From d66545ffeefc3bbf531e3cddc6c1a5493032d3a5 Mon Sep 17 00:00:00 2001 From: ModelHub XC Date: Sat, 11 Apr 2026 08:43:55 +0800 Subject: [PATCH] =?UTF-8?q?=E5=88=9D=E5=A7=8B=E5=8C=96=E9=A1=B9=E7=9B=AE?= =?UTF-8?q?=EF=BC=8C=E7=94=B1ModelHub=20XC=E7=A4=BE=E5=8C=BA=E6=8F=90?= =?UTF-8?q?=E4=BE=9B=E6=A8=A1=E5=9E=8B?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Model: gagan3012/llava_ar_pretrain_cc3_595k Source: Original Platform --- .gitattributes | 39 + config.json | 43 + generation_config.json | 8 + ggml-model-f16.gguf | 3 + llava.clip | 3 + llava.projector | 3 + mm_projector.bin | 3 + mmproj-model-f16.gguf | 3 + pytorch_model-00001-of-00002.bin | 3 + pytorch_model-00002-of-00002.bin | 3 + pytorch_model.bin.index.json | 725 + special_tokens_map.json | 24 + tokenizer.model | 3 + tokenizer_config.json | 36 + trainer_state.json | 111661 ++++++++++++++++++++++++++++ 15 files changed, 112560 insertions(+) create mode 100644 .gitattributes create mode 100644 config.json create mode 100644 generation_config.json create mode 100644 ggml-model-f16.gguf create mode 100644 llava.clip create mode 100644 llava.projector create mode 100644 mm_projector.bin create mode 100644 mmproj-model-f16.gguf create mode 100644 pytorch_model-00001-of-00002.bin create mode 100644 pytorch_model-00002-of-00002.bin create mode 100644 pytorch_model.bin.index.json create mode 100644 special_tokens_map.json create mode 100644 tokenizer.model create mode 100644 tokenizer_config.json create mode 100644 trainer_state.json diff --git a/.gitattributes b/.gitattributes new file mode 100644 index 0000000..996e806 --- /dev/null +++ b/.gitattributes @@ -0,0 +1,39 @@ +*.7z filter=lfs diff=lfs merge=lfs -text +*.arrow filter=lfs diff=lfs merge=lfs -text +*.bin filter=lfs diff=lfs merge=lfs -text +*.bz2 filter=lfs diff=lfs merge=lfs -text +*.ckpt filter=lfs diff=lfs merge=lfs -text +*.ftz filter=lfs diff=lfs merge=lfs -text +*.gz filter=lfs diff=lfs merge=lfs -text +*.h5 filter=lfs diff=lfs merge=lfs -text +*.joblib filter=lfs diff=lfs merge=lfs -text +*.lfs.* filter=lfs diff=lfs merge=lfs -text +*.mlmodel filter=lfs diff=lfs merge=lfs -text +*.model filter=lfs diff=lfs merge=lfs -text +*.msgpack filter=lfs diff=lfs merge=lfs -text +*.npy filter=lfs diff=lfs merge=lfs -text +*.npz filter=lfs diff=lfs merge=lfs -text +*.onnx filter=lfs diff=lfs merge=lfs -text +*.ot filter=lfs diff=lfs merge=lfs -text +*.parquet filter=lfs diff=lfs merge=lfs -text +*.pb filter=lfs diff=lfs merge=lfs -text +*.pickle filter=lfs diff=lfs merge=lfs -text +*.pkl filter=lfs diff=lfs merge=lfs -text +*.pt filter=lfs diff=lfs merge=lfs -text +*.pth filter=lfs diff=lfs merge=lfs -text +*.rar filter=lfs diff=lfs merge=lfs -text +*.safetensors filter=lfs diff=lfs merge=lfs -text +saved_model/**/* filter=lfs diff=lfs merge=lfs -text +*.tar.* filter=lfs diff=lfs merge=lfs -text +*.tar filter=lfs diff=lfs merge=lfs -text +*.tflite filter=lfs diff=lfs merge=lfs -text +*.tgz filter=lfs diff=lfs merge=lfs -text +*.wasm filter=lfs diff=lfs merge=lfs -text +*.xz filter=lfs diff=lfs merge=lfs -text +*.zip filter=lfs diff=lfs merge=lfs -text +*.zst filter=lfs diff=lfs merge=lfs -text +*tfevents* filter=lfs diff=lfs merge=lfs -text +ggml-model-f16.gguf filter=lfs diff=lfs merge=lfs -text +llava.clip filter=lfs diff=lfs merge=lfs -text +llava.projector filter=lfs diff=lfs merge=lfs -text +mmproj-model-f16.gguf filter=lfs diff=lfs merge=lfs -text diff --git a/config.json b/config.json new file mode 100644 index 0000000..c7888ff --- /dev/null +++ b/config.json @@ -0,0 +1,43 @@ +{ + "_name_or_path": "/home/fakhr/scratch/models/AceGPT-7B-chat", + "architectures": [ + "LlavaLlamaForCausalLM" + ], + "bos_token_id": 1, + "dropout": 0.0, + "end_token_id": 2, + "eos_token_id": 2, + "freeze_mm_mlp_adapter": false, + "hidden_act": "silu", + "hidden_size": 4096, + "image_aspect_ratio": "pad", + "image_grid_pinpoints": null, + "initializer_range": 0.02, + "intermediate_size": 11008, + "max_length": 4096, + "max_position_embeddings": 2048, + "mm_hidden_size": 1024, + "mm_projector_lr": 2e-05, + "mm_projector_type": "mlp2x_gelu", + "mm_use_im_patch_token": false, + "mm_use_im_start_end": false, + "mm_vision_select_feature": "patch", + "mm_vision_select_layer": -2, + "mm_vision_tower": "/home/fakhr/scratch/models/clip-vit-large-patch14-336", + "model_type": "llava", + "num_attention_heads": 32, + "num_hidden_layers": 32, + "num_key_value_heads": 32, + "pad_token_id": 2, + "pretraining_tp": 1, + "rms_norm_eps": 1e-05, + "rope_scaling": null, + "rope_theta": 10000.0, + "tie_word_embeddings": false, + "torch_dtype": "float16", + "transformers_version": "4.31.0", + "tune_mm_mlp_adapter": false, + "use_cache": true, + "use_mm_proj": true, + "vocab_size": 32000 +} diff --git a/generation_config.json b/generation_config.json new file mode 100644 index 0000000..afb9791 --- /dev/null +++ b/generation_config.json @@ -0,0 +1,8 @@ +{ + "_from_model_config": true, + "bos_token_id": 1, + "eos_token_id": 2, + "max_length": 4096, + "pad_token_id": 2, + "transformers_version": "4.31.0" +} diff --git a/ggml-model-f16.gguf b/ggml-model-f16.gguf new file mode 100644 index 0000000..c97aaa5 --- /dev/null +++ b/ggml-model-f16.gguf @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a118794337345653a4d5cc419a135e660722a641b9fd9f5fc52666284278c1a5 +size 13478104768 diff --git a/llava.clip b/llava.clip new file mode 100644 index 0000000..4687884 --- /dev/null +++ b/llava.clip @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1ba531ae937fc2796cbab5ec5be0540c232d85a093eb04d33030315bc940ce65 +size 1214138849 diff --git a/llava.projector b/llava.projector new file mode 100644 index 0000000..9d89793 --- /dev/null +++ b/llava.projector @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a3df514424a53f7012de66a02fe7bcd3b06e68121ad5a9211ca7373e9dfc5384 +size 83920211 diff --git a/mm_projector.bin b/mm_projector.bin new file mode 100644 index 0000000..654c227 --- /dev/null +++ b/mm_projector.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d6a02763ce25984e4b82a59a082947b966c8ea1746f84d19bd4ccb15e64b8f51 +size 41961085 diff --git a/mmproj-model-f16.gguf b/mmproj-model-f16.gguf new file mode 100644 index 0000000..f6258eb --- /dev/null +++ b/mmproj-model-f16.gguf @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:93d76855290f898d278e57cde99f9a91d1623a34be58e8b068a1746c4a2c922f +size 624434336 diff --git a/pytorch_model-00001-of-00002.bin b/pytorch_model-00001-of-00002.bin new file mode 100644 index 0000000..6b7703d --- /dev/null +++ b/pytorch_model-00001-of-00002.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:03b7e7824955a4e2cc64308ee8143b2f183ac351c5be55bcb8aedb333cacc5a6 +size 9976634558 diff --git a/pytorch_model-00002-of-00002.bin b/pytorch_model-00002-of-00002.bin new file mode 100644 index 0000000..8d3c6f1 --- /dev/null +++ b/pytorch_model-00002-of-00002.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:02df85213ab86f1d3c211bf3a571b7c8c9cdea50b6bbb69b02b2d689289bfb70 +size 3500315539 diff --git a/pytorch_model.bin.index.json b/pytorch_model.bin.index.json new file mode 100644 index 0000000..5948742 --- /dev/null +++ b/pytorch_model.bin.index.json @@ -0,0 +1,725 @@ +{ + "metadata": { + "total_size": 14125813760 + }, + "weight_map": { + "lm_head.weight": "pytorch_model-00002-of-00002.bin", + "model.embed_tokens.weight": "pytorch_model-00001-of-00002.bin", + "model.layers.0.input_layernorm.weight": "pytorch_model-00001-of-00002.bin", + "model.layers.0.mlp.down_proj.weight": "pytorch_model-00001-of-00002.bin", + "model.layers.0.mlp.gate_proj.weight": "pytorch_model-00001-of-00002.bin", + "model.layers.0.mlp.up_proj.weight": "pytorch_model-00001-of-00002.bin", + "model.layers.0.post_attention_layernorm.weight": "pytorch_model-00001-of-00002.bin", + "model.layers.0.self_attn.k_proj.weight": "pytorch_model-00001-of-00002.bin", + "model.layers.0.self_attn.o_proj.weight": "pytorch_model-00001-of-00002.bin", + "model.layers.0.self_attn.q_proj.weight": "pytorch_model-00001-of-00002.bin", + "model.layers.0.self_attn.rotary_emb.inv_freq": "pytorch_model-00001-of-00002.bin", + "model.layers.0.self_attn.v_proj.weight": "pytorch_model-00001-of-00002.bin", + "model.layers.1.input_layernorm.weight": "pytorch_model-00001-of-00002.bin", + "model.layers.1.mlp.down_proj.weight": "pytorch_model-00001-of-00002.bin", + "model.layers.1.mlp.gate_proj.weight": "pytorch_model-00001-of-00002.bin", + "model.layers.1.mlp.up_proj.weight": "pytorch_model-00001-of-00002.bin", + "model.layers.1.post_attention_layernorm.weight": "pytorch_model-00001-of-00002.bin", + "model.layers.1.self_attn.k_proj.weight": "pytorch_model-00001-of-00002.bin", + "model.layers.1.self_attn.o_proj.weight": "pytorch_model-00001-of-00002.bin", + "model.layers.1.self_attn.q_proj.weight": "pytorch_model-00001-of-00002.bin", + "model.layers.1.self_attn.rotary_emb.inv_freq": "pytorch_model-00001-of-00002.bin", + "model.layers.1.self_attn.v_proj.weight": "pytorch_model-00001-of-00002.bin", + "model.layers.10.input_layernorm.weight": "pytorch_model-00001-of-00002.bin", + "model.layers.10.mlp.down_proj.weight": "pytorch_model-00001-of-00002.bin", + "model.layers.10.mlp.gate_proj.weight": "pytorch_model-00001-of-00002.bin", + "model.layers.10.mlp.up_proj.weight": "pytorch_model-00001-of-00002.bin", + "model.layers.10.post_attention_layernorm.weight": "pytorch_model-00001-of-00002.bin", + "model.layers.10.self_attn.k_proj.weight": "pytorch_model-00001-of-00002.bin", + "model.layers.10.self_attn.o_proj.weight": "pytorch_model-00001-of-00002.bin", + "model.layers.10.self_attn.q_proj.weight": "pytorch_model-00001-of-00002.bin", + "model.layers.10.self_attn.rotary_emb.inv_freq": "pytorch_model-00001-of-00002.bin", + "model.layers.10.self_attn.v_proj.weight": "pytorch_model-00001-of-00002.bin", + "model.layers.11.input_layernorm.weight": "pytorch_model-00001-of-00002.bin", + "model.layers.11.mlp.down_proj.weight": "pytorch_model-00001-of-00002.bin", + "model.layers.11.mlp.gate_proj.weight": "pytorch_model-00001-of-00002.bin", + "model.layers.11.mlp.up_proj.weight": "pytorch_model-00001-of-00002.bin", + "model.layers.11.post_attention_layernorm.weight": "pytorch_model-00001-of-00002.bin", + "model.layers.11.self_attn.k_proj.weight": "pytorch_model-00001-of-00002.bin", + "model.layers.11.self_attn.o_proj.weight": "pytorch_model-00001-of-00002.bin", + "model.layers.11.self_attn.q_proj.weight": "pytorch_model-00001-of-00002.bin", + "model.layers.11.self_attn.rotary_emb.inv_freq": "pytorch_model-00001-of-00002.bin", + "model.layers.11.self_attn.v_proj.weight": "pytorch_model-00001-of-00002.bin", + "model.layers.12.input_layernorm.weight": "pytorch_model-00001-of-00002.bin", + "model.layers.12.mlp.down_proj.weight": "pytorch_model-00001-of-00002.bin", + "model.layers.12.mlp.gate_proj.weight": "pytorch_model-00001-of-00002.bin", + "model.layers.12.mlp.up_proj.weight": "pytorch_model-00001-of-00002.bin", + "model.layers.12.post_attention_layernorm.weight": "pytorch_model-00001-of-00002.bin", + "model.layers.12.self_attn.k_proj.weight": "pytorch_model-00001-of-00002.bin", + "model.layers.12.self_attn.o_proj.weight": "pytorch_model-00001-of-00002.bin", + "model.layers.12.self_attn.q_proj.weight": "pytorch_model-00001-of-00002.bin", + "model.layers.12.self_attn.rotary_emb.inv_freq": "pytorch_model-00001-of-00002.bin", + "model.layers.12.self_attn.v_proj.weight": "pytorch_model-00001-of-00002.bin", + "model.layers.13.input_layernorm.weight": "pytorch_model-00001-of-00002.bin", + "model.layers.13.mlp.down_proj.weight": "pytorch_model-00001-of-00002.bin", + "model.layers.13.mlp.gate_proj.weight": "pytorch_model-00001-of-00002.bin", + "model.layers.13.mlp.up_proj.weight": "pytorch_model-00001-of-00002.bin", + "model.layers.13.post_attention_layernorm.weight": "pytorch_model-00001-of-00002.bin", + "model.layers.13.self_attn.k_proj.weight": "pytorch_model-00001-of-00002.bin", + "model.layers.13.self_attn.o_proj.weight": "pytorch_model-00001-of-00002.bin", + "model.layers.13.self_attn.q_proj.weight": "pytorch_model-00001-of-00002.bin", + "model.layers.13.self_attn.rotary_emb.inv_freq": "pytorch_model-00001-of-00002.bin", + "model.layers.13.self_attn.v_proj.weight": "pytorch_model-00001-of-00002.bin", + "model.layers.14.input_layernorm.weight": "pytorch_model-00001-of-00002.bin", + "model.layers.14.mlp.down_proj.weight": "pytorch_model-00001-of-00002.bin", + "model.layers.14.mlp.gate_proj.weight": "pytorch_model-00001-of-00002.bin", + "model.layers.14.mlp.up_proj.weight": "pytorch_model-00001-of-00002.bin", + "model.layers.14.post_attention_layernorm.weight": "pytorch_model-00001-of-00002.bin", + "model.layers.14.self_attn.k_proj.weight": "pytorch_model-00001-of-00002.bin", + "model.layers.14.self_attn.o_proj.weight": "pytorch_model-00001-of-00002.bin", + "model.layers.14.self_attn.q_proj.weight": "pytorch_model-00001-of-00002.bin", + "model.layers.14.self_attn.rotary_emb.inv_freq": "pytorch_model-00001-of-00002.bin", + "model.layers.14.self_attn.v_proj.weight": "pytorch_model-00001-of-00002.bin", + "model.layers.15.input_layernorm.weight": "pytorch_model-00001-of-00002.bin", + "model.layers.15.mlp.down_proj.weight": "pytorch_model-00001-of-00002.bin", + "model.layers.15.mlp.gate_proj.weight": "pytorch_model-00001-of-00002.bin", + "model.layers.15.mlp.up_proj.weight": "pytorch_model-00001-of-00002.bin", + "model.layers.15.post_attention_layernorm.weight": "pytorch_model-00001-of-00002.bin", + "model.layers.15.self_attn.k_proj.weight": "pytorch_model-00001-of-00002.bin", + "model.layers.15.self_attn.o_proj.weight": "pytorch_model-00001-of-00002.bin", + "model.layers.15.self_attn.q_proj.weight": "pytorch_model-00001-of-00002.bin", + "model.layers.15.self_attn.rotary_emb.inv_freq": "pytorch_model-00001-of-00002.bin", + "model.layers.15.self_attn.v_proj.weight": "pytorch_model-00001-of-00002.bin", + "model.layers.16.input_layernorm.weight": "pytorch_model-00001-of-00002.bin", + "model.layers.16.mlp.down_proj.weight": "pytorch_model-00001-of-00002.bin", + "model.layers.16.mlp.gate_proj.weight": "pytorch_model-00001-of-00002.bin", + "model.layers.16.mlp.up_proj.weight": "pytorch_model-00001-of-00002.bin", + "model.layers.16.post_attention_layernorm.weight": "pytorch_model-00001-of-00002.bin", + "model.layers.16.self_attn.k_proj.weight": "pytorch_model-00001-of-00002.bin", + "model.layers.16.self_attn.o_proj.weight": "pytorch_model-00001-of-00002.bin", + "model.layers.16.self_attn.q_proj.weight": "pytorch_model-00001-of-00002.bin", + "model.layers.16.self_attn.rotary_emb.inv_freq": "pytorch_model-00001-of-00002.bin", + "model.layers.16.self_attn.v_proj.weight": "pytorch_model-00001-of-00002.bin", + "model.layers.17.input_layernorm.weight": "pytorch_model-00001-of-00002.bin", + "model.layers.17.mlp.down_proj.weight": "pytorch_model-00001-of-00002.bin", + "model.layers.17.mlp.gate_proj.weight": "pytorch_model-00001-of-00002.bin", + "model.layers.17.mlp.up_proj.weight": "pytorch_model-00001-of-00002.bin", + "model.layers.17.post_attention_layernorm.weight": "pytorch_model-00001-of-00002.bin", + "model.layers.17.self_attn.k_proj.weight": "pytorch_model-00001-of-00002.bin", + "model.layers.17.self_attn.o_proj.weight": "pytorch_model-00001-of-00002.bin", + "model.layers.17.self_attn.q_proj.weight": "pytorch_model-00001-of-00002.bin", + "model.layers.17.self_attn.rotary_emb.inv_freq": "pytorch_model-00001-of-00002.bin", + "model.layers.17.self_attn.v_proj.weight": "pytorch_model-00001-of-00002.bin", + "model.layers.18.input_layernorm.weight": "pytorch_model-00001-of-00002.bin", + "model.layers.18.mlp.down_proj.weight": "pytorch_model-00001-of-00002.bin", + "model.layers.18.mlp.gate_proj.weight": "pytorch_model-00001-of-00002.bin", + "model.layers.18.mlp.up_proj.weight": "pytorch_model-00001-of-00002.bin", + "model.layers.18.post_attention_layernorm.weight": "pytorch_model-00001-of-00002.bin", + "model.layers.18.self_attn.k_proj.weight": "pytorch_model-00001-of-00002.bin", + "model.layers.18.self_attn.o_proj.weight": "pytorch_model-00001-of-00002.bin", + "model.layers.18.self_attn.q_proj.weight": "pytorch_model-00001-of-00002.bin", + "model.layers.18.self_attn.rotary_emb.inv_freq": "pytorch_model-00001-of-00002.bin", + "model.layers.18.self_attn.v_proj.weight": "pytorch_model-00001-of-00002.bin", + "model.layers.19.input_layernorm.weight": "pytorch_model-00001-of-00002.bin", + "model.layers.19.mlp.down_proj.weight": "pytorch_model-00001-of-00002.bin", + "model.layers.19.mlp.gate_proj.weight": "pytorch_model-00001-of-00002.bin", + "model.layers.19.mlp.up_proj.weight": "pytorch_model-00001-of-00002.bin", + "model.layers.19.post_attention_layernorm.weight": "pytorch_model-00001-of-00002.bin", + "model.layers.19.self_attn.k_proj.weight": "pytorch_model-00001-of-00002.bin", + "model.layers.19.self_attn.o_proj.weight": "pytorch_model-00001-of-00002.bin", + "model.layers.19.self_attn.q_proj.weight": "pytorch_model-00001-of-00002.bin", + "model.layers.19.self_attn.rotary_emb.inv_freq": "pytorch_model-00001-of-00002.bin", + "model.layers.19.self_attn.v_proj.weight": "pytorch_model-00001-of-00002.bin", + "model.layers.2.input_layernorm.weight": "pytorch_model-00001-of-00002.bin", + "model.layers.2.mlp.down_proj.weight": "pytorch_model-00001-of-00002.bin", + "model.layers.2.mlp.gate_proj.weight": "pytorch_model-00001-of-00002.bin", + "model.layers.2.mlp.up_proj.weight": "pytorch_model-00001-of-00002.bin", + "model.layers.2.post_attention_layernorm.weight": "pytorch_model-00001-of-00002.bin", + "model.layers.2.self_attn.k_proj.weight": "pytorch_model-00001-of-00002.bin", + "model.layers.2.self_attn.o_proj.weight": "pytorch_model-00001-of-00002.bin", + "model.layers.2.self_attn.q_proj.weight": "pytorch_model-00001-of-00002.bin", + "model.layers.2.self_attn.rotary_emb.inv_freq": "pytorch_model-00001-of-00002.bin", + "model.layers.2.self_attn.v_proj.weight": "pytorch_model-00001-of-00002.bin", + "model.layers.20.input_layernorm.weight": "pytorch_model-00001-of-00002.bin", + "model.layers.20.mlp.down_proj.weight": "pytorch_model-00001-of-00002.bin", + "model.layers.20.mlp.gate_proj.weight": "pytorch_model-00001-of-00002.bin", + "model.layers.20.mlp.up_proj.weight": "pytorch_model-00001-of-00002.bin", + "model.layers.20.post_attention_layernorm.weight": "pytorch_model-00001-of-00002.bin", + "model.layers.20.self_attn.k_proj.weight": "pytorch_model-00001-of-00002.bin", + "model.layers.20.self_attn.o_proj.weight": "pytorch_model-00001-of-00002.bin", + "model.layers.20.self_attn.q_proj.weight": "pytorch_model-00001-of-00002.bin", + "model.layers.20.self_attn.rotary_emb.inv_freq": "pytorch_model-00001-of-00002.bin", + "model.layers.20.self_attn.v_proj.weight": "pytorch_model-00001-of-00002.bin", + "model.layers.21.input_layernorm.weight": "pytorch_model-00001-of-00002.bin", + "model.layers.21.mlp.down_proj.weight": "pytorch_model-00001-of-00002.bin", + "model.layers.21.mlp.gate_proj.weight": "pytorch_model-00001-of-00002.bin", + "model.layers.21.mlp.up_proj.weight": "pytorch_model-00001-of-00002.bin", + "model.layers.21.post_attention_layernorm.weight": "pytorch_model-00001-of-00002.bin", + "model.layers.21.self_attn.k_proj.weight": "pytorch_model-00001-of-00002.bin", + "model.layers.21.self_attn.o_proj.weight": "pytorch_model-00001-of-00002.bin", + "model.layers.21.self_attn.q_proj.weight": "pytorch_model-00001-of-00002.bin", + "model.layers.21.self_attn.rotary_emb.inv_freq": "pytorch_model-00001-of-00002.bin", + "model.layers.21.self_attn.v_proj.weight": "pytorch_model-00001-of-00002.bin", + "model.layers.22.input_layernorm.weight": "pytorch_model-00001-of-00002.bin", + "model.layers.22.mlp.down_proj.weight": "pytorch_model-00001-of-00002.bin", + "model.layers.22.mlp.gate_proj.weight": "pytorch_model-00001-of-00002.bin", + "model.layers.22.mlp.up_proj.weight": "pytorch_model-00001-of-00002.bin", + "model.layers.22.post_attention_layernorm.weight": "pytorch_model-00001-of-00002.bin", + "model.layers.22.self_attn.k_proj.weight": "pytorch_model-00001-of-00002.bin", + "model.layers.22.self_attn.o_proj.weight": "pytorch_model-00001-of-00002.bin", + "model.layers.22.self_attn.q_proj.weight": "pytorch_model-00001-of-00002.bin", + "model.layers.22.self_attn.rotary_emb.inv_freq": "pytorch_model-00001-of-00002.bin", + "model.layers.22.self_attn.v_proj.weight": "pytorch_model-00001-of-00002.bin", + "model.layers.23.input_layernorm.weight": "pytorch_model-00001-of-00002.bin", + "model.layers.23.mlp.down_proj.weight": "pytorch_model-00001-of-00002.bin", + "model.layers.23.mlp.gate_proj.weight": "pytorch_model-00001-of-00002.bin", + "model.layers.23.mlp.up_proj.weight": "pytorch_model-00001-of-00002.bin", + "model.layers.23.post_attention_layernorm.weight": "pytorch_model-00001-of-00002.bin", + "model.layers.23.self_attn.k_proj.weight": "pytorch_model-00001-of-00002.bin", + "model.layers.23.self_attn.o_proj.weight": "pytorch_model-00001-of-00002.bin", + "model.layers.23.self_attn.q_proj.weight": "pytorch_model-00001-of-00002.bin", + "model.layers.23.self_attn.rotary_emb.inv_freq": "pytorch_model-00001-of-00002.bin", + "model.layers.23.self_attn.v_proj.weight": "pytorch_model-00001-of-00002.bin", + "model.layers.24.input_layernorm.weight": "pytorch_model-00002-of-00002.bin", + "model.layers.24.mlp.down_proj.weight": "pytorch_model-00002-of-00002.bin", + "model.layers.24.mlp.gate_proj.weight": "pytorch_model-00002-of-00002.bin", + "model.layers.24.mlp.up_proj.weight": "pytorch_model-00002-of-00002.bin", + "model.layers.24.post_attention_layernorm.weight": "pytorch_model-00002-of-00002.bin", + "model.layers.24.self_attn.k_proj.weight": "pytorch_model-00002-of-00002.bin", + "model.layers.24.self_attn.o_proj.weight": "pytorch_model-00002-of-00002.bin", + "model.layers.24.self_attn.q_proj.weight": "pytorch_model-00002-of-00002.bin", + "model.layers.24.self_attn.rotary_emb.inv_freq": "pytorch_model-00002-of-00002.bin", + "model.layers.24.self_attn.v_proj.weight": "pytorch_model-00002-of-00002.bin", + "model.layers.25.input_layernorm.weight": "pytorch_model-00002-of-00002.bin", + "model.layers.25.mlp.down_proj.weight": "pytorch_model-00002-of-00002.bin", + "model.layers.25.mlp.gate_proj.weight": "pytorch_model-00002-of-00002.bin", + "model.layers.25.mlp.up_proj.weight": "pytorch_model-00002-of-00002.bin", + "model.layers.25.post_attention_layernorm.weight": "pytorch_model-00002-of-00002.bin", + "model.layers.25.self_attn.k_proj.weight": "pytorch_model-00002-of-00002.bin", + "model.layers.25.self_attn.o_proj.weight": "pytorch_model-00002-of-00002.bin", + "model.layers.25.self_attn.q_proj.weight": "pytorch_model-00002-of-00002.bin", + "model.layers.25.self_attn.rotary_emb.inv_freq": "pytorch_model-00002-of-00002.bin", + "model.layers.25.self_attn.v_proj.weight": "pytorch_model-00002-of-00002.bin", + "model.layers.26.input_layernorm.weight": "pytorch_model-00002-of-00002.bin", + "model.layers.26.mlp.down_proj.weight": "pytorch_model-00002-of-00002.bin", + "model.layers.26.mlp.gate_proj.weight": "pytorch_model-00002-of-00002.bin", + "model.layers.26.mlp.up_proj.weight": "pytorch_model-00002-of-00002.bin", + "model.layers.26.post_attention_layernorm.weight": "pytorch_model-00002-of-00002.bin", + "model.layers.26.self_attn.k_proj.weight": "pytorch_model-00002-of-00002.bin", + "model.layers.26.self_attn.o_proj.weight": "pytorch_model-00002-of-00002.bin", + "model.layers.26.self_attn.q_proj.weight": "pytorch_model-00002-of-00002.bin", + "model.layers.26.self_attn.rotary_emb.inv_freq": "pytorch_model-00002-of-00002.bin", + "model.layers.26.self_attn.v_proj.weight": "pytorch_model-00002-of-00002.bin", + "model.layers.27.input_layernorm.weight": "pytorch_model-00002-of-00002.bin", + "model.layers.27.mlp.down_proj.weight": "pytorch_model-00002-of-00002.bin", + "model.layers.27.mlp.gate_proj.weight": "pytorch_model-00002-of-00002.bin", + "model.layers.27.mlp.up_proj.weight": "pytorch_model-00002-of-00002.bin", + "model.layers.27.post_attention_layernorm.weight": "pytorch_model-00002-of-00002.bin", + "model.layers.27.self_attn.k_proj.weight": "pytorch_model-00002-of-00002.bin", + "model.layers.27.self_attn.o_proj.weight": "pytorch_model-00002-of-00002.bin", + "model.layers.27.self_attn.q_proj.weight": "pytorch_model-00002-of-00002.bin", + "model.layers.27.self_attn.rotary_emb.inv_freq": "pytorch_model-00002-of-00002.bin", + "model.layers.27.self_attn.v_proj.weight": "pytorch_model-00002-of-00002.bin", + "model.layers.28.input_layernorm.weight": "pytorch_model-00002-of-00002.bin", + "model.layers.28.mlp.down_proj.weight": "pytorch_model-00002-of-00002.bin", + "model.layers.28.mlp.gate_proj.weight": "pytorch_model-00002-of-00002.bin", + "model.layers.28.mlp.up_proj.weight": "pytorch_model-00002-of-00002.bin", + "model.layers.28.post_attention_layernorm.weight": "pytorch_model-00002-of-00002.bin", + "model.layers.28.self_attn.k_proj.weight": "pytorch_model-00002-of-00002.bin", + "model.layers.28.self_attn.o_proj.weight": "pytorch_model-00002-of-00002.bin", + "model.layers.28.self_attn.q_proj.weight": "pytorch_model-00002-of-00002.bin", + "model.layers.28.self_attn.rotary_emb.inv_freq": "pytorch_model-00002-of-00002.bin", + "model.layers.28.self_attn.v_proj.weight": "pytorch_model-00002-of-00002.bin", + "model.layers.29.input_layernorm.weight": "pytorch_model-00002-of-00002.bin", + "model.layers.29.mlp.down_proj.weight": "pytorch_model-00002-of-00002.bin", + "model.layers.29.mlp.gate_proj.weight": "pytorch_model-00002-of-00002.bin", + "model.layers.29.mlp.up_proj.weight": "pytorch_model-00002-of-00002.bin", + "model.layers.29.post_attention_layernorm.weight": "pytorch_model-00002-of-00002.bin", + "model.layers.29.self_attn.k_proj.weight": "pytorch_model-00002-of-00002.bin", + "model.layers.29.self_attn.o_proj.weight": "pytorch_model-00002-of-00002.bin", + "model.layers.29.self_attn.q_proj.weight": "pytorch_model-00002-of-00002.bin", + "model.layers.29.self_attn.rotary_emb.inv_freq": "pytorch_model-00002-of-00002.bin", + "model.layers.29.self_attn.v_proj.weight": "pytorch_model-00002-of-00002.bin", + "model.layers.3.input_layernorm.weight": "pytorch_model-00001-of-00002.bin", + "model.layers.3.mlp.down_proj.weight": "pytorch_model-00001-of-00002.bin", + "model.layers.3.mlp.gate_proj.weight": "pytorch_model-00001-of-00002.bin", + "model.layers.3.mlp.up_proj.weight": "pytorch_model-00001-of-00002.bin", + "model.layers.3.post_attention_layernorm.weight": "pytorch_model-00001-of-00002.bin", + "model.layers.3.self_attn.k_proj.weight": "pytorch_model-00001-of-00002.bin", + "model.layers.3.self_attn.o_proj.weight": "pytorch_model-00001-of-00002.bin", + "model.layers.3.self_attn.q_proj.weight": "pytorch_model-00001-of-00002.bin", + "model.layers.3.self_attn.rotary_emb.inv_freq": "pytorch_model-00001-of-00002.bin", + "model.layers.3.self_attn.v_proj.weight": "pytorch_model-00001-of-00002.bin", + "model.layers.30.input_layernorm.weight": "pytorch_model-00002-of-00002.bin", + "model.layers.30.mlp.down_proj.weight": "pytorch_model-00002-of-00002.bin", + "model.layers.30.mlp.gate_proj.weight": "pytorch_model-00002-of-00002.bin", + "model.layers.30.mlp.up_proj.weight": "pytorch_model-00002-of-00002.bin", + "model.layers.30.post_attention_layernorm.weight": "pytorch_model-00002-of-00002.bin", + "model.layers.30.self_attn.k_proj.weight": "pytorch_model-00002-of-00002.bin", + "model.layers.30.self_attn.o_proj.weight": "pytorch_model-00002-of-00002.bin", + "model.layers.30.self_attn.q_proj.weight": "pytorch_model-00002-of-00002.bin", + "model.layers.30.self_attn.rotary_emb.inv_freq": "pytorch_model-00002-of-00002.bin", + "model.layers.30.self_attn.v_proj.weight": "pytorch_model-00002-of-00002.bin", + "model.layers.31.input_layernorm.weight": "pytorch_model-00002-of-00002.bin", + "model.layers.31.mlp.down_proj.weight": "pytorch_model-00002-of-00002.bin", + "model.layers.31.mlp.gate_proj.weight": "pytorch_model-00002-of-00002.bin", + "model.layers.31.mlp.up_proj.weight": "pytorch_model-00002-of-00002.bin", + "model.layers.31.post_attention_layernorm.weight": "pytorch_model-00002-of-00002.bin", + "model.layers.31.self_attn.k_proj.weight": "pytorch_model-00002-of-00002.bin", + "model.layers.31.self_attn.o_proj.weight": "pytorch_model-00002-of-00002.bin", + "model.layers.31.self_attn.q_proj.weight": "pytorch_model-00002-of-00002.bin", + "model.layers.31.self_attn.rotary_emb.inv_freq": "pytorch_model-00002-of-00002.bin", + "model.layers.31.self_attn.v_proj.weight": "pytorch_model-00002-of-00002.bin", + "model.layers.4.input_layernorm.weight": "pytorch_model-00001-of-00002.bin", + "model.layers.4.mlp.down_proj.weight": "pytorch_model-00001-of-00002.bin", + "model.layers.4.mlp.gate_proj.weight": "pytorch_model-00001-of-00002.bin", + "model.layers.4.mlp.up_proj.weight": "pytorch_model-00001-of-00002.bin", + "model.layers.4.post_attention_layernorm.weight": "pytorch_model-00001-of-00002.bin", + "model.layers.4.self_attn.k_proj.weight": "pytorch_model-00001-of-00002.bin", + "model.layers.4.self_attn.o_proj.weight": "pytorch_model-00001-of-00002.bin", + "model.layers.4.self_attn.q_proj.weight": "pytorch_model-00001-of-00002.bin", + "model.layers.4.self_attn.rotary_emb.inv_freq": "pytorch_model-00001-of-00002.bin", + "model.layers.4.self_attn.v_proj.weight": "pytorch_model-00001-of-00002.bin", + "model.layers.5.input_layernorm.weight": "pytorch_model-00001-of-00002.bin", + "model.layers.5.mlp.down_proj.weight": "pytorch_model-00001-of-00002.bin", + "model.layers.5.mlp.gate_proj.weight": "pytorch_model-00001-of-00002.bin", + "model.layers.5.mlp.up_proj.weight": "pytorch_model-00001-of-00002.bin", + "model.layers.5.post_attention_layernorm.weight": "pytorch_model-00001-of-00002.bin", + "model.layers.5.self_attn.k_proj.weight": "pytorch_model-00001-of-00002.bin", + "model.layers.5.self_attn.o_proj.weight": "pytorch_model-00001-of-00002.bin", + "model.layers.5.self_attn.q_proj.weight": "pytorch_model-00001-of-00002.bin", + "model.layers.5.self_attn.rotary_emb.inv_freq": "pytorch_model-00001-of-00002.bin", + "model.layers.5.self_attn.v_proj.weight": "pytorch_model-00001-of-00002.bin", + "model.layers.6.input_layernorm.weight": "pytorch_model-00001-of-00002.bin", + "model.layers.6.mlp.down_proj.weight": "pytorch_model-00001-of-00002.bin", + "model.layers.6.mlp.gate_proj.weight": "pytorch_model-00001-of-00002.bin", + "model.layers.6.mlp.up_proj.weight": "pytorch_model-00001-of-00002.bin", + "model.layers.6.post_attention_layernorm.weight": "pytorch_model-00001-of-00002.bin", + "model.layers.6.self_attn.k_proj.weight": "pytorch_model-00001-of-00002.bin", + "model.layers.6.self_attn.o_proj.weight": "pytorch_model-00001-of-00002.bin", + "model.layers.6.self_attn.q_proj.weight": "pytorch_model-00001-of-00002.bin", + "model.layers.6.self_attn.rotary_emb.inv_freq": "pytorch_model-00001-of-00002.bin", + "model.layers.6.self_attn.v_proj.weight": "pytorch_model-00001-of-00002.bin", + "model.layers.7.input_layernorm.weight": "pytorch_model-00001-of-00002.bin", + "model.layers.7.mlp.down_proj.weight": "pytorch_model-00001-of-00002.bin", + "model.layers.7.mlp.gate_proj.weight": "pytorch_model-00001-of-00002.bin", + "model.layers.7.mlp.up_proj.weight": "pytorch_model-00001-of-00002.bin", + "model.layers.7.post_attention_layernorm.weight": "pytorch_model-00001-of-00002.bin", + "model.layers.7.self_attn.k_proj.weight": "pytorch_model-00001-of-00002.bin", + "model.layers.7.self_attn.o_proj.weight": "pytorch_model-00001-of-00002.bin", + "model.layers.7.self_attn.q_proj.weight": "pytorch_model-00001-of-00002.bin", + "model.layers.7.self_attn.rotary_emb.inv_freq": "pytorch_model-00001-of-00002.bin", + "model.layers.7.self_attn.v_proj.weight": "pytorch_model-00001-of-00002.bin", + "model.layers.8.input_layernorm.weight": "pytorch_model-00001-of-00002.bin", + "model.layers.8.mlp.down_proj.weight": "pytorch_model-00001-of-00002.bin", + "model.layers.8.mlp.gate_proj.weight": "pytorch_model-00001-of-00002.bin", + "model.layers.8.mlp.up_proj.weight": "pytorch_model-00001-of-00002.bin", + "model.layers.8.post_attention_layernorm.weight": "pytorch_model-00001-of-00002.bin", + "model.layers.8.self_attn.k_proj.weight": "pytorch_model-00001-of-00002.bin", + "model.layers.8.self_attn.o_proj.weight": "pytorch_model-00001-of-00002.bin", + "model.layers.8.self_attn.q_proj.weight": "pytorch_model-00001-of-00002.bin", + "model.layers.8.self_attn.rotary_emb.inv_freq": "pytorch_model-00001-of-00002.bin", + "model.layers.8.self_attn.v_proj.weight": "pytorch_model-00001-of-00002.bin", + "model.layers.9.input_layernorm.weight": "pytorch_model-00001-of-00002.bin", + "model.layers.9.mlp.down_proj.weight": "pytorch_model-00001-of-00002.bin", + "model.layers.9.mlp.gate_proj.weight": "pytorch_model-00001-of-00002.bin", + "model.layers.9.mlp.up_proj.weight": "pytorch_model-00001-of-00002.bin", + "model.layers.9.post_attention_layernorm.weight": "pytorch_model-00001-of-00002.bin", + "model.layers.9.self_attn.k_proj.weight": "pytorch_model-00001-of-00002.bin", + "model.layers.9.self_attn.o_proj.weight": "pytorch_model-00001-of-00002.bin", + "model.layers.9.self_attn.q_proj.weight": "pytorch_model-00001-of-00002.bin", + "model.layers.9.self_attn.rotary_emb.inv_freq": "pytorch_model-00001-of-00002.bin", + "model.layers.9.self_attn.v_proj.weight": "pytorch_model-00001-of-00002.bin", + "model.mm_projector.0.bias": "pytorch_model-00002-of-00002.bin", + "model.mm_projector.0.weight": "pytorch_model-00002-of-00002.bin", + "model.mm_projector.2.bias": "pytorch_model-00002-of-00002.bin", + "model.mm_projector.2.weight": "pytorch_model-00002-of-00002.bin", + "model.norm.weight": "pytorch_model-00002-of-00002.bin", + "model.vision_tower.vision_tower.vision_model.embeddings.class_embedding": "pytorch_model-00002-of-00002.bin", + "model.vision_tower.vision_tower.vision_model.embeddings.patch_embedding.weight": "pytorch_model-00002-of-00002.bin", + "model.vision_tower.vision_tower.vision_model.embeddings.position_embedding.weight": "pytorch_model-00002-of-00002.bin", + "model.vision_tower.vision_tower.vision_model.encoder.layers.0.layer_norm1.bias": "pytorch_model-00002-of-00002.bin", + "model.vision_tower.vision_tower.vision_model.encoder.layers.0.layer_norm1.weight": "pytorch_model-00002-of-00002.bin", + "model.vision_tower.vision_tower.vision_model.encoder.layers.0.layer_norm2.bias": "pytorch_model-00002-of-00002.bin", + "model.vision_tower.vision_tower.vision_model.encoder.layers.0.layer_norm2.weight": "pytorch_model-00002-of-00002.bin", + "model.vision_tower.vision_tower.vision_model.encoder.layers.0.mlp.fc1.bias": "pytorch_model-00002-of-00002.bin", + "model.vision_tower.vision_tower.vision_model.encoder.layers.0.mlp.fc1.weight": "pytorch_model-00002-of-00002.bin", + "model.vision_tower.vision_tower.vision_model.encoder.layers.0.mlp.fc2.bias": "pytorch_model-00002-of-00002.bin", + "model.vision_tower.vision_tower.vision_model.encoder.layers.0.mlp.fc2.weight": "pytorch_model-00002-of-00002.bin", + "model.vision_tower.vision_tower.vision_model.encoder.layers.0.self_attn.k_proj.bias": "pytorch_model-00002-of-00002.bin", + "model.vision_tower.vision_tower.vision_model.encoder.layers.0.self_attn.k_proj.weight": "pytorch_model-00002-of-00002.bin", + "model.vision_tower.vision_tower.vision_model.encoder.layers.0.self_attn.out_proj.bias": "pytorch_model-00002-of-00002.bin", + "model.vision_tower.vision_tower.vision_model.encoder.layers.0.self_attn.out_proj.weight": "pytorch_model-00002-of-00002.bin", + "model.vision_tower.vision_tower.vision_model.encoder.layers.0.self_attn.q_proj.bias": "pytorch_model-00002-of-00002.bin", + "model.vision_tower.vision_tower.vision_model.encoder.layers.0.self_attn.q_proj.weight": "pytorch_model-00002-of-00002.bin", + "model.vision_tower.vision_tower.vision_model.encoder.layers.0.self_attn.v_proj.bias": "pytorch_model-00002-of-00002.bin", + "model.vision_tower.vision_tower.vision_model.encoder.layers.0.self_attn.v_proj.weight": "pytorch_model-00002-of-00002.bin", + "model.vision_tower.vision_tower.vision_model.encoder.layers.1.layer_norm1.bias": "pytorch_model-00002-of-00002.bin", + "model.vision_tower.vision_tower.vision_model.encoder.layers.1.layer_norm1.weight": "pytorch_model-00002-of-00002.bin", + "model.vision_tower.vision_tower.vision_model.encoder.layers.1.layer_norm2.bias": "pytorch_model-00002-of-00002.bin", + "model.vision_tower.vision_tower.vision_model.encoder.layers.1.layer_norm2.weight": "pytorch_model-00002-of-00002.bin", + "model.vision_tower.vision_tower.vision_model.encoder.layers.1.mlp.fc1.bias": "pytorch_model-00002-of-00002.bin", + "model.vision_tower.vision_tower.vision_model.encoder.layers.1.mlp.fc1.weight": "pytorch_model-00002-of-00002.bin", + "model.vision_tower.vision_tower.vision_model.encoder.layers.1.mlp.fc2.bias": "pytorch_model-00002-of-00002.bin", + "model.vision_tower.vision_tower.vision_model.encoder.layers.1.mlp.fc2.weight": "pytorch_model-00002-of-00002.bin", + "model.vision_tower.vision_tower.vision_model.encoder.layers.1.self_attn.k_proj.bias": "pytorch_model-00002-of-00002.bin", + "model.vision_tower.vision_tower.vision_model.encoder.layers.1.self_attn.k_proj.weight": "pytorch_model-00002-of-00002.bin", + "model.vision_tower.vision_tower.vision_model.encoder.layers.1.self_attn.out_proj.bias": "pytorch_model-00002-of-00002.bin", + "model.vision_tower.vision_tower.vision_model.encoder.layers.1.self_attn.out_proj.weight": "pytorch_model-00002-of-00002.bin", + "model.vision_tower.vision_tower.vision_model.encoder.layers.1.self_attn.q_proj.bias": "pytorch_model-00002-of-00002.bin", + "model.vision_tower.vision_tower.vision_model.encoder.layers.1.self_attn.q_proj.weight": "pytorch_model-00002-of-00002.bin", + "model.vision_tower.vision_tower.vision_model.encoder.layers.1.self_attn.v_proj.bias": "pytorch_model-00002-of-00002.bin", + "model.vision_tower.vision_tower.vision_model.encoder.layers.1.self_attn.v_proj.weight": "pytorch_model-00002-of-00002.bin", + "model.vision_tower.vision_tower.vision_model.encoder.layers.10.layer_norm1.bias": "pytorch_model-00002-of-00002.bin", + "model.vision_tower.vision_tower.vision_model.encoder.layers.10.layer_norm1.weight": "pytorch_model-00002-of-00002.bin", + "model.vision_tower.vision_tower.vision_model.encoder.layers.10.layer_norm2.bias": "pytorch_model-00002-of-00002.bin", + "model.vision_tower.vision_tower.vision_model.encoder.layers.10.layer_norm2.weight": "pytorch_model-00002-of-00002.bin", + "model.vision_tower.vision_tower.vision_model.encoder.layers.10.mlp.fc1.bias": "pytorch_model-00002-of-00002.bin", + "model.vision_tower.vision_tower.vision_model.encoder.layers.10.mlp.fc1.weight": "pytorch_model-00002-of-00002.bin", + "model.vision_tower.vision_tower.vision_model.encoder.layers.10.mlp.fc2.bias": "pytorch_model-00002-of-00002.bin", + "model.vision_tower.vision_tower.vision_model.encoder.layers.10.mlp.fc2.weight": "pytorch_model-00002-of-00002.bin", + "model.vision_tower.vision_tower.vision_model.encoder.layers.10.self_attn.k_proj.bias": "pytorch_model-00002-of-00002.bin", + "model.vision_tower.vision_tower.vision_model.encoder.layers.10.self_attn.k_proj.weight": "pytorch_model-00002-of-00002.bin", + "model.vision_tower.vision_tower.vision_model.encoder.layers.10.self_attn.out_proj.bias": "pytorch_model-00002-of-00002.bin", + "model.vision_tower.vision_tower.vision_model.encoder.layers.10.self_attn.out_proj.weight": "pytorch_model-00002-of-00002.bin", + "model.vision_tower.vision_tower.vision_model.encoder.layers.10.self_attn.q_proj.bias": "pytorch_model-00002-of-00002.bin", + "model.vision_tower.vision_tower.vision_model.encoder.layers.10.self_attn.q_proj.weight": "pytorch_model-00002-of-00002.bin", + "model.vision_tower.vision_tower.vision_model.encoder.layers.10.self_attn.v_proj.bias": "pytorch_model-00002-of-00002.bin", + "model.vision_tower.vision_tower.vision_model.encoder.layers.10.self_attn.v_proj.weight": "pytorch_model-00002-of-00002.bin", + "model.vision_tower.vision_tower.vision_model.encoder.layers.11.layer_norm1.bias": "pytorch_model-00002-of-00002.bin", + "model.vision_tower.vision_tower.vision_model.encoder.layers.11.layer_norm1.weight": "pytorch_model-00002-of-00002.bin", + "model.vision_tower.vision_tower.vision_model.encoder.layers.11.layer_norm2.bias": "pytorch_model-00002-of-00002.bin", + "model.vision_tower.vision_tower.vision_model.encoder.layers.11.layer_norm2.weight": "pytorch_model-00002-of-00002.bin", + "model.vision_tower.vision_tower.vision_model.encoder.layers.11.mlp.fc1.bias": "pytorch_model-00002-of-00002.bin", + "model.vision_tower.vision_tower.vision_model.encoder.layers.11.mlp.fc1.weight": "pytorch_model-00002-of-00002.bin", + "model.vision_tower.vision_tower.vision_model.encoder.layers.11.mlp.fc2.bias": "pytorch_model-00002-of-00002.bin", + "model.vision_tower.vision_tower.vision_model.encoder.layers.11.mlp.fc2.weight": "pytorch_model-00002-of-00002.bin", + "model.vision_tower.vision_tower.vision_model.encoder.layers.11.self_attn.k_proj.bias": "pytorch_model-00002-of-00002.bin", + "model.vision_tower.vision_tower.vision_model.encoder.layers.11.self_attn.k_proj.weight": "pytorch_model-00002-of-00002.bin", + "model.vision_tower.vision_tower.vision_model.encoder.layers.11.self_attn.out_proj.bias": "pytorch_model-00002-of-00002.bin", + "model.vision_tower.vision_tower.vision_model.encoder.layers.11.self_attn.out_proj.weight": "pytorch_model-00002-of-00002.bin", + "model.vision_tower.vision_tower.vision_model.encoder.layers.11.self_attn.q_proj.bias": "pytorch_model-00002-of-00002.bin", + "model.vision_tower.vision_tower.vision_model.encoder.layers.11.self_attn.q_proj.weight": "pytorch_model-00002-of-00002.bin", + "model.vision_tower.vision_tower.vision_model.encoder.layers.11.self_attn.v_proj.bias": "pytorch_model-00002-of-00002.bin", + "model.vision_tower.vision_tower.vision_model.encoder.layers.11.self_attn.v_proj.weight": "pytorch_model-00002-of-00002.bin", + "model.vision_tower.vision_tower.vision_model.encoder.layers.12.layer_norm1.bias": "pytorch_model-00002-of-00002.bin", + "model.vision_tower.vision_tower.vision_model.encoder.layers.12.layer_norm1.weight": "pytorch_model-00002-of-00002.bin", + "model.vision_tower.vision_tower.vision_model.encoder.layers.12.layer_norm2.bias": "pytorch_model-00002-of-00002.bin", + "model.vision_tower.vision_tower.vision_model.encoder.layers.12.layer_norm2.weight": "pytorch_model-00002-of-00002.bin", + "model.vision_tower.vision_tower.vision_model.encoder.layers.12.mlp.fc1.bias": "pytorch_model-00002-of-00002.bin", + "model.vision_tower.vision_tower.vision_model.encoder.layers.12.mlp.fc1.weight": "pytorch_model-00002-of-00002.bin", + "model.vision_tower.vision_tower.vision_model.encoder.layers.12.mlp.fc2.bias": "pytorch_model-00002-of-00002.bin", + "model.vision_tower.vision_tower.vision_model.encoder.layers.12.mlp.fc2.weight": "pytorch_model-00002-of-00002.bin", + "model.vision_tower.vision_tower.vision_model.encoder.layers.12.self_attn.k_proj.bias": "pytorch_model-00002-of-00002.bin", + "model.vision_tower.vision_tower.vision_model.encoder.layers.12.self_attn.k_proj.weight": "pytorch_model-00002-of-00002.bin", + "model.vision_tower.vision_tower.vision_model.encoder.layers.12.self_attn.out_proj.bias": "pytorch_model-00002-of-00002.bin", + "model.vision_tower.vision_tower.vision_model.encoder.layers.12.self_attn.out_proj.weight": "pytorch_model-00002-of-00002.bin", + "model.vision_tower.vision_tower.vision_model.encoder.layers.12.self_attn.q_proj.bias": "pytorch_model-00002-of-00002.bin", + "model.vision_tower.vision_tower.vision_model.encoder.layers.12.self_attn.q_proj.weight": "pytorch_model-00002-of-00002.bin", + "model.vision_tower.vision_tower.vision_model.encoder.layers.12.self_attn.v_proj.bias": "pytorch_model-00002-of-00002.bin", + "model.vision_tower.vision_tower.vision_model.encoder.layers.12.self_attn.v_proj.weight": "pytorch_model-00002-of-00002.bin", + "model.vision_tower.vision_tower.vision_model.encoder.layers.13.layer_norm1.bias": "pytorch_model-00002-of-00002.bin", + "model.vision_tower.vision_tower.vision_model.encoder.layers.13.layer_norm1.weight": "pytorch_model-00002-of-00002.bin", + "model.vision_tower.vision_tower.vision_model.encoder.layers.13.layer_norm2.bias": "pytorch_model-00002-of-00002.bin", + "model.vision_tower.vision_tower.vision_model.encoder.layers.13.layer_norm2.weight": "pytorch_model-00002-of-00002.bin", + "model.vision_tower.vision_tower.vision_model.encoder.layers.13.mlp.fc1.bias": "pytorch_model-00002-of-00002.bin", + "model.vision_tower.vision_tower.vision_model.encoder.layers.13.mlp.fc1.weight": "pytorch_model-00002-of-00002.bin", + "model.vision_tower.vision_tower.vision_model.encoder.layers.13.mlp.fc2.bias": "pytorch_model-00002-of-00002.bin", + "model.vision_tower.vision_tower.vision_model.encoder.layers.13.mlp.fc2.weight": "pytorch_model-00002-of-00002.bin", + "model.vision_tower.vision_tower.vision_model.encoder.layers.13.self_attn.k_proj.bias": "pytorch_model-00002-of-00002.bin", + "model.vision_tower.vision_tower.vision_model.encoder.layers.13.self_attn.k_proj.weight": "pytorch_model-00002-of-00002.bin", + "model.vision_tower.vision_tower.vision_model.encoder.layers.13.self_attn.out_proj.bias": "pytorch_model-00002-of-00002.bin", + "model.vision_tower.vision_tower.vision_model.encoder.layers.13.self_attn.out_proj.weight": "pytorch_model-00002-of-00002.bin", + "model.vision_tower.vision_tower.vision_model.encoder.layers.13.self_attn.q_proj.bias": "pytorch_model-00002-of-00002.bin", + "model.vision_tower.vision_tower.vision_model.encoder.layers.13.self_attn.q_proj.weight": "pytorch_model-00002-of-00002.bin", + "model.vision_tower.vision_tower.vision_model.encoder.layers.13.self_attn.v_proj.bias": "pytorch_model-00002-of-00002.bin", + "model.vision_tower.vision_tower.vision_model.encoder.layers.13.self_attn.v_proj.weight": "pytorch_model-00002-of-00002.bin", + "model.vision_tower.vision_tower.vision_model.encoder.layers.14.layer_norm1.bias": "pytorch_model-00002-of-00002.bin", + "model.vision_tower.vision_tower.vision_model.encoder.layers.14.layer_norm1.weight": "pytorch_model-00002-of-00002.bin", + "model.vision_tower.vision_tower.vision_model.encoder.layers.14.layer_norm2.bias": "pytorch_model-00002-of-00002.bin", + "model.vision_tower.vision_tower.vision_model.encoder.layers.14.layer_norm2.weight": "pytorch_model-00002-of-00002.bin", + "model.vision_tower.vision_tower.vision_model.encoder.layers.14.mlp.fc1.bias": "pytorch_model-00002-of-00002.bin", + "model.vision_tower.vision_tower.vision_model.encoder.layers.14.mlp.fc1.weight": "pytorch_model-00002-of-00002.bin", + "model.vision_tower.vision_tower.vision_model.encoder.layers.14.mlp.fc2.bias": "pytorch_model-00002-of-00002.bin", + "model.vision_tower.vision_tower.vision_model.encoder.layers.14.mlp.fc2.weight": "pytorch_model-00002-of-00002.bin", + "model.vision_tower.vision_tower.vision_model.encoder.layers.14.self_attn.k_proj.bias": "pytorch_model-00002-of-00002.bin", + "model.vision_tower.vision_tower.vision_model.encoder.layers.14.self_attn.k_proj.weight": "pytorch_model-00002-of-00002.bin", + "model.vision_tower.vision_tower.vision_model.encoder.layers.14.self_attn.out_proj.bias": "pytorch_model-00002-of-00002.bin", + "model.vision_tower.vision_tower.vision_model.encoder.layers.14.self_attn.out_proj.weight": "pytorch_model-00002-of-00002.bin", + "model.vision_tower.vision_tower.vision_model.encoder.layers.14.self_attn.q_proj.bias": "pytorch_model-00002-of-00002.bin", + "model.vision_tower.vision_tower.vision_model.encoder.layers.14.self_attn.q_proj.weight": "pytorch_model-00002-of-00002.bin", + "model.vision_tower.vision_tower.vision_model.encoder.layers.14.self_attn.v_proj.bias": "pytorch_model-00002-of-00002.bin", + "model.vision_tower.vision_tower.vision_model.encoder.layers.14.self_attn.v_proj.weight": "pytorch_model-00002-of-00002.bin", + "model.vision_tower.vision_tower.vision_model.encoder.layers.15.layer_norm1.bias": "pytorch_model-00002-of-00002.bin", + "model.vision_tower.vision_tower.vision_model.encoder.layers.15.layer_norm1.weight": "pytorch_model-00002-of-00002.bin", + "model.vision_tower.vision_tower.vision_model.encoder.layers.15.layer_norm2.bias": "pytorch_model-00002-of-00002.bin", + "model.vision_tower.vision_tower.vision_model.encoder.layers.15.layer_norm2.weight": "pytorch_model-00002-of-00002.bin", + "model.vision_tower.vision_tower.vision_model.encoder.layers.15.mlp.fc1.bias": "pytorch_model-00002-of-00002.bin", + "model.vision_tower.vision_tower.vision_model.encoder.layers.15.mlp.fc1.weight": "pytorch_model-00002-of-00002.bin", + "model.vision_tower.vision_tower.vision_model.encoder.layers.15.mlp.fc2.bias": "pytorch_model-00002-of-00002.bin", + "model.vision_tower.vision_tower.vision_model.encoder.layers.15.mlp.fc2.weight": "pytorch_model-00002-of-00002.bin", + "model.vision_tower.vision_tower.vision_model.encoder.layers.15.self_attn.k_proj.bias": "pytorch_model-00002-of-00002.bin", + "model.vision_tower.vision_tower.vision_model.encoder.layers.15.self_attn.k_proj.weight": "pytorch_model-00002-of-00002.bin", + "model.vision_tower.vision_tower.vision_model.encoder.layers.15.self_attn.out_proj.bias": "pytorch_model-00002-of-00002.bin", + "model.vision_tower.vision_tower.vision_model.encoder.layers.15.self_attn.out_proj.weight": "pytorch_model-00002-of-00002.bin", + "model.vision_tower.vision_tower.vision_model.encoder.layers.15.self_attn.q_proj.bias": "pytorch_model-00002-of-00002.bin", + "model.vision_tower.vision_tower.vision_model.encoder.layers.15.self_attn.q_proj.weight": "pytorch_model-00002-of-00002.bin", + "model.vision_tower.vision_tower.vision_model.encoder.layers.15.self_attn.v_proj.bias": "pytorch_model-00002-of-00002.bin", + "model.vision_tower.vision_tower.vision_model.encoder.layers.15.self_attn.v_proj.weight": "pytorch_model-00002-of-00002.bin", + "model.vision_tower.vision_tower.vision_model.encoder.layers.16.layer_norm1.bias": "pytorch_model-00002-of-00002.bin", + "model.vision_tower.vision_tower.vision_model.encoder.layers.16.layer_norm1.weight": "pytorch_model-00002-of-00002.bin", + "model.vision_tower.vision_tower.vision_model.encoder.layers.16.layer_norm2.bias": "pytorch_model-00002-of-00002.bin", + "model.vision_tower.vision_tower.vision_model.encoder.layers.16.layer_norm2.weight": "pytorch_model-00002-of-00002.bin", + "model.vision_tower.vision_tower.vision_model.encoder.layers.16.mlp.fc1.bias": "pytorch_model-00002-of-00002.bin", + "model.vision_tower.vision_tower.vision_model.encoder.layers.16.mlp.fc1.weight": "pytorch_model-00002-of-00002.bin", + "model.vision_tower.vision_tower.vision_model.encoder.layers.16.mlp.fc2.bias": "pytorch_model-00002-of-00002.bin", + "model.vision_tower.vision_tower.vision_model.encoder.layers.16.mlp.fc2.weight": "pytorch_model-00002-of-00002.bin", + "model.vision_tower.vision_tower.vision_model.encoder.layers.16.self_attn.k_proj.bias": "pytorch_model-00002-of-00002.bin", + "model.vision_tower.vision_tower.vision_model.encoder.layers.16.self_attn.k_proj.weight": "pytorch_model-00002-of-00002.bin", + "model.vision_tower.vision_tower.vision_model.encoder.layers.16.self_attn.out_proj.bias": "pytorch_model-00002-of-00002.bin", + "model.vision_tower.vision_tower.vision_model.encoder.layers.16.self_attn.out_proj.weight": "pytorch_model-00002-of-00002.bin", + "model.vision_tower.vision_tower.vision_model.encoder.layers.16.self_attn.q_proj.bias": "pytorch_model-00002-of-00002.bin", + "model.vision_tower.vision_tower.vision_model.encoder.layers.16.self_attn.q_proj.weight": "pytorch_model-00002-of-00002.bin", + "model.vision_tower.vision_tower.vision_model.encoder.layers.16.self_attn.v_proj.bias": "pytorch_model-00002-of-00002.bin", + "model.vision_tower.vision_tower.vision_model.encoder.layers.16.self_attn.v_proj.weight": "pytorch_model-00002-of-00002.bin", + "model.vision_tower.vision_tower.vision_model.encoder.layers.17.layer_norm1.bias": "pytorch_model-00002-of-00002.bin", + "model.vision_tower.vision_tower.vision_model.encoder.layers.17.layer_norm1.weight": "pytorch_model-00002-of-00002.bin", + "model.vision_tower.vision_tower.vision_model.encoder.layers.17.layer_norm2.bias": "pytorch_model-00002-of-00002.bin", + "model.vision_tower.vision_tower.vision_model.encoder.layers.17.layer_norm2.weight": "pytorch_model-00002-of-00002.bin", + "model.vision_tower.vision_tower.vision_model.encoder.layers.17.mlp.fc1.bias": "pytorch_model-00002-of-00002.bin", + "model.vision_tower.vision_tower.vision_model.encoder.layers.17.mlp.fc1.weight": "pytorch_model-00002-of-00002.bin", + "model.vision_tower.vision_tower.vision_model.encoder.layers.17.mlp.fc2.bias": "pytorch_model-00002-of-00002.bin", + "model.vision_tower.vision_tower.vision_model.encoder.layers.17.mlp.fc2.weight": "pytorch_model-00002-of-00002.bin", + "model.vision_tower.vision_tower.vision_model.encoder.layers.17.self_attn.k_proj.bias": "pytorch_model-00002-of-00002.bin", + "model.vision_tower.vision_tower.vision_model.encoder.layers.17.self_attn.k_proj.weight": "pytorch_model-00002-of-00002.bin", + "model.vision_tower.vision_tower.vision_model.encoder.layers.17.self_attn.out_proj.bias": "pytorch_model-00002-of-00002.bin", + "model.vision_tower.vision_tower.vision_model.encoder.layers.17.self_attn.out_proj.weight": "pytorch_model-00002-of-00002.bin", + "model.vision_tower.vision_tower.vision_model.encoder.layers.17.self_attn.q_proj.bias": "pytorch_model-00002-of-00002.bin", + "model.vision_tower.vision_tower.vision_model.encoder.layers.17.self_attn.q_proj.weight": "pytorch_model-00002-of-00002.bin", + "model.vision_tower.vision_tower.vision_model.encoder.layers.17.self_attn.v_proj.bias": "pytorch_model-00002-of-00002.bin", + "model.vision_tower.vision_tower.vision_model.encoder.layers.17.self_attn.v_proj.weight": "pytorch_model-00002-of-00002.bin", + "model.vision_tower.vision_tower.vision_model.encoder.layers.18.layer_norm1.bias": "pytorch_model-00002-of-00002.bin", + "model.vision_tower.vision_tower.vision_model.encoder.layers.18.layer_norm1.weight": "pytorch_model-00002-of-00002.bin", + "model.vision_tower.vision_tower.vision_model.encoder.layers.18.layer_norm2.bias": "pytorch_model-00002-of-00002.bin", + "model.vision_tower.vision_tower.vision_model.encoder.layers.18.layer_norm2.weight": "pytorch_model-00002-of-00002.bin", + "model.vision_tower.vision_tower.vision_model.encoder.layers.18.mlp.fc1.bias": "pytorch_model-00002-of-00002.bin", + "model.vision_tower.vision_tower.vision_model.encoder.layers.18.mlp.fc1.weight": "pytorch_model-00002-of-00002.bin", + "model.vision_tower.vision_tower.vision_model.encoder.layers.18.mlp.fc2.bias": "pytorch_model-00002-of-00002.bin", + "model.vision_tower.vision_tower.vision_model.encoder.layers.18.mlp.fc2.weight": "pytorch_model-00002-of-00002.bin", + "model.vision_tower.vision_tower.vision_model.encoder.layers.18.self_attn.k_proj.bias": "pytorch_model-00002-of-00002.bin", + "model.vision_tower.vision_tower.vision_model.encoder.layers.18.self_attn.k_proj.weight": "pytorch_model-00002-of-00002.bin", + "model.vision_tower.vision_tower.vision_model.encoder.layers.18.self_attn.out_proj.bias": "pytorch_model-00002-of-00002.bin", + "model.vision_tower.vision_tower.vision_model.encoder.layers.18.self_attn.out_proj.weight": "pytorch_model-00002-of-00002.bin", + "model.vision_tower.vision_tower.vision_model.encoder.layers.18.self_attn.q_proj.bias": "pytorch_model-00002-of-00002.bin", + "model.vision_tower.vision_tower.vision_model.encoder.layers.18.self_attn.q_proj.weight": "pytorch_model-00002-of-00002.bin", + "model.vision_tower.vision_tower.vision_model.encoder.layers.18.self_attn.v_proj.bias": "pytorch_model-00002-of-00002.bin", + "model.vision_tower.vision_tower.vision_model.encoder.layers.18.self_attn.v_proj.weight": "pytorch_model-00002-of-00002.bin", + "model.vision_tower.vision_tower.vision_model.encoder.layers.19.layer_norm1.bias": "pytorch_model-00002-of-00002.bin", + "model.vision_tower.vision_tower.vision_model.encoder.layers.19.layer_norm1.weight": "pytorch_model-00002-of-00002.bin", + "model.vision_tower.vision_tower.vision_model.encoder.layers.19.layer_norm2.bias": "pytorch_model-00002-of-00002.bin", + "model.vision_tower.vision_tower.vision_model.encoder.layers.19.layer_norm2.weight": "pytorch_model-00002-of-00002.bin", + "model.vision_tower.vision_tower.vision_model.encoder.layers.19.mlp.fc1.bias": "pytorch_model-00002-of-00002.bin", + "model.vision_tower.vision_tower.vision_model.encoder.layers.19.mlp.fc1.weight": "pytorch_model-00002-of-00002.bin", + "model.vision_tower.vision_tower.vision_model.encoder.layers.19.mlp.fc2.bias": "pytorch_model-00002-of-00002.bin", + "model.vision_tower.vision_tower.vision_model.encoder.layers.19.mlp.fc2.weight": "pytorch_model-00002-of-00002.bin", + "model.vision_tower.vision_tower.vision_model.encoder.layers.19.self_attn.k_proj.bias": "pytorch_model-00002-of-00002.bin", + "model.vision_tower.vision_tower.vision_model.encoder.layers.19.self_attn.k_proj.weight": "pytorch_model-00002-of-00002.bin", + "model.vision_tower.vision_tower.vision_model.encoder.layers.19.self_attn.out_proj.bias": "pytorch_model-00002-of-00002.bin", + "model.vision_tower.vision_tower.vision_model.encoder.layers.19.self_attn.out_proj.weight": "pytorch_model-00002-of-00002.bin", + "model.vision_tower.vision_tower.vision_model.encoder.layers.19.self_attn.q_proj.bias": "pytorch_model-00002-of-00002.bin", + "model.vision_tower.vision_tower.vision_model.encoder.layers.19.self_attn.q_proj.weight": "pytorch_model-00002-of-00002.bin", + "model.vision_tower.vision_tower.vision_model.encoder.layers.19.self_attn.v_proj.bias": "pytorch_model-00002-of-00002.bin", + "model.vision_tower.vision_tower.vision_model.encoder.layers.19.self_attn.v_proj.weight": "pytorch_model-00002-of-00002.bin", + "model.vision_tower.vision_tower.vision_model.encoder.layers.2.layer_norm1.bias": "pytorch_model-00002-of-00002.bin", + "model.vision_tower.vision_tower.vision_model.encoder.layers.2.layer_norm1.weight": "pytorch_model-00002-of-00002.bin", + "model.vision_tower.vision_tower.vision_model.encoder.layers.2.layer_norm2.bias": "pytorch_model-00002-of-00002.bin", + "model.vision_tower.vision_tower.vision_model.encoder.layers.2.layer_norm2.weight": "pytorch_model-00002-of-00002.bin", + "model.vision_tower.vision_tower.vision_model.encoder.layers.2.mlp.fc1.bias": "pytorch_model-00002-of-00002.bin", + "model.vision_tower.vision_tower.vision_model.encoder.layers.2.mlp.fc1.weight": "pytorch_model-00002-of-00002.bin", + "model.vision_tower.vision_tower.vision_model.encoder.layers.2.mlp.fc2.bias": "pytorch_model-00002-of-00002.bin", + "model.vision_tower.vision_tower.vision_model.encoder.layers.2.mlp.fc2.weight": "pytorch_model-00002-of-00002.bin", + "model.vision_tower.vision_tower.vision_model.encoder.layers.2.self_attn.k_proj.bias": "pytorch_model-00002-of-00002.bin", + "model.vision_tower.vision_tower.vision_model.encoder.layers.2.self_attn.k_proj.weight": "pytorch_model-00002-of-00002.bin", + "model.vision_tower.vision_tower.vision_model.encoder.layers.2.self_attn.out_proj.bias": "pytorch_model-00002-of-00002.bin", + "model.vision_tower.vision_tower.vision_model.encoder.layers.2.self_attn.out_proj.weight": "pytorch_model-00002-of-00002.bin", + "model.vision_tower.vision_tower.vision_model.encoder.layers.2.self_attn.q_proj.bias": "pytorch_model-00002-of-00002.bin", + "model.vision_tower.vision_tower.vision_model.encoder.layers.2.self_attn.q_proj.weight": "pytorch_model-00002-of-00002.bin", + "model.vision_tower.vision_tower.vision_model.encoder.layers.2.self_attn.v_proj.bias": "pytorch_model-00002-of-00002.bin", + "model.vision_tower.vision_tower.vision_model.encoder.layers.2.self_attn.v_proj.weight": "pytorch_model-00002-of-00002.bin", + "model.vision_tower.vision_tower.vision_model.encoder.layers.20.layer_norm1.bias": "pytorch_model-00002-of-00002.bin", + "model.vision_tower.vision_tower.vision_model.encoder.layers.20.layer_norm1.weight": "pytorch_model-00002-of-00002.bin", + "model.vision_tower.vision_tower.vision_model.encoder.layers.20.layer_norm2.bias": "pytorch_model-00002-of-00002.bin", + "model.vision_tower.vision_tower.vision_model.encoder.layers.20.layer_norm2.weight": "pytorch_model-00002-of-00002.bin", + "model.vision_tower.vision_tower.vision_model.encoder.layers.20.mlp.fc1.bias": "pytorch_model-00002-of-00002.bin", + "model.vision_tower.vision_tower.vision_model.encoder.layers.20.mlp.fc1.weight": "pytorch_model-00002-of-00002.bin", + "model.vision_tower.vision_tower.vision_model.encoder.layers.20.mlp.fc2.bias": "pytorch_model-00002-of-00002.bin", + "model.vision_tower.vision_tower.vision_model.encoder.layers.20.mlp.fc2.weight": "pytorch_model-00002-of-00002.bin", + "model.vision_tower.vision_tower.vision_model.encoder.layers.20.self_attn.k_proj.bias": "pytorch_model-00002-of-00002.bin", + "model.vision_tower.vision_tower.vision_model.encoder.layers.20.self_attn.k_proj.weight": "pytorch_model-00002-of-00002.bin", + "model.vision_tower.vision_tower.vision_model.encoder.layers.20.self_attn.out_proj.bias": "pytorch_model-00002-of-00002.bin", + "model.vision_tower.vision_tower.vision_model.encoder.layers.20.self_attn.out_proj.weight": "pytorch_model-00002-of-00002.bin", + "model.vision_tower.vision_tower.vision_model.encoder.layers.20.self_attn.q_proj.bias": "pytorch_model-00002-of-00002.bin", + "model.vision_tower.vision_tower.vision_model.encoder.layers.20.self_attn.q_proj.weight": "pytorch_model-00002-of-00002.bin", + "model.vision_tower.vision_tower.vision_model.encoder.layers.20.self_attn.v_proj.bias": "pytorch_model-00002-of-00002.bin", + "model.vision_tower.vision_tower.vision_model.encoder.layers.20.self_attn.v_proj.weight": "pytorch_model-00002-of-00002.bin", + "model.vision_tower.vision_tower.vision_model.encoder.layers.21.layer_norm1.bias": "pytorch_model-00002-of-00002.bin", + "model.vision_tower.vision_tower.vision_model.encoder.layers.21.layer_norm1.weight": "pytorch_model-00002-of-00002.bin", + "model.vision_tower.vision_tower.vision_model.encoder.layers.21.layer_norm2.bias": "pytorch_model-00002-of-00002.bin", + "model.vision_tower.vision_tower.vision_model.encoder.layers.21.layer_norm2.weight": "pytorch_model-00002-of-00002.bin", + "model.vision_tower.vision_tower.vision_model.encoder.layers.21.mlp.fc1.bias": "pytorch_model-00002-of-00002.bin", + "model.vision_tower.vision_tower.vision_model.encoder.layers.21.mlp.fc1.weight": "pytorch_model-00002-of-00002.bin", + "model.vision_tower.vision_tower.vision_model.encoder.layers.21.mlp.fc2.bias": "pytorch_model-00002-of-00002.bin", + "model.vision_tower.vision_tower.vision_model.encoder.layers.21.mlp.fc2.weight": "pytorch_model-00002-of-00002.bin", + "model.vision_tower.vision_tower.vision_model.encoder.layers.21.self_attn.k_proj.bias": "pytorch_model-00002-of-00002.bin", + "model.vision_tower.vision_tower.vision_model.encoder.layers.21.self_attn.k_proj.weight": "pytorch_model-00002-of-00002.bin", + "model.vision_tower.vision_tower.vision_model.encoder.layers.21.self_attn.out_proj.bias": "pytorch_model-00002-of-00002.bin", + "model.vision_tower.vision_tower.vision_model.encoder.layers.21.self_attn.out_proj.weight": "pytorch_model-00002-of-00002.bin", + "model.vision_tower.vision_tower.vision_model.encoder.layers.21.self_attn.q_proj.bias": "pytorch_model-00002-of-00002.bin", + "model.vision_tower.vision_tower.vision_model.encoder.layers.21.self_attn.q_proj.weight": "pytorch_model-00002-of-00002.bin", + "model.vision_tower.vision_tower.vision_model.encoder.layers.21.self_attn.v_proj.bias": "pytorch_model-00002-of-00002.bin", + "model.vision_tower.vision_tower.vision_model.encoder.layers.21.self_attn.v_proj.weight": "pytorch_model-00002-of-00002.bin", + "model.vision_tower.vision_tower.vision_model.encoder.layers.22.layer_norm1.bias": "pytorch_model-00002-of-00002.bin", + "model.vision_tower.vision_tower.vision_model.encoder.layers.22.layer_norm1.weight": "pytorch_model-00002-of-00002.bin", + "model.vision_tower.vision_tower.vision_model.encoder.layers.22.layer_norm2.bias": "pytorch_model-00002-of-00002.bin", + "model.vision_tower.vision_tower.vision_model.encoder.layers.22.layer_norm2.weight": "pytorch_model-00002-of-00002.bin", + "model.vision_tower.vision_tower.vision_model.encoder.layers.22.mlp.fc1.bias": "pytorch_model-00002-of-00002.bin", + "model.vision_tower.vision_tower.vision_model.encoder.layers.22.mlp.fc1.weight": "pytorch_model-00002-of-00002.bin", + "model.vision_tower.vision_tower.vision_model.encoder.layers.22.mlp.fc2.bias": "pytorch_model-00002-of-00002.bin", + "model.vision_tower.vision_tower.vision_model.encoder.layers.22.mlp.fc2.weight": "pytorch_model-00002-of-00002.bin", + "model.vision_tower.vision_tower.vision_model.encoder.layers.22.self_attn.k_proj.bias": "pytorch_model-00002-of-00002.bin", + "model.vision_tower.vision_tower.vision_model.encoder.layers.22.self_attn.k_proj.weight": "pytorch_model-00002-of-00002.bin", + "model.vision_tower.vision_tower.vision_model.encoder.layers.22.self_attn.out_proj.bias": "pytorch_model-00002-of-00002.bin", + "model.vision_tower.vision_tower.vision_model.encoder.layers.22.self_attn.out_proj.weight": "pytorch_model-00002-of-00002.bin", + "model.vision_tower.vision_tower.vision_model.encoder.layers.22.self_attn.q_proj.bias": "pytorch_model-00002-of-00002.bin", + "model.vision_tower.vision_tower.vision_model.encoder.layers.22.self_attn.q_proj.weight": "pytorch_model-00002-of-00002.bin", + "model.vision_tower.vision_tower.vision_model.encoder.layers.22.self_attn.v_proj.bias": "pytorch_model-00002-of-00002.bin", + "model.vision_tower.vision_tower.vision_model.encoder.layers.22.self_attn.v_proj.weight": "pytorch_model-00002-of-00002.bin", + "model.vision_tower.vision_tower.vision_model.encoder.layers.23.layer_norm1.bias": "pytorch_model-00002-of-00002.bin", + "model.vision_tower.vision_tower.vision_model.encoder.layers.23.layer_norm1.weight": "pytorch_model-00002-of-00002.bin", + "model.vision_tower.vision_tower.vision_model.encoder.layers.23.layer_norm2.bias": "pytorch_model-00002-of-00002.bin", + "model.vision_tower.vision_tower.vision_model.encoder.layers.23.layer_norm2.weight": "pytorch_model-00002-of-00002.bin", + "model.vision_tower.vision_tower.vision_model.encoder.layers.23.mlp.fc1.bias": "pytorch_model-00002-of-00002.bin", + "model.vision_tower.vision_tower.vision_model.encoder.layers.23.mlp.fc1.weight": "pytorch_model-00002-of-00002.bin", + "model.vision_tower.vision_tower.vision_model.encoder.layers.23.mlp.fc2.bias": "pytorch_model-00002-of-00002.bin", + "model.vision_tower.vision_tower.vision_model.encoder.layers.23.mlp.fc2.weight": "pytorch_model-00002-of-00002.bin", + "model.vision_tower.vision_tower.vision_model.encoder.layers.23.self_attn.k_proj.bias": "pytorch_model-00002-of-00002.bin", + "model.vision_tower.vision_tower.vision_model.encoder.layers.23.self_attn.k_proj.weight": "pytorch_model-00002-of-00002.bin", + "model.vision_tower.vision_tower.vision_model.encoder.layers.23.self_attn.out_proj.bias": "pytorch_model-00002-of-00002.bin", + "model.vision_tower.vision_tower.vision_model.encoder.layers.23.self_attn.out_proj.weight": "pytorch_model-00002-of-00002.bin", + "model.vision_tower.vision_tower.vision_model.encoder.layers.23.self_attn.q_proj.bias": "pytorch_model-00002-of-00002.bin", + "model.vision_tower.vision_tower.vision_model.encoder.layers.23.self_attn.q_proj.weight": "pytorch_model-00002-of-00002.bin", + "model.vision_tower.vision_tower.vision_model.encoder.layers.23.self_attn.v_proj.bias": "pytorch_model-00002-of-00002.bin", + "model.vision_tower.vision_tower.vision_model.encoder.layers.23.self_attn.v_proj.weight": "pytorch_model-00002-of-00002.bin", + "model.vision_tower.vision_tower.vision_model.encoder.layers.3.layer_norm1.bias": "pytorch_model-00002-of-00002.bin", + "model.vision_tower.vision_tower.vision_model.encoder.layers.3.layer_norm1.weight": "pytorch_model-00002-of-00002.bin", + "model.vision_tower.vision_tower.vision_model.encoder.layers.3.layer_norm2.bias": "pytorch_model-00002-of-00002.bin", + "model.vision_tower.vision_tower.vision_model.encoder.layers.3.layer_norm2.weight": "pytorch_model-00002-of-00002.bin", + "model.vision_tower.vision_tower.vision_model.encoder.layers.3.mlp.fc1.bias": "pytorch_model-00002-of-00002.bin", + "model.vision_tower.vision_tower.vision_model.encoder.layers.3.mlp.fc1.weight": "pytorch_model-00002-of-00002.bin", + "model.vision_tower.vision_tower.vision_model.encoder.layers.3.mlp.fc2.bias": "pytorch_model-00002-of-00002.bin", + "model.vision_tower.vision_tower.vision_model.encoder.layers.3.mlp.fc2.weight": "pytorch_model-00002-of-00002.bin", + "model.vision_tower.vision_tower.vision_model.encoder.layers.3.self_attn.k_proj.bias": "pytorch_model-00002-of-00002.bin", + "model.vision_tower.vision_tower.vision_model.encoder.layers.3.self_attn.k_proj.weight": "pytorch_model-00002-of-00002.bin", + "model.vision_tower.vision_tower.vision_model.encoder.layers.3.self_attn.out_proj.bias": "pytorch_model-00002-of-00002.bin", + "model.vision_tower.vision_tower.vision_model.encoder.layers.3.self_attn.out_proj.weight": "pytorch_model-00002-of-00002.bin", + "model.vision_tower.vision_tower.vision_model.encoder.layers.3.self_attn.q_proj.bias": "pytorch_model-00002-of-00002.bin", + "model.vision_tower.vision_tower.vision_model.encoder.layers.3.self_attn.q_proj.weight": "pytorch_model-00002-of-00002.bin", + "model.vision_tower.vision_tower.vision_model.encoder.layers.3.self_attn.v_proj.bias": "pytorch_model-00002-of-00002.bin", + "model.vision_tower.vision_tower.vision_model.encoder.layers.3.self_attn.v_proj.weight": "pytorch_model-00002-of-00002.bin", + "model.vision_tower.vision_tower.vision_model.encoder.layers.4.layer_norm1.bias": "pytorch_model-00002-of-00002.bin", + "model.vision_tower.vision_tower.vision_model.encoder.layers.4.layer_norm1.weight": "pytorch_model-00002-of-00002.bin", + "model.vision_tower.vision_tower.vision_model.encoder.layers.4.layer_norm2.bias": "pytorch_model-00002-of-00002.bin", + "model.vision_tower.vision_tower.vision_model.encoder.layers.4.layer_norm2.weight": "pytorch_model-00002-of-00002.bin", + "model.vision_tower.vision_tower.vision_model.encoder.layers.4.mlp.fc1.bias": "pytorch_model-00002-of-00002.bin", + "model.vision_tower.vision_tower.vision_model.encoder.layers.4.mlp.fc1.weight": "pytorch_model-00002-of-00002.bin", + "model.vision_tower.vision_tower.vision_model.encoder.layers.4.mlp.fc2.bias": "pytorch_model-00002-of-00002.bin", + "model.vision_tower.vision_tower.vision_model.encoder.layers.4.mlp.fc2.weight": "pytorch_model-00002-of-00002.bin", + "model.vision_tower.vision_tower.vision_model.encoder.layers.4.self_attn.k_proj.bias": "pytorch_model-00002-of-00002.bin", + "model.vision_tower.vision_tower.vision_model.encoder.layers.4.self_attn.k_proj.weight": "pytorch_model-00002-of-00002.bin", + "model.vision_tower.vision_tower.vision_model.encoder.layers.4.self_attn.out_proj.bias": "pytorch_model-00002-of-00002.bin", + "model.vision_tower.vision_tower.vision_model.encoder.layers.4.self_attn.out_proj.weight": "pytorch_model-00002-of-00002.bin", + "model.vision_tower.vision_tower.vision_model.encoder.layers.4.self_attn.q_proj.bias": "pytorch_model-00002-of-00002.bin", + "model.vision_tower.vision_tower.vision_model.encoder.layers.4.self_attn.q_proj.weight": "pytorch_model-00002-of-00002.bin", + "model.vision_tower.vision_tower.vision_model.encoder.layers.4.self_attn.v_proj.bias": "pytorch_model-00002-of-00002.bin", + "model.vision_tower.vision_tower.vision_model.encoder.layers.4.self_attn.v_proj.weight": "pytorch_model-00002-of-00002.bin", + "model.vision_tower.vision_tower.vision_model.encoder.layers.5.layer_norm1.bias": "pytorch_model-00002-of-00002.bin", + "model.vision_tower.vision_tower.vision_model.encoder.layers.5.layer_norm1.weight": "pytorch_model-00002-of-00002.bin", + "model.vision_tower.vision_tower.vision_model.encoder.layers.5.layer_norm2.bias": "pytorch_model-00002-of-00002.bin", + "model.vision_tower.vision_tower.vision_model.encoder.layers.5.layer_norm2.weight": "pytorch_model-00002-of-00002.bin", + "model.vision_tower.vision_tower.vision_model.encoder.layers.5.mlp.fc1.bias": "pytorch_model-00002-of-00002.bin", + "model.vision_tower.vision_tower.vision_model.encoder.layers.5.mlp.fc1.weight": "pytorch_model-00002-of-00002.bin", + "model.vision_tower.vision_tower.vision_model.encoder.layers.5.mlp.fc2.bias": "pytorch_model-00002-of-00002.bin", + "model.vision_tower.vision_tower.vision_model.encoder.layers.5.mlp.fc2.weight": "pytorch_model-00002-of-00002.bin", + "model.vision_tower.vision_tower.vision_model.encoder.layers.5.self_attn.k_proj.bias": "pytorch_model-00002-of-00002.bin", + "model.vision_tower.vision_tower.vision_model.encoder.layers.5.self_attn.k_proj.weight": "pytorch_model-00002-of-00002.bin", + "model.vision_tower.vision_tower.vision_model.encoder.layers.5.self_attn.out_proj.bias": "pytorch_model-00002-of-00002.bin", + "model.vision_tower.vision_tower.vision_model.encoder.layers.5.self_attn.out_proj.weight": "pytorch_model-00002-of-00002.bin", + "model.vision_tower.vision_tower.vision_model.encoder.layers.5.self_attn.q_proj.bias": "pytorch_model-00002-of-00002.bin", + "model.vision_tower.vision_tower.vision_model.encoder.layers.5.self_attn.q_proj.weight": "pytorch_model-00002-of-00002.bin", + "model.vision_tower.vision_tower.vision_model.encoder.layers.5.self_attn.v_proj.bias": "pytorch_model-00002-of-00002.bin", + "model.vision_tower.vision_tower.vision_model.encoder.layers.5.self_attn.v_proj.weight": "pytorch_model-00002-of-00002.bin", + "model.vision_tower.vision_tower.vision_model.encoder.layers.6.layer_norm1.bias": "pytorch_model-00002-of-00002.bin", + "model.vision_tower.vision_tower.vision_model.encoder.layers.6.layer_norm1.weight": "pytorch_model-00002-of-00002.bin", + "model.vision_tower.vision_tower.vision_model.encoder.layers.6.layer_norm2.bias": "pytorch_model-00002-of-00002.bin", + "model.vision_tower.vision_tower.vision_model.encoder.layers.6.layer_norm2.weight": "pytorch_model-00002-of-00002.bin", + "model.vision_tower.vision_tower.vision_model.encoder.layers.6.mlp.fc1.bias": "pytorch_model-00002-of-00002.bin", + "model.vision_tower.vision_tower.vision_model.encoder.layers.6.mlp.fc1.weight": "pytorch_model-00002-of-00002.bin", + "model.vision_tower.vision_tower.vision_model.encoder.layers.6.mlp.fc2.bias": "pytorch_model-00002-of-00002.bin", + "model.vision_tower.vision_tower.vision_model.encoder.layers.6.mlp.fc2.weight": "pytorch_model-00002-of-00002.bin", + "model.vision_tower.vision_tower.vision_model.encoder.layers.6.self_attn.k_proj.bias": "pytorch_model-00002-of-00002.bin", + "model.vision_tower.vision_tower.vision_model.encoder.layers.6.self_attn.k_proj.weight": "pytorch_model-00002-of-00002.bin", + "model.vision_tower.vision_tower.vision_model.encoder.layers.6.self_attn.out_proj.bias": "pytorch_model-00002-of-00002.bin", + "model.vision_tower.vision_tower.vision_model.encoder.layers.6.self_attn.out_proj.weight": "pytorch_model-00002-of-00002.bin", + "model.vision_tower.vision_tower.vision_model.encoder.layers.6.self_attn.q_proj.bias": "pytorch_model-00002-of-00002.bin", + "model.vision_tower.vision_tower.vision_model.encoder.layers.6.self_attn.q_proj.weight": "pytorch_model-00002-of-00002.bin", + "model.vision_tower.vision_tower.vision_model.encoder.layers.6.self_attn.v_proj.bias": "pytorch_model-00002-of-00002.bin", + "model.vision_tower.vision_tower.vision_model.encoder.layers.6.self_attn.v_proj.weight": "pytorch_model-00002-of-00002.bin", + "model.vision_tower.vision_tower.vision_model.encoder.layers.7.layer_norm1.bias": "pytorch_model-00002-of-00002.bin", + "model.vision_tower.vision_tower.vision_model.encoder.layers.7.layer_norm1.weight": "pytorch_model-00002-of-00002.bin", + "model.vision_tower.vision_tower.vision_model.encoder.layers.7.layer_norm2.bias": "pytorch_model-00002-of-00002.bin", + "model.vision_tower.vision_tower.vision_model.encoder.layers.7.layer_norm2.weight": "pytorch_model-00002-of-00002.bin", + "model.vision_tower.vision_tower.vision_model.encoder.layers.7.mlp.fc1.bias": "pytorch_model-00002-of-00002.bin", + "model.vision_tower.vision_tower.vision_model.encoder.layers.7.mlp.fc1.weight": "pytorch_model-00002-of-00002.bin", + "model.vision_tower.vision_tower.vision_model.encoder.layers.7.mlp.fc2.bias": "pytorch_model-00002-of-00002.bin", + "model.vision_tower.vision_tower.vision_model.encoder.layers.7.mlp.fc2.weight": "pytorch_model-00002-of-00002.bin", + "model.vision_tower.vision_tower.vision_model.encoder.layers.7.self_attn.k_proj.bias": "pytorch_model-00002-of-00002.bin", + "model.vision_tower.vision_tower.vision_model.encoder.layers.7.self_attn.k_proj.weight": "pytorch_model-00002-of-00002.bin", + "model.vision_tower.vision_tower.vision_model.encoder.layers.7.self_attn.out_proj.bias": "pytorch_model-00002-of-00002.bin", + "model.vision_tower.vision_tower.vision_model.encoder.layers.7.self_attn.out_proj.weight": "pytorch_model-00002-of-00002.bin", + "model.vision_tower.vision_tower.vision_model.encoder.layers.7.self_attn.q_proj.bias": "pytorch_model-00002-of-00002.bin", + "model.vision_tower.vision_tower.vision_model.encoder.layers.7.self_attn.q_proj.weight": "pytorch_model-00002-of-00002.bin", + "model.vision_tower.vision_tower.vision_model.encoder.layers.7.self_attn.v_proj.bias": "pytorch_model-00002-of-00002.bin", + "model.vision_tower.vision_tower.vision_model.encoder.layers.7.self_attn.v_proj.weight": "pytorch_model-00002-of-00002.bin", + "model.vision_tower.vision_tower.vision_model.encoder.layers.8.layer_norm1.bias": "pytorch_model-00002-of-00002.bin", + "model.vision_tower.vision_tower.vision_model.encoder.layers.8.layer_norm1.weight": "pytorch_model-00002-of-00002.bin", + "model.vision_tower.vision_tower.vision_model.encoder.layers.8.layer_norm2.bias": "pytorch_model-00002-of-00002.bin", + "model.vision_tower.vision_tower.vision_model.encoder.layers.8.layer_norm2.weight": "pytorch_model-00002-of-00002.bin", + "model.vision_tower.vision_tower.vision_model.encoder.layers.8.mlp.fc1.bias": "pytorch_model-00002-of-00002.bin", + "model.vision_tower.vision_tower.vision_model.encoder.layers.8.mlp.fc1.weight": "pytorch_model-00002-of-00002.bin", + "model.vision_tower.vision_tower.vision_model.encoder.layers.8.mlp.fc2.bias": "pytorch_model-00002-of-00002.bin", + "model.vision_tower.vision_tower.vision_model.encoder.layers.8.mlp.fc2.weight": "pytorch_model-00002-of-00002.bin", + "model.vision_tower.vision_tower.vision_model.encoder.layers.8.self_attn.k_proj.bias": "pytorch_model-00002-of-00002.bin", + "model.vision_tower.vision_tower.vision_model.encoder.layers.8.self_attn.k_proj.weight": "pytorch_model-00002-of-00002.bin", + "model.vision_tower.vision_tower.vision_model.encoder.layers.8.self_attn.out_proj.bias": "pytorch_model-00002-of-00002.bin", + "model.vision_tower.vision_tower.vision_model.encoder.layers.8.self_attn.out_proj.weight": "pytorch_model-00002-of-00002.bin", + "model.vision_tower.vision_tower.vision_model.encoder.layers.8.self_attn.q_proj.bias": "pytorch_model-00002-of-00002.bin", + "model.vision_tower.vision_tower.vision_model.encoder.layers.8.self_attn.q_proj.weight": "pytorch_model-00002-of-00002.bin", + "model.vision_tower.vision_tower.vision_model.encoder.layers.8.self_attn.v_proj.bias": "pytorch_model-00002-of-00002.bin", + "model.vision_tower.vision_tower.vision_model.encoder.layers.8.self_attn.v_proj.weight": "pytorch_model-00002-of-00002.bin", + "model.vision_tower.vision_tower.vision_model.encoder.layers.9.layer_norm1.bias": "pytorch_model-00002-of-00002.bin", + "model.vision_tower.vision_tower.vision_model.encoder.layers.9.layer_norm1.weight": "pytorch_model-00002-of-00002.bin", + "model.vision_tower.vision_tower.vision_model.encoder.layers.9.layer_norm2.bias": "pytorch_model-00002-of-00002.bin", + "model.vision_tower.vision_tower.vision_model.encoder.layers.9.layer_norm2.weight": "pytorch_model-00002-of-00002.bin", + "model.vision_tower.vision_tower.vision_model.encoder.layers.9.mlp.fc1.bias": "pytorch_model-00002-of-00002.bin", + "model.vision_tower.vision_tower.vision_model.encoder.layers.9.mlp.fc1.weight": "pytorch_model-00002-of-00002.bin", + "model.vision_tower.vision_tower.vision_model.encoder.layers.9.mlp.fc2.bias": "pytorch_model-00002-of-00002.bin", + "model.vision_tower.vision_tower.vision_model.encoder.layers.9.mlp.fc2.weight": "pytorch_model-00002-of-00002.bin", + "model.vision_tower.vision_tower.vision_model.encoder.layers.9.self_attn.k_proj.bias": "pytorch_model-00002-of-00002.bin", + "model.vision_tower.vision_tower.vision_model.encoder.layers.9.self_attn.k_proj.weight": "pytorch_model-00002-of-00002.bin", + "model.vision_tower.vision_tower.vision_model.encoder.layers.9.self_attn.out_proj.bias": "pytorch_model-00002-of-00002.bin", + "model.vision_tower.vision_tower.vision_model.encoder.layers.9.self_attn.out_proj.weight": "pytorch_model-00002-of-00002.bin", + "model.vision_tower.vision_tower.vision_model.encoder.layers.9.self_attn.q_proj.bias": "pytorch_model-00002-of-00002.bin", + "model.vision_tower.vision_tower.vision_model.encoder.layers.9.self_attn.q_proj.weight": "pytorch_model-00002-of-00002.bin", + "model.vision_tower.vision_tower.vision_model.encoder.layers.9.self_attn.v_proj.bias": "pytorch_model-00002-of-00002.bin", + "model.vision_tower.vision_tower.vision_model.encoder.layers.9.self_attn.v_proj.weight": "pytorch_model-00002-of-00002.bin", + "model.vision_tower.vision_tower.vision_model.post_layernorm.bias": "pytorch_model-00002-of-00002.bin", + "model.vision_tower.vision_tower.vision_model.post_layernorm.weight": "pytorch_model-00002-of-00002.bin", + "model.vision_tower.vision_tower.vision_model.pre_layrnorm.bias": "pytorch_model-00002-of-00002.bin", + "model.vision_tower.vision_tower.vision_model.pre_layrnorm.weight": "pytorch_model-00002-of-00002.bin" + } +} diff --git a/special_tokens_map.json b/special_tokens_map.json new file mode 100644 index 0000000..14761dc --- /dev/null +++ b/special_tokens_map.json @@ -0,0 +1,24 @@ +{ + "bos_token": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "eos_token": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "pad_token": "", + "unk_token": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + } +} diff --git a/tokenizer.model b/tokenizer.model new file mode 100644 index 0000000..6c00c74 --- /dev/null +++ b/tokenizer.model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9e556afd44213b6bd1be2b850ebbbd98f5481437a8021afaf58ee7fb1818d347 +size 499723 diff --git a/tokenizer_config.json b/tokenizer_config.json new file mode 100644 index 0000000..64a0b50 --- /dev/null +++ b/tokenizer_config.json @@ -0,0 +1,36 @@ +{ + "add_bos_token": true, + "add_eos_token": false, + "bos_token": { + "__type": "AddedToken", + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "clean_up_tokenization_spaces": false, + "eos_token": { + "__type": "AddedToken", + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "legacy": false, + "model_max_length": 1000000000000000019884624838656, + "pad_token": null, + "sp_model_kwargs": {}, + "spaces_between_special_tokens": false, + "tokenizer_class": "LlamaTokenizer", + "unk_token": { + "__type": "AddedToken", + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "use_default_system_prompt": true +} diff --git a/trainer_state.json b/trainer_state.json new file mode 100644 index 0000000..4525e62 --- /dev/null +++ b/trainer_state.json @@ -0,0 +1,111661 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 1.0, + "global_step": 18606, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.0, + "learning_rate": 1.7889087656529517e-06, + "loss": 7.7188, + "step": 1 + }, + { + "epoch": 0.0, + "learning_rate": 3.5778175313059034e-06, + "loss": 7.875, + "step": 2 + }, + { + "epoch": 0.0, + "learning_rate": 5.366726296958855e-06, + "loss": 7.3438, + "step": 3 + }, + { + "epoch": 0.0, + "learning_rate": 7.155635062611807e-06, + "loss": 7.0938, + "step": 4 + }, + { + "epoch": 0.0, + "learning_rate": 8.944543828264759e-06, + "loss": 5.7812, + "step": 5 + }, + { + "epoch": 0.0, + "learning_rate": 1.073345259391771e-05, + "loss": 5.125, + "step": 6 + }, + { + "epoch": 0.0, + "learning_rate": 1.2522361359570662e-05, + "loss": 4.5938, + "step": 7 + }, + { + "epoch": 0.0, + "learning_rate": 1.4311270125223614e-05, + "loss": 3.7188, + "step": 8 + }, + { + "epoch": 0.0, + "learning_rate": 1.6100178890876567e-05, + "loss": 3.9844, + "step": 9 + }, + { + "epoch": 0.0, + "learning_rate": 1.7889087656529517e-05, + "loss": 3.875, + "step": 10 + }, + { + "epoch": 0.0, + "learning_rate": 1.967799642218247e-05, + "loss": 3.875, + "step": 11 + }, + { + "epoch": 0.0, + "learning_rate": 2.146690518783542e-05, + "loss": 3.7031, + "step": 12 + }, + { + "epoch": 0.0, + "learning_rate": 2.325581395348837e-05, + "loss": 3.4844, + "step": 13 + }, + { + "epoch": 0.0, + "learning_rate": 2.5044722719141324e-05, + "loss": 3.25, + "step": 14 + }, + { + "epoch": 0.0, + "learning_rate": 2.6833631484794274e-05, + "loss": 3.1562, + "step": 15 + }, + { + "epoch": 0.0, + "learning_rate": 2.8622540250447228e-05, + "loss": 3.1875, + "step": 16 + }, + { + "epoch": 0.0, + "learning_rate": 3.041144901610018e-05, + "loss": 3.1094, + "step": 17 + }, + { + "epoch": 0.0, + "learning_rate": 3.2200357781753134e-05, + "loss": 3.1094, + "step": 18 + }, + { + "epoch": 0.0, + "learning_rate": 3.3989266547406084e-05, + "loss": 3.0312, + "step": 19 + }, + { + "epoch": 0.0, + "learning_rate": 3.5778175313059034e-05, + "loss": 3.0469, + "step": 20 + }, + { + "epoch": 0.0, + "learning_rate": 3.756708407871199e-05, + "loss": 2.9844, + "step": 21 + }, + { + "epoch": 0.0, + "learning_rate": 3.935599284436494e-05, + "loss": 2.9375, + "step": 22 + }, + { + "epoch": 0.0, + "learning_rate": 4.114490161001789e-05, + "loss": 2.9062, + "step": 23 + }, + { + "epoch": 0.0, + "learning_rate": 4.293381037567084e-05, + "loss": 2.8906, + "step": 24 + }, + { + "epoch": 0.0, + "learning_rate": 4.47227191413238e-05, + "loss": 2.8125, + "step": 25 + }, + { + "epoch": 0.0, + "learning_rate": 4.651162790697674e-05, + "loss": 2.7812, + "step": 26 + }, + { + "epoch": 0.0, + "learning_rate": 4.830053667262969e-05, + "loss": 2.7188, + "step": 27 + }, + { + "epoch": 0.0, + "learning_rate": 5.008944543828265e-05, + "loss": 2.6719, + "step": 28 + }, + { + "epoch": 0.0, + "learning_rate": 5.18783542039356e-05, + "loss": 2.5781, + "step": 29 + }, + { + "epoch": 0.0, + "learning_rate": 5.366726296958855e-05, + "loss": 2.5, + "step": 30 + }, + { + "epoch": 0.0, + "learning_rate": 5.5456171735241505e-05, + "loss": 2.4688, + "step": 31 + }, + { + "epoch": 0.0, + "learning_rate": 5.7245080500894455e-05, + "loss": 2.4375, + "step": 32 + }, + { + "epoch": 0.0, + "learning_rate": 5.9033989266547405e-05, + "loss": 2.4219, + "step": 33 + }, + { + "epoch": 0.0, + "learning_rate": 6.082289803220036e-05, + "loss": 2.375, + "step": 34 + }, + { + "epoch": 0.0, + "learning_rate": 6.26118067978533e-05, + "loss": 2.4062, + "step": 35 + }, + { + "epoch": 0.0, + "learning_rate": 6.440071556350627e-05, + "loss": 2.4062, + "step": 36 + }, + { + "epoch": 0.0, + "learning_rate": 6.618962432915921e-05, + "loss": 2.2969, + "step": 37 + }, + { + "epoch": 0.0, + "learning_rate": 6.797853309481217e-05, + "loss": 2.2344, + "step": 38 + }, + { + "epoch": 0.0, + "learning_rate": 6.976744186046511e-05, + "loss": 2.2188, + "step": 39 + }, + { + "epoch": 0.0, + "learning_rate": 7.155635062611807e-05, + "loss": 2.25, + "step": 40 + }, + { + "epoch": 0.0, + "learning_rate": 7.334525939177101e-05, + "loss": 2.2188, + "step": 41 + }, + { + "epoch": 0.0, + "learning_rate": 7.513416815742398e-05, + "loss": 2.1875, + "step": 42 + }, + { + "epoch": 0.0, + "learning_rate": 7.692307692307693e-05, + "loss": 2.1875, + "step": 43 + }, + { + "epoch": 0.0, + "learning_rate": 7.871198568872988e-05, + "loss": 2.2188, + "step": 44 + }, + { + "epoch": 0.0, + "learning_rate": 8.050089445438283e-05, + "loss": 2.0938, + "step": 45 + }, + { + "epoch": 0.0, + "learning_rate": 8.228980322003578e-05, + "loss": 2.125, + "step": 46 + }, + { + "epoch": 0.0, + "learning_rate": 8.407871198568873e-05, + "loss": 2.125, + "step": 47 + }, + { + "epoch": 0.0, + "learning_rate": 8.586762075134168e-05, + "loss": 2.125, + "step": 48 + }, + { + "epoch": 0.0, + "learning_rate": 8.765652951699464e-05, + "loss": 2.0781, + "step": 49 + }, + { + "epoch": 0.0, + "learning_rate": 8.94454382826476e-05, + "loss": 2.0938, + "step": 50 + }, + { + "epoch": 0.0, + "learning_rate": 9.123434704830054e-05, + "loss": 2.1719, + "step": 51 + }, + { + "epoch": 0.0, + "learning_rate": 9.302325581395348e-05, + "loss": 2.0938, + "step": 52 + }, + { + "epoch": 0.0, + "learning_rate": 9.481216457960644e-05, + "loss": 2.0156, + "step": 53 + }, + { + "epoch": 0.0, + "learning_rate": 9.660107334525938e-05, + "loss": 2.0781, + "step": 54 + }, + { + "epoch": 0.0, + "learning_rate": 9.838998211091235e-05, + "loss": 2.0781, + "step": 55 + }, + { + "epoch": 0.0, + "learning_rate": 0.0001001788908765653, + "loss": 2.0, + "step": 56 + }, + { + "epoch": 0.0, + "learning_rate": 0.00010196779964221825, + "loss": 2.0, + "step": 57 + }, + { + "epoch": 0.0, + "learning_rate": 0.0001037567084078712, + "loss": 2.0312, + "step": 58 + }, + { + "epoch": 0.0, + "learning_rate": 0.00010554561717352415, + "loss": 2.0781, + "step": 59 + }, + { + "epoch": 0.0, + "learning_rate": 0.0001073345259391771, + "loss": 1.9453, + "step": 60 + }, + { + "epoch": 0.0, + "learning_rate": 0.00010912343470483005, + "loss": 2.0469, + "step": 61 + }, + { + "epoch": 0.0, + "learning_rate": 0.00011091234347048301, + "loss": 1.9922, + "step": 62 + }, + { + "epoch": 0.0, + "learning_rate": 0.00011270125223613597, + "loss": 1.9375, + "step": 63 + }, + { + "epoch": 0.0, + "learning_rate": 0.00011449016100178891, + "loss": 1.9531, + "step": 64 + }, + { + "epoch": 0.0, + "learning_rate": 0.00011627906976744187, + "loss": 1.9844, + "step": 65 + }, + { + "epoch": 0.0, + "learning_rate": 0.00011806797853309481, + "loss": 1.9297, + "step": 66 + }, + { + "epoch": 0.0, + "learning_rate": 0.00011985688729874777, + "loss": 2.0312, + "step": 67 + }, + { + "epoch": 0.0, + "learning_rate": 0.00012164579606440072, + "loss": 1.9766, + "step": 68 + }, + { + "epoch": 0.0, + "learning_rate": 0.00012343470483005368, + "loss": 1.8828, + "step": 69 + }, + { + "epoch": 0.0, + "learning_rate": 0.0001252236135957066, + "loss": 1.8984, + "step": 70 + }, + { + "epoch": 0.0, + "learning_rate": 0.00012701252236135957, + "loss": 1.9375, + "step": 71 + }, + { + "epoch": 0.0, + "learning_rate": 0.00012880143112701254, + "loss": 1.8203, + "step": 72 + }, + { + "epoch": 0.0, + "learning_rate": 0.00013059033989266548, + "loss": 1.8906, + "step": 73 + }, + { + "epoch": 0.0, + "learning_rate": 0.00013237924865831842, + "loss": 1.9062, + "step": 74 + }, + { + "epoch": 0.0, + "learning_rate": 0.00013416815742397137, + "loss": 1.8828, + "step": 75 + }, + { + "epoch": 0.0, + "learning_rate": 0.00013595706618962434, + "loss": 1.8984, + "step": 76 + }, + { + "epoch": 0.0, + "learning_rate": 0.00013774597495527728, + "loss": 1.9453, + "step": 77 + }, + { + "epoch": 0.0, + "learning_rate": 0.00013953488372093022, + "loss": 1.8047, + "step": 78 + }, + { + "epoch": 0.0, + "learning_rate": 0.0001413237924865832, + "loss": 1.875, + "step": 79 + }, + { + "epoch": 0.0, + "learning_rate": 0.00014311270125223614, + "loss": 1.7422, + "step": 80 + }, + { + "epoch": 0.0, + "learning_rate": 0.0001449016100178891, + "loss": 1.6719, + "step": 81 + }, + { + "epoch": 0.0, + "learning_rate": 0.00014669051878354202, + "loss": 1.4531, + "step": 82 + }, + { + "epoch": 0.0, + "learning_rate": 0.000148479427549195, + "loss": 1.4609, + "step": 83 + }, + { + "epoch": 0.0, + "learning_rate": 0.00015026833631484796, + "loss": 1.4531, + "step": 84 + }, + { + "epoch": 0.0, + "learning_rate": 0.00015205724508050088, + "loss": 1.3281, + "step": 85 + }, + { + "epoch": 0.0, + "learning_rate": 0.00015384615384615385, + "loss": 1.3828, + "step": 86 + }, + { + "epoch": 0.0, + "learning_rate": 0.0001556350626118068, + "loss": 1.3828, + "step": 87 + }, + { + "epoch": 0.0, + "learning_rate": 0.00015742397137745977, + "loss": 1.4844, + "step": 88 + }, + { + "epoch": 0.0, + "learning_rate": 0.00015921288014311268, + "loss": 1.3906, + "step": 89 + }, + { + "epoch": 0.0, + "learning_rate": 0.00016100178890876565, + "loss": 1.375, + "step": 90 + }, + { + "epoch": 0.0, + "learning_rate": 0.00016279069767441862, + "loss": 1.375, + "step": 91 + }, + { + "epoch": 0.0, + "learning_rate": 0.00016457960644007157, + "loss": 1.5, + "step": 92 + }, + { + "epoch": 0.0, + "learning_rate": 0.0001663685152057245, + "loss": 1.3281, + "step": 93 + }, + { + "epoch": 0.01, + "learning_rate": 0.00016815742397137745, + "loss": 1.3125, + "step": 94 + }, + { + "epoch": 0.01, + "learning_rate": 0.00016994633273703042, + "loss": 1.3203, + "step": 95 + }, + { + "epoch": 0.01, + "learning_rate": 0.00017173524150268337, + "loss": 1.3125, + "step": 96 + }, + { + "epoch": 0.01, + "learning_rate": 0.0001735241502683363, + "loss": 1.2656, + "step": 97 + }, + { + "epoch": 0.01, + "learning_rate": 0.00017531305903398928, + "loss": 1.3984, + "step": 98 + }, + { + "epoch": 0.01, + "learning_rate": 0.00017710196779964222, + "loss": 1.3047, + "step": 99 + }, + { + "epoch": 0.01, + "learning_rate": 0.0001788908765652952, + "loss": 1.3281, + "step": 100 + }, + { + "epoch": 0.01, + "learning_rate": 0.0001806797853309481, + "loss": 1.3594, + "step": 101 + }, + { + "epoch": 0.01, + "learning_rate": 0.00018246869409660108, + "loss": 1.3438, + "step": 102 + }, + { + "epoch": 0.01, + "learning_rate": 0.00018425760286225405, + "loss": 1.2969, + "step": 103 + }, + { + "epoch": 0.01, + "learning_rate": 0.00018604651162790697, + "loss": 1.2812, + "step": 104 + }, + { + "epoch": 0.01, + "learning_rate": 0.00018783542039355994, + "loss": 1.2031, + "step": 105 + }, + { + "epoch": 0.01, + "learning_rate": 0.00018962432915921288, + "loss": 1.2656, + "step": 106 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019141323792486585, + "loss": 1.3438, + "step": 107 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019320214669051877, + "loss": 1.3281, + "step": 108 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019499105545617174, + "loss": 1.2969, + "step": 109 + }, + { + "epoch": 0.01, + "learning_rate": 0.0001967799642218247, + "loss": 1.2188, + "step": 110 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019856887298747765, + "loss": 1.2422, + "step": 111 + }, + { + "epoch": 0.01, + "learning_rate": 0.0002003577817531306, + "loss": 1.2422, + "step": 112 + }, + { + "epoch": 0.01, + "learning_rate": 0.00020214669051878354, + "loss": 1.3203, + "step": 113 + }, + { + "epoch": 0.01, + "learning_rate": 0.0002039355992844365, + "loss": 1.2969, + "step": 114 + }, + { + "epoch": 0.01, + "learning_rate": 0.00020572450805008945, + "loss": 1.2891, + "step": 115 + }, + { + "epoch": 0.01, + "learning_rate": 0.0002075134168157424, + "loss": 1.1875, + "step": 116 + }, + { + "epoch": 0.01, + "learning_rate": 0.00020930232558139536, + "loss": 1.2891, + "step": 117 + }, + { + "epoch": 0.01, + "learning_rate": 0.0002110912343470483, + "loss": 1.2344, + "step": 118 + }, + { + "epoch": 0.01, + "learning_rate": 0.00021288014311270125, + "loss": 1.2422, + "step": 119 + }, + { + "epoch": 0.01, + "learning_rate": 0.0002146690518783542, + "loss": 1.2188, + "step": 120 + }, + { + "epoch": 0.01, + "learning_rate": 0.00021645796064400716, + "loss": 1.1484, + "step": 121 + }, + { + "epoch": 0.01, + "learning_rate": 0.0002182468694096601, + "loss": 1.2656, + "step": 122 + }, + { + "epoch": 0.01, + "learning_rate": 0.00022003577817531305, + "loss": 1.1875, + "step": 123 + }, + { + "epoch": 0.01, + "learning_rate": 0.00022182468694096602, + "loss": 1.2031, + "step": 124 + }, + { + "epoch": 0.01, + "learning_rate": 0.00022361359570661896, + "loss": 1.1875, + "step": 125 + }, + { + "epoch": 0.01, + "learning_rate": 0.00022540250447227193, + "loss": 1.2188, + "step": 126 + }, + { + "epoch": 0.01, + "learning_rate": 0.00022719141323792485, + "loss": 1.2109, + "step": 127 + }, + { + "epoch": 0.01, + "learning_rate": 0.00022898032200357782, + "loss": 1.2422, + "step": 128 + }, + { + "epoch": 0.01, + "learning_rate": 0.0002307692307692308, + "loss": 1.25, + "step": 129 + }, + { + "epoch": 0.01, + "learning_rate": 0.00023255813953488373, + "loss": 1.2109, + "step": 130 + }, + { + "epoch": 0.01, + "learning_rate": 0.00023434704830053668, + "loss": 1.2188, + "step": 131 + }, + { + "epoch": 0.01, + "learning_rate": 0.00023613595706618962, + "loss": 1.2656, + "step": 132 + }, + { + "epoch": 0.01, + "learning_rate": 0.0002379248658318426, + "loss": 1.2109, + "step": 133 + }, + { + "epoch": 0.01, + "learning_rate": 0.00023971377459749553, + "loss": 1.2656, + "step": 134 + }, + { + "epoch": 0.01, + "learning_rate": 0.00024150268336314848, + "loss": 1.25, + "step": 135 + }, + { + "epoch": 0.01, + "learning_rate": 0.00024329159212880145, + "loss": 1.2578, + "step": 136 + }, + { + "epoch": 0.01, + "learning_rate": 0.0002450805008944544, + "loss": 1.1328, + "step": 137 + }, + { + "epoch": 0.01, + "learning_rate": 0.00024686940966010736, + "loss": 1.2266, + "step": 138 + }, + { + "epoch": 0.01, + "learning_rate": 0.0002486583184257603, + "loss": 1.2188, + "step": 139 + }, + { + "epoch": 0.01, + "learning_rate": 0.0002504472271914132, + "loss": 1.2266, + "step": 140 + }, + { + "epoch": 0.01, + "learning_rate": 0.00025223613595706616, + "loss": 1.2578, + "step": 141 + }, + { + "epoch": 0.01, + "learning_rate": 0.00025402504472271913, + "loss": 1.3281, + "step": 142 + }, + { + "epoch": 0.01, + "learning_rate": 0.0002558139534883721, + "loss": 1.2812, + "step": 143 + }, + { + "epoch": 0.01, + "learning_rate": 0.0002576028622540251, + "loss": 1.1719, + "step": 144 + }, + { + "epoch": 0.01, + "learning_rate": 0.000259391771019678, + "loss": 1.2188, + "step": 145 + }, + { + "epoch": 0.01, + "learning_rate": 0.00026118067978533096, + "loss": 1.1641, + "step": 146 + }, + { + "epoch": 0.01, + "learning_rate": 0.00026296958855098393, + "loss": 1.2656, + "step": 147 + }, + { + "epoch": 0.01, + "learning_rate": 0.00026475849731663685, + "loss": 1.2344, + "step": 148 + }, + { + "epoch": 0.01, + "learning_rate": 0.0002665474060822898, + "loss": 1.1719, + "step": 149 + }, + { + "epoch": 0.01, + "learning_rate": 0.00026833631484794273, + "loss": 1.2344, + "step": 150 + }, + { + "epoch": 0.01, + "learning_rate": 0.0002701252236135957, + "loss": 1.1641, + "step": 151 + }, + { + "epoch": 0.01, + "learning_rate": 0.0002719141323792487, + "loss": 1.2266, + "step": 152 + }, + { + "epoch": 0.01, + "learning_rate": 0.00027370304114490165, + "loss": 1.1328, + "step": 153 + }, + { + "epoch": 0.01, + "learning_rate": 0.00027549194991055456, + "loss": 1.1172, + "step": 154 + }, + { + "epoch": 0.01, + "learning_rate": 0.0002772808586762075, + "loss": 1.1719, + "step": 155 + }, + { + "epoch": 0.01, + "learning_rate": 0.00027906976744186045, + "loss": 1.2031, + "step": 156 + }, + { + "epoch": 0.01, + "learning_rate": 0.0002808586762075134, + "loss": 1.2031, + "step": 157 + }, + { + "epoch": 0.01, + "learning_rate": 0.0002826475849731664, + "loss": 1.2188, + "step": 158 + }, + { + "epoch": 0.01, + "learning_rate": 0.0002844364937388193, + "loss": 1.1719, + "step": 159 + }, + { + "epoch": 0.01, + "learning_rate": 0.0002862254025044723, + "loss": 1.1953, + "step": 160 + }, + { + "epoch": 0.01, + "learning_rate": 0.00028801431127012525, + "loss": 1.2578, + "step": 161 + }, + { + "epoch": 0.01, + "learning_rate": 0.0002898032200357782, + "loss": 1.2734, + "step": 162 + }, + { + "epoch": 0.01, + "learning_rate": 0.00029159212880143113, + "loss": 1.1328, + "step": 163 + }, + { + "epoch": 0.01, + "learning_rate": 0.00029338103756708405, + "loss": 1.2422, + "step": 164 + }, + { + "epoch": 0.01, + "learning_rate": 0.000295169946332737, + "loss": 1.2344, + "step": 165 + }, + { + "epoch": 0.01, + "learning_rate": 0.00029695885509839, + "loss": 1.1719, + "step": 166 + }, + { + "epoch": 0.01, + "learning_rate": 0.00029874776386404296, + "loss": 1.1172, + "step": 167 + }, + { + "epoch": 0.01, + "learning_rate": 0.00030053667262969593, + "loss": 1.1406, + "step": 168 + }, + { + "epoch": 0.01, + "learning_rate": 0.00030232558139534885, + "loss": 1.2734, + "step": 169 + }, + { + "epoch": 0.01, + "learning_rate": 0.00030411449016100176, + "loss": 1.2812, + "step": 170 + }, + { + "epoch": 0.01, + "learning_rate": 0.00030590339892665473, + "loss": 1.2031, + "step": 171 + }, + { + "epoch": 0.01, + "learning_rate": 0.0003076923076923077, + "loss": 1.2266, + "step": 172 + }, + { + "epoch": 0.01, + "learning_rate": 0.0003094812164579606, + "loss": 1.1406, + "step": 173 + }, + { + "epoch": 0.01, + "learning_rate": 0.0003112701252236136, + "loss": 1.1641, + "step": 174 + }, + { + "epoch": 0.01, + "learning_rate": 0.00031305903398926656, + "loss": 1.1016, + "step": 175 + }, + { + "epoch": 0.01, + "learning_rate": 0.00031484794275491953, + "loss": 1.1172, + "step": 176 + }, + { + "epoch": 0.01, + "learning_rate": 0.0003166368515205725, + "loss": 1.1875, + "step": 177 + }, + { + "epoch": 0.01, + "learning_rate": 0.00031842576028622536, + "loss": 1.1562, + "step": 178 + }, + { + "epoch": 0.01, + "learning_rate": 0.00032021466905187833, + "loss": 1.2109, + "step": 179 + }, + { + "epoch": 0.01, + "learning_rate": 0.0003220035778175313, + "loss": 1.1953, + "step": 180 + }, + { + "epoch": 0.01, + "learning_rate": 0.0003237924865831843, + "loss": 1.125, + "step": 181 + }, + { + "epoch": 0.01, + "learning_rate": 0.00032558139534883724, + "loss": 1.1562, + "step": 182 + }, + { + "epoch": 0.01, + "learning_rate": 0.00032737030411449016, + "loss": 1.1719, + "step": 183 + }, + { + "epoch": 0.01, + "learning_rate": 0.00032915921288014313, + "loss": 1.1406, + "step": 184 + }, + { + "epoch": 0.01, + "learning_rate": 0.0003309481216457961, + "loss": 1.1875, + "step": 185 + }, + { + "epoch": 0.01, + "learning_rate": 0.000332737030411449, + "loss": 1.1953, + "step": 186 + }, + { + "epoch": 0.01, + "learning_rate": 0.000334525939177102, + "loss": 1.1172, + "step": 187 + }, + { + "epoch": 0.01, + "learning_rate": 0.0003363148479427549, + "loss": 1.2109, + "step": 188 + }, + { + "epoch": 0.01, + "learning_rate": 0.0003381037567084079, + "loss": 1.125, + "step": 189 + }, + { + "epoch": 0.01, + "learning_rate": 0.00033989266547406084, + "loss": 1.2344, + "step": 190 + }, + { + "epoch": 0.01, + "learning_rate": 0.0003416815742397138, + "loss": 1.1875, + "step": 191 + }, + { + "epoch": 0.01, + "learning_rate": 0.00034347048300536673, + "loss": 1.1484, + "step": 192 + }, + { + "epoch": 0.01, + "learning_rate": 0.00034525939177101965, + "loss": 1.1484, + "step": 193 + }, + { + "epoch": 0.01, + "learning_rate": 0.0003470483005366726, + "loss": 1.1094, + "step": 194 + }, + { + "epoch": 0.01, + "learning_rate": 0.0003488372093023256, + "loss": 1.0859, + "step": 195 + }, + { + "epoch": 0.01, + "learning_rate": 0.00035062611806797856, + "loss": 1.125, + "step": 196 + }, + { + "epoch": 0.01, + "learning_rate": 0.0003524150268336315, + "loss": 1.2266, + "step": 197 + }, + { + "epoch": 0.01, + "learning_rate": 0.00035420393559928444, + "loss": 1.2031, + "step": 198 + }, + { + "epoch": 0.01, + "learning_rate": 0.0003559928443649374, + "loss": 1.1875, + "step": 199 + }, + { + "epoch": 0.01, + "learning_rate": 0.0003577817531305904, + "loss": 1.2578, + "step": 200 + }, + { + "epoch": 0.01, + "learning_rate": 0.0003595706618962433, + "loss": 1.2109, + "step": 201 + }, + { + "epoch": 0.01, + "learning_rate": 0.0003613595706618962, + "loss": 1.1484, + "step": 202 + }, + { + "epoch": 0.01, + "learning_rate": 0.0003631484794275492, + "loss": 1.1094, + "step": 203 + }, + { + "epoch": 0.01, + "learning_rate": 0.00036493738819320216, + "loss": 1.1797, + "step": 204 + }, + { + "epoch": 0.01, + "learning_rate": 0.00036672629695885513, + "loss": 1.2188, + "step": 205 + }, + { + "epoch": 0.01, + "learning_rate": 0.0003685152057245081, + "loss": 1.0938, + "step": 206 + }, + { + "epoch": 0.01, + "learning_rate": 0.000370304114490161, + "loss": 1.0938, + "step": 207 + }, + { + "epoch": 0.01, + "learning_rate": 0.00037209302325581393, + "loss": 1.1719, + "step": 208 + }, + { + "epoch": 0.01, + "learning_rate": 0.0003738819320214669, + "loss": 1.125, + "step": 209 + }, + { + "epoch": 0.01, + "learning_rate": 0.00037567084078711987, + "loss": 1.0312, + "step": 210 + }, + { + "epoch": 0.01, + "learning_rate": 0.0003774597495527728, + "loss": 1.0859, + "step": 211 + }, + { + "epoch": 0.01, + "learning_rate": 0.00037924865831842576, + "loss": 1.1953, + "step": 212 + }, + { + "epoch": 0.01, + "learning_rate": 0.00038103756708407873, + "loss": 1.0469, + "step": 213 + }, + { + "epoch": 0.01, + "learning_rate": 0.0003828264758497317, + "loss": 1.125, + "step": 214 + }, + { + "epoch": 0.01, + "learning_rate": 0.00038461538461538467, + "loss": 1.1719, + "step": 215 + }, + { + "epoch": 0.01, + "learning_rate": 0.00038640429338103753, + "loss": 1.0703, + "step": 216 + }, + { + "epoch": 0.01, + "learning_rate": 0.0003881932021466905, + "loss": 1.1719, + "step": 217 + }, + { + "epoch": 0.01, + "learning_rate": 0.00038998211091234347, + "loss": 1.1016, + "step": 218 + }, + { + "epoch": 0.01, + "learning_rate": 0.00039177101967799644, + "loss": 1.2109, + "step": 219 + }, + { + "epoch": 0.01, + "learning_rate": 0.0003935599284436494, + "loss": 1.125, + "step": 220 + }, + { + "epoch": 0.01, + "learning_rate": 0.00039534883720930233, + "loss": 1.0859, + "step": 221 + }, + { + "epoch": 0.01, + "learning_rate": 0.0003971377459749553, + "loss": 1.2188, + "step": 222 + }, + { + "epoch": 0.01, + "learning_rate": 0.0003989266547406082, + "loss": 1.2266, + "step": 223 + }, + { + "epoch": 0.01, + "learning_rate": 0.0004007155635062612, + "loss": 1.125, + "step": 224 + }, + { + "epoch": 0.01, + "learning_rate": 0.00040250447227191416, + "loss": 1.0703, + "step": 225 + }, + { + "epoch": 0.01, + "learning_rate": 0.00040429338103756707, + "loss": 1.125, + "step": 226 + }, + { + "epoch": 0.01, + "learning_rate": 0.00040608228980322004, + "loss": 1.2266, + "step": 227 + }, + { + "epoch": 0.01, + "learning_rate": 0.000407871198568873, + "loss": 1.1562, + "step": 228 + }, + { + "epoch": 0.01, + "learning_rate": 0.000409660107334526, + "loss": 1.1328, + "step": 229 + }, + { + "epoch": 0.01, + "learning_rate": 0.0004114490161001789, + "loss": 1.2266, + "step": 230 + }, + { + "epoch": 0.01, + "learning_rate": 0.0004132379248658318, + "loss": 1.1094, + "step": 231 + }, + { + "epoch": 0.01, + "learning_rate": 0.0004150268336314848, + "loss": 1.1094, + "step": 232 + }, + { + "epoch": 0.01, + "learning_rate": 0.00041681574239713776, + "loss": 1.1406, + "step": 233 + }, + { + "epoch": 0.01, + "learning_rate": 0.0004186046511627907, + "loss": 1.1719, + "step": 234 + }, + { + "epoch": 0.01, + "learning_rate": 0.00042039355992844364, + "loss": 1.1484, + "step": 235 + }, + { + "epoch": 0.01, + "learning_rate": 0.0004221824686940966, + "loss": 1.1875, + "step": 236 + }, + { + "epoch": 0.01, + "learning_rate": 0.0004239713774597496, + "loss": 1.1484, + "step": 237 + }, + { + "epoch": 0.01, + "learning_rate": 0.0004257602862254025, + "loss": 1.1172, + "step": 238 + }, + { + "epoch": 0.01, + "learning_rate": 0.00042754919499105547, + "loss": 1.0703, + "step": 239 + }, + { + "epoch": 0.01, + "learning_rate": 0.0004293381037567084, + "loss": 1.0469, + "step": 240 + }, + { + "epoch": 0.01, + "learning_rate": 0.00043112701252236136, + "loss": 1.0781, + "step": 241 + }, + { + "epoch": 0.01, + "learning_rate": 0.0004329159212880143, + "loss": 1.0547, + "step": 242 + }, + { + "epoch": 0.01, + "learning_rate": 0.0004347048300536673, + "loss": 1.0859, + "step": 243 + }, + { + "epoch": 0.01, + "learning_rate": 0.0004364937388193202, + "loss": 1.1406, + "step": 244 + }, + { + "epoch": 0.01, + "learning_rate": 0.0004382826475849732, + "loss": 1.1172, + "step": 245 + }, + { + "epoch": 0.01, + "learning_rate": 0.0004400715563506261, + "loss": 1.1094, + "step": 246 + }, + { + "epoch": 0.01, + "learning_rate": 0.00044186046511627907, + "loss": 1.2109, + "step": 247 + }, + { + "epoch": 0.01, + "learning_rate": 0.00044364937388193204, + "loss": 1.2266, + "step": 248 + }, + { + "epoch": 0.01, + "learning_rate": 0.00044543828264758496, + "loss": 1.1328, + "step": 249 + }, + { + "epoch": 0.01, + "learning_rate": 0.0004472271914132379, + "loss": 1.2188, + "step": 250 + }, + { + "epoch": 0.01, + "learning_rate": 0.0004490161001788909, + "loss": 1.0078, + "step": 251 + }, + { + "epoch": 0.01, + "learning_rate": 0.00045080500894454387, + "loss": 1.0938, + "step": 252 + }, + { + "epoch": 0.01, + "learning_rate": 0.00045259391771019684, + "loss": 1.1328, + "step": 253 + }, + { + "epoch": 0.01, + "learning_rate": 0.0004543828264758497, + "loss": 1.0312, + "step": 254 + }, + { + "epoch": 0.01, + "learning_rate": 0.00045617173524150267, + "loss": 1.1172, + "step": 255 + }, + { + "epoch": 0.01, + "learning_rate": 0.00045796064400715564, + "loss": 1.125, + "step": 256 + }, + { + "epoch": 0.01, + "learning_rate": 0.0004597495527728086, + "loss": 1.0625, + "step": 257 + }, + { + "epoch": 0.01, + "learning_rate": 0.0004615384615384616, + "loss": 1.0938, + "step": 258 + }, + { + "epoch": 0.01, + "learning_rate": 0.0004633273703041145, + "loss": 1.0078, + "step": 259 + }, + { + "epoch": 0.01, + "learning_rate": 0.00046511627906976747, + "loss": 1.0547, + "step": 260 + }, + { + "epoch": 0.01, + "learning_rate": 0.0004669051878354204, + "loss": 1.0938, + "step": 261 + }, + { + "epoch": 0.01, + "learning_rate": 0.00046869409660107335, + "loss": 1.1719, + "step": 262 + }, + { + "epoch": 0.01, + "learning_rate": 0.00047048300536672627, + "loss": 1.0703, + "step": 263 + }, + { + "epoch": 0.01, + "learning_rate": 0.00047227191413237924, + "loss": 1.2109, + "step": 264 + }, + { + "epoch": 0.01, + "learning_rate": 0.0004740608228980322, + "loss": 1.2031, + "step": 265 + }, + { + "epoch": 0.01, + "learning_rate": 0.0004758497316636852, + "loss": 1.1328, + "step": 266 + }, + { + "epoch": 0.01, + "learning_rate": 0.00047763864042933815, + "loss": 1.0391, + "step": 267 + }, + { + "epoch": 0.01, + "learning_rate": 0.00047942754919499107, + "loss": 1.1172, + "step": 268 + }, + { + "epoch": 0.01, + "learning_rate": 0.000481216457960644, + "loss": 1.125, + "step": 269 + }, + { + "epoch": 0.01, + "learning_rate": 0.00048300536672629695, + "loss": 1.0156, + "step": 270 + }, + { + "epoch": 0.01, + "learning_rate": 0.0004847942754919499, + "loss": 1.0625, + "step": 271 + }, + { + "epoch": 0.01, + "learning_rate": 0.0004865831842576029, + "loss": 1.0859, + "step": 272 + }, + { + "epoch": 0.01, + "learning_rate": 0.0004883720930232558, + "loss": 1.125, + "step": 273 + }, + { + "epoch": 0.01, + "learning_rate": 0.0004901610017889088, + "loss": 1.1016, + "step": 274 + }, + { + "epoch": 0.01, + "learning_rate": 0.0004919499105545618, + "loss": 1.1406, + "step": 275 + }, + { + "epoch": 0.01, + "learning_rate": 0.0004937388193202147, + "loss": 1.1094, + "step": 276 + }, + { + "epoch": 0.01, + "learning_rate": 0.0004955277280858677, + "loss": 1.1328, + "step": 277 + }, + { + "epoch": 0.01, + "learning_rate": 0.0004973166368515206, + "loss": 1.1016, + "step": 278 + }, + { + "epoch": 0.01, + "learning_rate": 0.0004991055456171735, + "loss": 1.1719, + "step": 279 + }, + { + "epoch": 0.02, + "learning_rate": 0.0005008944543828264, + "loss": 1.125, + "step": 280 + }, + { + "epoch": 0.02, + "learning_rate": 0.0005026833631484794, + "loss": 1.0078, + "step": 281 + }, + { + "epoch": 0.02, + "learning_rate": 0.0005044722719141323, + "loss": 1.1094, + "step": 282 + }, + { + "epoch": 0.02, + "learning_rate": 0.0005062611806797853, + "loss": 1.0078, + "step": 283 + }, + { + "epoch": 0.02, + "learning_rate": 0.0005080500894454383, + "loss": 1.0078, + "step": 284 + }, + { + "epoch": 0.02, + "learning_rate": 0.0005098389982110912, + "loss": 1.0469, + "step": 285 + }, + { + "epoch": 0.02, + "learning_rate": 0.0005116279069767442, + "loss": 1.125, + "step": 286 + }, + { + "epoch": 0.02, + "learning_rate": 0.0005134168157423972, + "loss": 1.1172, + "step": 287 + }, + { + "epoch": 0.02, + "learning_rate": 0.0005152057245080502, + "loss": 1.0859, + "step": 288 + }, + { + "epoch": 0.02, + "learning_rate": 0.000516994633273703, + "loss": 1.0938, + "step": 289 + }, + { + "epoch": 0.02, + "learning_rate": 0.000518783542039356, + "loss": 1.1328, + "step": 290 + }, + { + "epoch": 0.02, + "learning_rate": 0.000520572450805009, + "loss": 1.0938, + "step": 291 + }, + { + "epoch": 0.02, + "learning_rate": 0.0005223613595706619, + "loss": 1.2188, + "step": 292 + }, + { + "epoch": 0.02, + "learning_rate": 0.0005241502683363149, + "loss": 1.1328, + "step": 293 + }, + { + "epoch": 0.02, + "learning_rate": 0.0005259391771019679, + "loss": 1.0859, + "step": 294 + }, + { + "epoch": 0.02, + "learning_rate": 0.0005277280858676207, + "loss": 1.0703, + "step": 295 + }, + { + "epoch": 0.02, + "learning_rate": 0.0005295169946332737, + "loss": 1.1172, + "step": 296 + }, + { + "epoch": 0.02, + "learning_rate": 0.0005313059033989267, + "loss": 1.0938, + "step": 297 + }, + { + "epoch": 0.02, + "learning_rate": 0.0005330948121645796, + "loss": 1.0469, + "step": 298 + }, + { + "epoch": 0.02, + "learning_rate": 0.0005348837209302325, + "loss": 1.2188, + "step": 299 + }, + { + "epoch": 0.02, + "learning_rate": 0.0005366726296958855, + "loss": 1.0234, + "step": 300 + }, + { + "epoch": 0.02, + "learning_rate": 0.0005384615384615384, + "loss": 1.0703, + "step": 301 + }, + { + "epoch": 0.02, + "learning_rate": 0.0005402504472271914, + "loss": 1.0234, + "step": 302 + }, + { + "epoch": 0.02, + "learning_rate": 0.0005420393559928444, + "loss": 0.9961, + "step": 303 + }, + { + "epoch": 0.02, + "learning_rate": 0.0005438282647584974, + "loss": 1.0469, + "step": 304 + }, + { + "epoch": 0.02, + "learning_rate": 0.0005456171735241503, + "loss": 0.957, + "step": 305 + }, + { + "epoch": 0.02, + "learning_rate": 0.0005474060822898033, + "loss": 1.0312, + "step": 306 + }, + { + "epoch": 0.02, + "learning_rate": 0.0005491949910554563, + "loss": 1.1016, + "step": 307 + }, + { + "epoch": 0.02, + "learning_rate": 0.0005509838998211091, + "loss": 1.125, + "step": 308 + }, + { + "epoch": 0.02, + "learning_rate": 0.0005527728085867621, + "loss": 1.0938, + "step": 309 + }, + { + "epoch": 0.02, + "learning_rate": 0.000554561717352415, + "loss": 1.0547, + "step": 310 + }, + { + "epoch": 0.02, + "learning_rate": 0.0005563506261180679, + "loss": 1.0703, + "step": 311 + }, + { + "epoch": 0.02, + "learning_rate": 0.0005581395348837209, + "loss": 1.0781, + "step": 312 + }, + { + "epoch": 0.02, + "learning_rate": 0.0005599284436493739, + "loss": 1.0547, + "step": 313 + }, + { + "epoch": 0.02, + "learning_rate": 0.0005617173524150268, + "loss": 1.0469, + "step": 314 + }, + { + "epoch": 0.02, + "learning_rate": 0.0005635062611806798, + "loss": 1.0547, + "step": 315 + }, + { + "epoch": 0.02, + "learning_rate": 0.0005652951699463328, + "loss": 1.0, + "step": 316 + }, + { + "epoch": 0.02, + "learning_rate": 0.0005670840787119857, + "loss": 1.0625, + "step": 317 + }, + { + "epoch": 0.02, + "learning_rate": 0.0005688729874776386, + "loss": 1.0312, + "step": 318 + }, + { + "epoch": 0.02, + "learning_rate": 0.0005706618962432916, + "loss": 1.1094, + "step": 319 + }, + { + "epoch": 0.02, + "learning_rate": 0.0005724508050089446, + "loss": 1.1641, + "step": 320 + }, + { + "epoch": 0.02, + "learning_rate": 0.0005742397137745975, + "loss": 0.9531, + "step": 321 + }, + { + "epoch": 0.02, + "learning_rate": 0.0005760286225402505, + "loss": 1.0391, + "step": 322 + }, + { + "epoch": 0.02, + "learning_rate": 0.0005778175313059035, + "loss": 1.0859, + "step": 323 + }, + { + "epoch": 0.02, + "learning_rate": 0.0005796064400715564, + "loss": 1.0625, + "step": 324 + }, + { + "epoch": 0.02, + "learning_rate": 0.0005813953488372093, + "loss": 1.0859, + "step": 325 + }, + { + "epoch": 0.02, + "learning_rate": 0.0005831842576028623, + "loss": 1.0469, + "step": 326 + }, + { + "epoch": 0.02, + "learning_rate": 0.0005849731663685151, + "loss": 1.0234, + "step": 327 + }, + { + "epoch": 0.02, + "learning_rate": 0.0005867620751341681, + "loss": 1.1094, + "step": 328 + }, + { + "epoch": 0.02, + "learning_rate": 0.0005885509838998211, + "loss": 1.0859, + "step": 329 + }, + { + "epoch": 0.02, + "learning_rate": 0.000590339892665474, + "loss": 1.0938, + "step": 330 + }, + { + "epoch": 0.02, + "learning_rate": 0.000592128801431127, + "loss": 1.1328, + "step": 331 + }, + { + "epoch": 0.02, + "learning_rate": 0.00059391771019678, + "loss": 1.0703, + "step": 332 + }, + { + "epoch": 0.02, + "learning_rate": 0.000595706618962433, + "loss": 1.0391, + "step": 333 + }, + { + "epoch": 0.02, + "learning_rate": 0.0005974955277280859, + "loss": 1.0156, + "step": 334 + }, + { + "epoch": 0.02, + "learning_rate": 0.0005992844364937389, + "loss": 1.125, + "step": 335 + }, + { + "epoch": 0.02, + "learning_rate": 0.0006010733452593919, + "loss": 1.0547, + "step": 336 + }, + { + "epoch": 0.02, + "learning_rate": 0.0006028622540250447, + "loss": 1.0625, + "step": 337 + }, + { + "epoch": 0.02, + "learning_rate": 0.0006046511627906977, + "loss": 1.0391, + "step": 338 + }, + { + "epoch": 0.02, + "learning_rate": 0.0006064400715563507, + "loss": 1.0312, + "step": 339 + }, + { + "epoch": 0.02, + "learning_rate": 0.0006082289803220035, + "loss": 1.0078, + "step": 340 + }, + { + "epoch": 0.02, + "learning_rate": 0.0006100178890876565, + "loss": 1.1484, + "step": 341 + }, + { + "epoch": 0.02, + "learning_rate": 0.0006118067978533095, + "loss": 1.0391, + "step": 342 + }, + { + "epoch": 0.02, + "learning_rate": 0.0006135957066189624, + "loss": 1.0, + "step": 343 + }, + { + "epoch": 0.02, + "learning_rate": 0.0006153846153846154, + "loss": 1.0625, + "step": 344 + }, + { + "epoch": 0.02, + "learning_rate": 0.0006171735241502684, + "loss": 1.1094, + "step": 345 + }, + { + "epoch": 0.02, + "learning_rate": 0.0006189624329159212, + "loss": 1.0469, + "step": 346 + }, + { + "epoch": 0.02, + "learning_rate": 0.0006207513416815742, + "loss": 0.9883, + "step": 347 + }, + { + "epoch": 0.02, + "learning_rate": 0.0006225402504472272, + "loss": 0.9453, + "step": 348 + }, + { + "epoch": 0.02, + "learning_rate": 0.0006243291592128801, + "loss": 1.1094, + "step": 349 + }, + { + "epoch": 0.02, + "learning_rate": 0.0006261180679785331, + "loss": 1.0469, + "step": 350 + }, + { + "epoch": 0.02, + "learning_rate": 0.0006279069767441861, + "loss": 1.0781, + "step": 351 + }, + { + "epoch": 0.02, + "learning_rate": 0.0006296958855098391, + "loss": 1.0469, + "step": 352 + }, + { + "epoch": 0.02, + "learning_rate": 0.000631484794275492, + "loss": 1.0859, + "step": 353 + }, + { + "epoch": 0.02, + "learning_rate": 0.000633273703041145, + "loss": 1.0312, + "step": 354 + }, + { + "epoch": 0.02, + "learning_rate": 0.000635062611806798, + "loss": 0.9766, + "step": 355 + }, + { + "epoch": 0.02, + "learning_rate": 0.0006368515205724507, + "loss": 1.0625, + "step": 356 + }, + { + "epoch": 0.02, + "learning_rate": 0.0006386404293381037, + "loss": 1.0938, + "step": 357 + }, + { + "epoch": 0.02, + "learning_rate": 0.0006404293381037567, + "loss": 1.1328, + "step": 358 + }, + { + "epoch": 0.02, + "learning_rate": 0.0006422182468694096, + "loss": 1.0, + "step": 359 + }, + { + "epoch": 0.02, + "learning_rate": 0.0006440071556350626, + "loss": 1.0312, + "step": 360 + }, + { + "epoch": 0.02, + "learning_rate": 0.0006457960644007156, + "loss": 1.0156, + "step": 361 + }, + { + "epoch": 0.02, + "learning_rate": 0.0006475849731663685, + "loss": 1.0859, + "step": 362 + }, + { + "epoch": 0.02, + "learning_rate": 0.0006493738819320215, + "loss": 1.1094, + "step": 363 + }, + { + "epoch": 0.02, + "learning_rate": 0.0006511627906976745, + "loss": 0.9609, + "step": 364 + }, + { + "epoch": 0.02, + "learning_rate": 0.0006529516994633273, + "loss": 1.0859, + "step": 365 + }, + { + "epoch": 0.02, + "learning_rate": 0.0006547406082289803, + "loss": 1.0547, + "step": 366 + }, + { + "epoch": 0.02, + "learning_rate": 0.0006565295169946333, + "loss": 1.1094, + "step": 367 + }, + { + "epoch": 0.02, + "learning_rate": 0.0006583184257602863, + "loss": 0.8984, + "step": 368 + }, + { + "epoch": 0.02, + "learning_rate": 0.0006601073345259392, + "loss": 1.0312, + "step": 369 + }, + { + "epoch": 0.02, + "learning_rate": 0.0006618962432915922, + "loss": 1.0078, + "step": 370 + }, + { + "epoch": 0.02, + "learning_rate": 0.0006636851520572451, + "loss": 1.1016, + "step": 371 + }, + { + "epoch": 0.02, + "learning_rate": 0.000665474060822898, + "loss": 1.0781, + "step": 372 + }, + { + "epoch": 0.02, + "learning_rate": 0.000667262969588551, + "loss": 1.0625, + "step": 373 + }, + { + "epoch": 0.02, + "learning_rate": 0.000669051878354204, + "loss": 1.0234, + "step": 374 + }, + { + "epoch": 0.02, + "learning_rate": 0.0006708407871198568, + "loss": 0.9883, + "step": 375 + }, + { + "epoch": 0.02, + "learning_rate": 0.0006726296958855098, + "loss": 0.9844, + "step": 376 + }, + { + "epoch": 0.02, + "learning_rate": 0.0006744186046511628, + "loss": 1.0547, + "step": 377 + }, + { + "epoch": 0.02, + "learning_rate": 0.0006762075134168157, + "loss": 1.0391, + "step": 378 + }, + { + "epoch": 0.02, + "learning_rate": 0.0006779964221824687, + "loss": 0.9922, + "step": 379 + }, + { + "epoch": 0.02, + "learning_rate": 0.0006797853309481217, + "loss": 1.0, + "step": 380 + }, + { + "epoch": 0.02, + "learning_rate": 0.0006815742397137747, + "loss": 1.0547, + "step": 381 + }, + { + "epoch": 0.02, + "learning_rate": 0.0006833631484794276, + "loss": 1.0938, + "step": 382 + }, + { + "epoch": 0.02, + "learning_rate": 0.0006851520572450806, + "loss": 0.9727, + "step": 383 + }, + { + "epoch": 0.02, + "learning_rate": 0.0006869409660107335, + "loss": 1.0078, + "step": 384 + }, + { + "epoch": 0.02, + "learning_rate": 0.0006887298747763864, + "loss": 1.0469, + "step": 385 + }, + { + "epoch": 0.02, + "learning_rate": 0.0006905187835420393, + "loss": 1.1875, + "step": 386 + }, + { + "epoch": 0.02, + "learning_rate": 0.0006923076923076923, + "loss": 0.9688, + "step": 387 + }, + { + "epoch": 0.02, + "learning_rate": 0.0006940966010733452, + "loss": 1.0156, + "step": 388 + }, + { + "epoch": 0.02, + "learning_rate": 0.0006958855098389982, + "loss": 1.0859, + "step": 389 + }, + { + "epoch": 0.02, + "learning_rate": 0.0006976744186046512, + "loss": 0.9922, + "step": 390 + }, + { + "epoch": 0.02, + "learning_rate": 0.0006994633273703041, + "loss": 1.0312, + "step": 391 + }, + { + "epoch": 0.02, + "learning_rate": 0.0007012522361359571, + "loss": 1.0234, + "step": 392 + }, + { + "epoch": 0.02, + "learning_rate": 0.0007030411449016101, + "loss": 0.9062, + "step": 393 + }, + { + "epoch": 0.02, + "learning_rate": 0.000704830053667263, + "loss": 1.0156, + "step": 394 + }, + { + "epoch": 0.02, + "learning_rate": 0.0007066189624329159, + "loss": 1.0938, + "step": 395 + }, + { + "epoch": 0.02, + "learning_rate": 0.0007084078711985689, + "loss": 1.0, + "step": 396 + }, + { + "epoch": 0.02, + "learning_rate": 0.0007101967799642219, + "loss": 1.0234, + "step": 397 + }, + { + "epoch": 0.02, + "learning_rate": 0.0007119856887298748, + "loss": 1.0, + "step": 398 + }, + { + "epoch": 0.02, + "learning_rate": 0.0007137745974955278, + "loss": 1.0156, + "step": 399 + }, + { + "epoch": 0.02, + "learning_rate": 0.0007155635062611808, + "loss": 1.0547, + "step": 400 + }, + { + "epoch": 0.02, + "learning_rate": 0.0007173524150268336, + "loss": 1.0547, + "step": 401 + }, + { + "epoch": 0.02, + "learning_rate": 0.0007191413237924866, + "loss": 1.0, + "step": 402 + }, + { + "epoch": 0.02, + "learning_rate": 0.0007209302325581395, + "loss": 0.9766, + "step": 403 + }, + { + "epoch": 0.02, + "learning_rate": 0.0007227191413237924, + "loss": 1.0859, + "step": 404 + }, + { + "epoch": 0.02, + "learning_rate": 0.0007245080500894454, + "loss": 1.0625, + "step": 405 + }, + { + "epoch": 0.02, + "learning_rate": 0.0007262969588550984, + "loss": 1.0391, + "step": 406 + }, + { + "epoch": 0.02, + "learning_rate": 0.0007280858676207513, + "loss": 1.0078, + "step": 407 + }, + { + "epoch": 0.02, + "learning_rate": 0.0007298747763864043, + "loss": 1.0078, + "step": 408 + }, + { + "epoch": 0.02, + "learning_rate": 0.0007316636851520573, + "loss": 1.0312, + "step": 409 + }, + { + "epoch": 0.02, + "learning_rate": 0.0007334525939177103, + "loss": 1.0547, + "step": 410 + }, + { + "epoch": 0.02, + "learning_rate": 0.0007352415026833632, + "loss": 1.0078, + "step": 411 + }, + { + "epoch": 0.02, + "learning_rate": 0.0007370304114490162, + "loss": 1.0156, + "step": 412 + }, + { + "epoch": 0.02, + "learning_rate": 0.0007388193202146691, + "loss": 1.0469, + "step": 413 + }, + { + "epoch": 0.02, + "learning_rate": 0.000740608228980322, + "loss": 1.0234, + "step": 414 + }, + { + "epoch": 0.02, + "learning_rate": 0.000742397137745975, + "loss": 1.0312, + "step": 415 + }, + { + "epoch": 0.02, + "learning_rate": 0.0007441860465116279, + "loss": 0.9922, + "step": 416 + }, + { + "epoch": 0.02, + "learning_rate": 0.0007459749552772808, + "loss": 1.0859, + "step": 417 + }, + { + "epoch": 0.02, + "learning_rate": 0.0007477638640429338, + "loss": 1.0156, + "step": 418 + }, + { + "epoch": 0.02, + "learning_rate": 0.0007495527728085868, + "loss": 1.0859, + "step": 419 + }, + { + "epoch": 0.02, + "learning_rate": 0.0007513416815742397, + "loss": 1.0703, + "step": 420 + }, + { + "epoch": 0.02, + "learning_rate": 0.0007531305903398927, + "loss": 1.0312, + "step": 421 + }, + { + "epoch": 0.02, + "learning_rate": 0.0007549194991055456, + "loss": 1.0312, + "step": 422 + }, + { + "epoch": 0.02, + "learning_rate": 0.0007567084078711985, + "loss": 1.0469, + "step": 423 + }, + { + "epoch": 0.02, + "learning_rate": 0.0007584973166368515, + "loss": 1.1875, + "step": 424 + }, + { + "epoch": 0.02, + "learning_rate": 0.0007602862254025045, + "loss": 0.9883, + "step": 425 + }, + { + "epoch": 0.02, + "learning_rate": 0.0007620751341681575, + "loss": 1.0, + "step": 426 + }, + { + "epoch": 0.02, + "learning_rate": 0.0007638640429338104, + "loss": 1.0391, + "step": 427 + }, + { + "epoch": 0.02, + "learning_rate": 0.0007656529516994634, + "loss": 0.9609, + "step": 428 + }, + { + "epoch": 0.02, + "learning_rate": 0.0007674418604651164, + "loss": 1.0703, + "step": 429 + }, + { + "epoch": 0.02, + "learning_rate": 0.0007692307692307693, + "loss": 1.0156, + "step": 430 + }, + { + "epoch": 0.02, + "learning_rate": 0.0007710196779964222, + "loss": 0.9922, + "step": 431 + }, + { + "epoch": 0.02, + "learning_rate": 0.0007728085867620751, + "loss": 0.9883, + "step": 432 + }, + { + "epoch": 0.02, + "learning_rate": 0.000774597495527728, + "loss": 1.0781, + "step": 433 + }, + { + "epoch": 0.02, + "learning_rate": 0.000776386404293381, + "loss": 1.0469, + "step": 434 + }, + { + "epoch": 0.02, + "learning_rate": 0.000778175313059034, + "loss": 1.0469, + "step": 435 + }, + { + "epoch": 0.02, + "learning_rate": 0.0007799642218246869, + "loss": 0.9922, + "step": 436 + }, + { + "epoch": 0.02, + "learning_rate": 0.0007817531305903399, + "loss": 1.0156, + "step": 437 + }, + { + "epoch": 0.02, + "learning_rate": 0.0007835420393559929, + "loss": 1.0, + "step": 438 + }, + { + "epoch": 0.02, + "learning_rate": 0.0007853309481216459, + "loss": 1.0625, + "step": 439 + }, + { + "epoch": 0.02, + "learning_rate": 0.0007871198568872988, + "loss": 0.9492, + "step": 440 + }, + { + "epoch": 0.02, + "learning_rate": 0.0007889087656529517, + "loss": 1.0781, + "step": 441 + }, + { + "epoch": 0.02, + "learning_rate": 0.0007906976744186047, + "loss": 1.0156, + "step": 442 + }, + { + "epoch": 0.02, + "learning_rate": 0.0007924865831842576, + "loss": 0.9805, + "step": 443 + }, + { + "epoch": 0.02, + "learning_rate": 0.0007942754919499106, + "loss": 1.0, + "step": 444 + }, + { + "epoch": 0.02, + "learning_rate": 0.0007960644007155636, + "loss": 0.9258, + "step": 445 + }, + { + "epoch": 0.02, + "learning_rate": 0.0007978533094812164, + "loss": 1.0391, + "step": 446 + }, + { + "epoch": 0.02, + "learning_rate": 0.0007996422182468694, + "loss": 1.0938, + "step": 447 + }, + { + "epoch": 0.02, + "learning_rate": 0.0008014311270125224, + "loss": 1.0, + "step": 448 + }, + { + "epoch": 0.02, + "learning_rate": 0.0008032200357781753, + "loss": 1.0, + "step": 449 + }, + { + "epoch": 0.02, + "learning_rate": 0.0008050089445438283, + "loss": 1.0859, + "step": 450 + }, + { + "epoch": 0.02, + "learning_rate": 0.0008067978533094812, + "loss": 1.1016, + "step": 451 + }, + { + "epoch": 0.02, + "learning_rate": 0.0008085867620751341, + "loss": 0.9766, + "step": 452 + }, + { + "epoch": 0.02, + "learning_rate": 0.0008103756708407871, + "loss": 0.9883, + "step": 453 + }, + { + "epoch": 0.02, + "learning_rate": 0.0008121645796064401, + "loss": 0.9766, + "step": 454 + }, + { + "epoch": 0.02, + "learning_rate": 0.0008139534883720931, + "loss": 0.9844, + "step": 455 + }, + { + "epoch": 0.02, + "learning_rate": 0.000815742397137746, + "loss": 1.1094, + "step": 456 + }, + { + "epoch": 0.02, + "learning_rate": 0.000817531305903399, + "loss": 0.957, + "step": 457 + }, + { + "epoch": 0.02, + "learning_rate": 0.000819320214669052, + "loss": 1.0547, + "step": 458 + }, + { + "epoch": 0.02, + "learning_rate": 0.0008211091234347049, + "loss": 1.0156, + "step": 459 + }, + { + "epoch": 0.02, + "learning_rate": 0.0008228980322003578, + "loss": 1.0312, + "step": 460 + }, + { + "epoch": 0.02, + "learning_rate": 0.0008246869409660107, + "loss": 0.9805, + "step": 461 + }, + { + "epoch": 0.02, + "learning_rate": 0.0008264758497316636, + "loss": 0.9727, + "step": 462 + }, + { + "epoch": 0.02, + "learning_rate": 0.0008282647584973166, + "loss": 1.0234, + "step": 463 + }, + { + "epoch": 0.02, + "learning_rate": 0.0008300536672629696, + "loss": 1.0, + "step": 464 + }, + { + "epoch": 0.02, + "learning_rate": 0.0008318425760286225, + "loss": 0.9727, + "step": 465 + }, + { + "epoch": 0.03, + "learning_rate": 0.0008336314847942755, + "loss": 1.0391, + "step": 466 + }, + { + "epoch": 0.03, + "learning_rate": 0.0008354203935599285, + "loss": 0.9766, + "step": 467 + }, + { + "epoch": 0.03, + "learning_rate": 0.0008372093023255815, + "loss": 1.0703, + "step": 468 + }, + { + "epoch": 0.03, + "learning_rate": 0.0008389982110912343, + "loss": 1.0234, + "step": 469 + }, + { + "epoch": 0.03, + "learning_rate": 0.0008407871198568873, + "loss": 1.0391, + "step": 470 + }, + { + "epoch": 0.03, + "learning_rate": 0.0008425760286225403, + "loss": 1.0312, + "step": 471 + }, + { + "epoch": 0.03, + "learning_rate": 0.0008443649373881932, + "loss": 0.9961, + "step": 472 + }, + { + "epoch": 0.03, + "learning_rate": 0.0008461538461538462, + "loss": 0.9922, + "step": 473 + }, + { + "epoch": 0.03, + "learning_rate": 0.0008479427549194992, + "loss": 0.9883, + "step": 474 + }, + { + "epoch": 0.03, + "learning_rate": 0.0008497316636851521, + "loss": 0.9531, + "step": 475 + }, + { + "epoch": 0.03, + "learning_rate": 0.000851520572450805, + "loss": 0.9922, + "step": 476 + }, + { + "epoch": 0.03, + "learning_rate": 0.000853309481216458, + "loss": 0.9375, + "step": 477 + }, + { + "epoch": 0.03, + "learning_rate": 0.0008550983899821109, + "loss": 1.0391, + "step": 478 + }, + { + "epoch": 0.03, + "learning_rate": 0.0008568872987477638, + "loss": 1.0391, + "step": 479 + }, + { + "epoch": 0.03, + "learning_rate": 0.0008586762075134168, + "loss": 1.0234, + "step": 480 + }, + { + "epoch": 0.03, + "learning_rate": 0.0008604651162790697, + "loss": 1.0078, + "step": 481 + }, + { + "epoch": 0.03, + "learning_rate": 0.0008622540250447227, + "loss": 1.0234, + "step": 482 + }, + { + "epoch": 0.03, + "learning_rate": 0.0008640429338103757, + "loss": 1.0625, + "step": 483 + }, + { + "epoch": 0.03, + "learning_rate": 0.0008658318425760287, + "loss": 1.0703, + "step": 484 + }, + { + "epoch": 0.03, + "learning_rate": 0.0008676207513416816, + "loss": 1.0703, + "step": 485 + }, + { + "epoch": 0.03, + "learning_rate": 0.0008694096601073346, + "loss": 1.0781, + "step": 486 + }, + { + "epoch": 0.03, + "learning_rate": 0.0008711985688729876, + "loss": 1.0234, + "step": 487 + }, + { + "epoch": 0.03, + "learning_rate": 0.0008729874776386404, + "loss": 1.0234, + "step": 488 + }, + { + "epoch": 0.03, + "learning_rate": 0.0008747763864042934, + "loss": 1.0312, + "step": 489 + }, + { + "epoch": 0.03, + "learning_rate": 0.0008765652951699464, + "loss": 0.9727, + "step": 490 + }, + { + "epoch": 0.03, + "learning_rate": 0.0008783542039355992, + "loss": 1.0391, + "step": 491 + }, + { + "epoch": 0.03, + "learning_rate": 0.0008801431127012522, + "loss": 1.0078, + "step": 492 + }, + { + "epoch": 0.03, + "learning_rate": 0.0008819320214669052, + "loss": 0.9883, + "step": 493 + }, + { + "epoch": 0.03, + "learning_rate": 0.0008837209302325581, + "loss": 1.0078, + "step": 494 + }, + { + "epoch": 0.03, + "learning_rate": 0.0008855098389982111, + "loss": 0.9727, + "step": 495 + }, + { + "epoch": 0.03, + "learning_rate": 0.0008872987477638641, + "loss": 1.0234, + "step": 496 + }, + { + "epoch": 0.03, + "learning_rate": 0.000889087656529517, + "loss": 1.0078, + "step": 497 + }, + { + "epoch": 0.03, + "learning_rate": 0.0008908765652951699, + "loss": 1.0156, + "step": 498 + }, + { + "epoch": 0.03, + "learning_rate": 0.0008926654740608229, + "loss": 1.0078, + "step": 499 + }, + { + "epoch": 0.03, + "learning_rate": 0.0008944543828264759, + "loss": 1.0703, + "step": 500 + }, + { + "epoch": 0.03, + "learning_rate": 0.0008962432915921288, + "loss": 1.0078, + "step": 501 + }, + { + "epoch": 0.03, + "learning_rate": 0.0008980322003577818, + "loss": 1.0, + "step": 502 + }, + { + "epoch": 0.03, + "learning_rate": 0.0008998211091234348, + "loss": 1.0312, + "step": 503 + }, + { + "epoch": 0.03, + "learning_rate": 0.0009016100178890877, + "loss": 1.0938, + "step": 504 + }, + { + "epoch": 0.03, + "learning_rate": 0.0009033989266547407, + "loss": 0.9961, + "step": 505 + }, + { + "epoch": 0.03, + "learning_rate": 0.0009051878354203937, + "loss": 0.9961, + "step": 506 + }, + { + "epoch": 0.03, + "learning_rate": 0.0009069767441860464, + "loss": 0.9688, + "step": 507 + }, + { + "epoch": 0.03, + "learning_rate": 0.0009087656529516994, + "loss": 1.0, + "step": 508 + }, + { + "epoch": 0.03, + "learning_rate": 0.0009105545617173524, + "loss": 1.0391, + "step": 509 + }, + { + "epoch": 0.03, + "learning_rate": 0.0009123434704830053, + "loss": 0.9102, + "step": 510 + }, + { + "epoch": 0.03, + "learning_rate": 0.0009141323792486583, + "loss": 1.0, + "step": 511 + }, + { + "epoch": 0.03, + "learning_rate": 0.0009159212880143113, + "loss": 1.0703, + "step": 512 + }, + { + "epoch": 0.03, + "learning_rate": 0.0009177101967799643, + "loss": 0.9648, + "step": 513 + }, + { + "epoch": 0.03, + "learning_rate": 0.0009194991055456172, + "loss": 1.0078, + "step": 514 + }, + { + "epoch": 0.03, + "learning_rate": 0.0009212880143112702, + "loss": 0.9102, + "step": 515 + }, + { + "epoch": 0.03, + "learning_rate": 0.0009230769230769232, + "loss": 1.0156, + "step": 516 + }, + { + "epoch": 0.03, + "learning_rate": 0.000924865831842576, + "loss": 1.0234, + "step": 517 + }, + { + "epoch": 0.03, + "learning_rate": 0.000926654740608229, + "loss": 0.9609, + "step": 518 + }, + { + "epoch": 0.03, + "learning_rate": 0.000928443649373882, + "loss": 0.9805, + "step": 519 + }, + { + "epoch": 0.03, + "learning_rate": 0.0009302325581395349, + "loss": 0.9531, + "step": 520 + }, + { + "epoch": 0.03, + "learning_rate": 0.0009320214669051879, + "loss": 1.0469, + "step": 521 + }, + { + "epoch": 0.03, + "learning_rate": 0.0009338103756708408, + "loss": 0.9727, + "step": 522 + }, + { + "epoch": 0.03, + "learning_rate": 0.0009355992844364937, + "loss": 1.0391, + "step": 523 + }, + { + "epoch": 0.03, + "learning_rate": 0.0009373881932021467, + "loss": 0.9844, + "step": 524 + }, + { + "epoch": 0.03, + "learning_rate": 0.0009391771019677997, + "loss": 1.0547, + "step": 525 + }, + { + "epoch": 0.03, + "learning_rate": 0.0009409660107334525, + "loss": 0.9648, + "step": 526 + }, + { + "epoch": 0.03, + "learning_rate": 0.0009427549194991055, + "loss": 0.9844, + "step": 527 + }, + { + "epoch": 0.03, + "learning_rate": 0.0009445438282647585, + "loss": 0.9648, + "step": 528 + }, + { + "epoch": 0.03, + "learning_rate": 0.0009463327370304115, + "loss": 1.0078, + "step": 529 + }, + { + "epoch": 0.03, + "learning_rate": 0.0009481216457960644, + "loss": 0.9844, + "step": 530 + }, + { + "epoch": 0.03, + "learning_rate": 0.0009499105545617174, + "loss": 0.9766, + "step": 531 + }, + { + "epoch": 0.03, + "learning_rate": 0.0009516994633273704, + "loss": 0.9727, + "step": 532 + }, + { + "epoch": 0.03, + "learning_rate": 0.0009534883720930233, + "loss": 1.0391, + "step": 533 + }, + { + "epoch": 0.03, + "learning_rate": 0.0009552772808586763, + "loss": 0.957, + "step": 534 + }, + { + "epoch": 0.03, + "learning_rate": 0.0009570661896243293, + "loss": 1.0547, + "step": 535 + }, + { + "epoch": 0.03, + "learning_rate": 0.0009588550983899821, + "loss": 1.0781, + "step": 536 + }, + { + "epoch": 0.03, + "learning_rate": 0.000960644007155635, + "loss": 0.9922, + "step": 537 + }, + { + "epoch": 0.03, + "learning_rate": 0.000962432915921288, + "loss": 1.0391, + "step": 538 + }, + { + "epoch": 0.03, + "learning_rate": 0.0009642218246869409, + "loss": 1.0, + "step": 539 + }, + { + "epoch": 0.03, + "learning_rate": 0.0009660107334525939, + "loss": 0.9609, + "step": 540 + }, + { + "epoch": 0.03, + "learning_rate": 0.0009677996422182469, + "loss": 1.0703, + "step": 541 + }, + { + "epoch": 0.03, + "learning_rate": 0.0009695885509838999, + "loss": 0.8906, + "step": 542 + }, + { + "epoch": 0.03, + "learning_rate": 0.0009713774597495528, + "loss": 0.9883, + "step": 543 + }, + { + "epoch": 0.03, + "learning_rate": 0.0009731663685152058, + "loss": 1.0391, + "step": 544 + }, + { + "epoch": 0.03, + "learning_rate": 0.0009749552772808587, + "loss": 0.9297, + "step": 545 + }, + { + "epoch": 0.03, + "learning_rate": 0.0009767441860465116, + "loss": 0.9336, + "step": 546 + }, + { + "epoch": 0.03, + "learning_rate": 0.0009785330948121646, + "loss": 0.9531, + "step": 547 + }, + { + "epoch": 0.03, + "learning_rate": 0.0009803220035778176, + "loss": 0.9805, + "step": 548 + }, + { + "epoch": 0.03, + "learning_rate": 0.0009821109123434705, + "loss": 1.0703, + "step": 549 + }, + { + "epoch": 0.03, + "learning_rate": 0.0009838998211091235, + "loss": 0.9766, + "step": 550 + }, + { + "epoch": 0.03, + "learning_rate": 0.0009856887298747765, + "loss": 0.957, + "step": 551 + }, + { + "epoch": 0.03, + "learning_rate": 0.0009874776386404294, + "loss": 0.9648, + "step": 552 + }, + { + "epoch": 0.03, + "learning_rate": 0.0009892665474060824, + "loss": 1.0391, + "step": 553 + }, + { + "epoch": 0.03, + "learning_rate": 0.0009910554561717354, + "loss": 1.0078, + "step": 554 + }, + { + "epoch": 0.03, + "learning_rate": 0.0009928443649373881, + "loss": 1.0469, + "step": 555 + }, + { + "epoch": 0.03, + "learning_rate": 0.0009946332737030411, + "loss": 1.0781, + "step": 556 + }, + { + "epoch": 0.03, + "learning_rate": 0.000996422182468694, + "loss": 0.9492, + "step": 557 + }, + { + "epoch": 0.03, + "learning_rate": 0.000998211091234347, + "loss": 1.0156, + "step": 558 + }, + { + "epoch": 0.03, + "learning_rate": 0.001, + "loss": 0.9727, + "step": 559 + }, + { + "epoch": 0.03, + "learning_rate": 0.0009999999924241788, + "loss": 0.957, + "step": 560 + }, + { + "epoch": 0.03, + "learning_rate": 0.0009999999696967154, + "loss": 1.1094, + "step": 561 + }, + { + "epoch": 0.03, + "learning_rate": 0.0009999999318176106, + "loss": 1.0156, + "step": 562 + }, + { + "epoch": 0.03, + "learning_rate": 0.0009999998787868654, + "loss": 1.0, + "step": 563 + }, + { + "epoch": 0.03, + "learning_rate": 0.0009999998106044814, + "loss": 0.9414, + "step": 564 + }, + { + "epoch": 0.03, + "learning_rate": 0.0009999997272704608, + "loss": 1.0234, + "step": 565 + }, + { + "epoch": 0.03, + "learning_rate": 0.000999999628784806, + "loss": 1.0469, + "step": 566 + }, + { + "epoch": 0.03, + "learning_rate": 0.0009999995151475202, + "loss": 1.0469, + "step": 567 + }, + { + "epoch": 0.03, + "learning_rate": 0.0009999993863586065, + "loss": 0.9648, + "step": 568 + }, + { + "epoch": 0.03, + "learning_rate": 0.0009999992424180692, + "loss": 0.9688, + "step": 569 + }, + { + "epoch": 0.03, + "learning_rate": 0.0009999990833259123, + "loss": 0.9609, + "step": 570 + }, + { + "epoch": 0.03, + "learning_rate": 0.000999998909082141, + "loss": 1.0, + "step": 571 + }, + { + "epoch": 0.03, + "learning_rate": 0.0009999987196867598, + "loss": 0.8906, + "step": 572 + }, + { + "epoch": 0.03, + "learning_rate": 0.0009999985151397754, + "loss": 1.0156, + "step": 573 + }, + { + "epoch": 0.03, + "learning_rate": 0.0009999982954411936, + "loss": 0.9688, + "step": 574 + }, + { + "epoch": 0.03, + "learning_rate": 0.0009999980605910211, + "loss": 0.9883, + "step": 575 + }, + { + "epoch": 0.03, + "learning_rate": 0.0009999978105892648, + "loss": 1.0547, + "step": 576 + }, + { + "epoch": 0.03, + "learning_rate": 0.0009999975454359324, + "loss": 1.0469, + "step": 577 + }, + { + "epoch": 0.03, + "learning_rate": 0.0009999972651310322, + "loss": 1.0859, + "step": 578 + }, + { + "epoch": 0.03, + "learning_rate": 0.0009999969696745724, + "loss": 1.1016, + "step": 579 + }, + { + "epoch": 0.03, + "learning_rate": 0.000999996659066562, + "loss": 0.9375, + "step": 580 + }, + { + "epoch": 0.03, + "learning_rate": 0.0009999963333070104, + "loss": 0.9453, + "step": 581 + }, + { + "epoch": 0.03, + "learning_rate": 0.0009999959923959274, + "loss": 0.9844, + "step": 582 + }, + { + "epoch": 0.03, + "learning_rate": 0.0009999956363333235, + "loss": 1.1094, + "step": 583 + }, + { + "epoch": 0.03, + "learning_rate": 0.0009999952651192096, + "loss": 0.9844, + "step": 584 + }, + { + "epoch": 0.03, + "learning_rate": 0.0009999948787535967, + "loss": 0.9883, + "step": 585 + }, + { + "epoch": 0.03, + "learning_rate": 0.0009999944772364966, + "loss": 0.957, + "step": 586 + }, + { + "epoch": 0.03, + "learning_rate": 0.0009999940605679213, + "loss": 0.9375, + "step": 587 + }, + { + "epoch": 0.03, + "learning_rate": 0.0009999936287478836, + "loss": 0.9336, + "step": 588 + }, + { + "epoch": 0.03, + "learning_rate": 0.0009999931817763967, + "loss": 0.9609, + "step": 589 + }, + { + "epoch": 0.03, + "learning_rate": 0.0009999927196534739, + "loss": 1.0391, + "step": 590 + }, + { + "epoch": 0.03, + "learning_rate": 0.0009999922423791295, + "loss": 1.0703, + "step": 591 + }, + { + "epoch": 0.03, + "learning_rate": 0.0009999917499533776, + "loss": 0.9297, + "step": 592 + }, + { + "epoch": 0.03, + "learning_rate": 0.0009999912423762334, + "loss": 0.9258, + "step": 593 + }, + { + "epoch": 0.03, + "learning_rate": 0.000999990719647712, + "loss": 0.9688, + "step": 594 + }, + { + "epoch": 0.03, + "learning_rate": 0.0009999901817678296, + "loss": 1.0781, + "step": 595 + }, + { + "epoch": 0.03, + "learning_rate": 0.0009999896287366025, + "loss": 1.0, + "step": 596 + }, + { + "epoch": 0.03, + "learning_rate": 0.000999989060554047, + "loss": 0.957, + "step": 597 + }, + { + "epoch": 0.03, + "learning_rate": 0.0009999884772201805, + "loss": 0.9492, + "step": 598 + }, + { + "epoch": 0.03, + "learning_rate": 0.0009999878787350209, + "loss": 0.9453, + "step": 599 + }, + { + "epoch": 0.03, + "learning_rate": 0.0009999872650985862, + "loss": 0.9336, + "step": 600 + }, + { + "epoch": 0.03, + "learning_rate": 0.000999986636310895, + "loss": 1.0234, + "step": 601 + }, + { + "epoch": 0.03, + "learning_rate": 0.0009999859923719666, + "loss": 0.9922, + "step": 602 + }, + { + "epoch": 0.03, + "learning_rate": 0.0009999853332818198, + "loss": 1.0078, + "step": 603 + }, + { + "epoch": 0.03, + "learning_rate": 0.0009999846590404754, + "loss": 1.0234, + "step": 604 + }, + { + "epoch": 0.03, + "learning_rate": 0.0009999839696479534, + "loss": 0.9922, + "step": 605 + }, + { + "epoch": 0.03, + "learning_rate": 0.0009999832651042748, + "loss": 0.9648, + "step": 606 + }, + { + "epoch": 0.03, + "learning_rate": 0.0009999825454094608, + "loss": 1.0, + "step": 607 + }, + { + "epoch": 0.03, + "learning_rate": 0.0009999818105635334, + "loss": 0.9609, + "step": 608 + }, + { + "epoch": 0.03, + "learning_rate": 0.0009999810605665147, + "loss": 0.9922, + "step": 609 + }, + { + "epoch": 0.03, + "learning_rate": 0.0009999802954184277, + "loss": 0.9922, + "step": 610 + }, + { + "epoch": 0.03, + "learning_rate": 0.0009999795151192953, + "loss": 0.9414, + "step": 611 + }, + { + "epoch": 0.03, + "learning_rate": 0.0009999787196691412, + "loss": 0.9609, + "step": 612 + }, + { + "epoch": 0.03, + "learning_rate": 0.0009999779090679897, + "loss": 0.9727, + "step": 613 + }, + { + "epoch": 0.03, + "learning_rate": 0.0009999770833158653, + "loss": 0.9883, + "step": 614 + }, + { + "epoch": 0.03, + "learning_rate": 0.0009999762424127928, + "loss": 0.9648, + "step": 615 + }, + { + "epoch": 0.03, + "learning_rate": 0.0009999753863587978, + "loss": 0.957, + "step": 616 + }, + { + "epoch": 0.03, + "learning_rate": 0.0009999745151539061, + "loss": 0.9297, + "step": 617 + }, + { + "epoch": 0.03, + "learning_rate": 0.0009999736287981446, + "loss": 0.9688, + "step": 618 + }, + { + "epoch": 0.03, + "learning_rate": 0.0009999727272915397, + "loss": 1.0234, + "step": 619 + }, + { + "epoch": 0.03, + "learning_rate": 0.0009999718106341187, + "loss": 0.957, + "step": 620 + }, + { + "epoch": 0.03, + "learning_rate": 0.0009999708788259098, + "loss": 1.0156, + "step": 621 + }, + { + "epoch": 0.03, + "learning_rate": 0.000999969931866941, + "loss": 0.9609, + "step": 622 + }, + { + "epoch": 0.03, + "learning_rate": 0.0009999689697572406, + "loss": 1.0, + "step": 623 + }, + { + "epoch": 0.03, + "learning_rate": 0.0009999679924968384, + "loss": 1.0859, + "step": 624 + }, + { + "epoch": 0.03, + "learning_rate": 0.0009999670000857635, + "loss": 0.957, + "step": 625 + }, + { + "epoch": 0.03, + "learning_rate": 0.0009999659925240463, + "loss": 1.0547, + "step": 626 + }, + { + "epoch": 0.03, + "learning_rate": 0.0009999649698117171, + "loss": 1.0, + "step": 627 + }, + { + "epoch": 0.03, + "learning_rate": 0.0009999639319488074, + "loss": 1.0469, + "step": 628 + }, + { + "epoch": 0.03, + "learning_rate": 0.0009999628789353482, + "loss": 0.9922, + "step": 629 + }, + { + "epoch": 0.03, + "learning_rate": 0.0009999618107713712, + "loss": 1.0625, + "step": 630 + }, + { + "epoch": 0.03, + "learning_rate": 0.0009999607274569092, + "loss": 0.9648, + "step": 631 + }, + { + "epoch": 0.03, + "learning_rate": 0.000999959628991995, + "loss": 0.9922, + "step": 632 + }, + { + "epoch": 0.03, + "learning_rate": 0.0009999585153766615, + "loss": 0.9414, + "step": 633 + }, + { + "epoch": 0.03, + "learning_rate": 0.000999957386610943, + "loss": 0.9023, + "step": 634 + }, + { + "epoch": 0.03, + "learning_rate": 0.0009999562426948733, + "loss": 0.9805, + "step": 635 + }, + { + "epoch": 0.03, + "learning_rate": 0.0009999550836284872, + "loss": 0.9961, + "step": 636 + }, + { + "epoch": 0.03, + "learning_rate": 0.0009999539094118198, + "loss": 0.9688, + "step": 637 + }, + { + "epoch": 0.03, + "learning_rate": 0.0009999527200449066, + "loss": 1.0391, + "step": 638 + }, + { + "epoch": 0.03, + "learning_rate": 0.0009999515155277839, + "loss": 0.9414, + "step": 639 + }, + { + "epoch": 0.03, + "learning_rate": 0.000999950295860488, + "loss": 1.0312, + "step": 640 + }, + { + "epoch": 0.03, + "learning_rate": 0.0009999490610430558, + "loss": 1.0312, + "step": 641 + }, + { + "epoch": 0.03, + "learning_rate": 0.0009999478110755248, + "loss": 1.0312, + "step": 642 + }, + { + "epoch": 0.03, + "learning_rate": 0.000999946545957933, + "loss": 1.0312, + "step": 643 + }, + { + "epoch": 0.03, + "learning_rate": 0.0009999452656903186, + "loss": 0.9688, + "step": 644 + }, + { + "epoch": 0.03, + "learning_rate": 0.0009999439702727201, + "loss": 1.0469, + "step": 645 + }, + { + "epoch": 0.03, + "learning_rate": 0.0009999426597051774, + "loss": 0.9922, + "step": 646 + }, + { + "epoch": 0.03, + "learning_rate": 0.0009999413339877301, + "loss": 0.9453, + "step": 647 + }, + { + "epoch": 0.03, + "learning_rate": 0.0009999399931204178, + "loss": 1.0234, + "step": 648 + }, + { + "epoch": 0.03, + "learning_rate": 0.0009999386371032817, + "loss": 1.0781, + "step": 649 + }, + { + "epoch": 0.03, + "learning_rate": 0.0009999372659363626, + "loss": 0.9375, + "step": 650 + }, + { + "epoch": 0.03, + "learning_rate": 0.0009999358796197021, + "loss": 0.9336, + "step": 651 + }, + { + "epoch": 0.04, + "learning_rate": 0.0009999344781533425, + "loss": 1.0078, + "step": 652 + }, + { + "epoch": 0.04, + "learning_rate": 0.000999933061537326, + "loss": 0.9219, + "step": 653 + }, + { + "epoch": 0.04, + "learning_rate": 0.0009999316297716952, + "loss": 0.8789, + "step": 654 + }, + { + "epoch": 0.04, + "learning_rate": 0.000999930182856494, + "loss": 1.0078, + "step": 655 + }, + { + "epoch": 0.04, + "learning_rate": 0.0009999287207917663, + "loss": 1.0391, + "step": 656 + }, + { + "epoch": 0.04, + "learning_rate": 0.000999927243577556, + "loss": 0.9648, + "step": 657 + }, + { + "epoch": 0.04, + "learning_rate": 0.000999925751213908, + "loss": 1.0547, + "step": 658 + }, + { + "epoch": 0.04, + "learning_rate": 0.0009999242437008676, + "loss": 1.0078, + "step": 659 + }, + { + "epoch": 0.04, + "learning_rate": 0.0009999227210384805, + "loss": 0.9688, + "step": 660 + }, + { + "epoch": 0.04, + "learning_rate": 0.0009999211832267929, + "loss": 0.9844, + "step": 661 + }, + { + "epoch": 0.04, + "learning_rate": 0.0009999196302658512, + "loss": 1.0078, + "step": 662 + }, + { + "epoch": 0.04, + "learning_rate": 0.0009999180621557025, + "loss": 0.957, + "step": 663 + }, + { + "epoch": 0.04, + "learning_rate": 0.0009999164788963945, + "loss": 0.9297, + "step": 664 + }, + { + "epoch": 0.04, + "learning_rate": 0.000999914880487975, + "loss": 0.9414, + "step": 665 + }, + { + "epoch": 0.04, + "learning_rate": 0.0009999132669304925, + "loss": 0.9922, + "step": 666 + }, + { + "epoch": 0.04, + "learning_rate": 0.0009999116382239958, + "loss": 0.9922, + "step": 667 + }, + { + "epoch": 0.04, + "learning_rate": 0.0009999099943685346, + "loss": 0.9844, + "step": 668 + }, + { + "epoch": 0.04, + "learning_rate": 0.0009999083353641582, + "loss": 0.9609, + "step": 669 + }, + { + "epoch": 0.04, + "learning_rate": 0.0009999066612109172, + "loss": 0.9258, + "step": 670 + }, + { + "epoch": 0.04, + "learning_rate": 0.0009999049719088624, + "loss": 0.9883, + "step": 671 + }, + { + "epoch": 0.04, + "learning_rate": 0.0009999032674580448, + "loss": 1.0625, + "step": 672 + }, + { + "epoch": 0.04, + "learning_rate": 0.000999901547858516, + "loss": 1.0625, + "step": 673 + }, + { + "epoch": 0.04, + "learning_rate": 0.0009998998131103283, + "loss": 1.0156, + "step": 674 + }, + { + "epoch": 0.04, + "learning_rate": 0.0009998980632135342, + "loss": 0.9805, + "step": 675 + }, + { + "epoch": 0.04, + "learning_rate": 0.0009998962981681867, + "loss": 1.0625, + "step": 676 + }, + { + "epoch": 0.04, + "learning_rate": 0.0009998945179743393, + "loss": 1.0469, + "step": 677 + }, + { + "epoch": 0.04, + "learning_rate": 0.000999892722632046, + "loss": 1.0234, + "step": 678 + }, + { + "epoch": 0.04, + "learning_rate": 0.0009998909121413612, + "loss": 0.8984, + "step": 679 + }, + { + "epoch": 0.04, + "learning_rate": 0.0009998890865023395, + "loss": 0.9805, + "step": 680 + }, + { + "epoch": 0.04, + "learning_rate": 0.0009998872457150365, + "loss": 0.9805, + "step": 681 + }, + { + "epoch": 0.04, + "learning_rate": 0.000999885389779508, + "loss": 0.9805, + "step": 682 + }, + { + "epoch": 0.04, + "learning_rate": 0.0009998835186958103, + "loss": 0.9297, + "step": 683 + }, + { + "epoch": 0.04, + "learning_rate": 0.0009998816324639996, + "loss": 0.957, + "step": 684 + }, + { + "epoch": 0.04, + "learning_rate": 0.0009998797310841337, + "loss": 1.0156, + "step": 685 + }, + { + "epoch": 0.04, + "learning_rate": 0.0009998778145562698, + "loss": 0.9844, + "step": 686 + }, + { + "epoch": 0.04, + "learning_rate": 0.0009998758828804661, + "loss": 0.9023, + "step": 687 + }, + { + "epoch": 0.04, + "learning_rate": 0.0009998739360567812, + "loss": 0.9648, + "step": 688 + }, + { + "epoch": 0.04, + "learning_rate": 0.0009998719740852743, + "loss": 0.9688, + "step": 689 + }, + { + "epoch": 0.04, + "learning_rate": 0.0009998699969660041, + "loss": 0.9453, + "step": 690 + }, + { + "epoch": 0.04, + "learning_rate": 0.0009998680046990314, + "loss": 0.9688, + "step": 691 + }, + { + "epoch": 0.04, + "learning_rate": 0.000999865997284416, + "loss": 0.9023, + "step": 692 + }, + { + "epoch": 0.04, + "learning_rate": 0.000999863974722219, + "loss": 0.9375, + "step": 693 + }, + { + "epoch": 0.04, + "learning_rate": 0.0009998619370125018, + "loss": 0.9336, + "step": 694 + }, + { + "epoch": 0.04, + "learning_rate": 0.0009998598841553256, + "loss": 1.0234, + "step": 695 + }, + { + "epoch": 0.04, + "learning_rate": 0.0009998578161507531, + "loss": 0.9648, + "step": 696 + }, + { + "epoch": 0.04, + "learning_rate": 0.000999855732998847, + "loss": 0.9453, + "step": 697 + }, + { + "epoch": 0.04, + "learning_rate": 0.00099985363469967, + "loss": 1.0547, + "step": 698 + }, + { + "epoch": 0.04, + "learning_rate": 0.0009998515212532861, + "loss": 0.9609, + "step": 699 + }, + { + "epoch": 0.04, + "learning_rate": 0.0009998493926597592, + "loss": 1.0469, + "step": 700 + }, + { + "epoch": 0.04, + "learning_rate": 0.0009998472489191536, + "loss": 1.1172, + "step": 701 + }, + { + "epoch": 0.04, + "learning_rate": 0.0009998450900315346, + "loss": 0.9414, + "step": 702 + }, + { + "epoch": 0.04, + "learning_rate": 0.0009998429159969674, + "loss": 0.9727, + "step": 703 + }, + { + "epoch": 0.04, + "learning_rate": 0.000999840726815518, + "loss": 0.9844, + "step": 704 + }, + { + "epoch": 0.04, + "learning_rate": 0.0009998385224872528, + "loss": 1.0234, + "step": 705 + }, + { + "epoch": 0.04, + "learning_rate": 0.0009998363030122384, + "loss": 0.9453, + "step": 706 + }, + { + "epoch": 0.04, + "learning_rate": 0.0009998340683905422, + "loss": 1.0391, + "step": 707 + }, + { + "epoch": 0.04, + "learning_rate": 0.000999831818622232, + "loss": 0.9844, + "step": 708 + }, + { + "epoch": 0.04, + "learning_rate": 0.0009998295537073756, + "loss": 0.957, + "step": 709 + }, + { + "epoch": 0.04, + "learning_rate": 0.0009998272736460418, + "loss": 0.9375, + "step": 710 + }, + { + "epoch": 0.04, + "learning_rate": 0.0009998249784383, + "loss": 0.9492, + "step": 711 + }, + { + "epoch": 0.04, + "learning_rate": 0.0009998226680842195, + "loss": 1.0312, + "step": 712 + }, + { + "epoch": 0.04, + "learning_rate": 0.0009998203425838701, + "loss": 0.9883, + "step": 713 + }, + { + "epoch": 0.04, + "learning_rate": 0.0009998180019373229, + "loss": 0.957, + "step": 714 + }, + { + "epoch": 0.04, + "learning_rate": 0.0009998156461446482, + "loss": 0.9336, + "step": 715 + }, + { + "epoch": 0.04, + "learning_rate": 0.0009998132752059175, + "loss": 0.9727, + "step": 716 + }, + { + "epoch": 0.04, + "learning_rate": 0.000999810889121203, + "loss": 1.0078, + "step": 717 + }, + { + "epoch": 0.04, + "learning_rate": 0.0009998084878905767, + "loss": 1.0391, + "step": 718 + }, + { + "epoch": 0.04, + "learning_rate": 0.0009998060715141115, + "loss": 1.0391, + "step": 719 + }, + { + "epoch": 0.04, + "learning_rate": 0.0009998036399918803, + "loss": 0.9297, + "step": 720 + }, + { + "epoch": 0.04, + "learning_rate": 0.0009998011933239574, + "loss": 0.9258, + "step": 721 + }, + { + "epoch": 0.04, + "learning_rate": 0.0009997987315104164, + "loss": 0.9922, + "step": 722 + }, + { + "epoch": 0.04, + "learning_rate": 0.0009997962545513321, + "loss": 0.9883, + "step": 723 + }, + { + "epoch": 0.04, + "learning_rate": 0.0009997937624467797, + "loss": 1.0, + "step": 724 + }, + { + "epoch": 0.04, + "learning_rate": 0.0009997912551968345, + "loss": 0.9727, + "step": 725 + }, + { + "epoch": 0.04, + "learning_rate": 0.0009997887328015724, + "loss": 1.0312, + "step": 726 + }, + { + "epoch": 0.04, + "learning_rate": 0.00099978619526107, + "loss": 1.0781, + "step": 727 + }, + { + "epoch": 0.04, + "learning_rate": 0.0009997836425754043, + "loss": 1.0469, + "step": 728 + }, + { + "epoch": 0.04, + "learning_rate": 0.0009997810747446525, + "loss": 1.0703, + "step": 729 + }, + { + "epoch": 0.04, + "learning_rate": 0.0009997784917688925, + "loss": 0.9297, + "step": 730 + }, + { + "epoch": 0.04, + "learning_rate": 0.0009997758936482024, + "loss": 0.9844, + "step": 731 + }, + { + "epoch": 0.04, + "learning_rate": 0.0009997732803826611, + "loss": 0.9805, + "step": 732 + }, + { + "epoch": 0.04, + "learning_rate": 0.000999770651972348, + "loss": 0.9336, + "step": 733 + }, + { + "epoch": 0.04, + "learning_rate": 0.0009997680084173422, + "loss": 1.0078, + "step": 734 + }, + { + "epoch": 0.04, + "learning_rate": 0.000999765349717724, + "loss": 0.8906, + "step": 735 + }, + { + "epoch": 0.04, + "learning_rate": 0.0009997626758735743, + "loss": 1.0, + "step": 736 + }, + { + "epoch": 0.04, + "learning_rate": 0.0009997599868849737, + "loss": 0.9922, + "step": 737 + }, + { + "epoch": 0.04, + "learning_rate": 0.000999757282752004, + "loss": 0.9766, + "step": 738 + }, + { + "epoch": 0.04, + "learning_rate": 0.0009997545634747472, + "loss": 0.9844, + "step": 739 + }, + { + "epoch": 0.04, + "learning_rate": 0.0009997518290532853, + "loss": 1.0781, + "step": 740 + }, + { + "epoch": 0.04, + "learning_rate": 0.0009997490794877017, + "loss": 0.9297, + "step": 741 + }, + { + "epoch": 0.04, + "learning_rate": 0.000999746314778079, + "loss": 0.9453, + "step": 742 + }, + { + "epoch": 0.04, + "learning_rate": 0.0009997435349245016, + "loss": 1.0547, + "step": 743 + }, + { + "epoch": 0.04, + "learning_rate": 0.0009997407399270535, + "loss": 1.0078, + "step": 744 + }, + { + "epoch": 0.04, + "learning_rate": 0.0009997379297858194, + "loss": 0.9766, + "step": 745 + }, + { + "epoch": 0.04, + "learning_rate": 0.0009997351045008844, + "loss": 0.9609, + "step": 746 + }, + { + "epoch": 0.04, + "learning_rate": 0.0009997322640723342, + "loss": 0.918, + "step": 747 + }, + { + "epoch": 0.04, + "learning_rate": 0.000999729408500255, + "loss": 1.0469, + "step": 748 + }, + { + "epoch": 0.04, + "learning_rate": 0.0009997265377847332, + "loss": 0.9609, + "step": 749 + }, + { + "epoch": 0.04, + "learning_rate": 0.0009997236519258559, + "loss": 0.9141, + "step": 750 + }, + { + "epoch": 0.04, + "learning_rate": 0.0009997207509237103, + "loss": 0.9297, + "step": 751 + }, + { + "epoch": 0.04, + "learning_rate": 0.0009997178347783844, + "loss": 0.9961, + "step": 752 + }, + { + "epoch": 0.04, + "learning_rate": 0.000999714903489967, + "loss": 0.9531, + "step": 753 + }, + { + "epoch": 0.04, + "learning_rate": 0.000999711957058546, + "loss": 0.9375, + "step": 754 + }, + { + "epoch": 0.04, + "learning_rate": 0.0009997089954842115, + "loss": 0.9844, + "step": 755 + }, + { + "epoch": 0.04, + "learning_rate": 0.000999706018767053, + "loss": 1.0156, + "step": 756 + }, + { + "epoch": 0.04, + "learning_rate": 0.0009997030269071608, + "loss": 1.0078, + "step": 757 + }, + { + "epoch": 0.04, + "learning_rate": 0.0009997000199046253, + "loss": 0.9844, + "step": 758 + }, + { + "epoch": 0.04, + "learning_rate": 0.0009996969977595378, + "loss": 0.918, + "step": 759 + }, + { + "epoch": 0.04, + "learning_rate": 0.00099969396047199, + "loss": 0.9141, + "step": 760 + }, + { + "epoch": 0.04, + "learning_rate": 0.0009996909080420735, + "loss": 0.9375, + "step": 761 + }, + { + "epoch": 0.04, + "learning_rate": 0.0009996878404698814, + "loss": 0.8984, + "step": 762 + }, + { + "epoch": 0.04, + "learning_rate": 0.0009996847577555062, + "loss": 0.8945, + "step": 763 + }, + { + "epoch": 0.04, + "learning_rate": 0.0009996816598990415, + "loss": 0.9648, + "step": 764 + }, + { + "epoch": 0.04, + "learning_rate": 0.0009996785469005812, + "loss": 0.9805, + "step": 765 + }, + { + "epoch": 0.04, + "learning_rate": 0.0009996754187602194, + "loss": 0.9727, + "step": 766 + }, + { + "epoch": 0.04, + "learning_rate": 0.0009996722754780512, + "loss": 0.9531, + "step": 767 + }, + { + "epoch": 0.04, + "learning_rate": 0.0009996691170541718, + "loss": 0.9531, + "step": 768 + }, + { + "epoch": 0.04, + "learning_rate": 0.0009996659434886768, + "loss": 0.9414, + "step": 769 + }, + { + "epoch": 0.04, + "learning_rate": 0.0009996627547816624, + "loss": 1.0469, + "step": 770 + }, + { + "epoch": 0.04, + "learning_rate": 0.0009996595509332252, + "loss": 0.9375, + "step": 771 + }, + { + "epoch": 0.04, + "learning_rate": 0.0009996563319434623, + "loss": 0.8711, + "step": 772 + }, + { + "epoch": 0.04, + "learning_rate": 0.0009996530978124713, + "loss": 1.0078, + "step": 773 + }, + { + "epoch": 0.04, + "learning_rate": 0.0009996498485403502, + "loss": 1.0156, + "step": 774 + }, + { + "epoch": 0.04, + "learning_rate": 0.0009996465841271974, + "loss": 1.0156, + "step": 775 + }, + { + "epoch": 0.04, + "learning_rate": 0.0009996433045731117, + "loss": 1.0, + "step": 776 + }, + { + "epoch": 0.04, + "learning_rate": 0.0009996400098781929, + "loss": 1.0, + "step": 777 + }, + { + "epoch": 0.04, + "learning_rate": 0.0009996367000425404, + "loss": 0.9141, + "step": 778 + }, + { + "epoch": 0.04, + "learning_rate": 0.0009996333750662547, + "loss": 0.9492, + "step": 779 + }, + { + "epoch": 0.04, + "learning_rate": 0.0009996300349494366, + "loss": 0.9492, + "step": 780 + }, + { + "epoch": 0.04, + "learning_rate": 0.0009996266796921873, + "loss": 0.9609, + "step": 781 + }, + { + "epoch": 0.04, + "learning_rate": 0.0009996233092946084, + "loss": 1.0078, + "step": 782 + }, + { + "epoch": 0.04, + "learning_rate": 0.000999619923756802, + "loss": 0.8672, + "step": 783 + }, + { + "epoch": 0.04, + "learning_rate": 0.0009996165230788708, + "loss": 0.9453, + "step": 784 + }, + { + "epoch": 0.04, + "learning_rate": 0.0009996131072609175, + "loss": 0.9883, + "step": 785 + }, + { + "epoch": 0.04, + "learning_rate": 0.0009996096763030464, + "loss": 0.9141, + "step": 786 + }, + { + "epoch": 0.04, + "learning_rate": 0.0009996062302053607, + "loss": 0.9453, + "step": 787 + }, + { + "epoch": 0.04, + "learning_rate": 0.0009996027689679653, + "loss": 1.0391, + "step": 788 + }, + { + "epoch": 0.04, + "learning_rate": 0.0009995992925909648, + "loss": 1.0938, + "step": 789 + }, + { + "epoch": 0.04, + "learning_rate": 0.0009995958010744646, + "loss": 0.8867, + "step": 790 + }, + { + "epoch": 0.04, + "learning_rate": 0.0009995922944185706, + "loss": 0.9219, + "step": 791 + }, + { + "epoch": 0.04, + "learning_rate": 0.000999588772623389, + "loss": 0.957, + "step": 792 + }, + { + "epoch": 0.04, + "learning_rate": 0.0009995852356890266, + "loss": 0.9805, + "step": 793 + }, + { + "epoch": 0.04, + "learning_rate": 0.0009995816836155904, + "loss": 0.9922, + "step": 794 + }, + { + "epoch": 0.04, + "learning_rate": 0.000999578116403188, + "loss": 1.0469, + "step": 795 + }, + { + "epoch": 0.04, + "learning_rate": 0.000999574534051928, + "loss": 1.0391, + "step": 796 + }, + { + "epoch": 0.04, + "learning_rate": 0.0009995709365619185, + "loss": 0.9805, + "step": 797 + }, + { + "epoch": 0.04, + "learning_rate": 0.0009995673239332685, + "loss": 0.9258, + "step": 798 + }, + { + "epoch": 0.04, + "learning_rate": 0.0009995636961660879, + "loss": 1.0781, + "step": 799 + }, + { + "epoch": 0.04, + "learning_rate": 0.0009995600532604861, + "loss": 1.0078, + "step": 800 + }, + { + "epoch": 0.04, + "learning_rate": 0.0009995563952165736, + "loss": 0.9414, + "step": 801 + }, + { + "epoch": 0.04, + "learning_rate": 0.0009995527220344616, + "loss": 1.0, + "step": 802 + }, + { + "epoch": 0.04, + "learning_rate": 0.0009995490337142612, + "loss": 0.8945, + "step": 803 + }, + { + "epoch": 0.04, + "learning_rate": 0.0009995453302560839, + "loss": 0.8867, + "step": 804 + }, + { + "epoch": 0.04, + "learning_rate": 0.0009995416116600423, + "loss": 0.8867, + "step": 805 + }, + { + "epoch": 0.04, + "learning_rate": 0.0009995378779262493, + "loss": 0.918, + "step": 806 + }, + { + "epoch": 0.04, + "learning_rate": 0.0009995341290548175, + "loss": 0.9375, + "step": 807 + }, + { + "epoch": 0.04, + "learning_rate": 0.0009995303650458605, + "loss": 1.1016, + "step": 808 + }, + { + "epoch": 0.04, + "learning_rate": 0.0009995265858994927, + "loss": 0.9688, + "step": 809 + }, + { + "epoch": 0.04, + "learning_rate": 0.0009995227916158285, + "loss": 0.9766, + "step": 810 + }, + { + "epoch": 0.04, + "learning_rate": 0.0009995189821949831, + "loss": 0.9375, + "step": 811 + }, + { + "epoch": 0.04, + "learning_rate": 0.0009995151576370714, + "loss": 0.9141, + "step": 812 + }, + { + "epoch": 0.04, + "learning_rate": 0.0009995113179422097, + "loss": 0.9492, + "step": 813 + }, + { + "epoch": 0.04, + "learning_rate": 0.0009995074631105143, + "loss": 1.0234, + "step": 814 + }, + { + "epoch": 0.04, + "learning_rate": 0.000999503593142102, + "loss": 1.0781, + "step": 815 + }, + { + "epoch": 0.04, + "learning_rate": 0.0009994997080370902, + "loss": 1.0391, + "step": 816 + }, + { + "epoch": 0.04, + "learning_rate": 0.0009994958077955962, + "loss": 0.9453, + "step": 817 + }, + { + "epoch": 0.04, + "learning_rate": 0.0009994918924177386, + "loss": 0.9922, + "step": 818 + }, + { + "epoch": 0.04, + "learning_rate": 0.0009994879619036358, + "loss": 0.9648, + "step": 819 + }, + { + "epoch": 0.04, + "learning_rate": 0.0009994840162534074, + "loss": 0.9492, + "step": 820 + }, + { + "epoch": 0.04, + "learning_rate": 0.0009994800554671722, + "loss": 0.9648, + "step": 821 + }, + { + "epoch": 0.04, + "learning_rate": 0.000999476079545051, + "loss": 0.9961, + "step": 822 + }, + { + "epoch": 0.04, + "learning_rate": 0.0009994720884871635, + "loss": 1.0547, + "step": 823 + }, + { + "epoch": 0.04, + "learning_rate": 0.0009994680822936312, + "loss": 0.9258, + "step": 824 + }, + { + "epoch": 0.04, + "learning_rate": 0.0009994640609645752, + "loss": 0.9023, + "step": 825 + }, + { + "epoch": 0.04, + "learning_rate": 0.0009994600245001178, + "loss": 1.0078, + "step": 826 + }, + { + "epoch": 0.04, + "learning_rate": 0.000999455972900381, + "loss": 0.9727, + "step": 827 + }, + { + "epoch": 0.04, + "learning_rate": 0.0009994519061654874, + "loss": 1.0234, + "step": 828 + }, + { + "epoch": 0.04, + "learning_rate": 0.0009994478242955607, + "loss": 0.9648, + "step": 829 + }, + { + "epoch": 0.04, + "learning_rate": 0.0009994437272907242, + "loss": 0.9883, + "step": 830 + }, + { + "epoch": 0.04, + "learning_rate": 0.0009994396151511021, + "loss": 1.0234, + "step": 831 + }, + { + "epoch": 0.04, + "learning_rate": 0.0009994354878768194, + "loss": 1.0, + "step": 832 + }, + { + "epoch": 0.04, + "learning_rate": 0.0009994313454680004, + "loss": 0.9609, + "step": 833 + }, + { + "epoch": 0.04, + "learning_rate": 0.0009994271879247714, + "loss": 1.0625, + "step": 834 + }, + { + "epoch": 0.04, + "learning_rate": 0.000999423015247258, + "loss": 0.9492, + "step": 835 + }, + { + "epoch": 0.04, + "learning_rate": 0.0009994188274355866, + "loss": 0.957, + "step": 836 + }, + { + "epoch": 0.04, + "learning_rate": 0.0009994146244898846, + "loss": 0.918, + "step": 837 + }, + { + "epoch": 0.05, + "learning_rate": 0.0009994104064102787, + "loss": 0.9219, + "step": 838 + }, + { + "epoch": 0.05, + "learning_rate": 0.0009994061731968971, + "loss": 0.8867, + "step": 839 + }, + { + "epoch": 0.05, + "learning_rate": 0.000999401924849868, + "loss": 0.9805, + "step": 840 + }, + { + "epoch": 0.05, + "learning_rate": 0.0009993976613693202, + "loss": 0.9453, + "step": 841 + }, + { + "epoch": 0.05, + "learning_rate": 0.0009993933827553826, + "loss": 0.9141, + "step": 842 + }, + { + "epoch": 0.05, + "learning_rate": 0.0009993890890081852, + "loss": 1.0703, + "step": 843 + }, + { + "epoch": 0.05, + "learning_rate": 0.0009993847801278582, + "loss": 1.0156, + "step": 844 + }, + { + "epoch": 0.05, + "learning_rate": 0.0009993804561145318, + "loss": 0.9414, + "step": 845 + }, + { + "epoch": 0.05, + "learning_rate": 0.000999376116968337, + "loss": 0.9062, + "step": 846 + }, + { + "epoch": 0.05, + "learning_rate": 0.000999371762689406, + "loss": 1.0312, + "step": 847 + }, + { + "epoch": 0.05, + "learning_rate": 0.00099936739327787, + "loss": 0.9727, + "step": 848 + }, + { + "epoch": 0.05, + "learning_rate": 0.0009993630087338616, + "loss": 0.9883, + "step": 849 + }, + { + "epoch": 0.05, + "learning_rate": 0.0009993586090575137, + "loss": 0.8984, + "step": 850 + }, + { + "epoch": 0.05, + "learning_rate": 0.0009993541942489596, + "loss": 0.9844, + "step": 851 + }, + { + "epoch": 0.05, + "learning_rate": 0.0009993497643083333, + "loss": 0.9883, + "step": 852 + }, + { + "epoch": 0.05, + "learning_rate": 0.000999345319235769, + "loss": 1.0, + "step": 853 + }, + { + "epoch": 0.05, + "learning_rate": 0.000999340859031401, + "loss": 0.9688, + "step": 854 + }, + { + "epoch": 0.05, + "learning_rate": 0.0009993363836953647, + "loss": 0.9102, + "step": 855 + }, + { + "epoch": 0.05, + "learning_rate": 0.000999331893227796, + "loss": 0.9961, + "step": 856 + }, + { + "epoch": 0.05, + "learning_rate": 0.0009993273876288307, + "loss": 0.9531, + "step": 857 + }, + { + "epoch": 0.05, + "learning_rate": 0.0009993228668986052, + "loss": 1.0, + "step": 858 + }, + { + "epoch": 0.05, + "learning_rate": 0.0009993183310372568, + "loss": 0.9648, + "step": 859 + }, + { + "epoch": 0.05, + "learning_rate": 0.0009993137800449225, + "loss": 0.9453, + "step": 860 + }, + { + "epoch": 0.05, + "learning_rate": 0.0009993092139217409, + "loss": 0.9414, + "step": 861 + }, + { + "epoch": 0.05, + "learning_rate": 0.0009993046326678499, + "loss": 1.0078, + "step": 862 + }, + { + "epoch": 0.05, + "learning_rate": 0.0009993000362833882, + "loss": 1.0, + "step": 863 + }, + { + "epoch": 0.05, + "learning_rate": 0.0009992954247684954, + "loss": 0.9805, + "step": 864 + }, + { + "epoch": 0.05, + "learning_rate": 0.000999290798123311, + "loss": 1.0391, + "step": 865 + }, + { + "epoch": 0.05, + "learning_rate": 0.0009992861563479756, + "loss": 1.0312, + "step": 866 + }, + { + "epoch": 0.05, + "learning_rate": 0.0009992814994426295, + "loss": 1.0234, + "step": 867 + }, + { + "epoch": 0.05, + "learning_rate": 0.000999276827407414, + "loss": 0.9258, + "step": 868 + }, + { + "epoch": 0.05, + "learning_rate": 0.0009992721402424705, + "loss": 1.0312, + "step": 869 + }, + { + "epoch": 0.05, + "learning_rate": 0.0009992674379479412, + "loss": 0.918, + "step": 870 + }, + { + "epoch": 0.05, + "learning_rate": 0.0009992627205239683, + "loss": 1.0234, + "step": 871 + }, + { + "epoch": 0.05, + "learning_rate": 0.0009992579879706954, + "loss": 0.9258, + "step": 872 + }, + { + "epoch": 0.05, + "learning_rate": 0.0009992532402882653, + "loss": 1.0234, + "step": 873 + }, + { + "epoch": 0.05, + "learning_rate": 0.0009992484774768219, + "loss": 0.9453, + "step": 874 + }, + { + "epoch": 0.05, + "learning_rate": 0.0009992436995365098, + "loss": 0.8711, + "step": 875 + }, + { + "epoch": 0.05, + "learning_rate": 0.000999238906467474, + "loss": 0.9414, + "step": 876 + }, + { + "epoch": 0.05, + "learning_rate": 0.000999234098269859, + "loss": 0.9219, + "step": 877 + }, + { + "epoch": 0.05, + "learning_rate": 0.000999229274943811, + "loss": 1.0156, + "step": 878 + }, + { + "epoch": 0.05, + "learning_rate": 0.0009992244364894762, + "loss": 0.9336, + "step": 879 + }, + { + "epoch": 0.05, + "learning_rate": 0.000999219582907001, + "loss": 0.918, + "step": 880 + }, + { + "epoch": 0.05, + "learning_rate": 0.0009992147141965329, + "loss": 0.8477, + "step": 881 + }, + { + "epoch": 0.05, + "learning_rate": 0.0009992098303582189, + "loss": 0.957, + "step": 882 + }, + { + "epoch": 0.05, + "learning_rate": 0.0009992049313922071, + "loss": 0.9727, + "step": 883 + }, + { + "epoch": 0.05, + "learning_rate": 0.0009992000172986465, + "loss": 0.9609, + "step": 884 + }, + { + "epoch": 0.05, + "learning_rate": 0.0009991950880776852, + "loss": 0.918, + "step": 885 + }, + { + "epoch": 0.05, + "learning_rate": 0.000999190143729473, + "loss": 0.9062, + "step": 886 + }, + { + "epoch": 0.05, + "learning_rate": 0.00099918518425416, + "loss": 1.0078, + "step": 887 + }, + { + "epoch": 0.05, + "learning_rate": 0.0009991802096518958, + "loss": 0.9219, + "step": 888 + }, + { + "epoch": 0.05, + "learning_rate": 0.0009991752199228318, + "loss": 0.9492, + "step": 889 + }, + { + "epoch": 0.05, + "learning_rate": 0.0009991702150671188, + "loss": 0.9531, + "step": 890 + }, + { + "epoch": 0.05, + "learning_rate": 0.0009991651950849087, + "loss": 0.9023, + "step": 891 + }, + { + "epoch": 0.05, + "learning_rate": 0.0009991601599763536, + "loss": 1.0078, + "step": 892 + }, + { + "epoch": 0.05, + "learning_rate": 0.000999155109741606, + "loss": 0.8828, + "step": 893 + }, + { + "epoch": 0.05, + "learning_rate": 0.0009991500443808188, + "loss": 1.0547, + "step": 894 + }, + { + "epoch": 0.05, + "learning_rate": 0.0009991449638941457, + "loss": 0.9375, + "step": 895 + }, + { + "epoch": 0.05, + "learning_rate": 0.0009991398682817406, + "loss": 0.9883, + "step": 896 + }, + { + "epoch": 0.05, + "learning_rate": 0.000999134757543758, + "loss": 0.9844, + "step": 897 + }, + { + "epoch": 0.05, + "learning_rate": 0.0009991296316803524, + "loss": 0.957, + "step": 898 + }, + { + "epoch": 0.05, + "learning_rate": 0.0009991244906916797, + "loss": 1.0625, + "step": 899 + }, + { + "epoch": 0.05, + "learning_rate": 0.0009991193345778953, + "loss": 0.9258, + "step": 900 + }, + { + "epoch": 0.05, + "learning_rate": 0.0009991141633391555, + "loss": 1.0078, + "step": 901 + }, + { + "epoch": 0.05, + "learning_rate": 0.0009991089769756172, + "loss": 0.9336, + "step": 902 + }, + { + "epoch": 0.05, + "learning_rate": 0.0009991037754874374, + "loss": 0.9883, + "step": 903 + }, + { + "epoch": 0.05, + "learning_rate": 0.0009990985588747736, + "loss": 0.8984, + "step": 904 + }, + { + "epoch": 0.05, + "learning_rate": 0.0009990933271377843, + "loss": 0.9297, + "step": 905 + }, + { + "epoch": 0.05, + "learning_rate": 0.0009990880802766273, + "loss": 1.0469, + "step": 906 + }, + { + "epoch": 0.05, + "learning_rate": 0.0009990828182914625, + "loss": 0.9766, + "step": 907 + }, + { + "epoch": 0.05, + "learning_rate": 0.0009990775411824486, + "loss": 0.9766, + "step": 908 + }, + { + "epoch": 0.05, + "learning_rate": 0.000999072248949746, + "loss": 0.9414, + "step": 909 + }, + { + "epoch": 0.05, + "learning_rate": 0.000999066941593515, + "loss": 0.9688, + "step": 910 + }, + { + "epoch": 0.05, + "learning_rate": 0.000999061619113916, + "loss": 0.9609, + "step": 911 + }, + { + "epoch": 0.05, + "learning_rate": 0.000999056281511111, + "loss": 0.918, + "step": 912 + }, + { + "epoch": 0.05, + "learning_rate": 0.0009990509287852612, + "loss": 0.957, + "step": 913 + }, + { + "epoch": 0.05, + "learning_rate": 0.0009990455609365288, + "loss": 0.9805, + "step": 914 + }, + { + "epoch": 0.05, + "learning_rate": 0.0009990401779650768, + "loss": 1.0234, + "step": 915 + }, + { + "epoch": 0.05, + "learning_rate": 0.000999034779871068, + "loss": 0.9961, + "step": 916 + }, + { + "epoch": 0.05, + "learning_rate": 0.0009990293666546663, + "loss": 0.8867, + "step": 917 + }, + { + "epoch": 0.05, + "learning_rate": 0.0009990239383160357, + "loss": 0.9492, + "step": 918 + }, + { + "epoch": 0.05, + "learning_rate": 0.0009990184948553405, + "loss": 0.8906, + "step": 919 + }, + { + "epoch": 0.05, + "learning_rate": 0.0009990130362727456, + "loss": 0.9062, + "step": 920 + }, + { + "epoch": 0.05, + "learning_rate": 0.0009990075625684164, + "loss": 0.918, + "step": 921 + }, + { + "epoch": 0.05, + "learning_rate": 0.0009990020737425192, + "loss": 0.9727, + "step": 922 + }, + { + "epoch": 0.05, + "learning_rate": 0.00099899656979522, + "loss": 0.9883, + "step": 923 + }, + { + "epoch": 0.05, + "learning_rate": 0.0009989910507266855, + "loss": 0.9531, + "step": 924 + }, + { + "epoch": 0.05, + "learning_rate": 0.000998985516537083, + "loss": 0.9141, + "step": 925 + }, + { + "epoch": 0.05, + "learning_rate": 0.0009989799672265806, + "loss": 0.918, + "step": 926 + }, + { + "epoch": 0.05, + "learning_rate": 0.0009989744027953458, + "loss": 0.9961, + "step": 927 + }, + { + "epoch": 0.05, + "learning_rate": 0.0009989688232435478, + "loss": 0.9375, + "step": 928 + }, + { + "epoch": 0.05, + "learning_rate": 0.0009989632285713553, + "loss": 0.9141, + "step": 929 + }, + { + "epoch": 0.05, + "learning_rate": 0.000998957618778938, + "loss": 0.9375, + "step": 930 + }, + { + "epoch": 0.05, + "learning_rate": 0.000998951993866466, + "loss": 0.9258, + "step": 931 + }, + { + "epoch": 0.05, + "learning_rate": 0.0009989463538341095, + "loss": 1.0, + "step": 932 + }, + { + "epoch": 0.05, + "learning_rate": 0.0009989406986820396, + "loss": 0.9258, + "step": 933 + }, + { + "epoch": 0.05, + "learning_rate": 0.0009989350284104275, + "loss": 0.9648, + "step": 934 + }, + { + "epoch": 0.05, + "learning_rate": 0.000998929343019445, + "loss": 0.9766, + "step": 935 + }, + { + "epoch": 0.05, + "learning_rate": 0.0009989236425092648, + "loss": 0.9258, + "step": 936 + }, + { + "epoch": 0.05, + "learning_rate": 0.0009989179268800592, + "loss": 0.9609, + "step": 937 + }, + { + "epoch": 0.05, + "learning_rate": 0.0009989121961320017, + "loss": 0.9688, + "step": 938 + }, + { + "epoch": 0.05, + "learning_rate": 0.0009989064502652658, + "loss": 1.0938, + "step": 939 + }, + { + "epoch": 0.05, + "learning_rate": 0.0009989006892800257, + "loss": 0.9883, + "step": 940 + }, + { + "epoch": 0.05, + "learning_rate": 0.0009988949131764558, + "loss": 0.9258, + "step": 941 + }, + { + "epoch": 0.05, + "learning_rate": 0.0009988891219547312, + "loss": 0.957, + "step": 942 + }, + { + "epoch": 0.05, + "learning_rate": 0.0009988833156150275, + "loss": 0.9492, + "step": 943 + }, + { + "epoch": 0.05, + "learning_rate": 0.0009988774941575207, + "loss": 0.8828, + "step": 944 + }, + { + "epoch": 0.05, + "learning_rate": 0.000998871657582387, + "loss": 0.9297, + "step": 945 + }, + { + "epoch": 0.05, + "learning_rate": 0.0009988658058898035, + "loss": 1.0234, + "step": 946 + }, + { + "epoch": 0.05, + "learning_rate": 0.0009988599390799472, + "loss": 0.9883, + "step": 947 + }, + { + "epoch": 0.05, + "learning_rate": 0.0009988540571529963, + "loss": 0.9805, + "step": 948 + }, + { + "epoch": 0.05, + "learning_rate": 0.0009988481601091285, + "loss": 1.0391, + "step": 949 + }, + { + "epoch": 0.05, + "learning_rate": 0.0009988422479485233, + "loss": 0.8828, + "step": 950 + }, + { + "epoch": 0.05, + "learning_rate": 0.0009988363206713592, + "loss": 0.918, + "step": 951 + }, + { + "epoch": 0.05, + "learning_rate": 0.0009988303782778158, + "loss": 0.8672, + "step": 952 + }, + { + "epoch": 0.05, + "learning_rate": 0.0009988244207680735, + "loss": 0.918, + "step": 953 + }, + { + "epoch": 0.05, + "learning_rate": 0.0009988184481423128, + "loss": 1.0078, + "step": 954 + }, + { + "epoch": 0.05, + "learning_rate": 0.0009988124604007145, + "loss": 0.9336, + "step": 955 + }, + { + "epoch": 0.05, + "learning_rate": 0.00099880645754346, + "loss": 0.9102, + "step": 956 + }, + { + "epoch": 0.05, + "learning_rate": 0.0009988004395707316, + "loss": 1.0625, + "step": 957 + }, + { + "epoch": 0.05, + "learning_rate": 0.0009987944064827113, + "loss": 0.9141, + "step": 958 + }, + { + "epoch": 0.05, + "learning_rate": 0.0009987883582795822, + "loss": 0.9375, + "step": 959 + }, + { + "epoch": 0.05, + "learning_rate": 0.0009987822949615274, + "loss": 1.0547, + "step": 960 + }, + { + "epoch": 0.05, + "learning_rate": 0.0009987762165287303, + "loss": 0.9844, + "step": 961 + }, + { + "epoch": 0.05, + "learning_rate": 0.000998770122981376, + "loss": 0.9766, + "step": 962 + }, + { + "epoch": 0.05, + "learning_rate": 0.0009987640143196482, + "loss": 0.9414, + "step": 963 + }, + { + "epoch": 0.05, + "learning_rate": 0.0009987578905437324, + "loss": 1.0547, + "step": 964 + }, + { + "epoch": 0.05, + "learning_rate": 0.0009987517516538145, + "loss": 0.9844, + "step": 965 + }, + { + "epoch": 0.05, + "learning_rate": 0.00099874559765008, + "loss": 1.0156, + "step": 966 + }, + { + "epoch": 0.05, + "learning_rate": 0.0009987394285327156, + "loss": 0.9492, + "step": 967 + }, + { + "epoch": 0.05, + "learning_rate": 0.0009987332443019085, + "loss": 0.9531, + "step": 968 + }, + { + "epoch": 0.05, + "learning_rate": 0.0009987270449578454, + "loss": 0.9688, + "step": 969 + }, + { + "epoch": 0.05, + "learning_rate": 0.0009987208305007149, + "loss": 0.875, + "step": 970 + }, + { + "epoch": 0.05, + "learning_rate": 0.000998714600930705, + "loss": 0.9961, + "step": 971 + }, + { + "epoch": 0.05, + "learning_rate": 0.0009987083562480047, + "loss": 0.9531, + "step": 972 + }, + { + "epoch": 0.05, + "learning_rate": 0.0009987020964528027, + "loss": 0.9609, + "step": 973 + }, + { + "epoch": 0.05, + "learning_rate": 0.0009986958215452892, + "loss": 0.918, + "step": 974 + }, + { + "epoch": 0.05, + "learning_rate": 0.0009986895315256543, + "loss": 0.9336, + "step": 975 + }, + { + "epoch": 0.05, + "learning_rate": 0.0009986832263940884, + "loss": 0.9648, + "step": 976 + }, + { + "epoch": 0.05, + "learning_rate": 0.0009986769061507827, + "loss": 0.9688, + "step": 977 + }, + { + "epoch": 0.05, + "learning_rate": 0.0009986705707959284, + "loss": 0.9414, + "step": 978 + }, + { + "epoch": 0.05, + "learning_rate": 0.000998664220329718, + "loss": 0.918, + "step": 979 + }, + { + "epoch": 0.05, + "learning_rate": 0.0009986578547523438, + "loss": 1.0078, + "step": 980 + }, + { + "epoch": 0.05, + "learning_rate": 0.0009986514740639985, + "loss": 0.9375, + "step": 981 + }, + { + "epoch": 0.05, + "learning_rate": 0.0009986450782648755, + "loss": 0.9727, + "step": 982 + }, + { + "epoch": 0.05, + "learning_rate": 0.0009986386673551688, + "loss": 0.9531, + "step": 983 + }, + { + "epoch": 0.05, + "learning_rate": 0.0009986322413350725, + "loss": 0.9688, + "step": 984 + }, + { + "epoch": 0.05, + "learning_rate": 0.0009986258002047813, + "loss": 0.9219, + "step": 985 + }, + { + "epoch": 0.05, + "learning_rate": 0.0009986193439644904, + "loss": 0.8867, + "step": 986 + }, + { + "epoch": 0.05, + "learning_rate": 0.0009986128726143957, + "loss": 0.8672, + "step": 987 + }, + { + "epoch": 0.05, + "learning_rate": 0.000998606386154693, + "loss": 0.9258, + "step": 988 + }, + { + "epoch": 0.05, + "learning_rate": 0.000998599884585579, + "loss": 0.8945, + "step": 989 + }, + { + "epoch": 0.05, + "learning_rate": 0.0009985933679072508, + "loss": 0.9375, + "step": 990 + }, + { + "epoch": 0.05, + "learning_rate": 0.0009985868361199058, + "loss": 0.9414, + "step": 991 + }, + { + "epoch": 0.05, + "learning_rate": 0.0009985802892237417, + "loss": 0.9844, + "step": 992 + }, + { + "epoch": 0.05, + "learning_rate": 0.0009985737272189571, + "loss": 0.9258, + "step": 993 + }, + { + "epoch": 0.05, + "learning_rate": 0.0009985671501057509, + "loss": 0.9141, + "step": 994 + }, + { + "epoch": 0.05, + "learning_rate": 0.0009985605578843224, + "loss": 0.9102, + "step": 995 + }, + { + "epoch": 0.05, + "learning_rate": 0.0009985539505548713, + "loss": 0.9883, + "step": 996 + }, + { + "epoch": 0.05, + "learning_rate": 0.0009985473281175978, + "loss": 1.1172, + "step": 997 + }, + { + "epoch": 0.05, + "learning_rate": 0.0009985406905727028, + "loss": 0.9375, + "step": 998 + }, + { + "epoch": 0.05, + "learning_rate": 0.000998534037920387, + "loss": 1.0078, + "step": 999 + }, + { + "epoch": 0.05, + "learning_rate": 0.0009985273701608524, + "loss": 0.8828, + "step": 1000 + }, + { + "epoch": 0.05, + "learning_rate": 0.0009985206872943008, + "loss": 0.9219, + "step": 1001 + }, + { + "epoch": 0.05, + "learning_rate": 0.000998513989320935, + "loss": 0.8828, + "step": 1002 + }, + { + "epoch": 0.05, + "learning_rate": 0.0009985072762409579, + "loss": 0.9062, + "step": 1003 + }, + { + "epoch": 0.05, + "learning_rate": 0.0009985005480545727, + "loss": 0.9336, + "step": 1004 + }, + { + "epoch": 0.05, + "learning_rate": 0.0009984938047619832, + "loss": 0.9102, + "step": 1005 + }, + { + "epoch": 0.05, + "learning_rate": 0.0009984870463633942, + "loss": 0.9297, + "step": 1006 + }, + { + "epoch": 0.05, + "learning_rate": 0.00099848027285901, + "loss": 0.9844, + "step": 1007 + }, + { + "epoch": 0.05, + "learning_rate": 0.0009984734842490364, + "loss": 0.9062, + "step": 1008 + }, + { + "epoch": 0.05, + "learning_rate": 0.0009984666805336788, + "loss": 0.9414, + "step": 1009 + }, + { + "epoch": 0.05, + "learning_rate": 0.0009984598617131435, + "loss": 0.9883, + "step": 1010 + }, + { + "epoch": 0.05, + "learning_rate": 0.0009984530277876368, + "loss": 1.0, + "step": 1011 + }, + { + "epoch": 0.05, + "learning_rate": 0.0009984461787573662, + "loss": 1.0312, + "step": 1012 + }, + { + "epoch": 0.05, + "learning_rate": 0.000998439314622539, + "loss": 0.9922, + "step": 1013 + }, + { + "epoch": 0.05, + "learning_rate": 0.0009984324353833633, + "loss": 0.9531, + "step": 1014 + }, + { + "epoch": 0.05, + "learning_rate": 0.0009984255410400477, + "loss": 0.8672, + "step": 1015 + }, + { + "epoch": 0.05, + "learning_rate": 0.0009984186315928007, + "loss": 0.9102, + "step": 1016 + }, + { + "epoch": 0.05, + "learning_rate": 0.0009984117070418322, + "loss": 0.9336, + "step": 1017 + }, + { + "epoch": 0.05, + "learning_rate": 0.0009984047673873516, + "loss": 1.0312, + "step": 1018 + }, + { + "epoch": 0.05, + "learning_rate": 0.0009983978126295694, + "loss": 0.9023, + "step": 1019 + }, + { + "epoch": 0.05, + "learning_rate": 0.0009983908427686965, + "loss": 0.9648, + "step": 1020 + }, + { + "epoch": 0.05, + "learning_rate": 0.0009983838578049439, + "loss": 0.9375, + "step": 1021 + }, + { + "epoch": 0.05, + "learning_rate": 0.000998376857738523, + "loss": 1.0078, + "step": 1022 + }, + { + "epoch": 0.05, + "learning_rate": 0.0009983698425696467, + "loss": 0.9531, + "step": 1023 + }, + { + "epoch": 0.06, + "learning_rate": 0.0009983628122985269, + "loss": 0.9609, + "step": 1024 + }, + { + "epoch": 0.06, + "learning_rate": 0.000998355766925377, + "loss": 0.9336, + "step": 1025 + }, + { + "epoch": 0.06, + "learning_rate": 0.00099834870645041, + "loss": 0.8984, + "step": 1026 + }, + { + "epoch": 0.06, + "learning_rate": 0.0009983416308738404, + "loss": 0.9375, + "step": 1027 + }, + { + "epoch": 0.06, + "learning_rate": 0.0009983345401958826, + "loss": 0.9414, + "step": 1028 + }, + { + "epoch": 0.06, + "learning_rate": 0.000998327434416751, + "loss": 0.9336, + "step": 1029 + }, + { + "epoch": 0.06, + "learning_rate": 0.0009983203135366615, + "loss": 0.9922, + "step": 1030 + }, + { + "epoch": 0.06, + "learning_rate": 0.0009983131775558297, + "loss": 1.0312, + "step": 1031 + }, + { + "epoch": 0.06, + "learning_rate": 0.0009983060264744715, + "loss": 0.9414, + "step": 1032 + }, + { + "epoch": 0.06, + "learning_rate": 0.000998298860292804, + "loss": 0.9922, + "step": 1033 + }, + { + "epoch": 0.06, + "learning_rate": 0.000998291679011044, + "loss": 0.9219, + "step": 1034 + }, + { + "epoch": 0.06, + "learning_rate": 0.0009982844826294094, + "loss": 0.8711, + "step": 1035 + }, + { + "epoch": 0.06, + "learning_rate": 0.0009982772711481184, + "loss": 0.8594, + "step": 1036 + }, + { + "epoch": 0.06, + "learning_rate": 0.0009982700445673892, + "loss": 0.8828, + "step": 1037 + }, + { + "epoch": 0.06, + "learning_rate": 0.000998262802887441, + "loss": 1.0234, + "step": 1038 + }, + { + "epoch": 0.06, + "learning_rate": 0.000998255546108493, + "loss": 1.0078, + "step": 1039 + }, + { + "epoch": 0.06, + "learning_rate": 0.0009982482742307655, + "loss": 0.8828, + "step": 1040 + }, + { + "epoch": 0.06, + "learning_rate": 0.0009982409872544783, + "loss": 0.9648, + "step": 1041 + }, + { + "epoch": 0.06, + "learning_rate": 0.000998233685179853, + "loss": 0.9883, + "step": 1042 + }, + { + "epoch": 0.06, + "learning_rate": 0.00099822636800711, + "loss": 1.0547, + "step": 1043 + }, + { + "epoch": 0.06, + "learning_rate": 0.0009982190357364718, + "loss": 0.9023, + "step": 1044 + }, + { + "epoch": 0.06, + "learning_rate": 0.0009982116883681601, + "loss": 0.9688, + "step": 1045 + }, + { + "epoch": 0.06, + "learning_rate": 0.0009982043259023979, + "loss": 1.0156, + "step": 1046 + }, + { + "epoch": 0.06, + "learning_rate": 0.000998196948339408, + "loss": 0.9297, + "step": 1047 + }, + { + "epoch": 0.06, + "learning_rate": 0.000998189555679414, + "loss": 0.9141, + "step": 1048 + }, + { + "epoch": 0.06, + "learning_rate": 0.00099818214792264, + "loss": 1.0625, + "step": 1049 + }, + { + "epoch": 0.06, + "learning_rate": 0.0009981747250693107, + "loss": 0.9648, + "step": 1050 + }, + { + "epoch": 0.06, + "learning_rate": 0.0009981672871196509, + "loss": 1.0078, + "step": 1051 + }, + { + "epoch": 0.06, + "learning_rate": 0.0009981598340738855, + "loss": 0.9297, + "step": 1052 + }, + { + "epoch": 0.06, + "learning_rate": 0.000998152365932241, + "loss": 1.0234, + "step": 1053 + }, + { + "epoch": 0.06, + "learning_rate": 0.0009981448826949435, + "loss": 1.0, + "step": 1054 + }, + { + "epoch": 0.06, + "learning_rate": 0.0009981373843622196, + "loss": 0.9062, + "step": 1055 + }, + { + "epoch": 0.06, + "learning_rate": 0.0009981298709342968, + "loss": 1.0078, + "step": 1056 + }, + { + "epoch": 0.06, + "learning_rate": 0.0009981223424114026, + "loss": 0.9688, + "step": 1057 + }, + { + "epoch": 0.06, + "learning_rate": 0.0009981147987937653, + "loss": 1.0234, + "step": 1058 + }, + { + "epoch": 0.06, + "learning_rate": 0.0009981072400816133, + "loss": 0.9492, + "step": 1059 + }, + { + "epoch": 0.06, + "learning_rate": 0.0009980996662751758, + "loss": 0.9922, + "step": 1060 + }, + { + "epoch": 0.06, + "learning_rate": 0.000998092077374682, + "loss": 0.918, + "step": 1061 + }, + { + "epoch": 0.06, + "learning_rate": 0.0009980844733803622, + "loss": 0.875, + "step": 1062 + }, + { + "epoch": 0.06, + "learning_rate": 0.000998076854292447, + "loss": 0.9375, + "step": 1063 + }, + { + "epoch": 0.06, + "learning_rate": 0.0009980692201111668, + "loss": 0.9492, + "step": 1064 + }, + { + "epoch": 0.06, + "learning_rate": 0.0009980615708367533, + "loss": 0.9961, + "step": 1065 + }, + { + "epoch": 0.06, + "learning_rate": 0.000998053906469438, + "loss": 0.9492, + "step": 1066 + }, + { + "epoch": 0.06, + "learning_rate": 0.0009980462270094534, + "loss": 1.0, + "step": 1067 + }, + { + "epoch": 0.06, + "learning_rate": 0.0009980385324570322, + "loss": 0.9648, + "step": 1068 + }, + { + "epoch": 0.06, + "learning_rate": 0.0009980308228124074, + "loss": 0.9102, + "step": 1069 + }, + { + "epoch": 0.06, + "learning_rate": 0.0009980230980758129, + "loss": 0.9492, + "step": 1070 + }, + { + "epoch": 0.06, + "learning_rate": 0.0009980153582474826, + "loss": 0.9297, + "step": 1071 + }, + { + "epoch": 0.06, + "learning_rate": 0.0009980076033276508, + "loss": 0.918, + "step": 1072 + }, + { + "epoch": 0.06, + "learning_rate": 0.000997999833316553, + "loss": 0.9648, + "step": 1073 + }, + { + "epoch": 0.06, + "learning_rate": 0.0009979920482144243, + "loss": 0.9531, + "step": 1074 + }, + { + "epoch": 0.06, + "learning_rate": 0.0009979842480215006, + "loss": 0.9609, + "step": 1075 + }, + { + "epoch": 0.06, + "learning_rate": 0.0009979764327380188, + "loss": 0.9805, + "step": 1076 + }, + { + "epoch": 0.06, + "learning_rate": 0.0009979686023642148, + "loss": 1.0, + "step": 1077 + }, + { + "epoch": 0.06, + "learning_rate": 0.000997960756900327, + "loss": 0.8281, + "step": 1078 + }, + { + "epoch": 0.06, + "learning_rate": 0.0009979528963465923, + "loss": 0.9805, + "step": 1079 + }, + { + "epoch": 0.06, + "learning_rate": 0.000997945020703249, + "loss": 0.9453, + "step": 1080 + }, + { + "epoch": 0.06, + "learning_rate": 0.000997937129970536, + "loss": 0.9844, + "step": 1081 + }, + { + "epoch": 0.06, + "learning_rate": 0.0009979292241486925, + "loss": 1.0, + "step": 1082 + }, + { + "epoch": 0.06, + "learning_rate": 0.0009979213032379577, + "loss": 0.9375, + "step": 1083 + }, + { + "epoch": 0.06, + "learning_rate": 0.000997913367238572, + "loss": 1.1094, + "step": 1084 + }, + { + "epoch": 0.06, + "learning_rate": 0.0009979054161507756, + "loss": 1.0781, + "step": 1085 + }, + { + "epoch": 0.06, + "learning_rate": 0.0009978974499748098, + "loss": 1.0078, + "step": 1086 + }, + { + "epoch": 0.06, + "learning_rate": 0.0009978894687109154, + "loss": 0.9961, + "step": 1087 + }, + { + "epoch": 0.06, + "learning_rate": 0.0009978814723593347, + "loss": 0.9375, + "step": 1088 + }, + { + "epoch": 0.06, + "learning_rate": 0.0009978734609203102, + "loss": 0.9531, + "step": 1089 + }, + { + "epoch": 0.06, + "learning_rate": 0.0009978654343940842, + "loss": 1.0312, + "step": 1090 + }, + { + "epoch": 0.06, + "learning_rate": 0.0009978573927809003, + "loss": 0.8984, + "step": 1091 + }, + { + "epoch": 0.06, + "learning_rate": 0.0009978493360810019, + "loss": 0.9492, + "step": 1092 + }, + { + "epoch": 0.06, + "learning_rate": 0.0009978412642946332, + "loss": 0.9609, + "step": 1093 + }, + { + "epoch": 0.06, + "learning_rate": 0.0009978331774220391, + "loss": 0.8906, + "step": 1094 + }, + { + "epoch": 0.06, + "learning_rate": 0.0009978250754634644, + "loss": 0.9492, + "step": 1095 + }, + { + "epoch": 0.06, + "learning_rate": 0.0009978169584191545, + "loss": 0.9336, + "step": 1096 + }, + { + "epoch": 0.06, + "learning_rate": 0.0009978088262893557, + "loss": 0.8984, + "step": 1097 + }, + { + "epoch": 0.06, + "learning_rate": 0.0009978006790743142, + "loss": 0.9961, + "step": 1098 + }, + { + "epoch": 0.06, + "learning_rate": 0.0009977925167742767, + "loss": 0.957, + "step": 1099 + }, + { + "epoch": 0.06, + "learning_rate": 0.0009977843393894912, + "loss": 0.957, + "step": 1100 + }, + { + "epoch": 0.06, + "learning_rate": 0.0009977761469202049, + "loss": 0.9609, + "step": 1101 + }, + { + "epoch": 0.06, + "learning_rate": 0.0009977679393666661, + "loss": 0.9961, + "step": 1102 + }, + { + "epoch": 0.06, + "learning_rate": 0.000997759716729124, + "loss": 0.9453, + "step": 1103 + }, + { + "epoch": 0.06, + "learning_rate": 0.0009977514790078272, + "loss": 0.9531, + "step": 1104 + }, + { + "epoch": 0.06, + "learning_rate": 0.0009977432262030254, + "loss": 0.8906, + "step": 1105 + }, + { + "epoch": 0.06, + "learning_rate": 0.0009977349583149692, + "loss": 1.0, + "step": 1106 + }, + { + "epoch": 0.06, + "learning_rate": 0.0009977266753439086, + "loss": 1.0156, + "step": 1107 + }, + { + "epoch": 0.06, + "learning_rate": 0.0009977183772900949, + "loss": 0.9414, + "step": 1108 + }, + { + "epoch": 0.06, + "learning_rate": 0.0009977100641537795, + "loss": 0.9453, + "step": 1109 + }, + { + "epoch": 0.06, + "learning_rate": 0.0009977017359352142, + "loss": 0.9258, + "step": 1110 + }, + { + "epoch": 0.06, + "learning_rate": 0.0009976933926346514, + "loss": 0.9531, + "step": 1111 + }, + { + "epoch": 0.06, + "learning_rate": 0.0009976850342523441, + "loss": 0.9766, + "step": 1112 + }, + { + "epoch": 0.06, + "learning_rate": 0.0009976766607885453, + "loss": 0.9219, + "step": 1113 + }, + { + "epoch": 0.06, + "learning_rate": 0.000997668272243509, + "loss": 0.9336, + "step": 1114 + }, + { + "epoch": 0.06, + "learning_rate": 0.0009976598686174893, + "loss": 0.9375, + "step": 1115 + }, + { + "epoch": 0.06, + "learning_rate": 0.0009976514499107408, + "loss": 0.918, + "step": 1116 + }, + { + "epoch": 0.06, + "learning_rate": 0.0009976430161235186, + "loss": 0.9375, + "step": 1117 + }, + { + "epoch": 0.06, + "learning_rate": 0.0009976345672560785, + "loss": 0.9766, + "step": 1118 + }, + { + "epoch": 0.06, + "learning_rate": 0.0009976261033086764, + "loss": 0.9297, + "step": 1119 + }, + { + "epoch": 0.06, + "learning_rate": 0.0009976176242815688, + "loss": 0.9336, + "step": 1120 + }, + { + "epoch": 0.06, + "learning_rate": 0.0009976091301750125, + "loss": 0.9609, + "step": 1121 + }, + { + "epoch": 0.06, + "learning_rate": 0.000997600620989265, + "loss": 0.9023, + "step": 1122 + }, + { + "epoch": 0.06, + "learning_rate": 0.0009975920967245841, + "loss": 0.957, + "step": 1123 + }, + { + "epoch": 0.06, + "learning_rate": 0.0009975835573812284, + "loss": 0.9961, + "step": 1124 + }, + { + "epoch": 0.06, + "learning_rate": 0.0009975750029594561, + "loss": 0.9727, + "step": 1125 + }, + { + "epoch": 0.06, + "learning_rate": 0.000997566433459527, + "loss": 0.8711, + "step": 1126 + }, + { + "epoch": 0.06, + "learning_rate": 0.0009975578488817007, + "loss": 0.9414, + "step": 1127 + }, + { + "epoch": 0.06, + "learning_rate": 0.000997549249226237, + "loss": 0.9766, + "step": 1128 + }, + { + "epoch": 0.06, + "learning_rate": 0.0009975406344933967, + "loss": 0.8594, + "step": 1129 + }, + { + "epoch": 0.06, + "learning_rate": 0.000997532004683441, + "loss": 0.9844, + "step": 1130 + }, + { + "epoch": 0.06, + "learning_rate": 0.000997523359796631, + "loss": 0.9688, + "step": 1131 + }, + { + "epoch": 0.06, + "learning_rate": 0.000997514699833229, + "loss": 0.9492, + "step": 1132 + }, + { + "epoch": 0.06, + "learning_rate": 0.0009975060247934976, + "loss": 0.9648, + "step": 1133 + }, + { + "epoch": 0.06, + "learning_rate": 0.0009974973346776991, + "loss": 1.0156, + "step": 1134 + }, + { + "epoch": 0.06, + "learning_rate": 0.0009974886294860973, + "loss": 1.0156, + "step": 1135 + }, + { + "epoch": 0.06, + "learning_rate": 0.0009974799092189559, + "loss": 1.0078, + "step": 1136 + }, + { + "epoch": 0.06, + "learning_rate": 0.0009974711738765392, + "loss": 0.9648, + "step": 1137 + }, + { + "epoch": 0.06, + "learning_rate": 0.0009974624234591117, + "loss": 1.0234, + "step": 1138 + }, + { + "epoch": 0.06, + "learning_rate": 0.0009974536579669387, + "loss": 0.9844, + "step": 1139 + }, + { + "epoch": 0.06, + "learning_rate": 0.0009974448774002858, + "loss": 0.9336, + "step": 1140 + }, + { + "epoch": 0.06, + "learning_rate": 0.0009974360817594193, + "loss": 0.9219, + "step": 1141 + }, + { + "epoch": 0.06, + "learning_rate": 0.0009974272710446053, + "loss": 0.9375, + "step": 1142 + }, + { + "epoch": 0.06, + "learning_rate": 0.0009974184452561112, + "loss": 1.0078, + "step": 1143 + }, + { + "epoch": 0.06, + "learning_rate": 0.0009974096043942043, + "loss": 0.9297, + "step": 1144 + }, + { + "epoch": 0.06, + "learning_rate": 0.0009974007484591524, + "loss": 0.9414, + "step": 1145 + }, + { + "epoch": 0.06, + "learning_rate": 0.0009973918774512241, + "loss": 0.9609, + "step": 1146 + }, + { + "epoch": 0.06, + "learning_rate": 0.0009973829913706878, + "loss": 0.9453, + "step": 1147 + }, + { + "epoch": 0.06, + "learning_rate": 0.0009973740902178131, + "loss": 0.9531, + "step": 1148 + }, + { + "epoch": 0.06, + "learning_rate": 0.0009973651739928699, + "loss": 0.8789, + "step": 1149 + }, + { + "epoch": 0.06, + "learning_rate": 0.0009973562426961282, + "loss": 0.9844, + "step": 1150 + }, + { + "epoch": 0.06, + "learning_rate": 0.0009973472963278585, + "loss": 0.9258, + "step": 1151 + }, + { + "epoch": 0.06, + "learning_rate": 0.0009973383348883319, + "loss": 0.8633, + "step": 1152 + }, + { + "epoch": 0.06, + "learning_rate": 0.0009973293583778202, + "loss": 0.9141, + "step": 1153 + }, + { + "epoch": 0.06, + "learning_rate": 0.0009973203667965954, + "loss": 0.9805, + "step": 1154 + }, + { + "epoch": 0.06, + "learning_rate": 0.0009973113601449298, + "loss": 1.0, + "step": 1155 + }, + { + "epoch": 0.06, + "learning_rate": 0.0009973023384230964, + "loss": 0.9531, + "step": 1156 + }, + { + "epoch": 0.06, + "learning_rate": 0.0009972933016313688, + "loss": 0.9844, + "step": 1157 + }, + { + "epoch": 0.06, + "learning_rate": 0.0009972842497700204, + "loss": 0.9648, + "step": 1158 + }, + { + "epoch": 0.06, + "learning_rate": 0.0009972751828393259, + "loss": 0.9258, + "step": 1159 + }, + { + "epoch": 0.06, + "learning_rate": 0.0009972661008395597, + "loss": 1.0078, + "step": 1160 + }, + { + "epoch": 0.06, + "learning_rate": 0.0009972570037709973, + "loss": 0.9297, + "step": 1161 + }, + { + "epoch": 0.06, + "learning_rate": 0.0009972478916339144, + "loss": 0.9609, + "step": 1162 + }, + { + "epoch": 0.06, + "learning_rate": 0.000997238764428587, + "loss": 0.8867, + "step": 1163 + }, + { + "epoch": 0.06, + "learning_rate": 0.0009972296221552918, + "loss": 1.0781, + "step": 1164 + }, + { + "epoch": 0.06, + "learning_rate": 0.0009972204648143054, + "loss": 0.9492, + "step": 1165 + }, + { + "epoch": 0.06, + "learning_rate": 0.0009972112924059057, + "loss": 0.9102, + "step": 1166 + }, + { + "epoch": 0.06, + "learning_rate": 0.0009972021049303708, + "loss": 0.9766, + "step": 1167 + }, + { + "epoch": 0.06, + "learning_rate": 0.0009971929023879788, + "loss": 0.9258, + "step": 1168 + }, + { + "epoch": 0.06, + "learning_rate": 0.000997183684779009, + "loss": 0.9219, + "step": 1169 + }, + { + "epoch": 0.06, + "learning_rate": 0.00099717445210374, + "loss": 0.9414, + "step": 1170 + }, + { + "epoch": 0.06, + "learning_rate": 0.0009971652043624523, + "loss": 0.9375, + "step": 1171 + }, + { + "epoch": 0.06, + "learning_rate": 0.0009971559415554254, + "loss": 0.9219, + "step": 1172 + }, + { + "epoch": 0.06, + "learning_rate": 0.0009971466636829406, + "loss": 1.0156, + "step": 1173 + }, + { + "epoch": 0.06, + "learning_rate": 0.0009971373707452788, + "loss": 0.9922, + "step": 1174 + }, + { + "epoch": 0.06, + "learning_rate": 0.000997128062742722, + "loss": 1.0234, + "step": 1175 + }, + { + "epoch": 0.06, + "learning_rate": 0.0009971187396755515, + "loss": 0.9336, + "step": 1176 + }, + { + "epoch": 0.06, + "learning_rate": 0.0009971094015440505, + "loss": 0.9062, + "step": 1177 + }, + { + "epoch": 0.06, + "learning_rate": 0.0009971000483485015, + "loss": 0.9258, + "step": 1178 + }, + { + "epoch": 0.06, + "learning_rate": 0.0009970906800891883, + "loss": 0.8789, + "step": 1179 + }, + { + "epoch": 0.06, + "learning_rate": 0.0009970812967663948, + "loss": 0.9648, + "step": 1180 + }, + { + "epoch": 0.06, + "learning_rate": 0.000997071898380405, + "loss": 0.9102, + "step": 1181 + }, + { + "epoch": 0.06, + "learning_rate": 0.000997062484931504, + "loss": 0.9023, + "step": 1182 + }, + { + "epoch": 0.06, + "learning_rate": 0.000997053056419977, + "loss": 0.9414, + "step": 1183 + }, + { + "epoch": 0.06, + "learning_rate": 0.0009970436128461093, + "loss": 1.0625, + "step": 1184 + }, + { + "epoch": 0.06, + "learning_rate": 0.0009970341542101877, + "loss": 0.9766, + "step": 1185 + }, + { + "epoch": 0.06, + "learning_rate": 0.0009970246805124986, + "loss": 1.0781, + "step": 1186 + }, + { + "epoch": 0.06, + "learning_rate": 0.000997015191753329, + "loss": 1.0469, + "step": 1187 + }, + { + "epoch": 0.06, + "learning_rate": 0.0009970056879329665, + "loss": 0.9492, + "step": 1188 + }, + { + "epoch": 0.06, + "learning_rate": 0.000996996169051699, + "loss": 0.918, + "step": 1189 + }, + { + "epoch": 0.06, + "learning_rate": 0.000996986635109815, + "loss": 0.9688, + "step": 1190 + }, + { + "epoch": 0.06, + "learning_rate": 0.0009969770861076037, + "loss": 0.9531, + "step": 1191 + }, + { + "epoch": 0.06, + "learning_rate": 0.000996967522045354, + "loss": 0.9648, + "step": 1192 + }, + { + "epoch": 0.06, + "learning_rate": 0.000996957942923356, + "loss": 0.9883, + "step": 1193 + }, + { + "epoch": 0.06, + "learning_rate": 0.0009969483487419, + "loss": 0.9805, + "step": 1194 + }, + { + "epoch": 0.06, + "learning_rate": 0.0009969387395012766, + "loss": 0.8867, + "step": 1195 + }, + { + "epoch": 0.06, + "learning_rate": 0.000996929115201777, + "loss": 0.9219, + "step": 1196 + }, + { + "epoch": 0.06, + "learning_rate": 0.000996919475843693, + "loss": 1.0078, + "step": 1197 + }, + { + "epoch": 0.06, + "learning_rate": 0.0009969098214273164, + "loss": 0.9258, + "step": 1198 + }, + { + "epoch": 0.06, + "learning_rate": 0.0009969001519529402, + "loss": 0.9023, + "step": 1199 + }, + { + "epoch": 0.06, + "learning_rate": 0.000996890467420857, + "loss": 1.0156, + "step": 1200 + }, + { + "epoch": 0.06, + "learning_rate": 0.0009968807678313605, + "loss": 0.957, + "step": 1201 + }, + { + "epoch": 0.06, + "learning_rate": 0.0009968710531847447, + "loss": 0.918, + "step": 1202 + }, + { + "epoch": 0.06, + "learning_rate": 0.0009968613234813035, + "loss": 0.8672, + "step": 1203 + }, + { + "epoch": 0.06, + "learning_rate": 0.0009968515787213326, + "loss": 1.0078, + "step": 1204 + }, + { + "epoch": 0.06, + "learning_rate": 0.0009968418189051265, + "loss": 1.0, + "step": 1205 + }, + { + "epoch": 0.06, + "learning_rate": 0.0009968320440329813, + "loss": 1.0312, + "step": 1206 + }, + { + "epoch": 0.06, + "learning_rate": 0.000996822254105193, + "loss": 0.9844, + "step": 1207 + }, + { + "epoch": 0.06, + "learning_rate": 0.0009968124491220587, + "loss": 1.0, + "step": 1208 + }, + { + "epoch": 0.06, + "learning_rate": 0.000996802629083875, + "loss": 0.9727, + "step": 1209 + }, + { + "epoch": 0.07, + "learning_rate": 0.00099679279399094, + "loss": 0.9805, + "step": 1210 + }, + { + "epoch": 0.07, + "learning_rate": 0.0009967829438435514, + "loss": 0.9141, + "step": 1211 + }, + { + "epoch": 0.07, + "learning_rate": 0.0009967730786420077, + "loss": 0.9336, + "step": 1212 + }, + { + "epoch": 0.07, + "learning_rate": 0.0009967631983866078, + "loss": 0.9297, + "step": 1213 + }, + { + "epoch": 0.07, + "learning_rate": 0.0009967533030776515, + "loss": 0.9141, + "step": 1214 + }, + { + "epoch": 0.07, + "learning_rate": 0.000996743392715438, + "loss": 0.8984, + "step": 1215 + }, + { + "epoch": 0.07, + "learning_rate": 0.0009967334673002684, + "loss": 0.8672, + "step": 1216 + }, + { + "epoch": 0.07, + "learning_rate": 0.0009967235268324428, + "loss": 0.9258, + "step": 1217 + }, + { + "epoch": 0.07, + "learning_rate": 0.0009967135713122629, + "loss": 1.0, + "step": 1218 + }, + { + "epoch": 0.07, + "learning_rate": 0.00099670360074003, + "loss": 1.0078, + "step": 1219 + }, + { + "epoch": 0.07, + "learning_rate": 0.0009966936151160465, + "loss": 0.9766, + "step": 1220 + }, + { + "epoch": 0.07, + "learning_rate": 0.0009966836144406149, + "loss": 0.9141, + "step": 1221 + }, + { + "epoch": 0.07, + "learning_rate": 0.0009966735987140382, + "loss": 0.9297, + "step": 1222 + }, + { + "epoch": 0.07, + "learning_rate": 0.00099666356793662, + "loss": 0.9141, + "step": 1223 + }, + { + "epoch": 0.07, + "learning_rate": 0.0009966535221086646, + "loss": 0.9297, + "step": 1224 + }, + { + "epoch": 0.07, + "learning_rate": 0.0009966434612304757, + "loss": 0.8242, + "step": 1225 + }, + { + "epoch": 0.07, + "learning_rate": 0.0009966333853023586, + "loss": 0.9102, + "step": 1226 + }, + { + "epoch": 0.07, + "learning_rate": 0.0009966232943246186, + "loss": 0.9414, + "step": 1227 + }, + { + "epoch": 0.07, + "learning_rate": 0.0009966131882975615, + "loss": 0.9492, + "step": 1228 + }, + { + "epoch": 0.07, + "learning_rate": 0.0009966030672214933, + "loss": 0.957, + "step": 1229 + }, + { + "epoch": 0.07, + "learning_rate": 0.0009965929310967211, + "loss": 0.9102, + "step": 1230 + }, + { + "epoch": 0.07, + "learning_rate": 0.000996582779923552, + "loss": 0.9492, + "step": 1231 + }, + { + "epoch": 0.07, + "learning_rate": 0.0009965726137022935, + "loss": 1.0156, + "step": 1232 + }, + { + "epoch": 0.07, + "learning_rate": 0.0009965624324332534, + "loss": 0.9062, + "step": 1233 + }, + { + "epoch": 0.07, + "learning_rate": 0.0009965522361167405, + "loss": 0.8633, + "step": 1234 + }, + { + "epoch": 0.07, + "learning_rate": 0.000996542024753064, + "loss": 1.0, + "step": 1235 + }, + { + "epoch": 0.07, + "learning_rate": 0.0009965317983425329, + "loss": 0.9258, + "step": 1236 + }, + { + "epoch": 0.07, + "learning_rate": 0.0009965215568854572, + "loss": 1.0, + "step": 1237 + }, + { + "epoch": 0.07, + "learning_rate": 0.0009965113003821476, + "loss": 0.9648, + "step": 1238 + }, + { + "epoch": 0.07, + "learning_rate": 0.0009965010288329147, + "loss": 0.9727, + "step": 1239 + }, + { + "epoch": 0.07, + "learning_rate": 0.0009964907422380697, + "loss": 0.8867, + "step": 1240 + }, + { + "epoch": 0.07, + "learning_rate": 0.000996480440597924, + "loss": 0.9258, + "step": 1241 + }, + { + "epoch": 0.07, + "learning_rate": 0.0009964701239127904, + "loss": 0.8945, + "step": 1242 + }, + { + "epoch": 0.07, + "learning_rate": 0.0009964597921829812, + "loss": 0.9453, + "step": 1243 + }, + { + "epoch": 0.07, + "learning_rate": 0.0009964494454088094, + "loss": 0.9727, + "step": 1244 + }, + { + "epoch": 0.07, + "learning_rate": 0.0009964390835905887, + "loss": 0.9727, + "step": 1245 + }, + { + "epoch": 0.07, + "learning_rate": 0.0009964287067286332, + "loss": 0.9258, + "step": 1246 + }, + { + "epoch": 0.07, + "learning_rate": 0.000996418314823257, + "loss": 1.0, + "step": 1247 + }, + { + "epoch": 0.07, + "learning_rate": 0.0009964079078747755, + "loss": 0.9961, + "step": 1248 + }, + { + "epoch": 0.07, + "learning_rate": 0.0009963974858835036, + "loss": 0.9531, + "step": 1249 + }, + { + "epoch": 0.07, + "learning_rate": 0.0009963870488497572, + "loss": 0.9805, + "step": 1250 + }, + { + "epoch": 0.07, + "learning_rate": 0.0009963765967738528, + "loss": 0.9102, + "step": 1251 + }, + { + "epoch": 0.07, + "learning_rate": 0.000996366129656107, + "loss": 0.9102, + "step": 1252 + }, + { + "epoch": 0.07, + "learning_rate": 0.0009963556474968372, + "loss": 0.8906, + "step": 1253 + }, + { + "epoch": 0.07, + "learning_rate": 0.0009963451502963605, + "loss": 0.8555, + "step": 1254 + }, + { + "epoch": 0.07, + "learning_rate": 0.0009963346380549955, + "loss": 0.8945, + "step": 1255 + }, + { + "epoch": 0.07, + "learning_rate": 0.0009963241107730607, + "loss": 0.9766, + "step": 1256 + }, + { + "epoch": 0.07, + "learning_rate": 0.0009963135684508749, + "loss": 0.9727, + "step": 1257 + }, + { + "epoch": 0.07, + "learning_rate": 0.0009963030110887578, + "loss": 0.875, + "step": 1258 + }, + { + "epoch": 0.07, + "learning_rate": 0.0009962924386870292, + "loss": 0.8906, + "step": 1259 + }, + { + "epoch": 0.07, + "learning_rate": 0.0009962818512460091, + "loss": 0.9727, + "step": 1260 + }, + { + "epoch": 0.07, + "learning_rate": 0.0009962712487660192, + "loss": 0.9297, + "step": 1261 + }, + { + "epoch": 0.07, + "learning_rate": 0.00099626063124738, + "loss": 1.0156, + "step": 1262 + }, + { + "epoch": 0.07, + "learning_rate": 0.0009962499986904138, + "loss": 0.9844, + "step": 1263 + }, + { + "epoch": 0.07, + "learning_rate": 0.0009962393510954423, + "loss": 0.8984, + "step": 1264 + }, + { + "epoch": 0.07, + "learning_rate": 0.0009962286884627888, + "loss": 0.9141, + "step": 1265 + }, + { + "epoch": 0.07, + "learning_rate": 0.0009962180107927756, + "loss": 0.9297, + "step": 1266 + }, + { + "epoch": 0.07, + "learning_rate": 0.0009962073180857269, + "loss": 1.0156, + "step": 1267 + }, + { + "epoch": 0.07, + "learning_rate": 0.0009961966103419664, + "loss": 0.9688, + "step": 1268 + }, + { + "epoch": 0.07, + "learning_rate": 0.0009961858875618189, + "loss": 0.9688, + "step": 1269 + }, + { + "epoch": 0.07, + "learning_rate": 0.000996175149745609, + "loss": 1.0469, + "step": 1270 + }, + { + "epoch": 0.07, + "learning_rate": 0.0009961643968936622, + "loss": 0.9219, + "step": 1271 + }, + { + "epoch": 0.07, + "learning_rate": 0.0009961536290063043, + "loss": 1.0078, + "step": 1272 + }, + { + "epoch": 0.07, + "learning_rate": 0.0009961428460838618, + "loss": 1.0312, + "step": 1273 + }, + { + "epoch": 0.07, + "learning_rate": 0.0009961320481266613, + "loss": 0.8477, + "step": 1274 + }, + { + "epoch": 0.07, + "learning_rate": 0.00099612123513503, + "loss": 0.9727, + "step": 1275 + }, + { + "epoch": 0.07, + "learning_rate": 0.0009961104071092956, + "loss": 0.9141, + "step": 1276 + }, + { + "epoch": 0.07, + "learning_rate": 0.0009960995640497863, + "loss": 0.9453, + "step": 1277 + }, + { + "epoch": 0.07, + "learning_rate": 0.0009960887059568308, + "loss": 0.8516, + "step": 1278 + }, + { + "epoch": 0.07, + "learning_rate": 0.0009960778328307576, + "loss": 0.9609, + "step": 1279 + }, + { + "epoch": 0.07, + "learning_rate": 0.0009960669446718966, + "loss": 1.0078, + "step": 1280 + }, + { + "epoch": 0.07, + "learning_rate": 0.0009960560414805778, + "loss": 0.9922, + "step": 1281 + }, + { + "epoch": 0.07, + "learning_rate": 0.0009960451232571316, + "loss": 0.9414, + "step": 1282 + }, + { + "epoch": 0.07, + "learning_rate": 0.0009960341900018886, + "loss": 1.0781, + "step": 1283 + }, + { + "epoch": 0.07, + "learning_rate": 0.0009960232417151802, + "loss": 1.125, + "step": 1284 + }, + { + "epoch": 0.07, + "learning_rate": 0.0009960122783973383, + "loss": 0.9258, + "step": 1285 + }, + { + "epoch": 0.07, + "learning_rate": 0.0009960013000486951, + "loss": 0.9805, + "step": 1286 + }, + { + "epoch": 0.07, + "learning_rate": 0.0009959903066695832, + "loss": 0.8906, + "step": 1287 + }, + { + "epoch": 0.07, + "learning_rate": 0.0009959792982603358, + "loss": 1.0312, + "step": 1288 + }, + { + "epoch": 0.07, + "learning_rate": 0.0009959682748212865, + "loss": 0.8984, + "step": 1289 + }, + { + "epoch": 0.07, + "learning_rate": 0.0009959572363527692, + "loss": 0.9883, + "step": 1290 + }, + { + "epoch": 0.07, + "learning_rate": 0.0009959461828551186, + "loss": 0.9453, + "step": 1291 + }, + { + "epoch": 0.07, + "learning_rate": 0.0009959351143286697, + "loss": 0.9453, + "step": 1292 + }, + { + "epoch": 0.07, + "learning_rate": 0.0009959240307737577, + "loss": 0.957, + "step": 1293 + }, + { + "epoch": 0.07, + "learning_rate": 0.0009959129321907183, + "loss": 0.9453, + "step": 1294 + }, + { + "epoch": 0.07, + "learning_rate": 0.0009959018185798883, + "loss": 0.957, + "step": 1295 + }, + { + "epoch": 0.07, + "learning_rate": 0.0009958906899416042, + "loss": 1.0469, + "step": 1296 + }, + { + "epoch": 0.07, + "learning_rate": 0.0009958795462762034, + "loss": 0.8555, + "step": 1297 + }, + { + "epoch": 0.07, + "learning_rate": 0.0009958683875840233, + "loss": 0.8711, + "step": 1298 + }, + { + "epoch": 0.07, + "learning_rate": 0.0009958572138654023, + "loss": 0.9531, + "step": 1299 + }, + { + "epoch": 0.07, + "learning_rate": 0.0009958460251206789, + "loss": 0.8789, + "step": 1300 + }, + { + "epoch": 0.07, + "learning_rate": 0.0009958348213501922, + "loss": 1.0547, + "step": 1301 + }, + { + "epoch": 0.07, + "learning_rate": 0.0009958236025542817, + "loss": 0.9336, + "step": 1302 + }, + { + "epoch": 0.07, + "learning_rate": 0.0009958123687332874, + "loss": 0.9492, + "step": 1303 + }, + { + "epoch": 0.07, + "learning_rate": 0.0009958011198875497, + "loss": 0.8555, + "step": 1304 + }, + { + "epoch": 0.07, + "learning_rate": 0.0009957898560174093, + "loss": 0.9492, + "step": 1305 + }, + { + "epoch": 0.07, + "learning_rate": 0.0009957785771232078, + "loss": 0.9258, + "step": 1306 + }, + { + "epoch": 0.07, + "learning_rate": 0.000995767283205287, + "loss": 0.8945, + "step": 1307 + }, + { + "epoch": 0.07, + "learning_rate": 0.0009957559742639888, + "loss": 0.9141, + "step": 1308 + }, + { + "epoch": 0.07, + "learning_rate": 0.0009957446502996562, + "loss": 0.9766, + "step": 1309 + }, + { + "epoch": 0.07, + "learning_rate": 0.0009957333113126323, + "loss": 0.9531, + "step": 1310 + }, + { + "epoch": 0.07, + "learning_rate": 0.0009957219573032605, + "loss": 0.8359, + "step": 1311 + }, + { + "epoch": 0.07, + "learning_rate": 0.0009957105882718851, + "loss": 0.8672, + "step": 1312 + }, + { + "epoch": 0.07, + "learning_rate": 0.0009956992042188507, + "loss": 0.8555, + "step": 1313 + }, + { + "epoch": 0.07, + "learning_rate": 0.0009956878051445022, + "loss": 0.918, + "step": 1314 + }, + { + "epoch": 0.07, + "learning_rate": 0.0009956763910491848, + "loss": 0.9922, + "step": 1315 + }, + { + "epoch": 0.07, + "learning_rate": 0.0009956649619332445, + "loss": 1.0156, + "step": 1316 + }, + { + "epoch": 0.07, + "learning_rate": 0.000995653517797028, + "loss": 0.9609, + "step": 1317 + }, + { + "epoch": 0.07, + "learning_rate": 0.0009956420586408814, + "loss": 1.0625, + "step": 1318 + }, + { + "epoch": 0.07, + "learning_rate": 0.0009956305844651524, + "loss": 0.9102, + "step": 1319 + }, + { + "epoch": 0.07, + "learning_rate": 0.0009956190952701888, + "loss": 0.9492, + "step": 1320 + }, + { + "epoch": 0.07, + "learning_rate": 0.0009956075910563386, + "loss": 0.9727, + "step": 1321 + }, + { + "epoch": 0.07, + "learning_rate": 0.0009955960718239503, + "loss": 0.9922, + "step": 1322 + }, + { + "epoch": 0.07, + "learning_rate": 0.000995584537573373, + "loss": 0.9297, + "step": 1323 + }, + { + "epoch": 0.07, + "learning_rate": 0.0009955729883049564, + "loss": 0.9492, + "step": 1324 + }, + { + "epoch": 0.07, + "learning_rate": 0.0009955614240190504, + "loss": 0.9062, + "step": 1325 + }, + { + "epoch": 0.07, + "learning_rate": 0.0009955498447160056, + "loss": 0.9297, + "step": 1326 + }, + { + "epoch": 0.07, + "learning_rate": 0.0009955382503961726, + "loss": 0.9453, + "step": 1327 + }, + { + "epoch": 0.07, + "learning_rate": 0.000995526641059903, + "loss": 0.9727, + "step": 1328 + }, + { + "epoch": 0.07, + "learning_rate": 0.000995515016707548, + "loss": 1.0391, + "step": 1329 + }, + { + "epoch": 0.07, + "learning_rate": 0.0009955033773394607, + "loss": 0.9961, + "step": 1330 + }, + { + "epoch": 0.07, + "learning_rate": 0.0009954917229559935, + "loss": 1.0, + "step": 1331 + }, + { + "epoch": 0.07, + "learning_rate": 0.0009954800535574993, + "loss": 0.9219, + "step": 1332 + }, + { + "epoch": 0.07, + "learning_rate": 0.0009954683691443319, + "loss": 1.0234, + "step": 1333 + }, + { + "epoch": 0.07, + "learning_rate": 0.0009954566697168454, + "loss": 0.9414, + "step": 1334 + }, + { + "epoch": 0.07, + "learning_rate": 0.0009954449552753944, + "loss": 1.0938, + "step": 1335 + }, + { + "epoch": 0.07, + "learning_rate": 0.000995433225820334, + "loss": 0.9805, + "step": 1336 + }, + { + "epoch": 0.07, + "learning_rate": 0.0009954214813520192, + "loss": 0.957, + "step": 1337 + }, + { + "epoch": 0.07, + "learning_rate": 0.0009954097218708062, + "loss": 0.9922, + "step": 1338 + }, + { + "epoch": 0.07, + "learning_rate": 0.0009953979473770515, + "loss": 0.9609, + "step": 1339 + }, + { + "epoch": 0.07, + "learning_rate": 0.0009953861578711114, + "loss": 0.9531, + "step": 1340 + }, + { + "epoch": 0.07, + "learning_rate": 0.0009953743533533434, + "loss": 0.8828, + "step": 1341 + }, + { + "epoch": 0.07, + "learning_rate": 0.0009953625338241056, + "loss": 0.9883, + "step": 1342 + }, + { + "epoch": 0.07, + "learning_rate": 0.0009953506992837558, + "loss": 0.8906, + "step": 1343 + }, + { + "epoch": 0.07, + "learning_rate": 0.0009953388497326526, + "loss": 0.8633, + "step": 1344 + }, + { + "epoch": 0.07, + "learning_rate": 0.000995326985171155, + "loss": 0.8594, + "step": 1345 + }, + { + "epoch": 0.07, + "learning_rate": 0.0009953151055996229, + "loss": 0.9258, + "step": 1346 + }, + { + "epoch": 0.07, + "learning_rate": 0.000995303211018416, + "loss": 0.9688, + "step": 1347 + }, + { + "epoch": 0.07, + "learning_rate": 0.000995291301427895, + "loss": 0.8555, + "step": 1348 + }, + { + "epoch": 0.07, + "learning_rate": 0.0009952793768284204, + "loss": 0.9375, + "step": 1349 + }, + { + "epoch": 0.07, + "learning_rate": 0.0009952674372203539, + "loss": 0.8789, + "step": 1350 + }, + { + "epoch": 0.07, + "learning_rate": 0.0009952554826040572, + "loss": 0.8906, + "step": 1351 + }, + { + "epoch": 0.07, + "learning_rate": 0.0009952435129798925, + "loss": 0.9844, + "step": 1352 + }, + { + "epoch": 0.07, + "learning_rate": 0.0009952315283482227, + "loss": 0.9453, + "step": 1353 + }, + { + "epoch": 0.07, + "learning_rate": 0.0009952195287094108, + "loss": 0.9883, + "step": 1354 + }, + { + "epoch": 0.07, + "learning_rate": 0.0009952075140638203, + "loss": 0.9336, + "step": 1355 + }, + { + "epoch": 0.07, + "learning_rate": 0.0009951954844118157, + "loss": 0.9023, + "step": 1356 + }, + { + "epoch": 0.07, + "learning_rate": 0.0009951834397537611, + "loss": 0.918, + "step": 1357 + }, + { + "epoch": 0.07, + "learning_rate": 0.000995171380090022, + "loss": 0.8984, + "step": 1358 + }, + { + "epoch": 0.07, + "learning_rate": 0.000995159305420963, + "loss": 0.9453, + "step": 1359 + }, + { + "epoch": 0.07, + "learning_rate": 0.000995147215746951, + "loss": 1.0156, + "step": 1360 + }, + { + "epoch": 0.07, + "learning_rate": 0.0009951351110683519, + "loss": 0.9023, + "step": 1361 + }, + { + "epoch": 0.07, + "learning_rate": 0.0009951229913855322, + "loss": 0.9297, + "step": 1362 + }, + { + "epoch": 0.07, + "learning_rate": 0.0009951108566988596, + "loss": 0.9883, + "step": 1363 + }, + { + "epoch": 0.07, + "learning_rate": 0.0009950987070087018, + "loss": 0.9492, + "step": 1364 + }, + { + "epoch": 0.07, + "learning_rate": 0.000995086542315427, + "loss": 0.9414, + "step": 1365 + }, + { + "epoch": 0.07, + "learning_rate": 0.0009950743626194033, + "loss": 0.9844, + "step": 1366 + }, + { + "epoch": 0.07, + "learning_rate": 0.0009950621679210006, + "loss": 0.9102, + "step": 1367 + }, + { + "epoch": 0.07, + "learning_rate": 0.000995049958220588, + "loss": 0.9453, + "step": 1368 + }, + { + "epoch": 0.07, + "learning_rate": 0.0009950377335185352, + "loss": 0.9023, + "step": 1369 + }, + { + "epoch": 0.07, + "learning_rate": 0.000995025493815213, + "loss": 0.9609, + "step": 1370 + }, + { + "epoch": 0.07, + "learning_rate": 0.0009950132391109926, + "loss": 0.9727, + "step": 1371 + }, + { + "epoch": 0.07, + "learning_rate": 0.0009950009694062447, + "loss": 0.9141, + "step": 1372 + }, + { + "epoch": 0.07, + "learning_rate": 0.0009949886847013417, + "loss": 0.9219, + "step": 1373 + }, + { + "epoch": 0.07, + "learning_rate": 0.0009949763849966555, + "loss": 0.8203, + "step": 1374 + }, + { + "epoch": 0.07, + "learning_rate": 0.0009949640702925588, + "loss": 0.9375, + "step": 1375 + }, + { + "epoch": 0.07, + "learning_rate": 0.000994951740589425, + "loss": 0.8516, + "step": 1376 + }, + { + "epoch": 0.07, + "learning_rate": 0.0009949393958876278, + "loss": 0.9688, + "step": 1377 + }, + { + "epoch": 0.07, + "learning_rate": 0.0009949270361875409, + "loss": 0.9219, + "step": 1378 + }, + { + "epoch": 0.07, + "learning_rate": 0.0009949146614895393, + "loss": 0.918, + "step": 1379 + }, + { + "epoch": 0.07, + "learning_rate": 0.0009949022717939974, + "loss": 0.8633, + "step": 1380 + }, + { + "epoch": 0.07, + "learning_rate": 0.0009948898671012913, + "loss": 0.9922, + "step": 1381 + }, + { + "epoch": 0.07, + "learning_rate": 0.0009948774474117965, + "loss": 0.9102, + "step": 1382 + }, + { + "epoch": 0.07, + "learning_rate": 0.0009948650127258895, + "loss": 0.9258, + "step": 1383 + }, + { + "epoch": 0.07, + "learning_rate": 0.000994852563043947, + "loss": 0.9414, + "step": 1384 + }, + { + "epoch": 0.07, + "learning_rate": 0.0009948400983663464, + "loss": 0.9648, + "step": 1385 + }, + { + "epoch": 0.07, + "learning_rate": 0.0009948276186934656, + "loss": 0.8672, + "step": 1386 + }, + { + "epoch": 0.07, + "learning_rate": 0.000994815124025682, + "loss": 0.9766, + "step": 1387 + }, + { + "epoch": 0.07, + "learning_rate": 0.0009948026143633752, + "loss": 0.9492, + "step": 1388 + }, + { + "epoch": 0.07, + "learning_rate": 0.0009947900897069237, + "loss": 0.9727, + "step": 1389 + }, + { + "epoch": 0.07, + "learning_rate": 0.0009947775500567073, + "loss": 0.8867, + "step": 1390 + }, + { + "epoch": 0.07, + "learning_rate": 0.0009947649954131059, + "loss": 0.9727, + "step": 1391 + }, + { + "epoch": 0.07, + "learning_rate": 0.0009947524257764998, + "loss": 0.9883, + "step": 1392 + }, + { + "epoch": 0.07, + "learning_rate": 0.00099473984114727, + "loss": 0.8906, + "step": 1393 + }, + { + "epoch": 0.07, + "learning_rate": 0.000994727241525798, + "loss": 0.9375, + "step": 1394 + }, + { + "epoch": 0.07, + "learning_rate": 0.0009947146269124654, + "loss": 0.9766, + "step": 1395 + }, + { + "epoch": 0.08, + "learning_rate": 0.0009947019973076546, + "loss": 0.9805, + "step": 1396 + }, + { + "epoch": 0.08, + "learning_rate": 0.0009946893527117483, + "loss": 0.8789, + "step": 1397 + }, + { + "epoch": 0.08, + "learning_rate": 0.0009946766931251297, + "loss": 0.8281, + "step": 1398 + }, + { + "epoch": 0.08, + "learning_rate": 0.0009946640185481824, + "loss": 0.9414, + "step": 1399 + }, + { + "epoch": 0.08, + "learning_rate": 0.0009946513289812904, + "loss": 1.0156, + "step": 1400 + }, + { + "epoch": 0.08, + "learning_rate": 0.0009946386244248382, + "loss": 1.0, + "step": 1401 + }, + { + "epoch": 0.08, + "learning_rate": 0.000994625904879211, + "loss": 0.9336, + "step": 1402 + }, + { + "epoch": 0.08, + "learning_rate": 0.0009946131703447941, + "loss": 0.8672, + "step": 1403 + }, + { + "epoch": 0.08, + "learning_rate": 0.0009946004208219734, + "loss": 0.9219, + "step": 1404 + }, + { + "epoch": 0.08, + "learning_rate": 0.0009945876563111353, + "loss": 0.9961, + "step": 1405 + }, + { + "epoch": 0.08, + "learning_rate": 0.0009945748768126665, + "loss": 0.9688, + "step": 1406 + }, + { + "epoch": 0.08, + "learning_rate": 0.0009945620823269545, + "loss": 0.8828, + "step": 1407 + }, + { + "epoch": 0.08, + "learning_rate": 0.0009945492728543868, + "loss": 0.9453, + "step": 1408 + }, + { + "epoch": 0.08, + "learning_rate": 0.0009945364483953515, + "loss": 0.9531, + "step": 1409 + }, + { + "epoch": 0.08, + "learning_rate": 0.0009945236089502377, + "loss": 1.0, + "step": 1410 + }, + { + "epoch": 0.08, + "learning_rate": 0.0009945107545194338, + "loss": 1.0, + "step": 1411 + }, + { + "epoch": 0.08, + "learning_rate": 0.00099449788510333, + "loss": 0.8828, + "step": 1412 + }, + { + "epoch": 0.08, + "learning_rate": 0.0009944850007023156, + "loss": 0.8359, + "step": 1413 + }, + { + "epoch": 0.08, + "learning_rate": 0.0009944721013167815, + "loss": 0.9961, + "step": 1414 + }, + { + "epoch": 0.08, + "learning_rate": 0.0009944591869471186, + "loss": 0.9883, + "step": 1415 + }, + { + "epoch": 0.08, + "learning_rate": 0.000994446257593718, + "loss": 0.9258, + "step": 1416 + }, + { + "epoch": 0.08, + "learning_rate": 0.0009944333132569717, + "loss": 0.9883, + "step": 1417 + }, + { + "epoch": 0.08, + "learning_rate": 0.0009944203539372718, + "loss": 1.0312, + "step": 1418 + }, + { + "epoch": 0.08, + "learning_rate": 0.0009944073796350112, + "loss": 0.8867, + "step": 1419 + }, + { + "epoch": 0.08, + "learning_rate": 0.000994394390350583, + "loss": 0.9414, + "step": 1420 + }, + { + "epoch": 0.08, + "learning_rate": 0.0009943813860843807, + "loss": 0.918, + "step": 1421 + }, + { + "epoch": 0.08, + "learning_rate": 0.0009943683668367984, + "loss": 0.9531, + "step": 1422 + }, + { + "epoch": 0.08, + "learning_rate": 0.0009943553326082308, + "loss": 0.9961, + "step": 1423 + }, + { + "epoch": 0.08, + "learning_rate": 0.0009943422833990727, + "loss": 0.9219, + "step": 1424 + }, + { + "epoch": 0.08, + "learning_rate": 0.0009943292192097196, + "loss": 1.0, + "step": 1425 + }, + { + "epoch": 0.08, + "learning_rate": 0.0009943161400405673, + "loss": 0.9727, + "step": 1426 + }, + { + "epoch": 0.08, + "learning_rate": 0.0009943030458920124, + "loss": 0.9609, + "step": 1427 + }, + { + "epoch": 0.08, + "learning_rate": 0.0009942899367644514, + "loss": 0.9297, + "step": 1428 + }, + { + "epoch": 0.08, + "learning_rate": 0.0009942768126582817, + "loss": 0.9531, + "step": 1429 + }, + { + "epoch": 0.08, + "learning_rate": 0.0009942636735739009, + "loss": 0.8633, + "step": 1430 + }, + { + "epoch": 0.08, + "learning_rate": 0.0009942505195117072, + "loss": 0.8711, + "step": 1431 + }, + { + "epoch": 0.08, + "learning_rate": 0.0009942373504720995, + "loss": 0.9844, + "step": 1432 + }, + { + "epoch": 0.08, + "learning_rate": 0.0009942241664554763, + "loss": 0.957, + "step": 1433 + }, + { + "epoch": 0.08, + "learning_rate": 0.0009942109674622374, + "loss": 1.0, + "step": 1434 + }, + { + "epoch": 0.08, + "learning_rate": 0.000994197753492783, + "loss": 0.9961, + "step": 1435 + }, + { + "epoch": 0.08, + "learning_rate": 0.0009941845245475135, + "loss": 0.875, + "step": 1436 + }, + { + "epoch": 0.08, + "learning_rate": 0.0009941712806268294, + "loss": 0.9961, + "step": 1437 + }, + { + "epoch": 0.08, + "learning_rate": 0.000994158021731132, + "loss": 0.9805, + "step": 1438 + }, + { + "epoch": 0.08, + "learning_rate": 0.0009941447478608236, + "loss": 0.9727, + "step": 1439 + }, + { + "epoch": 0.08, + "learning_rate": 0.0009941314590163062, + "loss": 0.9141, + "step": 1440 + }, + { + "epoch": 0.08, + "learning_rate": 0.0009941181551979823, + "loss": 0.9844, + "step": 1441 + }, + { + "epoch": 0.08, + "learning_rate": 0.0009941048364062552, + "loss": 0.9375, + "step": 1442 + }, + { + "epoch": 0.08, + "learning_rate": 0.0009940915026415288, + "loss": 0.8945, + "step": 1443 + }, + { + "epoch": 0.08, + "learning_rate": 0.0009940781539042065, + "loss": 0.9023, + "step": 1444 + }, + { + "epoch": 0.08, + "learning_rate": 0.0009940647901946933, + "loss": 0.9141, + "step": 1445 + }, + { + "epoch": 0.08, + "learning_rate": 0.0009940514115133942, + "loss": 0.9688, + "step": 1446 + }, + { + "epoch": 0.08, + "learning_rate": 0.0009940380178607145, + "loss": 0.8555, + "step": 1447 + }, + { + "epoch": 0.08, + "learning_rate": 0.00099402460923706, + "loss": 0.9219, + "step": 1448 + }, + { + "epoch": 0.08, + "learning_rate": 0.000994011185642837, + "loss": 0.957, + "step": 1449 + }, + { + "epoch": 0.08, + "learning_rate": 0.0009939977470784522, + "loss": 1.0, + "step": 1450 + }, + { + "epoch": 0.08, + "learning_rate": 0.000993984293544313, + "loss": 0.9258, + "step": 1451 + }, + { + "epoch": 0.08, + "learning_rate": 0.0009939708250408272, + "loss": 0.9688, + "step": 1452 + }, + { + "epoch": 0.08, + "learning_rate": 0.0009939573415684026, + "loss": 0.9961, + "step": 1453 + }, + { + "epoch": 0.08, + "learning_rate": 0.0009939438431274482, + "loss": 0.9492, + "step": 1454 + }, + { + "epoch": 0.08, + "learning_rate": 0.000993930329718373, + "loss": 0.9766, + "step": 1455 + }, + { + "epoch": 0.08, + "learning_rate": 0.000993916801341586, + "loss": 0.8828, + "step": 1456 + }, + { + "epoch": 0.08, + "learning_rate": 0.0009939032579974977, + "loss": 0.8945, + "step": 1457 + }, + { + "epoch": 0.08, + "learning_rate": 0.0009938896996865181, + "loss": 0.9141, + "step": 1458 + }, + { + "epoch": 0.08, + "learning_rate": 0.0009938761264090587, + "loss": 0.875, + "step": 1459 + }, + { + "epoch": 0.08, + "learning_rate": 0.00099386253816553, + "loss": 0.9336, + "step": 1460 + }, + { + "epoch": 0.08, + "learning_rate": 0.0009938489349563442, + "loss": 0.9141, + "step": 1461 + }, + { + "epoch": 0.08, + "learning_rate": 0.0009938353167819134, + "loss": 0.9023, + "step": 1462 + }, + { + "epoch": 0.08, + "learning_rate": 0.0009938216836426505, + "loss": 0.8555, + "step": 1463 + }, + { + "epoch": 0.08, + "learning_rate": 0.0009938080355389686, + "loss": 0.9258, + "step": 1464 + }, + { + "epoch": 0.08, + "learning_rate": 0.0009937943724712807, + "loss": 0.8828, + "step": 1465 + }, + { + "epoch": 0.08, + "learning_rate": 0.0009937806944400017, + "loss": 0.9727, + "step": 1466 + }, + { + "epoch": 0.08, + "learning_rate": 0.0009937670014455455, + "loss": 0.9922, + "step": 1467 + }, + { + "epoch": 0.08, + "learning_rate": 0.0009937532934883274, + "loss": 0.9609, + "step": 1468 + }, + { + "epoch": 0.08, + "learning_rate": 0.0009937395705687624, + "loss": 0.9453, + "step": 1469 + }, + { + "epoch": 0.08, + "learning_rate": 0.0009937258326872668, + "loss": 0.9727, + "step": 1470 + }, + { + "epoch": 0.08, + "learning_rate": 0.0009937120798442565, + "loss": 0.8555, + "step": 1471 + }, + { + "epoch": 0.08, + "learning_rate": 0.0009936983120401484, + "loss": 0.9727, + "step": 1472 + }, + { + "epoch": 0.08, + "learning_rate": 0.0009936845292753598, + "loss": 0.918, + "step": 1473 + }, + { + "epoch": 0.08, + "learning_rate": 0.0009936707315503085, + "loss": 0.9531, + "step": 1474 + }, + { + "epoch": 0.08, + "learning_rate": 0.000993656918865412, + "loss": 0.8594, + "step": 1475 + }, + { + "epoch": 0.08, + "learning_rate": 0.0009936430912210897, + "loss": 0.918, + "step": 1476 + }, + { + "epoch": 0.08, + "learning_rate": 0.00099362924861776, + "loss": 0.8945, + "step": 1477 + }, + { + "epoch": 0.08, + "learning_rate": 0.0009936153910558426, + "loss": 0.9648, + "step": 1478 + }, + { + "epoch": 0.08, + "learning_rate": 0.0009936015185357575, + "loss": 0.9375, + "step": 1479 + }, + { + "epoch": 0.08, + "learning_rate": 0.000993587631057925, + "loss": 0.9727, + "step": 1480 + }, + { + "epoch": 0.08, + "learning_rate": 0.000993573728622766, + "loss": 0.9219, + "step": 1481 + }, + { + "epoch": 0.08, + "learning_rate": 0.0009935598112307015, + "loss": 0.918, + "step": 1482 + }, + { + "epoch": 0.08, + "learning_rate": 0.0009935458788821537, + "loss": 0.9375, + "step": 1483 + }, + { + "epoch": 0.08, + "learning_rate": 0.0009935319315775445, + "loss": 0.9336, + "step": 1484 + }, + { + "epoch": 0.08, + "learning_rate": 0.0009935179693172965, + "loss": 0.875, + "step": 1485 + }, + { + "epoch": 0.08, + "learning_rate": 0.000993503992101833, + "loss": 1.0078, + "step": 1486 + }, + { + "epoch": 0.08, + "learning_rate": 0.0009934899999315774, + "loss": 0.8945, + "step": 1487 + }, + { + "epoch": 0.08, + "learning_rate": 0.0009934759928069538, + "loss": 0.8633, + "step": 1488 + }, + { + "epoch": 0.08, + "learning_rate": 0.0009934619707283867, + "loss": 0.8867, + "step": 1489 + }, + { + "epoch": 0.08, + "learning_rate": 0.000993447933696301, + "loss": 0.9297, + "step": 1490 + }, + { + "epoch": 0.08, + "learning_rate": 0.000993433881711122, + "loss": 0.9453, + "step": 1491 + }, + { + "epoch": 0.08, + "learning_rate": 0.0009934198147732753, + "loss": 0.9609, + "step": 1492 + }, + { + "epoch": 0.08, + "learning_rate": 0.0009934057328831879, + "loss": 0.9414, + "step": 1493 + }, + { + "epoch": 0.08, + "learning_rate": 0.0009933916360412858, + "loss": 0.875, + "step": 1494 + }, + { + "epoch": 0.08, + "learning_rate": 0.0009933775242479963, + "loss": 0.9766, + "step": 1495 + }, + { + "epoch": 0.08, + "learning_rate": 0.0009933633975037473, + "loss": 0.9492, + "step": 1496 + }, + { + "epoch": 0.08, + "learning_rate": 0.0009933492558089668, + "loss": 1.0391, + "step": 1497 + }, + { + "epoch": 0.08, + "learning_rate": 0.0009933350991640831, + "loss": 0.9492, + "step": 1498 + }, + { + "epoch": 0.08, + "learning_rate": 0.0009933209275695255, + "loss": 1.0312, + "step": 1499 + }, + { + "epoch": 0.08, + "learning_rate": 0.0009933067410257234, + "loss": 0.9609, + "step": 1500 + }, + { + "epoch": 0.08, + "learning_rate": 0.0009932925395331066, + "loss": 0.9102, + "step": 1501 + }, + { + "epoch": 0.08, + "learning_rate": 0.0009932783230921054, + "loss": 0.9062, + "step": 1502 + }, + { + "epoch": 0.08, + "learning_rate": 0.0009932640917031507, + "loss": 1.0156, + "step": 1503 + }, + { + "epoch": 0.08, + "learning_rate": 0.0009932498453666739, + "loss": 1.0078, + "step": 1504 + }, + { + "epoch": 0.08, + "learning_rate": 0.0009932355840831064, + "loss": 0.9023, + "step": 1505 + }, + { + "epoch": 0.08, + "learning_rate": 0.0009932213078528804, + "loss": 0.9961, + "step": 1506 + }, + { + "epoch": 0.08, + "learning_rate": 0.000993207016676429, + "loss": 0.9102, + "step": 1507 + }, + { + "epoch": 0.08, + "learning_rate": 0.0009931927105541847, + "loss": 0.9258, + "step": 1508 + }, + { + "epoch": 0.08, + "learning_rate": 0.0009931783894865812, + "loss": 0.8555, + "step": 1509 + }, + { + "epoch": 0.08, + "learning_rate": 0.0009931640534740524, + "loss": 0.9805, + "step": 1510 + }, + { + "epoch": 0.08, + "learning_rate": 0.0009931497025170329, + "loss": 0.9258, + "step": 1511 + }, + { + "epoch": 0.08, + "learning_rate": 0.0009931353366159576, + "loss": 0.9297, + "step": 1512 + }, + { + "epoch": 0.08, + "learning_rate": 0.0009931209557712617, + "loss": 0.875, + "step": 1513 + }, + { + "epoch": 0.08, + "learning_rate": 0.000993106559983381, + "loss": 0.9297, + "step": 1514 + }, + { + "epoch": 0.08, + "learning_rate": 0.0009930921492527518, + "loss": 0.9453, + "step": 1515 + }, + { + "epoch": 0.08, + "learning_rate": 0.0009930777235798108, + "loss": 0.9492, + "step": 1516 + }, + { + "epoch": 0.08, + "learning_rate": 0.000993063282964995, + "loss": 0.9219, + "step": 1517 + }, + { + "epoch": 0.08, + "learning_rate": 0.0009930488274087423, + "loss": 0.9219, + "step": 1518 + }, + { + "epoch": 0.08, + "learning_rate": 0.0009930343569114904, + "loss": 0.9336, + "step": 1519 + }, + { + "epoch": 0.08, + "learning_rate": 0.000993019871473678, + "loss": 0.9844, + "step": 1520 + }, + { + "epoch": 0.08, + "learning_rate": 0.0009930053710957441, + "loss": 0.8281, + "step": 1521 + }, + { + "epoch": 0.08, + "learning_rate": 0.0009929908557781279, + "loss": 0.9023, + "step": 1522 + }, + { + "epoch": 0.08, + "learning_rate": 0.0009929763255212695, + "loss": 1.0156, + "step": 1523 + }, + { + "epoch": 0.08, + "learning_rate": 0.000992961780325609, + "loss": 0.9805, + "step": 1524 + }, + { + "epoch": 0.08, + "learning_rate": 0.0009929472201915873, + "loss": 0.9648, + "step": 1525 + }, + { + "epoch": 0.08, + "learning_rate": 0.0009929326451196454, + "loss": 0.9492, + "step": 1526 + }, + { + "epoch": 0.08, + "learning_rate": 0.0009929180551102254, + "loss": 0.8867, + "step": 1527 + }, + { + "epoch": 0.08, + "learning_rate": 0.0009929034501637692, + "loss": 0.9219, + "step": 1528 + }, + { + "epoch": 0.08, + "learning_rate": 0.0009928888302807193, + "loss": 0.8711, + "step": 1529 + }, + { + "epoch": 0.08, + "learning_rate": 0.0009928741954615189, + "loss": 0.8516, + "step": 1530 + }, + { + "epoch": 0.08, + "learning_rate": 0.0009928595457066112, + "loss": 0.8711, + "step": 1531 + }, + { + "epoch": 0.08, + "learning_rate": 0.0009928448810164405, + "loss": 0.9609, + "step": 1532 + }, + { + "epoch": 0.08, + "learning_rate": 0.000992830201391451, + "loss": 0.8164, + "step": 1533 + }, + { + "epoch": 0.08, + "learning_rate": 0.0009928155068320873, + "loss": 0.9961, + "step": 1534 + }, + { + "epoch": 0.08, + "learning_rate": 0.000992800797338795, + "loss": 0.9727, + "step": 1535 + }, + { + "epoch": 0.08, + "learning_rate": 0.00099278607291202, + "loss": 0.9883, + "step": 1536 + }, + { + "epoch": 0.08, + "learning_rate": 0.0009927713335522081, + "loss": 0.8711, + "step": 1537 + }, + { + "epoch": 0.08, + "learning_rate": 0.0009927565792598062, + "loss": 0.9336, + "step": 1538 + }, + { + "epoch": 0.08, + "learning_rate": 0.0009927418100352614, + "loss": 0.8867, + "step": 1539 + }, + { + "epoch": 0.08, + "learning_rate": 0.000992727025879021, + "loss": 0.8984, + "step": 1540 + }, + { + "epoch": 0.08, + "learning_rate": 0.0009927122267915334, + "loss": 0.9805, + "step": 1541 + }, + { + "epoch": 0.08, + "learning_rate": 0.0009926974127732469, + "loss": 0.9922, + "step": 1542 + }, + { + "epoch": 0.08, + "learning_rate": 0.0009926825838246101, + "loss": 0.9062, + "step": 1543 + }, + { + "epoch": 0.08, + "learning_rate": 0.0009926677399460728, + "loss": 0.9062, + "step": 1544 + }, + { + "epoch": 0.08, + "learning_rate": 0.0009926528811380847, + "loss": 0.9102, + "step": 1545 + }, + { + "epoch": 0.08, + "learning_rate": 0.000992638007401096, + "loss": 0.9531, + "step": 1546 + }, + { + "epoch": 0.08, + "learning_rate": 0.0009926231187355574, + "loss": 0.8633, + "step": 1547 + }, + { + "epoch": 0.08, + "learning_rate": 0.0009926082151419202, + "loss": 1.0156, + "step": 1548 + }, + { + "epoch": 0.08, + "learning_rate": 0.000992593296620636, + "loss": 0.9453, + "step": 1549 + }, + { + "epoch": 0.08, + "learning_rate": 0.0009925783631721569, + "loss": 1.0547, + "step": 1550 + }, + { + "epoch": 0.08, + "learning_rate": 0.0009925634147969352, + "loss": 0.8672, + "step": 1551 + }, + { + "epoch": 0.08, + "learning_rate": 0.000992548451495424, + "loss": 1.0156, + "step": 1552 + }, + { + "epoch": 0.08, + "learning_rate": 0.000992533473268077, + "loss": 0.9727, + "step": 1553 + }, + { + "epoch": 0.08, + "learning_rate": 0.0009925184801153476, + "loss": 0.9688, + "step": 1554 + }, + { + "epoch": 0.08, + "learning_rate": 0.0009925034720376907, + "loss": 0.9688, + "step": 1555 + }, + { + "epoch": 0.08, + "learning_rate": 0.0009924884490355606, + "loss": 0.8203, + "step": 1556 + }, + { + "epoch": 0.08, + "learning_rate": 0.000992473411109413, + "loss": 0.918, + "step": 1557 + }, + { + "epoch": 0.08, + "learning_rate": 0.000992458358259703, + "loss": 0.9375, + "step": 1558 + }, + { + "epoch": 0.08, + "learning_rate": 0.0009924432904868873, + "loss": 0.957, + "step": 1559 + }, + { + "epoch": 0.08, + "learning_rate": 0.0009924282077914226, + "loss": 0.9336, + "step": 1560 + }, + { + "epoch": 0.08, + "learning_rate": 0.0009924131101737654, + "loss": 0.9531, + "step": 1561 + }, + { + "epoch": 0.08, + "learning_rate": 0.0009923979976343734, + "loss": 0.9883, + "step": 1562 + }, + { + "epoch": 0.08, + "learning_rate": 0.0009923828701737048, + "loss": 0.9414, + "step": 1563 + }, + { + "epoch": 0.08, + "learning_rate": 0.0009923677277922178, + "loss": 0.9492, + "step": 1564 + }, + { + "epoch": 0.08, + "learning_rate": 0.0009923525704903713, + "loss": 0.9258, + "step": 1565 + }, + { + "epoch": 0.08, + "learning_rate": 0.0009923373982686247, + "loss": 0.9961, + "step": 1566 + }, + { + "epoch": 0.08, + "learning_rate": 0.0009923222111274377, + "loss": 0.9375, + "step": 1567 + }, + { + "epoch": 0.08, + "learning_rate": 0.0009923070090672704, + "loss": 0.9727, + "step": 1568 + }, + { + "epoch": 0.08, + "learning_rate": 0.0009922917920885837, + "loss": 0.8008, + "step": 1569 + }, + { + "epoch": 0.08, + "learning_rate": 0.0009922765601918385, + "loss": 0.8281, + "step": 1570 + }, + { + "epoch": 0.08, + "learning_rate": 0.0009922613133774966, + "loss": 0.8906, + "step": 1571 + }, + { + "epoch": 0.08, + "learning_rate": 0.00099224605164602, + "loss": 0.957, + "step": 1572 + }, + { + "epoch": 0.08, + "learning_rate": 0.0009922307749978708, + "loss": 0.9258, + "step": 1573 + }, + { + "epoch": 0.08, + "learning_rate": 0.0009922154834335125, + "loss": 0.9297, + "step": 1574 + }, + { + "epoch": 0.08, + "learning_rate": 0.0009922001769534082, + "loss": 0.957, + "step": 1575 + }, + { + "epoch": 0.08, + "learning_rate": 0.0009921848555580215, + "loss": 0.9805, + "step": 1576 + }, + { + "epoch": 0.08, + "learning_rate": 0.0009921695192478173, + "loss": 0.9336, + "step": 1577 + }, + { + "epoch": 0.08, + "learning_rate": 0.0009921541680232597, + "loss": 0.9453, + "step": 1578 + }, + { + "epoch": 0.08, + "learning_rate": 0.0009921388018848142, + "loss": 0.8828, + "step": 1579 + }, + { + "epoch": 0.08, + "learning_rate": 0.0009921234208329465, + "loss": 0.9062, + "step": 1580 + }, + { + "epoch": 0.08, + "learning_rate": 0.0009921080248681228, + "loss": 0.9531, + "step": 1581 + }, + { + "epoch": 0.09, + "learning_rate": 0.0009920926139908091, + "loss": 0.793, + "step": 1582 + }, + { + "epoch": 0.09, + "learning_rate": 0.000992077188201473, + "loss": 0.9375, + "step": 1583 + }, + { + "epoch": 0.09, + "learning_rate": 0.0009920617475005819, + "loss": 0.9688, + "step": 1584 + }, + { + "epoch": 0.09, + "learning_rate": 0.000992046291888603, + "loss": 0.9062, + "step": 1585 + }, + { + "epoch": 0.09, + "learning_rate": 0.0009920308213660057, + "loss": 0.957, + "step": 1586 + }, + { + "epoch": 0.09, + "learning_rate": 0.000992015335933258, + "loss": 0.9336, + "step": 1587 + }, + { + "epoch": 0.09, + "learning_rate": 0.0009919998355908293, + "loss": 0.8789, + "step": 1588 + }, + { + "epoch": 0.09, + "learning_rate": 0.0009919843203391899, + "loss": 0.875, + "step": 1589 + }, + { + "epoch": 0.09, + "learning_rate": 0.000991968790178809, + "loss": 1.0156, + "step": 1590 + }, + { + "epoch": 0.09, + "learning_rate": 0.0009919532451101581, + "loss": 0.9961, + "step": 1591 + }, + { + "epoch": 0.09, + "learning_rate": 0.0009919376851337078, + "loss": 1.0312, + "step": 1592 + }, + { + "epoch": 0.09, + "learning_rate": 0.0009919221102499297, + "loss": 0.8828, + "step": 1593 + }, + { + "epoch": 0.09, + "learning_rate": 0.0009919065204592958, + "loss": 0.957, + "step": 1594 + }, + { + "epoch": 0.09, + "learning_rate": 0.0009918909157622786, + "loss": 0.9805, + "step": 1595 + }, + { + "epoch": 0.09, + "learning_rate": 0.0009918752961593509, + "loss": 0.8711, + "step": 1596 + }, + { + "epoch": 0.09, + "learning_rate": 0.0009918596616509858, + "loss": 1.0312, + "step": 1597 + }, + { + "epoch": 0.09, + "learning_rate": 0.0009918440122376575, + "loss": 0.9766, + "step": 1598 + }, + { + "epoch": 0.09, + "learning_rate": 0.0009918283479198398, + "loss": 0.8555, + "step": 1599 + }, + { + "epoch": 0.09, + "learning_rate": 0.000991812668698008, + "loss": 0.8164, + "step": 1600 + }, + { + "epoch": 0.09, + "learning_rate": 0.0009917969745726362, + "loss": 0.9648, + "step": 1601 + }, + { + "epoch": 0.09, + "learning_rate": 0.0009917812655442012, + "loss": 0.9766, + "step": 1602 + }, + { + "epoch": 0.09, + "learning_rate": 0.000991765541613178, + "loss": 0.8672, + "step": 1603 + }, + { + "epoch": 0.09, + "learning_rate": 0.0009917498027800437, + "loss": 0.9727, + "step": 1604 + }, + { + "epoch": 0.09, + "learning_rate": 0.0009917340490452754, + "loss": 0.9336, + "step": 1605 + }, + { + "epoch": 0.09, + "learning_rate": 0.0009917182804093498, + "loss": 1.0938, + "step": 1606 + }, + { + "epoch": 0.09, + "learning_rate": 0.0009917024968727451, + "loss": 0.8984, + "step": 1607 + }, + { + "epoch": 0.09, + "learning_rate": 0.0009916866984359396, + "loss": 0.875, + "step": 1608 + }, + { + "epoch": 0.09, + "learning_rate": 0.0009916708850994122, + "loss": 0.9258, + "step": 1609 + }, + { + "epoch": 0.09, + "learning_rate": 0.0009916550568636418, + "loss": 0.9297, + "step": 1610 + }, + { + "epoch": 0.09, + "learning_rate": 0.0009916392137291083, + "loss": 1.0078, + "step": 1611 + }, + { + "epoch": 0.09, + "learning_rate": 0.0009916233556962915, + "loss": 0.8789, + "step": 1612 + }, + { + "epoch": 0.09, + "learning_rate": 0.0009916074827656723, + "loss": 0.9688, + "step": 1613 + }, + { + "epoch": 0.09, + "learning_rate": 0.0009915915949377315, + "loss": 0.9102, + "step": 1614 + }, + { + "epoch": 0.09, + "learning_rate": 0.0009915756922129506, + "loss": 0.9297, + "step": 1615 + }, + { + "epoch": 0.09, + "learning_rate": 0.0009915597745918114, + "loss": 0.9648, + "step": 1616 + }, + { + "epoch": 0.09, + "learning_rate": 0.0009915438420747965, + "loss": 0.9453, + "step": 1617 + }, + { + "epoch": 0.09, + "learning_rate": 0.0009915278946623885, + "loss": 0.9375, + "step": 1618 + }, + { + "epoch": 0.09, + "learning_rate": 0.0009915119323550706, + "loss": 0.8984, + "step": 1619 + }, + { + "epoch": 0.09, + "learning_rate": 0.0009914959551533267, + "loss": 0.9375, + "step": 1620 + }, + { + "epoch": 0.09, + "learning_rate": 0.000991479963057641, + "loss": 1.0156, + "step": 1621 + }, + { + "epoch": 0.09, + "learning_rate": 0.0009914639560684977, + "loss": 0.8789, + "step": 1622 + }, + { + "epoch": 0.09, + "learning_rate": 0.0009914479341863824, + "loss": 0.9531, + "step": 1623 + }, + { + "epoch": 0.09, + "learning_rate": 0.0009914318974117804, + "loss": 0.9844, + "step": 1624 + }, + { + "epoch": 0.09, + "learning_rate": 0.0009914158457451777, + "loss": 0.9375, + "step": 1625 + }, + { + "epoch": 0.09, + "learning_rate": 0.0009913997791870604, + "loss": 0.9375, + "step": 1626 + }, + { + "epoch": 0.09, + "learning_rate": 0.0009913836977379158, + "loss": 0.9023, + "step": 1627 + }, + { + "epoch": 0.09, + "learning_rate": 0.000991367601398231, + "loss": 0.8633, + "step": 1628 + }, + { + "epoch": 0.09, + "learning_rate": 0.0009913514901684938, + "loss": 0.9258, + "step": 1629 + }, + { + "epoch": 0.09, + "learning_rate": 0.0009913353640491926, + "loss": 0.9453, + "step": 1630 + }, + { + "epoch": 0.09, + "learning_rate": 0.000991319223040816, + "loss": 0.9219, + "step": 1631 + }, + { + "epoch": 0.09, + "learning_rate": 0.0009913030671438528, + "loss": 0.8672, + "step": 1632 + }, + { + "epoch": 0.09, + "learning_rate": 0.000991286896358793, + "loss": 0.8711, + "step": 1633 + }, + { + "epoch": 0.09, + "learning_rate": 0.0009912707106861263, + "loss": 1.0469, + "step": 1634 + }, + { + "epoch": 0.09, + "learning_rate": 0.0009912545101263435, + "loss": 0.9141, + "step": 1635 + }, + { + "epoch": 0.09, + "learning_rate": 0.0009912382946799354, + "loss": 0.9375, + "step": 1636 + }, + { + "epoch": 0.09, + "learning_rate": 0.0009912220643473932, + "loss": 0.8984, + "step": 1637 + }, + { + "epoch": 0.09, + "learning_rate": 0.000991205819129209, + "loss": 0.8867, + "step": 1638 + }, + { + "epoch": 0.09, + "learning_rate": 0.0009911895590258748, + "loss": 0.9492, + "step": 1639 + }, + { + "epoch": 0.09, + "learning_rate": 0.0009911732840378836, + "loss": 0.9023, + "step": 1640 + }, + { + "epoch": 0.09, + "learning_rate": 0.0009911569941657285, + "loss": 0.8516, + "step": 1641 + }, + { + "epoch": 0.09, + "learning_rate": 0.000991140689409903, + "loss": 0.8828, + "step": 1642 + }, + { + "epoch": 0.09, + "learning_rate": 0.0009911243697709013, + "loss": 0.8203, + "step": 1643 + }, + { + "epoch": 0.09, + "learning_rate": 0.0009911080352492181, + "loss": 0.9531, + "step": 1644 + }, + { + "epoch": 0.09, + "learning_rate": 0.0009910916858453483, + "loss": 0.918, + "step": 1645 + }, + { + "epoch": 0.09, + "learning_rate": 0.0009910753215597872, + "loss": 0.8906, + "step": 1646 + }, + { + "epoch": 0.09, + "learning_rate": 0.0009910589423930305, + "loss": 0.9648, + "step": 1647 + }, + { + "epoch": 0.09, + "learning_rate": 0.0009910425483455752, + "loss": 0.9336, + "step": 1648 + }, + { + "epoch": 0.09, + "learning_rate": 0.0009910261394179174, + "loss": 0.9961, + "step": 1649 + }, + { + "epoch": 0.09, + "learning_rate": 0.000991009715610555, + "loss": 0.9492, + "step": 1650 + }, + { + "epoch": 0.09, + "learning_rate": 0.000990993276923985, + "loss": 0.918, + "step": 1651 + }, + { + "epoch": 0.09, + "learning_rate": 0.000990976823358706, + "loss": 0.9219, + "step": 1652 + }, + { + "epoch": 0.09, + "learning_rate": 0.0009909603549152165, + "loss": 0.9062, + "step": 1653 + }, + { + "epoch": 0.09, + "learning_rate": 0.0009909438715940155, + "loss": 0.8984, + "step": 1654 + }, + { + "epoch": 0.09, + "learning_rate": 0.0009909273733956026, + "loss": 0.9492, + "step": 1655 + }, + { + "epoch": 0.09, + "learning_rate": 0.0009909108603204776, + "loss": 0.9766, + "step": 1656 + }, + { + "epoch": 0.09, + "learning_rate": 0.000990894332369141, + "loss": 0.8672, + "step": 1657 + }, + { + "epoch": 0.09, + "learning_rate": 0.0009908777895420936, + "loss": 0.9492, + "step": 1658 + }, + { + "epoch": 0.09, + "learning_rate": 0.000990861231839837, + "loss": 0.9102, + "step": 1659 + }, + { + "epoch": 0.09, + "learning_rate": 0.0009908446592628724, + "loss": 0.9844, + "step": 1660 + }, + { + "epoch": 0.09, + "learning_rate": 0.0009908280718117024, + "loss": 0.8828, + "step": 1661 + }, + { + "epoch": 0.09, + "learning_rate": 0.0009908114694868295, + "loss": 0.9375, + "step": 1662 + }, + { + "epoch": 0.09, + "learning_rate": 0.000990794852288757, + "loss": 0.9297, + "step": 1663 + }, + { + "epoch": 0.09, + "learning_rate": 0.0009907782202179883, + "loss": 0.9375, + "step": 1664 + }, + { + "epoch": 0.09, + "learning_rate": 0.0009907615732750272, + "loss": 0.9102, + "step": 1665 + }, + { + "epoch": 0.09, + "learning_rate": 0.0009907449114603785, + "loss": 0.9062, + "step": 1666 + }, + { + "epoch": 0.09, + "learning_rate": 0.000990728234774547, + "loss": 1.0, + "step": 1667 + }, + { + "epoch": 0.09, + "learning_rate": 0.000990711543218038, + "loss": 0.9648, + "step": 1668 + }, + { + "epoch": 0.09, + "learning_rate": 0.0009906948367913572, + "loss": 0.9805, + "step": 1669 + }, + { + "epoch": 0.09, + "learning_rate": 0.0009906781154950114, + "loss": 0.8203, + "step": 1670 + }, + { + "epoch": 0.09, + "learning_rate": 0.0009906613793295065, + "loss": 1.0469, + "step": 1671 + }, + { + "epoch": 0.09, + "learning_rate": 0.0009906446282953504, + "loss": 0.9258, + "step": 1672 + }, + { + "epoch": 0.09, + "learning_rate": 0.0009906278623930502, + "loss": 0.9414, + "step": 1673 + }, + { + "epoch": 0.09, + "learning_rate": 0.000990611081623114, + "loss": 0.9258, + "step": 1674 + }, + { + "epoch": 0.09, + "learning_rate": 0.0009905942859860508, + "loss": 0.9102, + "step": 1675 + }, + { + "epoch": 0.09, + "learning_rate": 0.000990577475482369, + "loss": 0.9453, + "step": 1676 + }, + { + "epoch": 0.09, + "learning_rate": 0.0009905606501125783, + "loss": 0.9219, + "step": 1677 + }, + { + "epoch": 0.09, + "learning_rate": 0.0009905438098771885, + "loss": 0.9336, + "step": 1678 + }, + { + "epoch": 0.09, + "learning_rate": 0.00099052695477671, + "loss": 0.8555, + "step": 1679 + }, + { + "epoch": 0.09, + "learning_rate": 0.0009905100848116534, + "loss": 0.9727, + "step": 1680 + }, + { + "epoch": 0.09, + "learning_rate": 0.00099049319998253, + "loss": 0.9961, + "step": 1681 + }, + { + "epoch": 0.09, + "learning_rate": 0.0009904763002898515, + "loss": 0.9258, + "step": 1682 + }, + { + "epoch": 0.09, + "learning_rate": 0.00099045938573413, + "loss": 0.9531, + "step": 1683 + }, + { + "epoch": 0.09, + "learning_rate": 0.0009904424563158781, + "loss": 0.9453, + "step": 1684 + }, + { + "epoch": 0.09, + "learning_rate": 0.0009904255120356084, + "loss": 0.9336, + "step": 1685 + }, + { + "epoch": 0.09, + "learning_rate": 0.0009904085528938352, + "loss": 0.8555, + "step": 1686 + }, + { + "epoch": 0.09, + "learning_rate": 0.0009903915788910718, + "loss": 0.8594, + "step": 1687 + }, + { + "epoch": 0.09, + "learning_rate": 0.0009903745900278324, + "loss": 1.0078, + "step": 1688 + }, + { + "epoch": 0.09, + "learning_rate": 0.0009903575863046325, + "loss": 0.8945, + "step": 1689 + }, + { + "epoch": 0.09, + "learning_rate": 0.000990340567721987, + "loss": 0.9453, + "step": 1690 + }, + { + "epoch": 0.09, + "learning_rate": 0.0009903235342804116, + "loss": 0.9883, + "step": 1691 + }, + { + "epoch": 0.09, + "learning_rate": 0.0009903064859804222, + "loss": 0.8867, + "step": 1692 + }, + { + "epoch": 0.09, + "learning_rate": 0.0009902894228225362, + "loss": 1.0312, + "step": 1693 + }, + { + "epoch": 0.09, + "learning_rate": 0.0009902723448072697, + "loss": 0.9023, + "step": 1694 + }, + { + "epoch": 0.09, + "learning_rate": 0.000990255251935141, + "loss": 0.8828, + "step": 1695 + }, + { + "epoch": 0.09, + "learning_rate": 0.0009902381442066677, + "loss": 0.9648, + "step": 1696 + }, + { + "epoch": 0.09, + "learning_rate": 0.000990221021622368, + "loss": 0.9336, + "step": 1697 + }, + { + "epoch": 0.09, + "learning_rate": 0.0009902038841827616, + "loss": 0.8984, + "step": 1698 + }, + { + "epoch": 0.09, + "learning_rate": 0.000990186731888367, + "loss": 0.9883, + "step": 1699 + }, + { + "epoch": 0.09, + "learning_rate": 0.0009901695647397042, + "loss": 1.0078, + "step": 1700 + }, + { + "epoch": 0.09, + "learning_rate": 0.0009901523827372937, + "loss": 0.8555, + "step": 1701 + }, + { + "epoch": 0.09, + "learning_rate": 0.0009901351858816556, + "loss": 0.9531, + "step": 1702 + }, + { + "epoch": 0.09, + "learning_rate": 0.0009901179741733117, + "loss": 0.8555, + "step": 1703 + }, + { + "epoch": 0.09, + "learning_rate": 0.000990100747612783, + "loss": 0.8984, + "step": 1704 + }, + { + "epoch": 0.09, + "learning_rate": 0.000990083506200592, + "loss": 0.9453, + "step": 1705 + }, + { + "epoch": 0.09, + "learning_rate": 0.0009900662499372607, + "loss": 0.8477, + "step": 1706 + }, + { + "epoch": 0.09, + "learning_rate": 0.0009900489788233124, + "loss": 0.9453, + "step": 1707 + }, + { + "epoch": 0.09, + "learning_rate": 0.0009900316928592701, + "loss": 0.918, + "step": 1708 + }, + { + "epoch": 0.09, + "learning_rate": 0.000990014392045658, + "loss": 0.9336, + "step": 1709 + }, + { + "epoch": 0.09, + "learning_rate": 0.0009899970763830005, + "loss": 0.8672, + "step": 1710 + }, + { + "epoch": 0.09, + "learning_rate": 0.0009899797458718215, + "loss": 0.9219, + "step": 1711 + }, + { + "epoch": 0.09, + "learning_rate": 0.0009899624005126472, + "loss": 1.0078, + "step": 1712 + }, + { + "epoch": 0.09, + "learning_rate": 0.0009899450403060023, + "loss": 0.918, + "step": 1713 + }, + { + "epoch": 0.09, + "learning_rate": 0.0009899276652524135, + "loss": 0.875, + "step": 1714 + }, + { + "epoch": 0.09, + "learning_rate": 0.0009899102753524072, + "loss": 0.9102, + "step": 1715 + }, + { + "epoch": 0.09, + "learning_rate": 0.0009898928706065103, + "loss": 1.0, + "step": 1716 + }, + { + "epoch": 0.09, + "learning_rate": 0.0009898754510152502, + "loss": 0.9531, + "step": 1717 + }, + { + "epoch": 0.09, + "learning_rate": 0.0009898580165791546, + "loss": 0.9531, + "step": 1718 + }, + { + "epoch": 0.09, + "learning_rate": 0.0009898405672987523, + "loss": 0.918, + "step": 1719 + }, + { + "epoch": 0.09, + "learning_rate": 0.0009898231031745715, + "loss": 0.8477, + "step": 1720 + }, + { + "epoch": 0.09, + "learning_rate": 0.0009898056242071418, + "loss": 0.9727, + "step": 1721 + }, + { + "epoch": 0.09, + "learning_rate": 0.0009897881303969928, + "loss": 0.8555, + "step": 1722 + }, + { + "epoch": 0.09, + "learning_rate": 0.0009897706217446545, + "loss": 0.9219, + "step": 1723 + }, + { + "epoch": 0.09, + "learning_rate": 0.0009897530982506577, + "loss": 0.8516, + "step": 1724 + }, + { + "epoch": 0.09, + "learning_rate": 0.0009897355599155333, + "loss": 0.918, + "step": 1725 + }, + { + "epoch": 0.09, + "learning_rate": 0.0009897180067398126, + "loss": 0.8594, + "step": 1726 + }, + { + "epoch": 0.09, + "learning_rate": 0.0009897004387240276, + "loss": 0.9375, + "step": 1727 + }, + { + "epoch": 0.09, + "learning_rate": 0.000989682855868711, + "loss": 1.0156, + "step": 1728 + }, + { + "epoch": 0.09, + "learning_rate": 0.0009896652581743953, + "loss": 0.9375, + "step": 1729 + }, + { + "epoch": 0.09, + "learning_rate": 0.0009896476456416135, + "loss": 0.8438, + "step": 1730 + }, + { + "epoch": 0.09, + "learning_rate": 0.0009896300182709, + "loss": 0.9727, + "step": 1731 + }, + { + "epoch": 0.09, + "learning_rate": 0.0009896123760627885, + "loss": 1.0078, + "step": 1732 + }, + { + "epoch": 0.09, + "learning_rate": 0.0009895947190178137, + "loss": 0.8203, + "step": 1733 + }, + { + "epoch": 0.09, + "learning_rate": 0.0009895770471365106, + "loss": 0.9883, + "step": 1734 + }, + { + "epoch": 0.09, + "learning_rate": 0.000989559360419415, + "loss": 0.8633, + "step": 1735 + }, + { + "epoch": 0.09, + "learning_rate": 0.0009895416588670625, + "loss": 0.9219, + "step": 1736 + }, + { + "epoch": 0.09, + "learning_rate": 0.0009895239424799896, + "loss": 0.9141, + "step": 1737 + }, + { + "epoch": 0.09, + "learning_rate": 0.0009895062112587334, + "loss": 0.9102, + "step": 1738 + }, + { + "epoch": 0.09, + "learning_rate": 0.0009894884652038311, + "loss": 1.0078, + "step": 1739 + }, + { + "epoch": 0.09, + "learning_rate": 0.0009894707043158204, + "loss": 0.8594, + "step": 1740 + }, + { + "epoch": 0.09, + "learning_rate": 0.0009894529285952396, + "loss": 1.0469, + "step": 1741 + }, + { + "epoch": 0.09, + "learning_rate": 0.0009894351380426271, + "loss": 0.9883, + "step": 1742 + }, + { + "epoch": 0.09, + "learning_rate": 0.0009894173326585225, + "loss": 0.9609, + "step": 1743 + }, + { + "epoch": 0.09, + "learning_rate": 0.0009893995124434648, + "loss": 0.9141, + "step": 1744 + }, + { + "epoch": 0.09, + "learning_rate": 0.0009893816773979944, + "loss": 0.8594, + "step": 1745 + }, + { + "epoch": 0.09, + "learning_rate": 0.0009893638275226518, + "loss": 1.0469, + "step": 1746 + }, + { + "epoch": 0.09, + "learning_rate": 0.0009893459628179775, + "loss": 0.8945, + "step": 1747 + }, + { + "epoch": 0.09, + "learning_rate": 0.0009893280832845132, + "loss": 0.918, + "step": 1748 + }, + { + "epoch": 0.09, + "learning_rate": 0.0009893101889228008, + "loss": 0.9648, + "step": 1749 + }, + { + "epoch": 0.09, + "learning_rate": 0.0009892922797333822, + "loss": 0.8789, + "step": 1750 + }, + { + "epoch": 0.09, + "learning_rate": 0.0009892743557168003, + "loss": 0.9492, + "step": 1751 + }, + { + "epoch": 0.09, + "learning_rate": 0.0009892564168735983, + "loss": 1.0, + "step": 1752 + }, + { + "epoch": 0.09, + "learning_rate": 0.0009892384632043197, + "loss": 0.9297, + "step": 1753 + }, + { + "epoch": 0.09, + "learning_rate": 0.0009892204947095087, + "loss": 1.0, + "step": 1754 + }, + { + "epoch": 0.09, + "learning_rate": 0.0009892025113897097, + "loss": 0.9766, + "step": 1755 + }, + { + "epoch": 0.09, + "learning_rate": 0.0009891845132454676, + "loss": 0.918, + "step": 1756 + }, + { + "epoch": 0.09, + "learning_rate": 0.000989166500277328, + "loss": 0.9609, + "step": 1757 + }, + { + "epoch": 0.09, + "learning_rate": 0.0009891484724858363, + "loss": 0.875, + "step": 1758 + }, + { + "epoch": 0.09, + "learning_rate": 0.0009891304298715394, + "loss": 0.9062, + "step": 1759 + }, + { + "epoch": 0.09, + "learning_rate": 0.0009891123724349838, + "loss": 0.9375, + "step": 1760 + }, + { + "epoch": 0.09, + "learning_rate": 0.0009890943001767166, + "loss": 1.0, + "step": 1761 + }, + { + "epoch": 0.09, + "learning_rate": 0.0009890762130972855, + "loss": 0.8984, + "step": 1762 + }, + { + "epoch": 0.09, + "learning_rate": 0.0009890581111972386, + "loss": 0.9258, + "step": 1763 + }, + { + "epoch": 0.09, + "learning_rate": 0.0009890399944771245, + "loss": 0.8906, + "step": 1764 + }, + { + "epoch": 0.09, + "learning_rate": 0.0009890218629374922, + "loss": 0.8945, + "step": 1765 + }, + { + "epoch": 0.09, + "learning_rate": 0.000989003716578891, + "loss": 0.8555, + "step": 1766 + }, + { + "epoch": 0.09, + "learning_rate": 0.0009889855554018711, + "loss": 0.9531, + "step": 1767 + }, + { + "epoch": 0.1, + "learning_rate": 0.0009889673794069825, + "loss": 0.9102, + "step": 1768 + }, + { + "epoch": 0.1, + "learning_rate": 0.0009889491885947761, + "loss": 0.8203, + "step": 1769 + }, + { + "epoch": 0.1, + "learning_rate": 0.0009889309829658032, + "loss": 0.9609, + "step": 1770 + }, + { + "epoch": 0.1, + "learning_rate": 0.0009889127625206157, + "loss": 0.957, + "step": 1771 + }, + { + "epoch": 0.1, + "learning_rate": 0.0009888945272597653, + "loss": 0.9609, + "step": 1772 + }, + { + "epoch": 0.1, + "learning_rate": 0.0009888762771838048, + "loss": 0.9336, + "step": 1773 + }, + { + "epoch": 0.1, + "learning_rate": 0.0009888580122932873, + "loss": 0.8867, + "step": 1774 + }, + { + "epoch": 0.1, + "learning_rate": 0.0009888397325887662, + "loss": 0.8867, + "step": 1775 + }, + { + "epoch": 0.1, + "learning_rate": 0.0009888214380707953, + "loss": 0.9023, + "step": 1776 + }, + { + "epoch": 0.1, + "learning_rate": 0.0009888031287399294, + "loss": 0.9102, + "step": 1777 + }, + { + "epoch": 0.1, + "learning_rate": 0.000988784804596723, + "loss": 0.9727, + "step": 1778 + }, + { + "epoch": 0.1, + "learning_rate": 0.0009887664656417314, + "loss": 0.9531, + "step": 1779 + }, + { + "epoch": 0.1, + "learning_rate": 0.0009887481118755104, + "loss": 0.9141, + "step": 1780 + }, + { + "epoch": 0.1, + "learning_rate": 0.0009887297432986163, + "loss": 0.9102, + "step": 1781 + }, + { + "epoch": 0.1, + "learning_rate": 0.0009887113599116053, + "loss": 0.957, + "step": 1782 + }, + { + "epoch": 0.1, + "learning_rate": 0.000988692961715035, + "loss": 0.8281, + "step": 1783 + }, + { + "epoch": 0.1, + "learning_rate": 0.0009886745487094626, + "loss": 0.9805, + "step": 1784 + }, + { + "epoch": 0.1, + "learning_rate": 0.0009886561208954463, + "loss": 1.0312, + "step": 1785 + }, + { + "epoch": 0.1, + "learning_rate": 0.0009886376782735442, + "loss": 1.0234, + "step": 1786 + }, + { + "epoch": 0.1, + "learning_rate": 0.0009886192208443155, + "loss": 0.9062, + "step": 1787 + }, + { + "epoch": 0.1, + "learning_rate": 0.0009886007486083193, + "loss": 1.0, + "step": 1788 + }, + { + "epoch": 0.1, + "learning_rate": 0.0009885822615661156, + "loss": 0.8555, + "step": 1789 + }, + { + "epoch": 0.1, + "learning_rate": 0.0009885637597182645, + "loss": 0.957, + "step": 1790 + }, + { + "epoch": 0.1, + "learning_rate": 0.0009885452430653263, + "loss": 1.0156, + "step": 1791 + }, + { + "epoch": 0.1, + "learning_rate": 0.0009885267116078627, + "loss": 0.8711, + "step": 1792 + }, + { + "epoch": 0.1, + "learning_rate": 0.0009885081653464352, + "loss": 0.9648, + "step": 1793 + }, + { + "epoch": 0.1, + "learning_rate": 0.0009884896042816054, + "loss": 0.9727, + "step": 1794 + }, + { + "epoch": 0.1, + "learning_rate": 0.000988471028413936, + "loss": 0.9219, + "step": 1795 + }, + { + "epoch": 0.1, + "learning_rate": 0.0009884524377439898, + "loss": 0.9141, + "step": 1796 + }, + { + "epoch": 0.1, + "learning_rate": 0.0009884338322723306, + "loss": 0.9766, + "step": 1797 + }, + { + "epoch": 0.1, + "learning_rate": 0.0009884152119995215, + "loss": 0.9453, + "step": 1798 + }, + { + "epoch": 0.1, + "learning_rate": 0.0009883965769261274, + "loss": 0.9414, + "step": 1799 + }, + { + "epoch": 0.1, + "learning_rate": 0.0009883779270527127, + "loss": 0.9336, + "step": 1800 + }, + { + "epoch": 0.1, + "learning_rate": 0.0009883592623798423, + "loss": 1.0156, + "step": 1801 + }, + { + "epoch": 0.1, + "learning_rate": 0.0009883405829080824, + "loss": 0.9336, + "step": 1802 + }, + { + "epoch": 0.1, + "learning_rate": 0.0009883218886379986, + "loss": 0.8516, + "step": 1803 + }, + { + "epoch": 0.1, + "learning_rate": 0.0009883031795701576, + "loss": 0.8672, + "step": 1804 + }, + { + "epoch": 0.1, + "learning_rate": 0.0009882844557051263, + "loss": 1.0234, + "step": 1805 + }, + { + "epoch": 0.1, + "learning_rate": 0.0009882657170434718, + "loss": 0.9062, + "step": 1806 + }, + { + "epoch": 0.1, + "learning_rate": 0.0009882469635857625, + "loss": 0.9805, + "step": 1807 + }, + { + "epoch": 0.1, + "learning_rate": 0.0009882281953325662, + "loss": 0.9141, + "step": 1808 + }, + { + "epoch": 0.1, + "learning_rate": 0.0009882094122844521, + "loss": 0.8828, + "step": 1809 + }, + { + "epoch": 0.1, + "learning_rate": 0.000988190614441989, + "loss": 0.8945, + "step": 1810 + }, + { + "epoch": 0.1, + "learning_rate": 0.0009881718018057467, + "loss": 0.832, + "step": 1811 + }, + { + "epoch": 0.1, + "learning_rate": 0.000988152974376295, + "loss": 1.0078, + "step": 1812 + }, + { + "epoch": 0.1, + "learning_rate": 0.000988134132154205, + "loss": 0.9258, + "step": 1813 + }, + { + "epoch": 0.1, + "learning_rate": 0.0009881152751400474, + "loss": 0.875, + "step": 1814 + }, + { + "epoch": 0.1, + "learning_rate": 0.0009880964033343934, + "loss": 0.8867, + "step": 1815 + }, + { + "epoch": 0.1, + "learning_rate": 0.000988077516737815, + "loss": 0.9414, + "step": 1816 + }, + { + "epoch": 0.1, + "learning_rate": 0.0009880586153508848, + "loss": 0.9688, + "step": 1817 + }, + { + "epoch": 0.1, + "learning_rate": 0.0009880396991741754, + "loss": 0.9883, + "step": 1818 + }, + { + "epoch": 0.1, + "learning_rate": 0.00098802076820826, + "loss": 0.8984, + "step": 1819 + }, + { + "epoch": 0.1, + "learning_rate": 0.0009880018224537122, + "loss": 0.9844, + "step": 1820 + }, + { + "epoch": 0.1, + "learning_rate": 0.0009879828619111062, + "loss": 0.8945, + "step": 1821 + }, + { + "epoch": 0.1, + "learning_rate": 0.0009879638865810164, + "loss": 0.8672, + "step": 1822 + }, + { + "epoch": 0.1, + "learning_rate": 0.0009879448964640182, + "loss": 0.9805, + "step": 1823 + }, + { + "epoch": 0.1, + "learning_rate": 0.0009879258915606868, + "loss": 0.9531, + "step": 1824 + }, + { + "epoch": 0.1, + "learning_rate": 0.000987906871871598, + "loss": 0.9492, + "step": 1825 + }, + { + "epoch": 0.1, + "learning_rate": 0.0009878878373973284, + "loss": 0.9062, + "step": 1826 + }, + { + "epoch": 0.1, + "learning_rate": 0.0009878687881384545, + "loss": 0.9414, + "step": 1827 + }, + { + "epoch": 0.1, + "learning_rate": 0.000987849724095554, + "loss": 0.9375, + "step": 1828 + }, + { + "epoch": 0.1, + "learning_rate": 0.0009878306452692043, + "loss": 0.9844, + "step": 1829 + }, + { + "epoch": 0.1, + "learning_rate": 0.0009878115516599837, + "loss": 0.9961, + "step": 1830 + }, + { + "epoch": 0.1, + "learning_rate": 0.0009877924432684707, + "loss": 0.9531, + "step": 1831 + }, + { + "epoch": 0.1, + "learning_rate": 0.0009877733200952443, + "loss": 0.9297, + "step": 1832 + }, + { + "epoch": 0.1, + "learning_rate": 0.000987754182140884, + "loss": 0.9961, + "step": 1833 + }, + { + "epoch": 0.1, + "learning_rate": 0.0009877350294059698, + "loss": 0.9844, + "step": 1834 + }, + { + "epoch": 0.1, + "learning_rate": 0.0009877158618910823, + "loss": 0.9805, + "step": 1835 + }, + { + "epoch": 0.1, + "learning_rate": 0.000987696679596802, + "loss": 0.9219, + "step": 1836 + }, + { + "epoch": 0.1, + "learning_rate": 0.0009876774825237102, + "loss": 0.9023, + "step": 1837 + }, + { + "epoch": 0.1, + "learning_rate": 0.000987658270672389, + "loss": 0.9453, + "step": 1838 + }, + { + "epoch": 0.1, + "learning_rate": 0.00098763904404342, + "loss": 0.918, + "step": 1839 + }, + { + "epoch": 0.1, + "learning_rate": 0.0009876198026373864, + "loss": 0.9219, + "step": 1840 + }, + { + "epoch": 0.1, + "learning_rate": 0.0009876005464548709, + "loss": 0.8789, + "step": 1841 + }, + { + "epoch": 0.1, + "learning_rate": 0.0009875812754964574, + "loss": 1.0156, + "step": 1842 + }, + { + "epoch": 0.1, + "learning_rate": 0.0009875619897627295, + "loss": 0.9766, + "step": 1843 + }, + { + "epoch": 0.1, + "learning_rate": 0.0009875426892542718, + "loss": 0.9609, + "step": 1844 + }, + { + "epoch": 0.1, + "learning_rate": 0.000987523373971669, + "loss": 0.957, + "step": 1845 + }, + { + "epoch": 0.1, + "learning_rate": 0.0009875040439155068, + "loss": 0.8984, + "step": 1846 + }, + { + "epoch": 0.1, + "learning_rate": 0.0009874846990863705, + "loss": 0.9062, + "step": 1847 + }, + { + "epoch": 0.1, + "learning_rate": 0.0009874653394848466, + "loss": 0.9023, + "step": 1848 + }, + { + "epoch": 0.1, + "learning_rate": 0.0009874459651115217, + "loss": 1.0391, + "step": 1849 + }, + { + "epoch": 0.1, + "learning_rate": 0.0009874265759669827, + "loss": 0.8906, + "step": 1850 + }, + { + "epoch": 0.1, + "learning_rate": 0.0009874071720518177, + "loss": 0.9609, + "step": 1851 + }, + { + "epoch": 0.1, + "learning_rate": 0.000987387753366614, + "loss": 1.0469, + "step": 1852 + }, + { + "epoch": 0.1, + "learning_rate": 0.0009873683199119606, + "loss": 1.0078, + "step": 1853 + }, + { + "epoch": 0.1, + "learning_rate": 0.000987348871688446, + "loss": 0.9609, + "step": 1854 + }, + { + "epoch": 0.1, + "learning_rate": 0.00098732940869666, + "loss": 0.8828, + "step": 1855 + }, + { + "epoch": 0.1, + "learning_rate": 0.000987309930937192, + "loss": 0.8789, + "step": 1856 + }, + { + "epoch": 0.1, + "learning_rate": 0.0009872904384106323, + "loss": 0.9453, + "step": 1857 + }, + { + "epoch": 0.1, + "learning_rate": 0.0009872709311175719, + "loss": 0.9648, + "step": 1858 + }, + { + "epoch": 0.1, + "learning_rate": 0.0009872514090586014, + "loss": 0.918, + "step": 1859 + }, + { + "epoch": 0.1, + "learning_rate": 0.0009872318722343129, + "loss": 0.9297, + "step": 1860 + }, + { + "epoch": 0.1, + "learning_rate": 0.0009872123206452982, + "loss": 0.9219, + "step": 1861 + }, + { + "epoch": 0.1, + "learning_rate": 0.0009871927542921496, + "loss": 0.8281, + "step": 1862 + }, + { + "epoch": 0.1, + "learning_rate": 0.0009871731731754603, + "loss": 0.8789, + "step": 1863 + }, + { + "epoch": 0.1, + "learning_rate": 0.0009871535772958235, + "loss": 0.9727, + "step": 1864 + }, + { + "epoch": 0.1, + "learning_rate": 0.0009871339666538332, + "loss": 0.9609, + "step": 1865 + }, + { + "epoch": 0.1, + "learning_rate": 0.0009871143412500836, + "loss": 0.9805, + "step": 1866 + }, + { + "epoch": 0.1, + "learning_rate": 0.0009870947010851695, + "loss": 0.9258, + "step": 1867 + }, + { + "epoch": 0.1, + "learning_rate": 0.0009870750461596857, + "loss": 0.8828, + "step": 1868 + }, + { + "epoch": 0.1, + "learning_rate": 0.0009870553764742282, + "loss": 0.8242, + "step": 1869 + }, + { + "epoch": 0.1, + "learning_rate": 0.0009870356920293928, + "loss": 0.9297, + "step": 1870 + }, + { + "epoch": 0.1, + "learning_rate": 0.0009870159928257762, + "loss": 0.8516, + "step": 1871 + }, + { + "epoch": 0.1, + "learning_rate": 0.0009869962788639753, + "loss": 1.0, + "step": 1872 + }, + { + "epoch": 0.1, + "learning_rate": 0.0009869765501445873, + "loss": 0.9414, + "step": 1873 + }, + { + "epoch": 0.1, + "learning_rate": 0.0009869568066682103, + "loss": 0.9102, + "step": 1874 + }, + { + "epoch": 0.1, + "learning_rate": 0.0009869370484354425, + "loss": 1.0, + "step": 1875 + }, + { + "epoch": 0.1, + "learning_rate": 0.0009869172754468827, + "loss": 0.8594, + "step": 1876 + }, + { + "epoch": 0.1, + "learning_rate": 0.0009868974877031299, + "loss": 0.9141, + "step": 1877 + }, + { + "epoch": 0.1, + "learning_rate": 0.000986877685204784, + "loss": 0.9531, + "step": 1878 + }, + { + "epoch": 0.1, + "learning_rate": 0.0009868578679524448, + "loss": 0.9023, + "step": 1879 + }, + { + "epoch": 0.1, + "learning_rate": 0.000986838035946713, + "loss": 0.8398, + "step": 1880 + }, + { + "epoch": 0.1, + "learning_rate": 0.0009868181891881894, + "loss": 0.957, + "step": 1881 + }, + { + "epoch": 0.1, + "learning_rate": 0.0009867983276774758, + "loss": 0.8945, + "step": 1882 + }, + { + "epoch": 0.1, + "learning_rate": 0.0009867784514151737, + "loss": 0.9492, + "step": 1883 + }, + { + "epoch": 0.1, + "learning_rate": 0.0009867585604018856, + "loss": 0.9844, + "step": 1884 + }, + { + "epoch": 0.1, + "learning_rate": 0.0009867386546382143, + "loss": 0.9102, + "step": 1885 + }, + { + "epoch": 0.1, + "learning_rate": 0.0009867187341247629, + "loss": 0.8906, + "step": 1886 + }, + { + "epoch": 0.1, + "learning_rate": 0.000986698798862135, + "loss": 0.9688, + "step": 1887 + }, + { + "epoch": 0.1, + "learning_rate": 0.0009866788488509346, + "loss": 0.9258, + "step": 1888 + }, + { + "epoch": 0.1, + "learning_rate": 0.0009866588840917667, + "loss": 0.9336, + "step": 1889 + }, + { + "epoch": 0.1, + "learning_rate": 0.000986638904585236, + "loss": 0.8555, + "step": 1890 + }, + { + "epoch": 0.1, + "learning_rate": 0.000986618910331948, + "loss": 0.957, + "step": 1891 + }, + { + "epoch": 0.1, + "learning_rate": 0.0009865989013325082, + "loss": 0.9219, + "step": 1892 + }, + { + "epoch": 0.1, + "learning_rate": 0.0009865788775875238, + "loss": 1.0312, + "step": 1893 + }, + { + "epoch": 0.1, + "learning_rate": 0.0009865588390976009, + "loss": 0.8438, + "step": 1894 + }, + { + "epoch": 0.1, + "learning_rate": 0.0009865387858633467, + "loss": 0.9492, + "step": 1895 + }, + { + "epoch": 0.1, + "learning_rate": 0.0009865187178853695, + "loss": 0.9531, + "step": 1896 + }, + { + "epoch": 0.1, + "learning_rate": 0.0009864986351642768, + "loss": 0.8789, + "step": 1897 + }, + { + "epoch": 0.1, + "learning_rate": 0.0009864785377006772, + "loss": 0.8945, + "step": 1898 + }, + { + "epoch": 0.1, + "learning_rate": 0.0009864584254951803, + "loss": 1.0, + "step": 1899 + }, + { + "epoch": 0.1, + "learning_rate": 0.000986438298548395, + "loss": 0.8828, + "step": 1900 + }, + { + "epoch": 0.1, + "learning_rate": 0.0009864181568609316, + "loss": 0.9648, + "step": 1901 + }, + { + "epoch": 0.1, + "learning_rate": 0.0009863980004334, + "loss": 0.9688, + "step": 1902 + }, + { + "epoch": 0.1, + "learning_rate": 0.0009863778292664115, + "loss": 0.8477, + "step": 1903 + }, + { + "epoch": 0.1, + "learning_rate": 0.0009863576433605768, + "loss": 0.9141, + "step": 1904 + }, + { + "epoch": 0.1, + "learning_rate": 0.000986337442716508, + "loss": 0.8633, + "step": 1905 + }, + { + "epoch": 0.1, + "learning_rate": 0.0009863172273348174, + "loss": 0.9883, + "step": 1906 + }, + { + "epoch": 0.1, + "learning_rate": 0.0009862969972161172, + "loss": 0.8398, + "step": 1907 + }, + { + "epoch": 0.1, + "learning_rate": 0.0009862767523610206, + "loss": 1.0703, + "step": 1908 + }, + { + "epoch": 0.1, + "learning_rate": 0.000986256492770141, + "loss": 0.9727, + "step": 1909 + }, + { + "epoch": 0.1, + "learning_rate": 0.0009862362184440924, + "loss": 0.9375, + "step": 1910 + }, + { + "epoch": 0.1, + "learning_rate": 0.0009862159293834894, + "loss": 0.9375, + "step": 1911 + }, + { + "epoch": 0.1, + "learning_rate": 0.0009861956255889464, + "loss": 0.9648, + "step": 1912 + }, + { + "epoch": 0.1, + "learning_rate": 0.0009861753070610788, + "loss": 0.9414, + "step": 1913 + }, + { + "epoch": 0.1, + "learning_rate": 0.0009861549738005026, + "loss": 0.9219, + "step": 1914 + }, + { + "epoch": 0.1, + "learning_rate": 0.0009861346258078338, + "loss": 0.9805, + "step": 1915 + }, + { + "epoch": 0.1, + "learning_rate": 0.000986114263083689, + "loss": 0.8789, + "step": 1916 + }, + { + "epoch": 0.1, + "learning_rate": 0.000986093885628685, + "loss": 0.8828, + "step": 1917 + }, + { + "epoch": 0.1, + "learning_rate": 0.0009860734934434396, + "loss": 0.9297, + "step": 1918 + }, + { + "epoch": 0.1, + "learning_rate": 0.0009860530865285707, + "loss": 0.9258, + "step": 1919 + }, + { + "epoch": 0.1, + "learning_rate": 0.000986032664884697, + "loss": 0.9609, + "step": 1920 + }, + { + "epoch": 0.1, + "learning_rate": 0.0009860122285124367, + "loss": 0.9102, + "step": 1921 + }, + { + "epoch": 0.1, + "learning_rate": 0.0009859917774124095, + "loss": 0.8906, + "step": 1922 + }, + { + "epoch": 0.1, + "learning_rate": 0.000985971311585235, + "loss": 0.9375, + "step": 1923 + }, + { + "epoch": 0.1, + "learning_rate": 0.0009859508310315335, + "loss": 0.8555, + "step": 1924 + }, + { + "epoch": 0.1, + "learning_rate": 0.0009859303357519255, + "loss": 0.8633, + "step": 1925 + }, + { + "epoch": 0.1, + "learning_rate": 0.0009859098257470323, + "loss": 0.9336, + "step": 1926 + }, + { + "epoch": 0.1, + "learning_rate": 0.0009858893010174753, + "loss": 0.9805, + "step": 1927 + }, + { + "epoch": 0.1, + "learning_rate": 0.0009858687615638762, + "loss": 0.9609, + "step": 1928 + }, + { + "epoch": 0.1, + "learning_rate": 0.0009858482073868576, + "loss": 0.8555, + "step": 1929 + }, + { + "epoch": 0.1, + "learning_rate": 0.0009858276384870426, + "loss": 0.8984, + "step": 1930 + }, + { + "epoch": 0.1, + "learning_rate": 0.0009858070548650541, + "loss": 0.8789, + "step": 1931 + }, + { + "epoch": 0.1, + "learning_rate": 0.0009857864565215161, + "loss": 0.8867, + "step": 1932 + }, + { + "epoch": 0.1, + "learning_rate": 0.0009857658434570529, + "loss": 0.957, + "step": 1933 + }, + { + "epoch": 0.1, + "learning_rate": 0.000985745215672289, + "loss": 0.918, + "step": 1934 + }, + { + "epoch": 0.1, + "learning_rate": 0.0009857245731678494, + "loss": 0.9258, + "step": 1935 + }, + { + "epoch": 0.1, + "learning_rate": 0.0009857039159443597, + "loss": 0.9023, + "step": 1936 + }, + { + "epoch": 0.1, + "learning_rate": 0.0009856832440024459, + "loss": 0.9609, + "step": 1937 + }, + { + "epoch": 0.1, + "learning_rate": 0.0009856625573427345, + "loss": 1.0, + "step": 1938 + }, + { + "epoch": 0.1, + "learning_rate": 0.0009856418559658522, + "loss": 0.957, + "step": 1939 + }, + { + "epoch": 0.1, + "learning_rate": 0.0009856211398724265, + "loss": 0.9805, + "step": 1940 + }, + { + "epoch": 0.1, + "learning_rate": 0.000985600409063085, + "loss": 0.9531, + "step": 1941 + }, + { + "epoch": 0.1, + "learning_rate": 0.0009855796635384563, + "loss": 0.9219, + "step": 1942 + }, + { + "epoch": 0.1, + "learning_rate": 0.0009855589032991684, + "loss": 0.8984, + "step": 1943 + }, + { + "epoch": 0.1, + "learning_rate": 0.0009855381283458509, + "loss": 0.9766, + "step": 1944 + }, + { + "epoch": 0.1, + "learning_rate": 0.0009855173386791331, + "loss": 0.8945, + "step": 1945 + }, + { + "epoch": 0.1, + "learning_rate": 0.0009854965342996453, + "loss": 0.9609, + "step": 1946 + }, + { + "epoch": 0.1, + "learning_rate": 0.0009854757152080176, + "loss": 0.9258, + "step": 1947 + }, + { + "epoch": 0.1, + "learning_rate": 0.0009854548814048811, + "loss": 0.9258, + "step": 1948 + }, + { + "epoch": 0.1, + "learning_rate": 0.000985434032890867, + "loss": 0.9414, + "step": 1949 + }, + { + "epoch": 0.1, + "learning_rate": 0.0009854131696666072, + "loss": 0.9023, + "step": 1950 + }, + { + "epoch": 0.1, + "learning_rate": 0.000985392291732734, + "loss": 0.9219, + "step": 1951 + }, + { + "epoch": 0.1, + "learning_rate": 0.0009853713990898797, + "loss": 0.9609, + "step": 1952 + }, + { + "epoch": 0.1, + "learning_rate": 0.0009853504917386778, + "loss": 0.9258, + "step": 1953 + }, + { + "epoch": 0.11, + "learning_rate": 0.0009853295696797615, + "loss": 0.8438, + "step": 1954 + }, + { + "epoch": 0.11, + "learning_rate": 0.0009853086329137653, + "loss": 0.9336, + "step": 1955 + }, + { + "epoch": 0.11, + "learning_rate": 0.0009852876814413233, + "loss": 0.9219, + "step": 1956 + }, + { + "epoch": 0.11, + "learning_rate": 0.0009852667152630706, + "loss": 0.9844, + "step": 1957 + }, + { + "epoch": 0.11, + "learning_rate": 0.0009852457343796422, + "loss": 0.9375, + "step": 1958 + }, + { + "epoch": 0.11, + "learning_rate": 0.0009852247387916741, + "loss": 0.8711, + "step": 1959 + }, + { + "epoch": 0.11, + "learning_rate": 0.0009852037284998026, + "loss": 0.9336, + "step": 1960 + }, + { + "epoch": 0.11, + "learning_rate": 0.0009851827035046643, + "loss": 0.9258, + "step": 1961 + }, + { + "epoch": 0.11, + "learning_rate": 0.0009851616638068966, + "loss": 0.9297, + "step": 1962 + }, + { + "epoch": 0.11, + "learning_rate": 0.0009851406094071365, + "loss": 0.9414, + "step": 1963 + }, + { + "epoch": 0.11, + "learning_rate": 0.0009851195403060224, + "loss": 0.832, + "step": 1964 + }, + { + "epoch": 0.11, + "learning_rate": 0.0009850984565041927, + "loss": 0.9648, + "step": 1965 + }, + { + "epoch": 0.11, + "learning_rate": 0.0009850773580022865, + "loss": 0.8828, + "step": 1966 + }, + { + "epoch": 0.11, + "learning_rate": 0.0009850562448009428, + "loss": 0.9336, + "step": 1967 + }, + { + "epoch": 0.11, + "learning_rate": 0.0009850351169008015, + "loss": 0.9492, + "step": 1968 + }, + { + "epoch": 0.11, + "learning_rate": 0.0009850139743025031, + "loss": 0.9102, + "step": 1969 + }, + { + "epoch": 0.11, + "learning_rate": 0.000984992817006688, + "loss": 0.8984, + "step": 1970 + }, + { + "epoch": 0.11, + "learning_rate": 0.0009849716450139976, + "loss": 0.8281, + "step": 1971 + }, + { + "epoch": 0.11, + "learning_rate": 0.000984950458325073, + "loss": 1.0078, + "step": 1972 + }, + { + "epoch": 0.11, + "learning_rate": 0.0009849292569405568, + "loss": 0.9375, + "step": 1973 + }, + { + "epoch": 0.11, + "learning_rate": 0.000984908040861091, + "loss": 0.9805, + "step": 1974 + }, + { + "epoch": 0.11, + "learning_rate": 0.000984886810087319, + "loss": 0.9414, + "step": 1975 + }, + { + "epoch": 0.11, + "learning_rate": 0.0009848655646198837, + "loss": 0.9219, + "step": 1976 + }, + { + "epoch": 0.11, + "learning_rate": 0.0009848443044594294, + "loss": 0.9883, + "step": 1977 + }, + { + "epoch": 0.11, + "learning_rate": 0.0009848230296065996, + "loss": 0.9414, + "step": 1978 + }, + { + "epoch": 0.11, + "learning_rate": 0.0009848017400620396, + "loss": 0.8867, + "step": 1979 + }, + { + "epoch": 0.11, + "learning_rate": 0.0009847804358263945, + "loss": 0.9648, + "step": 1980 + }, + { + "epoch": 0.11, + "learning_rate": 0.00098475911690031, + "loss": 0.9844, + "step": 1981 + }, + { + "epoch": 0.11, + "learning_rate": 0.0009847377832844315, + "loss": 1.0078, + "step": 1982 + }, + { + "epoch": 0.11, + "learning_rate": 0.000984716434979406, + "loss": 1.0312, + "step": 1983 + }, + { + "epoch": 0.11, + "learning_rate": 0.0009846950719858802, + "loss": 0.8867, + "step": 1984 + }, + { + "epoch": 0.11, + "learning_rate": 0.0009846736943045018, + "loss": 1.0078, + "step": 1985 + }, + { + "epoch": 0.11, + "learning_rate": 0.0009846523019359185, + "loss": 0.8164, + "step": 1986 + }, + { + "epoch": 0.11, + "learning_rate": 0.0009846308948807782, + "loss": 0.9922, + "step": 1987 + }, + { + "epoch": 0.11, + "learning_rate": 0.00098460947313973, + "loss": 0.9375, + "step": 1988 + }, + { + "epoch": 0.11, + "learning_rate": 0.000984588036713423, + "loss": 0.8945, + "step": 1989 + }, + { + "epoch": 0.11, + "learning_rate": 0.0009845665856025065, + "loss": 0.9805, + "step": 1990 + }, + { + "epoch": 0.11, + "learning_rate": 0.000984545119807631, + "loss": 0.9336, + "step": 1991 + }, + { + "epoch": 0.11, + "learning_rate": 0.0009845236393294465, + "loss": 0.9609, + "step": 1992 + }, + { + "epoch": 0.11, + "learning_rate": 0.000984502144168604, + "loss": 0.9336, + "step": 1993 + }, + { + "epoch": 0.11, + "learning_rate": 0.0009844806343257556, + "loss": 0.9453, + "step": 1994 + }, + { + "epoch": 0.11, + "learning_rate": 0.000984459109801552, + "loss": 0.8906, + "step": 1995 + }, + { + "epoch": 0.11, + "learning_rate": 0.0009844375705966464, + "loss": 0.8516, + "step": 1996 + }, + { + "epoch": 0.11, + "learning_rate": 0.000984416016711691, + "loss": 0.7812, + "step": 1997 + }, + { + "epoch": 0.11, + "learning_rate": 0.000984394448147339, + "loss": 0.875, + "step": 1998 + }, + { + "epoch": 0.11, + "learning_rate": 0.000984372864904244, + "loss": 0.832, + "step": 1999 + }, + { + "epoch": 0.11, + "learning_rate": 0.0009843512669830603, + "loss": 0.875, + "step": 2000 + }, + { + "epoch": 0.11, + "learning_rate": 0.0009843296543844422, + "loss": 0.8984, + "step": 2001 + }, + { + "epoch": 0.11, + "learning_rate": 0.0009843080271090445, + "loss": 0.9375, + "step": 2002 + }, + { + "epoch": 0.11, + "learning_rate": 0.000984286385157523, + "loss": 0.9844, + "step": 2003 + }, + { + "epoch": 0.11, + "learning_rate": 0.000984264728530533, + "loss": 0.9023, + "step": 2004 + }, + { + "epoch": 0.11, + "learning_rate": 0.0009842430572287312, + "loss": 0.9141, + "step": 2005 + }, + { + "epoch": 0.11, + "learning_rate": 0.000984221371252774, + "loss": 0.918, + "step": 2006 + }, + { + "epoch": 0.11, + "learning_rate": 0.0009841996706033186, + "loss": 0.957, + "step": 2007 + }, + { + "epoch": 0.11, + "learning_rate": 0.0009841779552810226, + "loss": 0.9648, + "step": 2008 + }, + { + "epoch": 0.11, + "learning_rate": 0.0009841562252865443, + "loss": 0.9648, + "step": 2009 + }, + { + "epoch": 0.11, + "learning_rate": 0.000984134480620542, + "loss": 0.8984, + "step": 2010 + }, + { + "epoch": 0.11, + "learning_rate": 0.0009841127212836746, + "loss": 0.8555, + "step": 2011 + }, + { + "epoch": 0.11, + "learning_rate": 0.0009840909472766015, + "loss": 0.9102, + "step": 2012 + }, + { + "epoch": 0.11, + "learning_rate": 0.0009840691585999827, + "loss": 0.8984, + "step": 2013 + }, + { + "epoch": 0.11, + "learning_rate": 0.0009840473552544782, + "loss": 0.8203, + "step": 2014 + }, + { + "epoch": 0.11, + "learning_rate": 0.000984025537240749, + "loss": 0.8633, + "step": 2015 + }, + { + "epoch": 0.11, + "learning_rate": 0.000984003704559456, + "loss": 1.0156, + "step": 2016 + }, + { + "epoch": 0.11, + "learning_rate": 0.0009839818572112606, + "loss": 1.0625, + "step": 2017 + }, + { + "epoch": 0.11, + "learning_rate": 0.0009839599951968255, + "loss": 0.8555, + "step": 2018 + }, + { + "epoch": 0.11, + "learning_rate": 0.0009839381185168126, + "loss": 0.9883, + "step": 2019 + }, + { + "epoch": 0.11, + "learning_rate": 0.000983916227171885, + "loss": 1.0469, + "step": 2020 + }, + { + "epoch": 0.11, + "learning_rate": 0.0009838943211627066, + "loss": 0.957, + "step": 2021 + }, + { + "epoch": 0.11, + "learning_rate": 0.0009838724004899405, + "loss": 0.918, + "step": 2022 + }, + { + "epoch": 0.11, + "learning_rate": 0.000983850465154251, + "loss": 0.875, + "step": 2023 + }, + { + "epoch": 0.11, + "learning_rate": 0.0009838285151563034, + "loss": 0.9062, + "step": 2024 + }, + { + "epoch": 0.11, + "learning_rate": 0.0009838065504967624, + "loss": 1.0234, + "step": 2025 + }, + { + "epoch": 0.11, + "learning_rate": 0.0009837845711762937, + "loss": 0.9688, + "step": 2026 + }, + { + "epoch": 0.11, + "learning_rate": 0.0009837625771955633, + "loss": 0.9414, + "step": 2027 + }, + { + "epoch": 0.11, + "learning_rate": 0.0009837405685552377, + "loss": 0.9453, + "step": 2028 + }, + { + "epoch": 0.11, + "learning_rate": 0.000983718545255984, + "loss": 0.8945, + "step": 2029 + }, + { + "epoch": 0.11, + "learning_rate": 0.0009836965072984693, + "loss": 0.9414, + "step": 2030 + }, + { + "epoch": 0.11, + "learning_rate": 0.0009836744546833618, + "loss": 1.0, + "step": 2031 + }, + { + "epoch": 0.11, + "learning_rate": 0.0009836523874113294, + "loss": 0.9062, + "step": 2032 + }, + { + "epoch": 0.11, + "learning_rate": 0.000983630305483041, + "loss": 0.9062, + "step": 2033 + }, + { + "epoch": 0.11, + "learning_rate": 0.0009836082088991657, + "loss": 0.9336, + "step": 2034 + }, + { + "epoch": 0.11, + "learning_rate": 0.0009835860976603731, + "loss": 0.9062, + "step": 2035 + }, + { + "epoch": 0.11, + "learning_rate": 0.0009835639717673333, + "loss": 0.8984, + "step": 2036 + }, + { + "epoch": 0.11, + "learning_rate": 0.0009835418312207167, + "loss": 0.9219, + "step": 2037 + }, + { + "epoch": 0.11, + "learning_rate": 0.0009835196760211943, + "loss": 0.9141, + "step": 2038 + }, + { + "epoch": 0.11, + "learning_rate": 0.0009834975061694374, + "loss": 0.9688, + "step": 2039 + }, + { + "epoch": 0.11, + "learning_rate": 0.000983475321666118, + "loss": 0.957, + "step": 2040 + }, + { + "epoch": 0.11, + "learning_rate": 0.000983453122511908, + "loss": 0.9883, + "step": 2041 + }, + { + "epoch": 0.11, + "learning_rate": 0.0009834309087074803, + "loss": 0.8594, + "step": 2042 + }, + { + "epoch": 0.11, + "learning_rate": 0.0009834086802535083, + "loss": 0.9102, + "step": 2043 + }, + { + "epoch": 0.11, + "learning_rate": 0.0009833864371506654, + "loss": 0.8984, + "step": 2044 + }, + { + "epoch": 0.11, + "learning_rate": 0.0009833641793996256, + "loss": 0.9492, + "step": 2045 + }, + { + "epoch": 0.11, + "learning_rate": 0.0009833419070010632, + "loss": 0.9219, + "step": 2046 + }, + { + "epoch": 0.11, + "learning_rate": 0.0009833196199556535, + "loss": 0.9453, + "step": 2047 + }, + { + "epoch": 0.11, + "learning_rate": 0.0009832973182640717, + "loss": 0.8906, + "step": 2048 + }, + { + "epoch": 0.11, + "learning_rate": 0.0009832750019269935, + "loss": 0.8516, + "step": 2049 + }, + { + "epoch": 0.11, + "learning_rate": 0.0009832526709450955, + "loss": 0.8789, + "step": 2050 + }, + { + "epoch": 0.11, + "learning_rate": 0.000983230325319054, + "loss": 0.9844, + "step": 2051 + }, + { + "epoch": 0.11, + "learning_rate": 0.0009832079650495464, + "loss": 0.9141, + "step": 2052 + }, + { + "epoch": 0.11, + "learning_rate": 0.0009831855901372502, + "loss": 0.9453, + "step": 2053 + }, + { + "epoch": 0.11, + "learning_rate": 0.0009831632005828433, + "loss": 0.875, + "step": 2054 + }, + { + "epoch": 0.11, + "learning_rate": 0.0009831407963870045, + "loss": 0.9336, + "step": 2055 + }, + { + "epoch": 0.11, + "learning_rate": 0.0009831183775504124, + "loss": 0.9297, + "step": 2056 + }, + { + "epoch": 0.11, + "learning_rate": 0.0009830959440737467, + "loss": 0.8594, + "step": 2057 + }, + { + "epoch": 0.11, + "learning_rate": 0.000983073495957687, + "loss": 0.8477, + "step": 2058 + }, + { + "epoch": 0.11, + "learning_rate": 0.0009830510332029135, + "loss": 0.9023, + "step": 2059 + }, + { + "epoch": 0.11, + "learning_rate": 0.0009830285558101069, + "loss": 0.9141, + "step": 2060 + }, + { + "epoch": 0.11, + "learning_rate": 0.0009830060637799486, + "loss": 0.8633, + "step": 2061 + }, + { + "epoch": 0.11, + "learning_rate": 0.0009829835571131195, + "loss": 0.9062, + "step": 2062 + }, + { + "epoch": 0.11, + "learning_rate": 0.0009829610358103025, + "loss": 0.9688, + "step": 2063 + }, + { + "epoch": 0.11, + "learning_rate": 0.0009829384998721797, + "loss": 0.9805, + "step": 2064 + }, + { + "epoch": 0.11, + "learning_rate": 0.000982915949299434, + "loss": 0.8828, + "step": 2065 + }, + { + "epoch": 0.11, + "learning_rate": 0.0009828933840927486, + "loss": 0.8672, + "step": 2066 + }, + { + "epoch": 0.11, + "learning_rate": 0.0009828708042528076, + "loss": 0.9883, + "step": 2067 + }, + { + "epoch": 0.11, + "learning_rate": 0.000982848209780295, + "loss": 0.9141, + "step": 2068 + }, + { + "epoch": 0.11, + "learning_rate": 0.0009828256006758956, + "loss": 0.9414, + "step": 2069 + }, + { + "epoch": 0.11, + "learning_rate": 0.0009828029769402944, + "loss": 0.8555, + "step": 2070 + }, + { + "epoch": 0.11, + "learning_rate": 0.0009827803385741773, + "loss": 0.9141, + "step": 2071 + }, + { + "epoch": 0.11, + "learning_rate": 0.00098275768557823, + "loss": 0.9453, + "step": 2072 + }, + { + "epoch": 0.11, + "learning_rate": 0.000982735017953139, + "loss": 0.9102, + "step": 2073 + }, + { + "epoch": 0.11, + "learning_rate": 0.0009827123356995913, + "loss": 0.9336, + "step": 2074 + }, + { + "epoch": 0.11, + "learning_rate": 0.0009826896388182742, + "loss": 1.0391, + "step": 2075 + }, + { + "epoch": 0.11, + "learning_rate": 0.0009826669273098757, + "loss": 0.8594, + "step": 2076 + }, + { + "epoch": 0.11, + "learning_rate": 0.0009826442011750835, + "loss": 0.9453, + "step": 2077 + }, + { + "epoch": 0.11, + "learning_rate": 0.0009826214604145868, + "loss": 0.8789, + "step": 2078 + }, + { + "epoch": 0.11, + "learning_rate": 0.0009825987050290745, + "loss": 0.8984, + "step": 2079 + }, + { + "epoch": 0.11, + "learning_rate": 0.0009825759350192362, + "loss": 0.9805, + "step": 2080 + }, + { + "epoch": 0.11, + "learning_rate": 0.000982553150385762, + "loss": 0.8789, + "step": 2081 + }, + { + "epoch": 0.11, + "learning_rate": 0.0009825303511293419, + "loss": 0.9727, + "step": 2082 + }, + { + "epoch": 0.11, + "learning_rate": 0.0009825075372506674, + "loss": 1.0078, + "step": 2083 + }, + { + "epoch": 0.11, + "learning_rate": 0.0009824847087504297, + "loss": 0.9727, + "step": 2084 + }, + { + "epoch": 0.11, + "learning_rate": 0.0009824618656293202, + "loss": 0.9375, + "step": 2085 + }, + { + "epoch": 0.11, + "learning_rate": 0.0009824390078880315, + "loss": 1.0156, + "step": 2086 + }, + { + "epoch": 0.11, + "learning_rate": 0.0009824161355272561, + "loss": 0.8242, + "step": 2087 + }, + { + "epoch": 0.11, + "learning_rate": 0.0009823932485476872, + "loss": 0.9766, + "step": 2088 + }, + { + "epoch": 0.11, + "learning_rate": 0.0009823703469500182, + "loss": 0.9062, + "step": 2089 + }, + { + "epoch": 0.11, + "learning_rate": 0.0009823474307349433, + "loss": 0.9258, + "step": 2090 + }, + { + "epoch": 0.11, + "learning_rate": 0.0009823244999031568, + "loss": 0.8828, + "step": 2091 + }, + { + "epoch": 0.11, + "learning_rate": 0.0009823015544553535, + "loss": 0.8438, + "step": 2092 + }, + { + "epoch": 0.11, + "learning_rate": 0.000982278594392229, + "loss": 0.8984, + "step": 2093 + }, + { + "epoch": 0.11, + "learning_rate": 0.0009822556197144788, + "loss": 0.9648, + "step": 2094 + }, + { + "epoch": 0.11, + "learning_rate": 0.0009822326304227992, + "loss": 0.8359, + "step": 2095 + }, + { + "epoch": 0.11, + "learning_rate": 0.000982209626517887, + "loss": 0.8789, + "step": 2096 + }, + { + "epoch": 0.11, + "learning_rate": 0.000982186608000439, + "loss": 0.8867, + "step": 2097 + }, + { + "epoch": 0.11, + "learning_rate": 0.0009821635748711527, + "loss": 0.8828, + "step": 2098 + }, + { + "epoch": 0.11, + "learning_rate": 0.0009821405271307267, + "loss": 0.8984, + "step": 2099 + }, + { + "epoch": 0.11, + "learning_rate": 0.0009821174647798587, + "loss": 0.9609, + "step": 2100 + }, + { + "epoch": 0.11, + "learning_rate": 0.000982094387819248, + "loss": 0.918, + "step": 2101 + }, + { + "epoch": 0.11, + "learning_rate": 0.0009820712962495936, + "loss": 0.8672, + "step": 2102 + }, + { + "epoch": 0.11, + "learning_rate": 0.0009820481900715955, + "loss": 1.0312, + "step": 2103 + }, + { + "epoch": 0.11, + "learning_rate": 0.0009820250692859538, + "loss": 0.9648, + "step": 2104 + }, + { + "epoch": 0.11, + "learning_rate": 0.0009820019338933692, + "loss": 0.9336, + "step": 2105 + }, + { + "epoch": 0.11, + "learning_rate": 0.0009819787838945427, + "loss": 0.9141, + "step": 2106 + }, + { + "epoch": 0.11, + "learning_rate": 0.0009819556192901756, + "loss": 1.0312, + "step": 2107 + }, + { + "epoch": 0.11, + "learning_rate": 0.0009819324400809704, + "loss": 0.8438, + "step": 2108 + }, + { + "epoch": 0.11, + "learning_rate": 0.0009819092462676293, + "loss": 0.8945, + "step": 2109 + }, + { + "epoch": 0.11, + "learning_rate": 0.0009818860378508548, + "loss": 1.0156, + "step": 2110 + }, + { + "epoch": 0.11, + "learning_rate": 0.0009818628148313506, + "loss": 0.9766, + "step": 2111 + }, + { + "epoch": 0.11, + "learning_rate": 0.00098183957720982, + "loss": 0.9219, + "step": 2112 + }, + { + "epoch": 0.11, + "learning_rate": 0.0009818163249869678, + "loss": 0.9375, + "step": 2113 + }, + { + "epoch": 0.11, + "learning_rate": 0.0009817930581634982, + "loss": 0.9141, + "step": 2114 + }, + { + "epoch": 0.11, + "learning_rate": 0.0009817697767401162, + "loss": 0.9531, + "step": 2115 + }, + { + "epoch": 0.11, + "learning_rate": 0.0009817464807175276, + "loss": 0.8867, + "step": 2116 + }, + { + "epoch": 0.11, + "learning_rate": 0.000981723170096438, + "loss": 0.8359, + "step": 2117 + }, + { + "epoch": 0.11, + "learning_rate": 0.0009816998448775541, + "loss": 0.9219, + "step": 2118 + }, + { + "epoch": 0.11, + "learning_rate": 0.0009816765050615825, + "loss": 0.9766, + "step": 2119 + }, + { + "epoch": 0.11, + "learning_rate": 0.0009816531506492308, + "loss": 0.9844, + "step": 2120 + }, + { + "epoch": 0.11, + "learning_rate": 0.0009816297816412063, + "loss": 0.875, + "step": 2121 + }, + { + "epoch": 0.11, + "learning_rate": 0.0009816063980382175, + "loss": 0.8789, + "step": 2122 + }, + { + "epoch": 0.11, + "learning_rate": 0.000981582999840973, + "loss": 0.9023, + "step": 2123 + }, + { + "epoch": 0.11, + "learning_rate": 0.0009815595870501812, + "loss": 0.9414, + "step": 2124 + }, + { + "epoch": 0.11, + "learning_rate": 0.0009815361596665524, + "loss": 0.8867, + "step": 2125 + }, + { + "epoch": 0.11, + "learning_rate": 0.0009815127176907963, + "loss": 0.9023, + "step": 2126 + }, + { + "epoch": 0.11, + "learning_rate": 0.000981489261123623, + "loss": 0.8359, + "step": 2127 + }, + { + "epoch": 0.11, + "learning_rate": 0.0009814657899657436, + "loss": 0.9336, + "step": 2128 + }, + { + "epoch": 0.11, + "learning_rate": 0.0009814423042178691, + "loss": 0.9141, + "step": 2129 + }, + { + "epoch": 0.11, + "learning_rate": 0.0009814188038807113, + "loss": 0.9102, + "step": 2130 + }, + { + "epoch": 0.11, + "learning_rate": 0.0009813952889549827, + "loss": 0.9219, + "step": 2131 + }, + { + "epoch": 0.11, + "learning_rate": 0.0009813717594413952, + "loss": 0.8906, + "step": 2132 + }, + { + "epoch": 0.11, + "learning_rate": 0.0009813482153406623, + "loss": 0.8359, + "step": 2133 + }, + { + "epoch": 0.11, + "learning_rate": 0.0009813246566534974, + "loss": 0.9492, + "step": 2134 + }, + { + "epoch": 0.11, + "learning_rate": 0.0009813010833806142, + "loss": 0.9102, + "step": 2135 + }, + { + "epoch": 0.11, + "learning_rate": 0.0009812774955227273, + "loss": 0.9453, + "step": 2136 + }, + { + "epoch": 0.11, + "learning_rate": 0.0009812538930805514, + "loss": 0.9727, + "step": 2137 + }, + { + "epoch": 0.11, + "learning_rate": 0.0009812302760548017, + "loss": 0.9961, + "step": 2138 + }, + { + "epoch": 0.11, + "learning_rate": 0.0009812066444461938, + "loss": 0.8789, + "step": 2139 + }, + { + "epoch": 0.12, + "learning_rate": 0.000981182998255444, + "loss": 0.832, + "step": 2140 + }, + { + "epoch": 0.12, + "learning_rate": 0.0009811593374832689, + "loss": 0.9102, + "step": 2141 + }, + { + "epoch": 0.12, + "learning_rate": 0.000981135662130385, + "loss": 0.9023, + "step": 2142 + }, + { + "epoch": 0.12, + "learning_rate": 0.0009811119721975104, + "loss": 0.8633, + "step": 2143 + }, + { + "epoch": 0.12, + "learning_rate": 0.0009810882676853626, + "loss": 0.9766, + "step": 2144 + }, + { + "epoch": 0.12, + "learning_rate": 0.00098106454859466, + "loss": 0.9375, + "step": 2145 + }, + { + "epoch": 0.12, + "learning_rate": 0.0009810408149261212, + "loss": 0.9883, + "step": 2146 + }, + { + "epoch": 0.12, + "learning_rate": 0.0009810170666804657, + "loss": 0.9297, + "step": 2147 + }, + { + "epoch": 0.12, + "learning_rate": 0.000980993303858413, + "loss": 0.9727, + "step": 2148 + }, + { + "epoch": 0.12, + "learning_rate": 0.0009809695264606834, + "loss": 0.9688, + "step": 2149 + }, + { + "epoch": 0.12, + "learning_rate": 0.0009809457344879971, + "loss": 0.8945, + "step": 2150 + }, + { + "epoch": 0.12, + "learning_rate": 0.0009809219279410752, + "loss": 0.9297, + "step": 2151 + }, + { + "epoch": 0.12, + "learning_rate": 0.0009808981068206392, + "loss": 0.8633, + "step": 2152 + }, + { + "epoch": 0.12, + "learning_rate": 0.0009808742711274107, + "loss": 0.8984, + "step": 2153 + }, + { + "epoch": 0.12, + "learning_rate": 0.0009808504208621124, + "loss": 0.9375, + "step": 2154 + }, + { + "epoch": 0.12, + "learning_rate": 0.000980826556025467, + "loss": 0.8008, + "step": 2155 + }, + { + "epoch": 0.12, + "learning_rate": 0.0009808026766181975, + "loss": 0.8594, + "step": 2156 + }, + { + "epoch": 0.12, + "learning_rate": 0.0009807787826410274, + "loss": 0.9609, + "step": 2157 + }, + { + "epoch": 0.12, + "learning_rate": 0.0009807548740946809, + "loss": 0.9609, + "step": 2158 + }, + { + "epoch": 0.12, + "learning_rate": 0.0009807309509798825, + "loss": 0.8828, + "step": 2159 + }, + { + "epoch": 0.12, + "learning_rate": 0.0009807070132973573, + "loss": 0.9609, + "step": 2160 + }, + { + "epoch": 0.12, + "learning_rate": 0.0009806830610478306, + "loss": 0.9844, + "step": 2161 + }, + { + "epoch": 0.12, + "learning_rate": 0.000980659094232028, + "loss": 0.9102, + "step": 2162 + }, + { + "epoch": 0.12, + "learning_rate": 0.0009806351128506762, + "loss": 0.9219, + "step": 2163 + }, + { + "epoch": 0.12, + "learning_rate": 0.0009806111169045016, + "loss": 0.9141, + "step": 2164 + }, + { + "epoch": 0.12, + "learning_rate": 0.0009805871063942313, + "loss": 0.8867, + "step": 2165 + }, + { + "epoch": 0.12, + "learning_rate": 0.0009805630813205933, + "loss": 0.9688, + "step": 2166 + }, + { + "epoch": 0.12, + "learning_rate": 0.0009805390416843152, + "loss": 0.9023, + "step": 2167 + }, + { + "epoch": 0.12, + "learning_rate": 0.0009805149874861257, + "loss": 0.8398, + "step": 2168 + }, + { + "epoch": 0.12, + "learning_rate": 0.0009804909187267536, + "loss": 0.9219, + "step": 2169 + }, + { + "epoch": 0.12, + "learning_rate": 0.0009804668354069285, + "loss": 0.9609, + "step": 2170 + }, + { + "epoch": 0.12, + "learning_rate": 0.0009804427375273799, + "loss": 0.9922, + "step": 2171 + }, + { + "epoch": 0.12, + "learning_rate": 0.0009804186250888384, + "loss": 0.8438, + "step": 2172 + }, + { + "epoch": 0.12, + "learning_rate": 0.000980394498092034, + "loss": 0.9258, + "step": 2173 + }, + { + "epoch": 0.12, + "learning_rate": 0.0009803703565376988, + "loss": 0.9219, + "step": 2174 + }, + { + "epoch": 0.12, + "learning_rate": 0.0009803462004265638, + "loss": 0.8398, + "step": 2175 + }, + { + "epoch": 0.12, + "learning_rate": 0.0009803220297593608, + "loss": 0.8516, + "step": 2176 + }, + { + "epoch": 0.12, + "learning_rate": 0.0009802978445368228, + "loss": 0.9219, + "step": 2177 + }, + { + "epoch": 0.12, + "learning_rate": 0.0009802736447596823, + "loss": 0.8828, + "step": 2178 + }, + { + "epoch": 0.12, + "learning_rate": 0.0009802494304286728, + "loss": 0.8945, + "step": 2179 + }, + { + "epoch": 0.12, + "learning_rate": 0.0009802252015445282, + "loss": 0.9062, + "step": 2180 + }, + { + "epoch": 0.12, + "learning_rate": 0.0009802009581079824, + "loss": 0.8906, + "step": 2181 + }, + { + "epoch": 0.12, + "learning_rate": 0.00098017670011977, + "loss": 0.9062, + "step": 2182 + }, + { + "epoch": 0.12, + "learning_rate": 0.0009801524275806265, + "loss": 0.8867, + "step": 2183 + }, + { + "epoch": 0.12, + "learning_rate": 0.0009801281404912873, + "loss": 0.9062, + "step": 2184 + }, + { + "epoch": 0.12, + "learning_rate": 0.000980103838852488, + "loss": 0.9492, + "step": 2185 + }, + { + "epoch": 0.12, + "learning_rate": 0.0009800795226649658, + "loss": 0.918, + "step": 2186 + }, + { + "epoch": 0.12, + "learning_rate": 0.0009800551919294568, + "loss": 0.8672, + "step": 2187 + }, + { + "epoch": 0.12, + "learning_rate": 0.0009800308466466985, + "loss": 0.9219, + "step": 2188 + }, + { + "epoch": 0.12, + "learning_rate": 0.0009800064868174289, + "loss": 0.8945, + "step": 2189 + }, + { + "epoch": 0.12, + "learning_rate": 0.000979982112442386, + "loss": 0.8867, + "step": 2190 + }, + { + "epoch": 0.12, + "learning_rate": 0.0009799577235223085, + "loss": 0.8203, + "step": 2191 + }, + { + "epoch": 0.12, + "learning_rate": 0.0009799333200579351, + "loss": 0.9805, + "step": 2192 + }, + { + "epoch": 0.12, + "learning_rate": 0.0009799089020500061, + "loss": 0.8125, + "step": 2193 + }, + { + "epoch": 0.12, + "learning_rate": 0.0009798844694992605, + "loss": 0.9141, + "step": 2194 + }, + { + "epoch": 0.12, + "learning_rate": 0.0009798600224064395, + "loss": 0.8633, + "step": 2195 + }, + { + "epoch": 0.12, + "learning_rate": 0.0009798355607722835, + "loss": 0.9375, + "step": 2196 + }, + { + "epoch": 0.12, + "learning_rate": 0.0009798110845975338, + "loss": 0.8633, + "step": 2197 + }, + { + "epoch": 0.12, + "learning_rate": 0.000979786593882932, + "loss": 0.9414, + "step": 2198 + }, + { + "epoch": 0.12, + "learning_rate": 0.0009797620886292206, + "loss": 0.9688, + "step": 2199 + }, + { + "epoch": 0.12, + "learning_rate": 0.0009797375688371417, + "loss": 0.9258, + "step": 2200 + }, + { + "epoch": 0.12, + "learning_rate": 0.000979713034507439, + "loss": 0.8828, + "step": 2201 + }, + { + "epoch": 0.12, + "learning_rate": 0.0009796884856408555, + "loss": 0.9258, + "step": 2202 + }, + { + "epoch": 0.12, + "learning_rate": 0.000979663922238135, + "loss": 0.8672, + "step": 2203 + }, + { + "epoch": 0.12, + "learning_rate": 0.0009796393443000221, + "loss": 0.8711, + "step": 2204 + }, + { + "epoch": 0.12, + "learning_rate": 0.0009796147518272617, + "loss": 0.8203, + "step": 2205 + }, + { + "epoch": 0.12, + "learning_rate": 0.0009795901448205989, + "loss": 0.9219, + "step": 2206 + }, + { + "epoch": 0.12, + "learning_rate": 0.0009795655232807791, + "loss": 0.9414, + "step": 2207 + }, + { + "epoch": 0.12, + "learning_rate": 0.0009795408872085487, + "loss": 0.9727, + "step": 2208 + }, + { + "epoch": 0.12, + "learning_rate": 0.0009795162366046544, + "loss": 0.9102, + "step": 2209 + }, + { + "epoch": 0.12, + "learning_rate": 0.0009794915714698429, + "loss": 0.9102, + "step": 2210 + }, + { + "epoch": 0.12, + "learning_rate": 0.0009794668918048617, + "loss": 0.9102, + "step": 2211 + }, + { + "epoch": 0.12, + "learning_rate": 0.0009794421976104589, + "loss": 0.9297, + "step": 2212 + }, + { + "epoch": 0.12, + "learning_rate": 0.0009794174888873826, + "loss": 0.8711, + "step": 2213 + }, + { + "epoch": 0.12, + "learning_rate": 0.0009793927656363815, + "loss": 0.9844, + "step": 2214 + }, + { + "epoch": 0.12, + "learning_rate": 0.0009793680278582049, + "loss": 0.8867, + "step": 2215 + }, + { + "epoch": 0.12, + "learning_rate": 0.0009793432755536024, + "loss": 0.8242, + "step": 2216 + }, + { + "epoch": 0.12, + "learning_rate": 0.0009793185087233242, + "loss": 0.9375, + "step": 2217 + }, + { + "epoch": 0.12, + "learning_rate": 0.0009792937273681206, + "loss": 0.9648, + "step": 2218 + }, + { + "epoch": 0.12, + "learning_rate": 0.0009792689314887426, + "loss": 1.0, + "step": 2219 + }, + { + "epoch": 0.12, + "learning_rate": 0.0009792441210859418, + "loss": 0.9727, + "step": 2220 + }, + { + "epoch": 0.12, + "learning_rate": 0.00097921929616047, + "loss": 0.9844, + "step": 2221 + }, + { + "epoch": 0.12, + "learning_rate": 0.0009791944567130793, + "loss": 0.8828, + "step": 2222 + }, + { + "epoch": 0.12, + "learning_rate": 0.0009791696027445225, + "loss": 0.8516, + "step": 2223 + }, + { + "epoch": 0.12, + "learning_rate": 0.0009791447342555526, + "loss": 0.8867, + "step": 2224 + }, + { + "epoch": 0.12, + "learning_rate": 0.0009791198512469235, + "loss": 0.8633, + "step": 2225 + }, + { + "epoch": 0.12, + "learning_rate": 0.000979094953719389, + "loss": 0.9297, + "step": 2226 + }, + { + "epoch": 0.12, + "learning_rate": 0.0009790700416737036, + "loss": 0.8555, + "step": 2227 + }, + { + "epoch": 0.12, + "learning_rate": 0.0009790451151106226, + "loss": 0.9609, + "step": 2228 + }, + { + "epoch": 0.12, + "learning_rate": 0.0009790201740309007, + "loss": 0.9297, + "step": 2229 + }, + { + "epoch": 0.12, + "learning_rate": 0.0009789952184352945, + "loss": 0.9453, + "step": 2230 + }, + { + "epoch": 0.12, + "learning_rate": 0.0009789702483245593, + "loss": 0.8789, + "step": 2231 + }, + { + "epoch": 0.12, + "learning_rate": 0.0009789452636994524, + "loss": 0.8516, + "step": 2232 + }, + { + "epoch": 0.12, + "learning_rate": 0.0009789202645607308, + "loss": 0.9648, + "step": 2233 + }, + { + "epoch": 0.12, + "learning_rate": 0.0009788952509091522, + "loss": 0.8438, + "step": 2234 + }, + { + "epoch": 0.12, + "learning_rate": 0.0009788702227454741, + "loss": 0.875, + "step": 2235 + }, + { + "epoch": 0.12, + "learning_rate": 0.0009788451800704554, + "loss": 0.9531, + "step": 2236 + }, + { + "epoch": 0.12, + "learning_rate": 0.0009788201228848547, + "loss": 0.9336, + "step": 2237 + }, + { + "epoch": 0.12, + "learning_rate": 0.0009787950511894318, + "loss": 0.9609, + "step": 2238 + }, + { + "epoch": 0.12, + "learning_rate": 0.0009787699649849457, + "loss": 0.9258, + "step": 2239 + }, + { + "epoch": 0.12, + "learning_rate": 0.0009787448642721572, + "loss": 0.8203, + "step": 2240 + }, + { + "epoch": 0.12, + "learning_rate": 0.0009787197490518265, + "loss": 0.8477, + "step": 2241 + }, + { + "epoch": 0.12, + "learning_rate": 0.0009786946193247152, + "loss": 0.8789, + "step": 2242 + }, + { + "epoch": 0.12, + "learning_rate": 0.0009786694750915843, + "loss": 0.8633, + "step": 2243 + }, + { + "epoch": 0.12, + "learning_rate": 0.0009786443163531961, + "loss": 0.8945, + "step": 2244 + }, + { + "epoch": 0.12, + "learning_rate": 0.000978619143110313, + "loss": 0.8789, + "step": 2245 + }, + { + "epoch": 0.12, + "learning_rate": 0.0009785939553636973, + "loss": 0.8828, + "step": 2246 + }, + { + "epoch": 0.12, + "learning_rate": 0.000978568753114113, + "loss": 0.9492, + "step": 2247 + }, + { + "epoch": 0.12, + "learning_rate": 0.0009785435363623232, + "loss": 0.875, + "step": 2248 + }, + { + "epoch": 0.12, + "learning_rate": 0.0009785183051090926, + "loss": 0.8359, + "step": 2249 + }, + { + "epoch": 0.12, + "learning_rate": 0.0009784930593551852, + "loss": 0.8672, + "step": 2250 + }, + { + "epoch": 0.12, + "learning_rate": 0.0009784677991013667, + "loss": 0.9336, + "step": 2251 + }, + { + "epoch": 0.12, + "learning_rate": 0.0009784425243484019, + "loss": 0.8945, + "step": 2252 + }, + { + "epoch": 0.12, + "learning_rate": 0.0009784172350970571, + "loss": 0.8203, + "step": 2253 + }, + { + "epoch": 0.12, + "learning_rate": 0.0009783919313480986, + "loss": 0.9062, + "step": 2254 + }, + { + "epoch": 0.12, + "learning_rate": 0.0009783666131022932, + "loss": 0.9922, + "step": 2255 + }, + { + "epoch": 0.12, + "learning_rate": 0.000978341280360408, + "loss": 0.9297, + "step": 2256 + }, + { + "epoch": 0.12, + "learning_rate": 0.0009783159331232109, + "loss": 0.8711, + "step": 2257 + }, + { + "epoch": 0.12, + "learning_rate": 0.0009782905713914697, + "loss": 0.9023, + "step": 2258 + }, + { + "epoch": 0.12, + "learning_rate": 0.0009782651951659531, + "loss": 0.8242, + "step": 2259 + }, + { + "epoch": 0.12, + "learning_rate": 0.00097823980444743, + "loss": 0.9141, + "step": 2260 + }, + { + "epoch": 0.12, + "learning_rate": 0.00097821439923667, + "loss": 0.9961, + "step": 2261 + }, + { + "epoch": 0.12, + "learning_rate": 0.000978188979534443, + "loss": 0.7969, + "step": 2262 + }, + { + "epoch": 0.12, + "learning_rate": 0.0009781635453415191, + "loss": 1.0469, + "step": 2263 + }, + { + "epoch": 0.12, + "learning_rate": 0.000978138096658669, + "loss": 0.8359, + "step": 2264 + }, + { + "epoch": 0.12, + "learning_rate": 0.0009781126334866641, + "loss": 0.9766, + "step": 2265 + }, + { + "epoch": 0.12, + "learning_rate": 0.0009780871558262758, + "loss": 0.9375, + "step": 2266 + }, + { + "epoch": 0.12, + "learning_rate": 0.0009780616636782763, + "loss": 0.9414, + "step": 2267 + }, + { + "epoch": 0.12, + "learning_rate": 0.000978036157043438, + "loss": 0.918, + "step": 2268 + }, + { + "epoch": 0.12, + "learning_rate": 0.000978010635922534, + "loss": 0.9023, + "step": 2269 + }, + { + "epoch": 0.12, + "learning_rate": 0.0009779851003163373, + "loss": 0.9922, + "step": 2270 + }, + { + "epoch": 0.12, + "learning_rate": 0.000977959550225622, + "loss": 0.8906, + "step": 2271 + }, + { + "epoch": 0.12, + "learning_rate": 0.0009779339856511625, + "loss": 0.9336, + "step": 2272 + }, + { + "epoch": 0.12, + "learning_rate": 0.0009779084065937333, + "loss": 0.8984, + "step": 2273 + }, + { + "epoch": 0.12, + "learning_rate": 0.0009778828130541095, + "loss": 0.8711, + "step": 2274 + }, + { + "epoch": 0.12, + "learning_rate": 0.0009778572050330666, + "loss": 0.9023, + "step": 2275 + }, + { + "epoch": 0.12, + "learning_rate": 0.000977831582531381, + "loss": 0.9023, + "step": 2276 + }, + { + "epoch": 0.12, + "learning_rate": 0.0009778059455498285, + "loss": 0.9375, + "step": 2277 + }, + { + "epoch": 0.12, + "learning_rate": 0.0009777802940891867, + "loss": 0.8789, + "step": 2278 + }, + { + "epoch": 0.12, + "learning_rate": 0.0009777546281502323, + "loss": 0.9258, + "step": 2279 + }, + { + "epoch": 0.12, + "learning_rate": 0.0009777289477337435, + "loss": 0.9062, + "step": 2280 + }, + { + "epoch": 0.12, + "learning_rate": 0.0009777032528404982, + "loss": 0.8828, + "step": 2281 + }, + { + "epoch": 0.12, + "learning_rate": 0.0009776775434712753, + "loss": 0.8867, + "step": 2282 + }, + { + "epoch": 0.12, + "learning_rate": 0.0009776518196268538, + "loss": 0.9062, + "step": 2283 + }, + { + "epoch": 0.12, + "learning_rate": 0.000977626081308013, + "loss": 0.9727, + "step": 2284 + }, + { + "epoch": 0.12, + "learning_rate": 0.000977600328515533, + "loss": 0.8633, + "step": 2285 + }, + { + "epoch": 0.12, + "learning_rate": 0.0009775745612501945, + "loss": 0.8984, + "step": 2286 + }, + { + "epoch": 0.12, + "learning_rate": 0.0009775487795127779, + "loss": 0.8711, + "step": 2287 + }, + { + "epoch": 0.12, + "learning_rate": 0.0009775229833040645, + "loss": 0.9531, + "step": 2288 + }, + { + "epoch": 0.12, + "learning_rate": 0.0009774971726248362, + "loss": 0.8828, + "step": 2289 + }, + { + "epoch": 0.12, + "learning_rate": 0.0009774713474758753, + "loss": 0.9727, + "step": 2290 + }, + { + "epoch": 0.12, + "learning_rate": 0.0009774455078579638, + "loss": 0.8047, + "step": 2291 + }, + { + "epoch": 0.12, + "learning_rate": 0.0009774196537718854, + "loss": 1.0, + "step": 2292 + }, + { + "epoch": 0.12, + "learning_rate": 0.000977393785218423, + "loss": 0.918, + "step": 2293 + }, + { + "epoch": 0.12, + "learning_rate": 0.000977367902198361, + "loss": 0.9062, + "step": 2294 + }, + { + "epoch": 0.12, + "learning_rate": 0.0009773420047124834, + "loss": 0.9297, + "step": 2295 + }, + { + "epoch": 0.12, + "learning_rate": 0.0009773160927615749, + "loss": 0.9102, + "step": 2296 + }, + { + "epoch": 0.12, + "learning_rate": 0.0009772901663464212, + "loss": 0.9453, + "step": 2297 + }, + { + "epoch": 0.12, + "learning_rate": 0.0009772642254678074, + "loss": 1.0703, + "step": 2298 + }, + { + "epoch": 0.12, + "learning_rate": 0.0009772382701265202, + "loss": 0.957, + "step": 2299 + }, + { + "epoch": 0.12, + "learning_rate": 0.0009772123003233455, + "loss": 0.9453, + "step": 2300 + }, + { + "epoch": 0.12, + "learning_rate": 0.0009771863160590705, + "loss": 0.8633, + "step": 2301 + }, + { + "epoch": 0.12, + "learning_rate": 0.0009771603173344828, + "loss": 0.9531, + "step": 2302 + }, + { + "epoch": 0.12, + "learning_rate": 0.00097713430415037, + "loss": 0.9531, + "step": 2303 + }, + { + "epoch": 0.12, + "learning_rate": 0.0009771082765075206, + "loss": 0.8789, + "step": 2304 + }, + { + "epoch": 0.12, + "learning_rate": 0.0009770822344067232, + "loss": 0.9414, + "step": 2305 + }, + { + "epoch": 0.12, + "learning_rate": 0.000977056177848767, + "loss": 0.8906, + "step": 2306 + }, + { + "epoch": 0.12, + "learning_rate": 0.0009770301068344413, + "loss": 1.0156, + "step": 2307 + }, + { + "epoch": 0.12, + "learning_rate": 0.0009770040213645367, + "loss": 0.9414, + "step": 2308 + }, + { + "epoch": 0.12, + "learning_rate": 0.0009769779214398432, + "loss": 0.9688, + "step": 2309 + }, + { + "epoch": 0.12, + "learning_rate": 0.000976951807061152, + "loss": 0.8789, + "step": 2310 + }, + { + "epoch": 0.12, + "learning_rate": 0.0009769256782292545, + "loss": 0.9219, + "step": 2311 + }, + { + "epoch": 0.12, + "learning_rate": 0.0009768995349449421, + "loss": 0.9023, + "step": 2312 + }, + { + "epoch": 0.12, + "learning_rate": 0.0009768733772090076, + "loss": 0.8359, + "step": 2313 + }, + { + "epoch": 0.12, + "learning_rate": 0.000976847205022243, + "loss": 0.8828, + "step": 2314 + }, + { + "epoch": 0.12, + "learning_rate": 0.0009768210183854417, + "loss": 0.8906, + "step": 2315 + }, + { + "epoch": 0.12, + "learning_rate": 0.0009767948172993974, + "loss": 0.9336, + "step": 2316 + }, + { + "epoch": 0.12, + "learning_rate": 0.000976768601764904, + "loss": 0.7773, + "step": 2317 + }, + { + "epoch": 0.12, + "learning_rate": 0.000976742371782756, + "loss": 0.7344, + "step": 2318 + }, + { + "epoch": 0.12, + "learning_rate": 0.0009767161273537476, + "loss": 0.9102, + "step": 2319 + }, + { + "epoch": 0.12, + "learning_rate": 0.000976689868478675, + "loss": 0.8945, + "step": 2320 + }, + { + "epoch": 0.12, + "learning_rate": 0.0009766635951583337, + "loss": 0.9258, + "step": 2321 + }, + { + "epoch": 0.12, + "learning_rate": 0.0009766373073935195, + "loss": 1.0078, + "step": 2322 + }, + { + "epoch": 0.12, + "learning_rate": 0.0009766110051850291, + "loss": 0.9297, + "step": 2323 + }, + { + "epoch": 0.12, + "learning_rate": 0.00097658468853366, + "loss": 0.9258, + "step": 2324 + }, + { + "epoch": 0.12, + "learning_rate": 0.0009765583574402093, + "loss": 0.9219, + "step": 2325 + }, + { + "epoch": 0.13, + "learning_rate": 0.0009765320119054748, + "loss": 0.9883, + "step": 2326 + }, + { + "epoch": 0.13, + "learning_rate": 0.0009765056519302551, + "loss": 1.0312, + "step": 2327 + }, + { + "epoch": 0.13, + "learning_rate": 0.0009764792775153488, + "loss": 0.8906, + "step": 2328 + }, + { + "epoch": 0.13, + "learning_rate": 0.0009764528886615553, + "loss": 0.8672, + "step": 2329 + }, + { + "epoch": 0.13, + "learning_rate": 0.0009764264853696742, + "loss": 0.8984, + "step": 2330 + }, + { + "epoch": 0.13, + "learning_rate": 0.0009764000676405058, + "loss": 0.9375, + "step": 2331 + }, + { + "epoch": 0.13, + "learning_rate": 0.0009763736354748503, + "loss": 0.8281, + "step": 2332 + }, + { + "epoch": 0.13, + "learning_rate": 0.000976347188873509, + "loss": 0.9648, + "step": 2333 + }, + { + "epoch": 0.13, + "learning_rate": 0.000976320727837283, + "loss": 0.9609, + "step": 2334 + }, + { + "epoch": 0.13, + "learning_rate": 0.0009762942523669744, + "loss": 0.9531, + "step": 2335 + }, + { + "epoch": 0.13, + "learning_rate": 0.0009762677624633854, + "loss": 0.7852, + "step": 2336 + }, + { + "epoch": 0.13, + "learning_rate": 0.0009762412581273185, + "loss": 0.8203, + "step": 2337 + }, + { + "epoch": 0.13, + "learning_rate": 0.0009762147393595775, + "loss": 0.9414, + "step": 2338 + }, + { + "epoch": 0.13, + "learning_rate": 0.0009761882061609655, + "loss": 0.8633, + "step": 2339 + }, + { + "epoch": 0.13, + "learning_rate": 0.0009761616585322867, + "loss": 0.918, + "step": 2340 + }, + { + "epoch": 0.13, + "learning_rate": 0.0009761350964743454, + "loss": 0.8906, + "step": 2341 + }, + { + "epoch": 0.13, + "learning_rate": 0.0009761085199879469, + "loss": 0.9297, + "step": 2342 + }, + { + "epoch": 0.13, + "learning_rate": 0.000976081929073896, + "loss": 0.9102, + "step": 2343 + }, + { + "epoch": 0.13, + "learning_rate": 0.0009760553237329989, + "loss": 0.8984, + "step": 2344 + }, + { + "epoch": 0.13, + "learning_rate": 0.0009760287039660621, + "loss": 1.0078, + "step": 2345 + }, + { + "epoch": 0.13, + "learning_rate": 0.0009760020697738916, + "loss": 0.9375, + "step": 2346 + }, + { + "epoch": 0.13, + "learning_rate": 0.0009759754211572947, + "loss": 0.9141, + "step": 2347 + }, + { + "epoch": 0.13, + "learning_rate": 0.0009759487581170792, + "loss": 0.8398, + "step": 2348 + }, + { + "epoch": 0.13, + "learning_rate": 0.0009759220806540529, + "loss": 0.7812, + "step": 2349 + }, + { + "epoch": 0.13, + "learning_rate": 0.0009758953887690243, + "loss": 0.9766, + "step": 2350 + }, + { + "epoch": 0.13, + "learning_rate": 0.0009758686824628023, + "loss": 0.8672, + "step": 2351 + }, + { + "epoch": 0.13, + "learning_rate": 0.0009758419617361959, + "loss": 0.9258, + "step": 2352 + }, + { + "epoch": 0.13, + "learning_rate": 0.0009758152265900151, + "loss": 0.8789, + "step": 2353 + }, + { + "epoch": 0.13, + "learning_rate": 0.0009757884770250698, + "loss": 0.9844, + "step": 2354 + }, + { + "epoch": 0.13, + "learning_rate": 0.000975761713042171, + "loss": 0.9023, + "step": 2355 + }, + { + "epoch": 0.13, + "learning_rate": 0.0009757349346421293, + "loss": 0.8828, + "step": 2356 + }, + { + "epoch": 0.13, + "learning_rate": 0.0009757081418257565, + "loss": 0.8164, + "step": 2357 + }, + { + "epoch": 0.13, + "learning_rate": 0.0009756813345938644, + "loss": 0.9023, + "step": 2358 + }, + { + "epoch": 0.13, + "learning_rate": 0.0009756545129472653, + "loss": 0.9453, + "step": 2359 + }, + { + "epoch": 0.13, + "learning_rate": 0.000975627676886772, + "loss": 0.9375, + "step": 2360 + }, + { + "epoch": 0.13, + "learning_rate": 0.0009756008264131979, + "loss": 0.8906, + "step": 2361 + }, + { + "epoch": 0.13, + "learning_rate": 0.0009755739615273563, + "loss": 0.9258, + "step": 2362 + }, + { + "epoch": 0.13, + "learning_rate": 0.0009755470822300616, + "loss": 0.9727, + "step": 2363 + }, + { + "epoch": 0.13, + "learning_rate": 0.0009755201885221283, + "loss": 0.8867, + "step": 2364 + }, + { + "epoch": 0.13, + "learning_rate": 0.0009754932804043712, + "loss": 0.9297, + "step": 2365 + }, + { + "epoch": 0.13, + "learning_rate": 0.0009754663578776057, + "loss": 0.875, + "step": 2366 + }, + { + "epoch": 0.13, + "learning_rate": 0.0009754394209426479, + "loss": 1.0, + "step": 2367 + }, + { + "epoch": 0.13, + "learning_rate": 0.0009754124696003139, + "loss": 0.9922, + "step": 2368 + }, + { + "epoch": 0.13, + "learning_rate": 0.0009753855038514204, + "loss": 0.9023, + "step": 2369 + }, + { + "epoch": 0.13, + "learning_rate": 0.0009753585236967845, + "loss": 0.9375, + "step": 2370 + }, + { + "epoch": 0.13, + "learning_rate": 0.000975331529137224, + "loss": 0.9766, + "step": 2371 + }, + { + "epoch": 0.13, + "learning_rate": 0.0009753045201735567, + "loss": 0.8945, + "step": 2372 + }, + { + "epoch": 0.13, + "learning_rate": 0.0009752774968066012, + "loss": 0.8633, + "step": 2373 + }, + { + "epoch": 0.13, + "learning_rate": 0.0009752504590371762, + "loss": 0.8906, + "step": 2374 + }, + { + "epoch": 0.13, + "learning_rate": 0.0009752234068661015, + "loss": 0.9062, + "step": 2375 + }, + { + "epoch": 0.13, + "learning_rate": 0.0009751963402941964, + "loss": 0.9375, + "step": 2376 + }, + { + "epoch": 0.13, + "learning_rate": 0.0009751692593222812, + "loss": 0.8945, + "step": 2377 + }, + { + "epoch": 0.13, + "learning_rate": 0.0009751421639511767, + "loss": 0.9844, + "step": 2378 + }, + { + "epoch": 0.13, + "learning_rate": 0.0009751150541817038, + "loss": 0.8711, + "step": 2379 + }, + { + "epoch": 0.13, + "learning_rate": 0.000975087930014684, + "loss": 0.8867, + "step": 2380 + }, + { + "epoch": 0.13, + "learning_rate": 0.0009750607914509395, + "loss": 0.875, + "step": 2381 + }, + { + "epoch": 0.13, + "learning_rate": 0.0009750336384912924, + "loss": 0.9609, + "step": 2382 + }, + { + "epoch": 0.13, + "learning_rate": 0.0009750064711365658, + "loss": 0.9219, + "step": 2383 + }, + { + "epoch": 0.13, + "learning_rate": 0.0009749792893875829, + "loss": 0.8477, + "step": 2384 + }, + { + "epoch": 0.13, + "learning_rate": 0.0009749520932451671, + "loss": 0.9805, + "step": 2385 + }, + { + "epoch": 0.13, + "learning_rate": 0.0009749248827101429, + "loss": 1.0234, + "step": 2386 + }, + { + "epoch": 0.13, + "learning_rate": 0.0009748976577833346, + "loss": 0.8867, + "step": 2387 + }, + { + "epoch": 0.13, + "learning_rate": 0.0009748704184655674, + "loss": 0.9414, + "step": 2388 + }, + { + "epoch": 0.13, + "learning_rate": 0.0009748431647576666, + "loss": 0.9492, + "step": 2389 + }, + { + "epoch": 0.13, + "learning_rate": 0.0009748158966604581, + "loss": 0.8867, + "step": 2390 + }, + { + "epoch": 0.13, + "learning_rate": 0.0009747886141747684, + "loss": 0.8672, + "step": 2391 + }, + { + "epoch": 0.13, + "learning_rate": 0.0009747613173014241, + "loss": 0.8984, + "step": 2392 + }, + { + "epoch": 0.13, + "learning_rate": 0.0009747340060412521, + "loss": 0.9297, + "step": 2393 + }, + { + "epoch": 0.13, + "learning_rate": 0.0009747066803950805, + "loss": 0.9531, + "step": 2394 + }, + { + "epoch": 0.13, + "learning_rate": 0.0009746793403637373, + "loss": 0.9453, + "step": 2395 + }, + { + "epoch": 0.13, + "learning_rate": 0.0009746519859480507, + "loss": 0.9102, + "step": 2396 + }, + { + "epoch": 0.13, + "learning_rate": 0.0009746246171488498, + "loss": 0.9023, + "step": 2397 + }, + { + "epoch": 0.13, + "learning_rate": 0.000974597233966964, + "loss": 0.9102, + "step": 2398 + }, + { + "epoch": 0.13, + "learning_rate": 0.000974569836403223, + "loss": 0.9336, + "step": 2399 + }, + { + "epoch": 0.13, + "learning_rate": 0.0009745424244584572, + "loss": 0.8906, + "step": 2400 + }, + { + "epoch": 0.13, + "learning_rate": 0.0009745149981334971, + "loss": 0.875, + "step": 2401 + }, + { + "epoch": 0.13, + "learning_rate": 0.0009744875574291738, + "loss": 0.8906, + "step": 2402 + }, + { + "epoch": 0.13, + "learning_rate": 0.0009744601023463188, + "loss": 1.0156, + "step": 2403 + }, + { + "epoch": 0.13, + "learning_rate": 0.0009744326328857644, + "loss": 0.8789, + "step": 2404 + }, + { + "epoch": 0.13, + "learning_rate": 0.0009744051490483428, + "loss": 0.9375, + "step": 2405 + }, + { + "epoch": 0.13, + "learning_rate": 0.0009743776508348868, + "loss": 1.0312, + "step": 2406 + }, + { + "epoch": 0.13, + "learning_rate": 0.0009743501382462297, + "loss": 0.8945, + "step": 2407 + }, + { + "epoch": 0.13, + "learning_rate": 0.0009743226112832052, + "loss": 0.8359, + "step": 2408 + }, + { + "epoch": 0.13, + "learning_rate": 0.0009742950699466477, + "loss": 0.9023, + "step": 2409 + }, + { + "epoch": 0.13, + "learning_rate": 0.0009742675142373915, + "loss": 0.9023, + "step": 2410 + }, + { + "epoch": 0.13, + "learning_rate": 0.0009742399441562717, + "loss": 0.9766, + "step": 2411 + }, + { + "epoch": 0.13, + "learning_rate": 0.0009742123597041239, + "loss": 0.9258, + "step": 2412 + }, + { + "epoch": 0.13, + "learning_rate": 0.0009741847608817838, + "loss": 0.9297, + "step": 2413 + }, + { + "epoch": 0.13, + "learning_rate": 0.0009741571476900879, + "loss": 1.0312, + "step": 2414 + }, + { + "epoch": 0.13, + "learning_rate": 0.0009741295201298729, + "loss": 0.8828, + "step": 2415 + }, + { + "epoch": 0.13, + "learning_rate": 0.0009741018782019761, + "loss": 0.8906, + "step": 2416 + }, + { + "epoch": 0.13, + "learning_rate": 0.0009740742219072349, + "loss": 0.918, + "step": 2417 + }, + { + "epoch": 0.13, + "learning_rate": 0.0009740465512464876, + "loss": 0.918, + "step": 2418 + }, + { + "epoch": 0.13, + "learning_rate": 0.0009740188662205727, + "loss": 0.9688, + "step": 2419 + }, + { + "epoch": 0.13, + "learning_rate": 0.000973991166830329, + "loss": 0.8516, + "step": 2420 + }, + { + "epoch": 0.13, + "learning_rate": 0.0009739634530765961, + "loss": 0.8867, + "step": 2421 + }, + { + "epoch": 0.13, + "learning_rate": 0.0009739357249602135, + "loss": 0.9336, + "step": 2422 + }, + { + "epoch": 0.13, + "learning_rate": 0.0009739079824820218, + "loss": 0.8789, + "step": 2423 + }, + { + "epoch": 0.13, + "learning_rate": 0.0009738802256428615, + "loss": 1.0078, + "step": 2424 + }, + { + "epoch": 0.13, + "learning_rate": 0.0009738524544435737, + "loss": 0.8516, + "step": 2425 + }, + { + "epoch": 0.13, + "learning_rate": 0.0009738246688850001, + "loss": 0.8438, + "step": 2426 + }, + { + "epoch": 0.13, + "learning_rate": 0.0009737968689679826, + "loss": 0.8906, + "step": 2427 + }, + { + "epoch": 0.13, + "learning_rate": 0.0009737690546933635, + "loss": 0.8828, + "step": 2428 + }, + { + "epoch": 0.13, + "learning_rate": 0.000973741226061986, + "loss": 0.9766, + "step": 2429 + }, + { + "epoch": 0.13, + "learning_rate": 0.0009737133830746931, + "loss": 0.9297, + "step": 2430 + }, + { + "epoch": 0.13, + "learning_rate": 0.0009736855257323287, + "loss": 0.918, + "step": 2431 + }, + { + "epoch": 0.13, + "learning_rate": 0.0009736576540357368, + "loss": 0.8906, + "step": 2432 + }, + { + "epoch": 0.13, + "learning_rate": 0.0009736297679857622, + "loss": 0.8672, + "step": 2433 + }, + { + "epoch": 0.13, + "learning_rate": 0.0009736018675832498, + "loss": 0.9219, + "step": 2434 + }, + { + "epoch": 0.13, + "learning_rate": 0.0009735739528290451, + "loss": 0.9727, + "step": 2435 + }, + { + "epoch": 0.13, + "learning_rate": 0.0009735460237239941, + "loss": 0.9453, + "step": 2436 + }, + { + "epoch": 0.13, + "learning_rate": 0.0009735180802689429, + "loss": 0.8828, + "step": 2437 + }, + { + "epoch": 0.13, + "learning_rate": 0.0009734901224647387, + "loss": 0.9648, + "step": 2438 + }, + { + "epoch": 0.13, + "learning_rate": 0.0009734621503122283, + "loss": 0.957, + "step": 2439 + }, + { + "epoch": 0.13, + "learning_rate": 0.0009734341638122595, + "loss": 0.957, + "step": 2440 + }, + { + "epoch": 0.13, + "learning_rate": 0.0009734061629656806, + "loss": 0.8789, + "step": 2441 + }, + { + "epoch": 0.13, + "learning_rate": 0.0009733781477733397, + "loss": 1.0391, + "step": 2442 + }, + { + "epoch": 0.13, + "learning_rate": 0.000973350118236086, + "loss": 0.9375, + "step": 2443 + }, + { + "epoch": 0.13, + "learning_rate": 0.0009733220743547689, + "loss": 0.9141, + "step": 2444 + }, + { + "epoch": 0.13, + "learning_rate": 0.0009732940161302382, + "loss": 0.9961, + "step": 2445 + }, + { + "epoch": 0.13, + "learning_rate": 0.000973265943563344, + "loss": 0.9219, + "step": 2446 + }, + { + "epoch": 0.13, + "learning_rate": 0.0009732378566549372, + "loss": 0.9883, + "step": 2447 + }, + { + "epoch": 0.13, + "learning_rate": 0.0009732097554058689, + "loss": 0.9258, + "step": 2448 + }, + { + "epoch": 0.13, + "learning_rate": 0.0009731816398169906, + "loss": 0.8984, + "step": 2449 + }, + { + "epoch": 0.13, + "learning_rate": 0.0009731535098891542, + "loss": 0.8672, + "step": 2450 + }, + { + "epoch": 0.13, + "learning_rate": 0.0009731253656232122, + "loss": 0.8945, + "step": 2451 + }, + { + "epoch": 0.13, + "learning_rate": 0.0009730972070200177, + "loss": 0.9141, + "step": 2452 + }, + { + "epoch": 0.13, + "learning_rate": 0.0009730690340804236, + "loss": 0.918, + "step": 2453 + }, + { + "epoch": 0.13, + "learning_rate": 0.000973040846805284, + "loss": 0.957, + "step": 2454 + }, + { + "epoch": 0.13, + "learning_rate": 0.0009730126451954527, + "loss": 0.8555, + "step": 2455 + }, + { + "epoch": 0.13, + "learning_rate": 0.0009729844292517847, + "loss": 0.9141, + "step": 2456 + }, + { + "epoch": 0.13, + "learning_rate": 0.0009729561989751346, + "loss": 1.0156, + "step": 2457 + }, + { + "epoch": 0.13, + "learning_rate": 0.0009729279543663582, + "loss": 0.9062, + "step": 2458 + }, + { + "epoch": 0.13, + "learning_rate": 0.0009728996954263114, + "loss": 0.9609, + "step": 2459 + }, + { + "epoch": 0.13, + "learning_rate": 0.0009728714221558504, + "loss": 0.9062, + "step": 2460 + }, + { + "epoch": 0.13, + "learning_rate": 0.000972843134555832, + "loss": 0.8594, + "step": 2461 + }, + { + "epoch": 0.13, + "learning_rate": 0.0009728148326271133, + "loss": 0.9258, + "step": 2462 + }, + { + "epoch": 0.13, + "learning_rate": 0.0009727865163705522, + "loss": 0.9492, + "step": 2463 + }, + { + "epoch": 0.13, + "learning_rate": 0.0009727581857870067, + "loss": 0.8867, + "step": 2464 + }, + { + "epoch": 0.13, + "learning_rate": 0.0009727298408773351, + "loss": 0.9336, + "step": 2465 + }, + { + "epoch": 0.13, + "learning_rate": 0.0009727014816423967, + "loss": 0.8867, + "step": 2466 + }, + { + "epoch": 0.13, + "learning_rate": 0.0009726731080830504, + "loss": 0.9219, + "step": 2467 + }, + { + "epoch": 0.13, + "learning_rate": 0.0009726447202001565, + "loss": 0.9648, + "step": 2468 + }, + { + "epoch": 0.13, + "learning_rate": 0.0009726163179945748, + "loss": 0.8633, + "step": 2469 + }, + { + "epoch": 0.13, + "learning_rate": 0.0009725879014671664, + "loss": 0.9336, + "step": 2470 + }, + { + "epoch": 0.13, + "learning_rate": 0.0009725594706187922, + "loss": 0.8398, + "step": 2471 + }, + { + "epoch": 0.13, + "learning_rate": 0.0009725310254503138, + "loss": 0.9023, + "step": 2472 + }, + { + "epoch": 0.13, + "learning_rate": 0.000972502565962593, + "loss": 0.9375, + "step": 2473 + }, + { + "epoch": 0.13, + "learning_rate": 0.0009724740921564925, + "loss": 0.9062, + "step": 2474 + }, + { + "epoch": 0.13, + "learning_rate": 0.0009724456040328751, + "loss": 0.9531, + "step": 2475 + }, + { + "epoch": 0.13, + "learning_rate": 0.0009724171015926038, + "loss": 0.9258, + "step": 2476 + }, + { + "epoch": 0.13, + "learning_rate": 0.0009723885848365427, + "loss": 0.9727, + "step": 2477 + }, + { + "epoch": 0.13, + "learning_rate": 0.0009723600537655557, + "loss": 0.9609, + "step": 2478 + }, + { + "epoch": 0.13, + "learning_rate": 0.0009723315083805074, + "loss": 0.8867, + "step": 2479 + }, + { + "epoch": 0.13, + "learning_rate": 0.000972302948682263, + "loss": 0.8633, + "step": 2480 + }, + { + "epoch": 0.13, + "learning_rate": 0.0009722743746716877, + "loss": 0.9492, + "step": 2481 + }, + { + "epoch": 0.13, + "learning_rate": 0.0009722457863496477, + "loss": 0.9453, + "step": 2482 + }, + { + "epoch": 0.13, + "learning_rate": 0.0009722171837170089, + "loss": 0.832, + "step": 2483 + }, + { + "epoch": 0.13, + "learning_rate": 0.0009721885667746384, + "loss": 0.9492, + "step": 2484 + }, + { + "epoch": 0.13, + "learning_rate": 0.0009721599355234033, + "loss": 0.9453, + "step": 2485 + }, + { + "epoch": 0.13, + "learning_rate": 0.0009721312899641713, + "loss": 0.9102, + "step": 2486 + }, + { + "epoch": 0.13, + "learning_rate": 0.00097210263009781, + "loss": 0.9258, + "step": 2487 + }, + { + "epoch": 0.13, + "learning_rate": 0.0009720739559251886, + "loss": 0.9492, + "step": 2488 + }, + { + "epoch": 0.13, + "learning_rate": 0.0009720452674471756, + "loss": 0.9531, + "step": 2489 + }, + { + "epoch": 0.13, + "learning_rate": 0.0009720165646646403, + "loss": 1.0, + "step": 2490 + }, + { + "epoch": 0.13, + "learning_rate": 0.0009719878475784526, + "loss": 0.9062, + "step": 2491 + }, + { + "epoch": 0.13, + "learning_rate": 0.0009719591161894827, + "loss": 0.875, + "step": 2492 + }, + { + "epoch": 0.13, + "learning_rate": 0.0009719303704986015, + "loss": 0.9023, + "step": 2493 + }, + { + "epoch": 0.13, + "learning_rate": 0.0009719016105066797, + "loss": 0.8594, + "step": 2494 + }, + { + "epoch": 0.13, + "learning_rate": 0.000971872836214589, + "loss": 0.8633, + "step": 2495 + }, + { + "epoch": 0.13, + "learning_rate": 0.0009718440476232014, + "loss": 0.9023, + "step": 2496 + }, + { + "epoch": 0.13, + "learning_rate": 0.0009718152447333893, + "loss": 0.9141, + "step": 2497 + }, + { + "epoch": 0.13, + "learning_rate": 0.0009717864275460253, + "loss": 0.9258, + "step": 2498 + }, + { + "epoch": 0.13, + "learning_rate": 0.0009717575960619829, + "loss": 0.918, + "step": 2499 + }, + { + "epoch": 0.13, + "learning_rate": 0.0009717287502821357, + "loss": 0.8906, + "step": 2500 + }, + { + "epoch": 0.13, + "learning_rate": 0.0009716998902073579, + "loss": 0.8516, + "step": 2501 + }, + { + "epoch": 0.13, + "learning_rate": 0.0009716710158385239, + "loss": 1.0156, + "step": 2502 + }, + { + "epoch": 0.13, + "learning_rate": 0.0009716421271765088, + "loss": 0.9766, + "step": 2503 + }, + { + "epoch": 0.13, + "learning_rate": 0.000971613224222188, + "loss": 0.9922, + "step": 2504 + }, + { + "epoch": 0.13, + "learning_rate": 0.0009715843069764373, + "loss": 0.8477, + "step": 2505 + }, + { + "epoch": 0.13, + "learning_rate": 0.000971555375440133, + "loss": 0.9062, + "step": 2506 + }, + { + "epoch": 0.13, + "learning_rate": 0.000971526429614152, + "loss": 0.9141, + "step": 2507 + }, + { + "epoch": 0.13, + "learning_rate": 0.0009714974694993712, + "loss": 0.9023, + "step": 2508 + }, + { + "epoch": 0.13, + "learning_rate": 0.0009714684950966684, + "loss": 0.918, + "step": 2509 + }, + { + "epoch": 0.13, + "learning_rate": 0.0009714395064069213, + "loss": 0.8672, + "step": 2510 + }, + { + "epoch": 0.13, + "learning_rate": 0.0009714105034310088, + "loss": 0.9805, + "step": 2511 + }, + { + "epoch": 0.14, + "learning_rate": 0.0009713814861698094, + "loss": 0.875, + "step": 2512 + }, + { + "epoch": 0.14, + "learning_rate": 0.0009713524546242026, + "loss": 0.9609, + "step": 2513 + }, + { + "epoch": 0.14, + "learning_rate": 0.0009713234087950682, + "loss": 0.9648, + "step": 2514 + }, + { + "epoch": 0.14, + "learning_rate": 0.0009712943486832863, + "loss": 0.918, + "step": 2515 + }, + { + "epoch": 0.14, + "learning_rate": 0.0009712652742897374, + "loss": 0.8828, + "step": 2516 + }, + { + "epoch": 0.14, + "learning_rate": 0.0009712361856153026, + "loss": 0.9883, + "step": 2517 + }, + { + "epoch": 0.14, + "learning_rate": 0.0009712070826608638, + "loss": 0.832, + "step": 2518 + }, + { + "epoch": 0.14, + "learning_rate": 0.0009711779654273023, + "loss": 0.9258, + "step": 2519 + }, + { + "epoch": 0.14, + "learning_rate": 0.0009711488339155008, + "loss": 0.8867, + "step": 2520 + }, + { + "epoch": 0.14, + "learning_rate": 0.000971119688126342, + "loss": 0.9922, + "step": 2521 + }, + { + "epoch": 0.14, + "learning_rate": 0.0009710905280607091, + "loss": 0.8398, + "step": 2522 + }, + { + "epoch": 0.14, + "learning_rate": 0.0009710613537194857, + "loss": 0.8906, + "step": 2523 + }, + { + "epoch": 0.14, + "learning_rate": 0.0009710321651035561, + "loss": 0.9805, + "step": 2524 + }, + { + "epoch": 0.14, + "learning_rate": 0.0009710029622138045, + "loss": 1.0156, + "step": 2525 + }, + { + "epoch": 0.14, + "learning_rate": 0.000970973745051116, + "loss": 0.8516, + "step": 2526 + }, + { + "epoch": 0.14, + "learning_rate": 0.0009709445136163759, + "loss": 0.8984, + "step": 2527 + }, + { + "epoch": 0.14, + "learning_rate": 0.0009709152679104702, + "loss": 0.9258, + "step": 2528 + }, + { + "epoch": 0.14, + "learning_rate": 0.0009708860079342851, + "loss": 0.9844, + "step": 2529 + }, + { + "epoch": 0.14, + "learning_rate": 0.0009708567336887071, + "loss": 0.8477, + "step": 2530 + }, + { + "epoch": 0.14, + "learning_rate": 0.0009708274451746232, + "loss": 0.8398, + "step": 2531 + }, + { + "epoch": 0.14, + "learning_rate": 0.0009707981423929213, + "loss": 0.9688, + "step": 2532 + }, + { + "epoch": 0.14, + "learning_rate": 0.0009707688253444893, + "loss": 0.8398, + "step": 2533 + }, + { + "epoch": 0.14, + "learning_rate": 0.0009707394940302155, + "loss": 0.9414, + "step": 2534 + }, + { + "epoch": 0.14, + "learning_rate": 0.0009707101484509885, + "loss": 0.9219, + "step": 2535 + }, + { + "epoch": 0.14, + "learning_rate": 0.000970680788607698, + "loss": 0.8359, + "step": 2536 + }, + { + "epoch": 0.14, + "learning_rate": 0.0009706514145012335, + "loss": 0.9336, + "step": 2537 + }, + { + "epoch": 0.14, + "learning_rate": 0.000970622026132485, + "loss": 0.8633, + "step": 2538 + }, + { + "epoch": 0.14, + "learning_rate": 0.0009705926235023434, + "loss": 0.9023, + "step": 2539 + }, + { + "epoch": 0.14, + "learning_rate": 0.0009705632066116993, + "loss": 0.8477, + "step": 2540 + }, + { + "epoch": 0.14, + "learning_rate": 0.0009705337754614445, + "loss": 0.9062, + "step": 2541 + }, + { + "epoch": 0.14, + "learning_rate": 0.0009705043300524707, + "loss": 0.9062, + "step": 2542 + }, + { + "epoch": 0.14, + "learning_rate": 0.00097047487038567, + "loss": 0.8789, + "step": 2543 + }, + { + "epoch": 0.14, + "learning_rate": 0.0009704453964619356, + "loss": 0.9023, + "step": 2544 + }, + { + "epoch": 0.14, + "learning_rate": 0.00097041590828216, + "loss": 0.8008, + "step": 2545 + }, + { + "epoch": 0.14, + "learning_rate": 0.0009703864058472374, + "loss": 0.9531, + "step": 2546 + }, + { + "epoch": 0.14, + "learning_rate": 0.0009703568891580614, + "loss": 0.8867, + "step": 2547 + }, + { + "epoch": 0.14, + "learning_rate": 0.0009703273582155268, + "loss": 1.0312, + "step": 2548 + }, + { + "epoch": 0.14, + "learning_rate": 0.0009702978130205282, + "loss": 0.8789, + "step": 2549 + }, + { + "epoch": 0.14, + "learning_rate": 0.0009702682535739611, + "loss": 0.8477, + "step": 2550 + }, + { + "epoch": 0.14, + "learning_rate": 0.0009702386798767212, + "loss": 0.9453, + "step": 2551 + }, + { + "epoch": 0.14, + "learning_rate": 0.0009702090919297046, + "loss": 0.8789, + "step": 2552 + }, + { + "epoch": 0.14, + "learning_rate": 0.000970179489733808, + "loss": 0.918, + "step": 2553 + }, + { + "epoch": 0.14, + "learning_rate": 0.0009701498732899283, + "loss": 0.875, + "step": 2554 + }, + { + "epoch": 0.14, + "learning_rate": 0.0009701202425989633, + "loss": 0.918, + "step": 2555 + }, + { + "epoch": 0.14, + "learning_rate": 0.0009700905976618104, + "loss": 0.9141, + "step": 2556 + }, + { + "epoch": 0.14, + "learning_rate": 0.0009700609384793683, + "loss": 0.8906, + "step": 2557 + }, + { + "epoch": 0.14, + "learning_rate": 0.000970031265052536, + "loss": 0.9688, + "step": 2558 + }, + { + "epoch": 0.14, + "learning_rate": 0.0009700015773822122, + "loss": 0.8477, + "step": 2559 + }, + { + "epoch": 0.14, + "learning_rate": 0.0009699718754692968, + "loss": 0.8711, + "step": 2560 + }, + { + "epoch": 0.14, + "learning_rate": 0.0009699421593146897, + "loss": 1.0078, + "step": 2561 + }, + { + "epoch": 0.14, + "learning_rate": 0.0009699124289192915, + "loss": 0.9336, + "step": 2562 + }, + { + "epoch": 0.14, + "learning_rate": 0.0009698826842840032, + "loss": 0.9531, + "step": 2563 + }, + { + "epoch": 0.14, + "learning_rate": 0.0009698529254097261, + "loss": 0.9141, + "step": 2564 + }, + { + "epoch": 0.14, + "learning_rate": 0.000969823152297362, + "loss": 0.9062, + "step": 2565 + }, + { + "epoch": 0.14, + "learning_rate": 0.000969793364947813, + "loss": 0.9375, + "step": 2566 + }, + { + "epoch": 0.14, + "learning_rate": 0.0009697635633619819, + "loss": 0.9648, + "step": 2567 + }, + { + "epoch": 0.14, + "learning_rate": 0.0009697337475407717, + "loss": 1.0078, + "step": 2568 + }, + { + "epoch": 0.14, + "learning_rate": 0.0009697039174850861, + "loss": 0.875, + "step": 2569 + }, + { + "epoch": 0.14, + "learning_rate": 0.0009696740731958288, + "loss": 0.8789, + "step": 2570 + }, + { + "epoch": 0.14, + "learning_rate": 0.0009696442146739044, + "loss": 0.8594, + "step": 2571 + }, + { + "epoch": 0.14, + "learning_rate": 0.0009696143419202174, + "loss": 0.8672, + "step": 2572 + }, + { + "epoch": 0.14, + "learning_rate": 0.0009695844549356733, + "loss": 0.9844, + "step": 2573 + }, + { + "epoch": 0.14, + "learning_rate": 0.0009695545537211779, + "loss": 0.8984, + "step": 2574 + }, + { + "epoch": 0.14, + "learning_rate": 0.000969524638277637, + "loss": 0.832, + "step": 2575 + }, + { + "epoch": 0.14, + "learning_rate": 0.0009694947086059573, + "loss": 1.0312, + "step": 2576 + }, + { + "epoch": 0.14, + "learning_rate": 0.0009694647647070458, + "loss": 0.8281, + "step": 2577 + }, + { + "epoch": 0.14, + "learning_rate": 0.0009694348065818097, + "loss": 0.9023, + "step": 2578 + }, + { + "epoch": 0.14, + "learning_rate": 0.000969404834231157, + "loss": 0.8438, + "step": 2579 + }, + { + "epoch": 0.14, + "learning_rate": 0.0009693748476559959, + "loss": 0.9961, + "step": 2580 + }, + { + "epoch": 0.14, + "learning_rate": 0.0009693448468572352, + "loss": 0.957, + "step": 2581 + }, + { + "epoch": 0.14, + "learning_rate": 0.0009693148318357838, + "loss": 0.9219, + "step": 2582 + }, + { + "epoch": 0.14, + "learning_rate": 0.0009692848025925515, + "loss": 0.9492, + "step": 2583 + }, + { + "epoch": 0.14, + "learning_rate": 0.0009692547591284482, + "loss": 0.8906, + "step": 2584 + }, + { + "epoch": 0.14, + "learning_rate": 0.0009692247014443841, + "loss": 0.8398, + "step": 2585 + }, + { + "epoch": 0.14, + "learning_rate": 0.0009691946295412704, + "loss": 0.8711, + "step": 2586 + }, + { + "epoch": 0.14, + "learning_rate": 0.0009691645434200181, + "loss": 0.8906, + "step": 2587 + }, + { + "epoch": 0.14, + "learning_rate": 0.000969134443081539, + "loss": 0.9688, + "step": 2588 + }, + { + "epoch": 0.14, + "learning_rate": 0.0009691043285267453, + "loss": 0.9336, + "step": 2589 + }, + { + "epoch": 0.14, + "learning_rate": 0.0009690741997565495, + "loss": 0.8906, + "step": 2590 + }, + { + "epoch": 0.14, + "learning_rate": 0.0009690440567718648, + "loss": 0.9414, + "step": 2591 + }, + { + "epoch": 0.14, + "learning_rate": 0.0009690138995736043, + "loss": 0.8828, + "step": 2592 + }, + { + "epoch": 0.14, + "learning_rate": 0.0009689837281626821, + "loss": 0.8359, + "step": 2593 + }, + { + "epoch": 0.14, + "learning_rate": 0.0009689535425400123, + "loss": 0.9805, + "step": 2594 + }, + { + "epoch": 0.14, + "learning_rate": 0.0009689233427065099, + "loss": 0.8633, + "step": 2595 + }, + { + "epoch": 0.14, + "learning_rate": 0.0009688931286630898, + "loss": 0.8359, + "step": 2596 + }, + { + "epoch": 0.14, + "learning_rate": 0.0009688629004106676, + "loss": 0.9102, + "step": 2597 + }, + { + "epoch": 0.14, + "learning_rate": 0.0009688326579501594, + "loss": 0.9609, + "step": 2598 + }, + { + "epoch": 0.14, + "learning_rate": 0.0009688024012824818, + "loss": 0.8555, + "step": 2599 + }, + { + "epoch": 0.14, + "learning_rate": 0.0009687721304085514, + "loss": 0.8945, + "step": 2600 + }, + { + "epoch": 0.14, + "learning_rate": 0.0009687418453292857, + "loss": 0.7695, + "step": 2601 + }, + { + "epoch": 0.14, + "learning_rate": 0.0009687115460456022, + "loss": 0.9688, + "step": 2602 + }, + { + "epoch": 0.14, + "learning_rate": 0.0009686812325584193, + "loss": 0.8984, + "step": 2603 + }, + { + "epoch": 0.14, + "learning_rate": 0.0009686509048686556, + "loss": 0.9414, + "step": 2604 + }, + { + "epoch": 0.14, + "learning_rate": 0.00096862056297723, + "loss": 0.9102, + "step": 2605 + }, + { + "epoch": 0.14, + "learning_rate": 0.000968590206885062, + "loss": 1.0391, + "step": 2606 + }, + { + "epoch": 0.14, + "learning_rate": 0.0009685598365930715, + "loss": 0.8398, + "step": 2607 + }, + { + "epoch": 0.14, + "learning_rate": 0.0009685294521021789, + "loss": 0.9258, + "step": 2608 + }, + { + "epoch": 0.14, + "learning_rate": 0.0009684990534133049, + "loss": 0.8555, + "step": 2609 + }, + { + "epoch": 0.14, + "learning_rate": 0.0009684686405273705, + "loss": 0.9922, + "step": 2610 + }, + { + "epoch": 0.14, + "learning_rate": 0.0009684382134452976, + "loss": 0.8359, + "step": 2611 + }, + { + "epoch": 0.14, + "learning_rate": 0.000968407772168008, + "loss": 0.8867, + "step": 2612 + }, + { + "epoch": 0.14, + "learning_rate": 0.0009683773166964243, + "loss": 0.9414, + "step": 2613 + }, + { + "epoch": 0.14, + "learning_rate": 0.0009683468470314694, + "loss": 0.9727, + "step": 2614 + }, + { + "epoch": 0.14, + "learning_rate": 0.0009683163631740665, + "loss": 0.918, + "step": 2615 + }, + { + "epoch": 0.14, + "learning_rate": 0.0009682858651251396, + "loss": 0.8633, + "step": 2616 + }, + { + "epoch": 0.14, + "learning_rate": 0.0009682553528856127, + "loss": 1.0, + "step": 2617 + }, + { + "epoch": 0.14, + "learning_rate": 0.0009682248264564104, + "loss": 0.9414, + "step": 2618 + }, + { + "epoch": 0.14, + "learning_rate": 0.000968194285838458, + "loss": 0.875, + "step": 2619 + }, + { + "epoch": 0.14, + "learning_rate": 0.0009681637310326806, + "loss": 0.8555, + "step": 2620 + }, + { + "epoch": 0.14, + "learning_rate": 0.0009681331620400045, + "loss": 0.8477, + "step": 2621 + }, + { + "epoch": 0.14, + "learning_rate": 0.0009681025788613557, + "loss": 0.8516, + "step": 2622 + }, + { + "epoch": 0.14, + "learning_rate": 0.0009680719814976613, + "loss": 0.9766, + "step": 2623 + }, + { + "epoch": 0.14, + "learning_rate": 0.0009680413699498483, + "loss": 0.8828, + "step": 2624 + }, + { + "epoch": 0.14, + "learning_rate": 0.0009680107442188444, + "loss": 0.9062, + "step": 2625 + }, + { + "epoch": 0.14, + "learning_rate": 0.0009679801043055775, + "loss": 0.9375, + "step": 2626 + }, + { + "epoch": 0.14, + "learning_rate": 0.0009679494502109762, + "loss": 0.8594, + "step": 2627 + }, + { + "epoch": 0.14, + "learning_rate": 0.0009679187819359697, + "loss": 0.8555, + "step": 2628 + }, + { + "epoch": 0.14, + "learning_rate": 0.0009678880994814867, + "loss": 0.9102, + "step": 2629 + }, + { + "epoch": 0.14, + "learning_rate": 0.0009678574028484575, + "loss": 0.7891, + "step": 2630 + }, + { + "epoch": 0.14, + "learning_rate": 0.0009678266920378122, + "loss": 0.8477, + "step": 2631 + }, + { + "epoch": 0.14, + "learning_rate": 0.0009677959670504816, + "loss": 0.9414, + "step": 2632 + }, + { + "epoch": 0.14, + "learning_rate": 0.0009677652278873963, + "loss": 0.8945, + "step": 2633 + }, + { + "epoch": 0.14, + "learning_rate": 0.0009677344745494883, + "loss": 0.8906, + "step": 2634 + }, + { + "epoch": 0.14, + "learning_rate": 0.000967703707037689, + "loss": 0.8945, + "step": 2635 + }, + { + "epoch": 0.14, + "learning_rate": 0.0009676729253529314, + "loss": 0.9453, + "step": 2636 + }, + { + "epoch": 0.14, + "learning_rate": 0.0009676421294961477, + "loss": 0.832, + "step": 2637 + }, + { + "epoch": 0.14, + "learning_rate": 0.0009676113194682716, + "loss": 0.8281, + "step": 2638 + }, + { + "epoch": 0.14, + "learning_rate": 0.0009675804952702363, + "loss": 0.8398, + "step": 2639 + }, + { + "epoch": 0.14, + "learning_rate": 0.000967549656902976, + "loss": 0.9102, + "step": 2640 + }, + { + "epoch": 0.14, + "learning_rate": 0.0009675188043674254, + "loss": 0.9688, + "step": 2641 + }, + { + "epoch": 0.14, + "learning_rate": 0.0009674879376645194, + "loss": 0.9414, + "step": 2642 + }, + { + "epoch": 0.14, + "learning_rate": 0.0009674570567951932, + "loss": 0.9375, + "step": 2643 + }, + { + "epoch": 0.14, + "learning_rate": 0.0009674261617603828, + "loss": 0.8203, + "step": 2644 + }, + { + "epoch": 0.14, + "learning_rate": 0.0009673952525610241, + "loss": 0.8828, + "step": 2645 + }, + { + "epoch": 0.14, + "learning_rate": 0.0009673643291980541, + "loss": 0.9023, + "step": 2646 + }, + { + "epoch": 0.14, + "learning_rate": 0.0009673333916724096, + "loss": 0.9531, + "step": 2647 + }, + { + "epoch": 0.14, + "learning_rate": 0.0009673024399850283, + "loss": 0.9453, + "step": 2648 + }, + { + "epoch": 0.14, + "learning_rate": 0.000967271474136848, + "loss": 0.9375, + "step": 2649 + }, + { + "epoch": 0.14, + "learning_rate": 0.0009672404941288073, + "loss": 0.9727, + "step": 2650 + }, + { + "epoch": 0.14, + "learning_rate": 0.0009672094999618446, + "loss": 0.9414, + "step": 2651 + }, + { + "epoch": 0.14, + "learning_rate": 0.0009671784916368996, + "loss": 0.8789, + "step": 2652 + }, + { + "epoch": 0.14, + "learning_rate": 0.0009671474691549114, + "loss": 0.8711, + "step": 2653 + }, + { + "epoch": 0.14, + "learning_rate": 0.0009671164325168207, + "loss": 0.9375, + "step": 2654 + }, + { + "epoch": 0.14, + "learning_rate": 0.0009670853817235676, + "loss": 0.8711, + "step": 2655 + }, + { + "epoch": 0.14, + "learning_rate": 0.0009670543167760932, + "loss": 0.9297, + "step": 2656 + }, + { + "epoch": 0.14, + "learning_rate": 0.0009670232376753387, + "loss": 0.8516, + "step": 2657 + }, + { + "epoch": 0.14, + "learning_rate": 0.000966992144422246, + "loss": 0.9648, + "step": 2658 + }, + { + "epoch": 0.14, + "learning_rate": 0.0009669610370177574, + "loss": 0.8164, + "step": 2659 + }, + { + "epoch": 0.14, + "learning_rate": 0.0009669299154628155, + "loss": 0.957, + "step": 2660 + }, + { + "epoch": 0.14, + "learning_rate": 0.0009668987797583635, + "loss": 1.0156, + "step": 2661 + }, + { + "epoch": 0.14, + "learning_rate": 0.0009668676299053446, + "loss": 0.9688, + "step": 2662 + }, + { + "epoch": 0.14, + "learning_rate": 0.000966836465904703, + "loss": 0.8945, + "step": 2663 + }, + { + "epoch": 0.14, + "learning_rate": 0.0009668052877573832, + "loss": 0.9141, + "step": 2664 + }, + { + "epoch": 0.14, + "learning_rate": 0.0009667740954643296, + "loss": 0.8711, + "step": 2665 + }, + { + "epoch": 0.14, + "learning_rate": 0.0009667428890264877, + "loss": 1.0625, + "step": 2666 + }, + { + "epoch": 0.14, + "learning_rate": 0.0009667116684448031, + "loss": 1.0156, + "step": 2667 + }, + { + "epoch": 0.14, + "learning_rate": 0.000966680433720222, + "loss": 0.9688, + "step": 2668 + }, + { + "epoch": 0.14, + "learning_rate": 0.0009666491848536906, + "loss": 0.9961, + "step": 2669 + }, + { + "epoch": 0.14, + "learning_rate": 0.0009666179218461562, + "loss": 0.9648, + "step": 2670 + }, + { + "epoch": 0.14, + "learning_rate": 0.0009665866446985659, + "loss": 0.9219, + "step": 2671 + }, + { + "epoch": 0.14, + "learning_rate": 0.0009665553534118676, + "loss": 0.8594, + "step": 2672 + }, + { + "epoch": 0.14, + "learning_rate": 0.0009665240479870096, + "loss": 0.8984, + "step": 2673 + }, + { + "epoch": 0.14, + "learning_rate": 0.0009664927284249406, + "loss": 0.8516, + "step": 2674 + }, + { + "epoch": 0.14, + "learning_rate": 0.0009664613947266095, + "loss": 0.8672, + "step": 2675 + }, + { + "epoch": 0.14, + "learning_rate": 0.0009664300468929659, + "loss": 0.918, + "step": 2676 + }, + { + "epoch": 0.14, + "learning_rate": 0.0009663986849249598, + "loss": 0.9531, + "step": 2677 + }, + { + "epoch": 0.14, + "learning_rate": 0.0009663673088235415, + "loss": 0.9375, + "step": 2678 + }, + { + "epoch": 0.14, + "learning_rate": 0.0009663359185896618, + "loss": 0.8203, + "step": 2679 + }, + { + "epoch": 0.14, + "learning_rate": 0.0009663045142242719, + "loss": 0.918, + "step": 2680 + }, + { + "epoch": 0.14, + "learning_rate": 0.0009662730957283235, + "loss": 0.8438, + "step": 2681 + }, + { + "epoch": 0.14, + "learning_rate": 0.0009662416631027688, + "loss": 0.9062, + "step": 2682 + }, + { + "epoch": 0.14, + "learning_rate": 0.0009662102163485601, + "loss": 1.0625, + "step": 2683 + }, + { + "epoch": 0.14, + "learning_rate": 0.0009661787554666503, + "loss": 0.8906, + "step": 2684 + }, + { + "epoch": 0.14, + "learning_rate": 0.0009661472804579932, + "loss": 0.8906, + "step": 2685 + }, + { + "epoch": 0.14, + "learning_rate": 0.0009661157913235421, + "loss": 0.9258, + "step": 2686 + }, + { + "epoch": 0.14, + "learning_rate": 0.0009660842880642516, + "loss": 0.957, + "step": 2687 + }, + { + "epoch": 0.14, + "learning_rate": 0.0009660527706810759, + "loss": 0.8125, + "step": 2688 + }, + { + "epoch": 0.14, + "learning_rate": 0.0009660212391749706, + "loss": 0.8789, + "step": 2689 + }, + { + "epoch": 0.14, + "learning_rate": 0.0009659896935468908, + "loss": 0.9141, + "step": 2690 + }, + { + "epoch": 0.14, + "learning_rate": 0.0009659581337977926, + "loss": 0.7891, + "step": 2691 + }, + { + "epoch": 0.14, + "learning_rate": 0.0009659265599286324, + "loss": 0.9805, + "step": 2692 + }, + { + "epoch": 0.14, + "learning_rate": 0.0009658949719403669, + "loss": 0.9258, + "step": 2693 + }, + { + "epoch": 0.14, + "learning_rate": 0.0009658633698339535, + "loss": 0.8828, + "step": 2694 + }, + { + "epoch": 0.14, + "learning_rate": 0.0009658317536103497, + "loss": 0.9336, + "step": 2695 + }, + { + "epoch": 0.14, + "learning_rate": 0.0009658001232705135, + "loss": 0.9023, + "step": 2696 + }, + { + "epoch": 0.14, + "learning_rate": 0.0009657684788154035, + "loss": 0.9336, + "step": 2697 + }, + { + "epoch": 0.15, + "learning_rate": 0.0009657368202459788, + "loss": 0.957, + "step": 2698 + }, + { + "epoch": 0.15, + "learning_rate": 0.0009657051475631984, + "loss": 0.9453, + "step": 2699 + }, + { + "epoch": 0.15, + "learning_rate": 0.0009656734607680224, + "loss": 0.8711, + "step": 2700 + }, + { + "epoch": 0.15, + "learning_rate": 0.0009656417598614109, + "loss": 0.8555, + "step": 2701 + }, + { + "epoch": 0.15, + "learning_rate": 0.0009656100448443243, + "loss": 0.9922, + "step": 2702 + }, + { + "epoch": 0.15, + "learning_rate": 0.0009655783157177242, + "loss": 0.9219, + "step": 2703 + }, + { + "epoch": 0.15, + "learning_rate": 0.0009655465724825716, + "loss": 0.9258, + "step": 2704 + }, + { + "epoch": 0.15, + "learning_rate": 0.0009655148151398288, + "loss": 0.8906, + "step": 2705 + }, + { + "epoch": 0.15, + "learning_rate": 0.0009654830436904578, + "loss": 0.8906, + "step": 2706 + }, + { + "epoch": 0.15, + "learning_rate": 0.0009654512581354217, + "loss": 0.8633, + "step": 2707 + }, + { + "epoch": 0.15, + "learning_rate": 0.0009654194584756834, + "loss": 0.9492, + "step": 2708 + }, + { + "epoch": 0.15, + "learning_rate": 0.0009653876447122067, + "loss": 0.9102, + "step": 2709 + }, + { + "epoch": 0.15, + "learning_rate": 0.0009653558168459559, + "loss": 1.0, + "step": 2710 + }, + { + "epoch": 0.15, + "learning_rate": 0.000965323974877895, + "loss": 0.9141, + "step": 2711 + }, + { + "epoch": 0.15, + "learning_rate": 0.0009652921188089893, + "loss": 0.8945, + "step": 2712 + }, + { + "epoch": 0.15, + "learning_rate": 0.0009652602486402039, + "loss": 0.9727, + "step": 2713 + }, + { + "epoch": 0.15, + "learning_rate": 0.0009652283643725048, + "loss": 0.9219, + "step": 2714 + }, + { + "epoch": 0.15, + "learning_rate": 0.000965196466006858, + "loss": 0.8906, + "step": 2715 + }, + { + "epoch": 0.15, + "learning_rate": 0.0009651645535442301, + "loss": 0.9258, + "step": 2716 + }, + { + "epoch": 0.15, + "learning_rate": 0.0009651326269855884, + "loss": 0.875, + "step": 2717 + }, + { + "epoch": 0.15, + "learning_rate": 0.0009651006863319, + "loss": 0.9922, + "step": 2718 + }, + { + "epoch": 0.15, + "learning_rate": 0.0009650687315841334, + "loss": 0.9062, + "step": 2719 + }, + { + "epoch": 0.15, + "learning_rate": 0.0009650367627432563, + "loss": 0.957, + "step": 2720 + }, + { + "epoch": 0.15, + "learning_rate": 0.0009650047798102378, + "loss": 0.9648, + "step": 2721 + }, + { + "epoch": 0.15, + "learning_rate": 0.0009649727827860471, + "loss": 0.9062, + "step": 2722 + }, + { + "epoch": 0.15, + "learning_rate": 0.0009649407716716536, + "loss": 0.918, + "step": 2723 + }, + { + "epoch": 0.15, + "learning_rate": 0.0009649087464680275, + "loss": 0.8711, + "step": 2724 + }, + { + "epoch": 0.15, + "learning_rate": 0.0009648767071761393, + "loss": 0.9805, + "step": 2725 + }, + { + "epoch": 0.15, + "learning_rate": 0.0009648446537969599, + "loss": 0.8555, + "step": 2726 + }, + { + "epoch": 0.15, + "learning_rate": 0.0009648125863314604, + "loss": 0.9844, + "step": 2727 + }, + { + "epoch": 0.15, + "learning_rate": 0.0009647805047806129, + "loss": 0.8906, + "step": 2728 + }, + { + "epoch": 0.15, + "learning_rate": 0.0009647484091453892, + "loss": 0.9648, + "step": 2729 + }, + { + "epoch": 0.15, + "learning_rate": 0.0009647162994267622, + "loss": 0.8203, + "step": 2730 + }, + { + "epoch": 0.15, + "learning_rate": 0.0009646841756257048, + "loss": 0.9141, + "step": 2731 + }, + { + "epoch": 0.15, + "learning_rate": 0.0009646520377431904, + "loss": 1.0234, + "step": 2732 + }, + { + "epoch": 0.15, + "learning_rate": 0.0009646198857801931, + "loss": 0.9961, + "step": 2733 + }, + { + "epoch": 0.15, + "learning_rate": 0.0009645877197376871, + "loss": 1.0234, + "step": 2734 + }, + { + "epoch": 0.15, + "learning_rate": 0.000964555539616647, + "loss": 0.8828, + "step": 2735 + }, + { + "epoch": 0.15, + "learning_rate": 0.0009645233454180481, + "loss": 0.8164, + "step": 2736 + }, + { + "epoch": 0.15, + "learning_rate": 0.000964491137142866, + "loss": 0.8438, + "step": 2737 + }, + { + "epoch": 0.15, + "learning_rate": 0.0009644589147920766, + "loss": 0.8555, + "step": 2738 + }, + { + "epoch": 0.15, + "learning_rate": 0.0009644266783666565, + "loss": 0.918, + "step": 2739 + }, + { + "epoch": 0.15, + "learning_rate": 0.0009643944278675824, + "loss": 0.9336, + "step": 2740 + }, + { + "epoch": 0.15, + "learning_rate": 0.0009643621632958317, + "loss": 0.9258, + "step": 2741 + }, + { + "epoch": 0.15, + "learning_rate": 0.0009643298846523823, + "loss": 0.9141, + "step": 2742 + }, + { + "epoch": 0.15, + "learning_rate": 0.000964297591938212, + "loss": 0.9531, + "step": 2743 + }, + { + "epoch": 0.15, + "learning_rate": 0.0009642652851542994, + "loss": 0.9375, + "step": 2744 + }, + { + "epoch": 0.15, + "learning_rate": 0.0009642329643016239, + "loss": 0.9141, + "step": 2745 + }, + { + "epoch": 0.15, + "learning_rate": 0.0009642006293811646, + "loss": 0.918, + "step": 2746 + }, + { + "epoch": 0.15, + "learning_rate": 0.0009641682803939014, + "loss": 0.9336, + "step": 2747 + }, + { + "epoch": 0.15, + "learning_rate": 0.0009641359173408145, + "loss": 0.9844, + "step": 2748 + }, + { + "epoch": 0.15, + "learning_rate": 0.0009641035402228848, + "loss": 0.9492, + "step": 2749 + }, + { + "epoch": 0.15, + "learning_rate": 0.0009640711490410934, + "loss": 0.9336, + "step": 2750 + }, + { + "epoch": 0.15, + "learning_rate": 0.0009640387437964217, + "loss": 1.0156, + "step": 2751 + }, + { + "epoch": 0.15, + "learning_rate": 0.0009640063244898518, + "loss": 0.9453, + "step": 2752 + }, + { + "epoch": 0.15, + "learning_rate": 0.0009639738911223661, + "loss": 0.9023, + "step": 2753 + }, + { + "epoch": 0.15, + "learning_rate": 0.0009639414436949474, + "loss": 0.8398, + "step": 2754 + }, + { + "epoch": 0.15, + "learning_rate": 0.000963908982208579, + "loss": 0.8516, + "step": 2755 + }, + { + "epoch": 0.15, + "learning_rate": 0.0009638765066642446, + "loss": 0.9688, + "step": 2756 + }, + { + "epoch": 0.15, + "learning_rate": 0.0009638440170629284, + "loss": 0.9336, + "step": 2757 + }, + { + "epoch": 0.15, + "learning_rate": 0.0009638115134056146, + "loss": 0.918, + "step": 2758 + }, + { + "epoch": 0.15, + "learning_rate": 0.0009637789956932888, + "loss": 0.9297, + "step": 2759 + }, + { + "epoch": 0.15, + "learning_rate": 0.0009637464639269356, + "loss": 0.8672, + "step": 2760 + }, + { + "epoch": 0.15, + "learning_rate": 0.0009637139181075415, + "loss": 0.8984, + "step": 2761 + }, + { + "epoch": 0.15, + "learning_rate": 0.0009636813582360924, + "loss": 0.9375, + "step": 2762 + }, + { + "epoch": 0.15, + "learning_rate": 0.000963648784313575, + "loss": 0.8633, + "step": 2763 + }, + { + "epoch": 0.15, + "learning_rate": 0.0009636161963409764, + "loss": 0.9375, + "step": 2764 + }, + { + "epoch": 0.15, + "learning_rate": 0.0009635835943192842, + "loss": 0.8242, + "step": 2765 + }, + { + "epoch": 0.15, + "learning_rate": 0.0009635509782494864, + "loss": 0.8828, + "step": 2766 + }, + { + "epoch": 0.15, + "learning_rate": 0.0009635183481325711, + "loss": 0.9727, + "step": 2767 + }, + { + "epoch": 0.15, + "learning_rate": 0.0009634857039695275, + "loss": 0.9414, + "step": 2768 + }, + { + "epoch": 0.15, + "learning_rate": 0.0009634530457613444, + "loss": 0.9414, + "step": 2769 + }, + { + "epoch": 0.15, + "learning_rate": 0.0009634203735090118, + "loss": 0.9297, + "step": 2770 + }, + { + "epoch": 0.15, + "learning_rate": 0.0009633876872135195, + "loss": 0.9453, + "step": 2771 + }, + { + "epoch": 0.15, + "learning_rate": 0.0009633549868758582, + "loss": 0.8516, + "step": 2772 + }, + { + "epoch": 0.15, + "learning_rate": 0.0009633222724970188, + "loss": 0.9453, + "step": 2773 + }, + { + "epoch": 0.15, + "learning_rate": 0.0009632895440779927, + "loss": 0.8672, + "step": 2774 + }, + { + "epoch": 0.15, + "learning_rate": 0.0009632568016197714, + "loss": 0.8984, + "step": 2775 + }, + { + "epoch": 0.15, + "learning_rate": 0.0009632240451233474, + "loss": 0.9336, + "step": 2776 + }, + { + "epoch": 0.15, + "learning_rate": 0.0009631912745897133, + "loss": 0.9961, + "step": 2777 + }, + { + "epoch": 0.15, + "learning_rate": 0.000963158490019862, + "loss": 0.8672, + "step": 2778 + }, + { + "epoch": 0.15, + "learning_rate": 0.0009631256914147871, + "loss": 0.9141, + "step": 2779 + }, + { + "epoch": 0.15, + "learning_rate": 0.0009630928787754824, + "loss": 0.8984, + "step": 2780 + }, + { + "epoch": 0.15, + "learning_rate": 0.0009630600521029425, + "loss": 0.8711, + "step": 2781 + }, + { + "epoch": 0.15, + "learning_rate": 0.0009630272113981618, + "loss": 0.8438, + "step": 2782 + }, + { + "epoch": 0.15, + "learning_rate": 0.0009629943566621355, + "loss": 0.8906, + "step": 2783 + }, + { + "epoch": 0.15, + "learning_rate": 0.0009629614878958595, + "loss": 0.9727, + "step": 2784 + }, + { + "epoch": 0.15, + "learning_rate": 0.0009629286051003297, + "loss": 0.8516, + "step": 2785 + }, + { + "epoch": 0.15, + "learning_rate": 0.0009628957082765425, + "loss": 0.8984, + "step": 2786 + }, + { + "epoch": 0.15, + "learning_rate": 0.0009628627974254949, + "loss": 0.9727, + "step": 2787 + }, + { + "epoch": 0.15, + "learning_rate": 0.0009628298725481839, + "loss": 0.9219, + "step": 2788 + }, + { + "epoch": 0.15, + "learning_rate": 0.0009627969336456077, + "loss": 0.8984, + "step": 2789 + }, + { + "epoch": 0.15, + "learning_rate": 0.0009627639807187641, + "loss": 0.8828, + "step": 2790 + }, + { + "epoch": 0.15, + "learning_rate": 0.0009627310137686518, + "loss": 0.9414, + "step": 2791 + }, + { + "epoch": 0.15, + "learning_rate": 0.0009626980327962698, + "loss": 0.8984, + "step": 2792 + }, + { + "epoch": 0.15, + "learning_rate": 0.0009626650378026175, + "loss": 0.9258, + "step": 2793 + }, + { + "epoch": 0.15, + "learning_rate": 0.0009626320287886948, + "loss": 0.8398, + "step": 2794 + }, + { + "epoch": 0.15, + "learning_rate": 0.0009625990057555019, + "loss": 0.9648, + "step": 2795 + }, + { + "epoch": 0.15, + "learning_rate": 0.0009625659687040398, + "loss": 0.8711, + "step": 2796 + }, + { + "epoch": 0.15, + "learning_rate": 0.0009625329176353093, + "loss": 0.9062, + "step": 2797 + }, + { + "epoch": 0.15, + "learning_rate": 0.0009624998525503119, + "loss": 0.9141, + "step": 2798 + }, + { + "epoch": 0.15, + "learning_rate": 0.0009624667734500498, + "loss": 0.9102, + "step": 2799 + }, + { + "epoch": 0.15, + "learning_rate": 0.0009624336803355256, + "loss": 0.9336, + "step": 2800 + }, + { + "epoch": 0.15, + "learning_rate": 0.0009624005732077417, + "loss": 0.8633, + "step": 2801 + }, + { + "epoch": 0.15, + "learning_rate": 0.0009623674520677015, + "loss": 0.8906, + "step": 2802 + }, + { + "epoch": 0.15, + "learning_rate": 0.0009623343169164087, + "loss": 0.8789, + "step": 2803 + }, + { + "epoch": 0.15, + "learning_rate": 0.0009623011677548674, + "loss": 0.9453, + "step": 2804 + }, + { + "epoch": 0.15, + "learning_rate": 0.0009622680045840824, + "loss": 0.9492, + "step": 2805 + }, + { + "epoch": 0.15, + "learning_rate": 0.0009622348274050581, + "loss": 0.8359, + "step": 2806 + }, + { + "epoch": 0.15, + "learning_rate": 0.0009622016362188003, + "loss": 0.8984, + "step": 2807 + }, + { + "epoch": 0.15, + "learning_rate": 0.0009621684310263147, + "loss": 0.8711, + "step": 2808 + }, + { + "epoch": 0.15, + "learning_rate": 0.0009621352118286074, + "loss": 0.9492, + "step": 2809 + }, + { + "epoch": 0.15, + "learning_rate": 0.0009621019786266853, + "loss": 0.9766, + "step": 2810 + }, + { + "epoch": 0.15, + "learning_rate": 0.0009620687314215552, + "loss": 0.8594, + "step": 2811 + }, + { + "epoch": 0.15, + "learning_rate": 0.0009620354702142248, + "loss": 0.957, + "step": 2812 + }, + { + "epoch": 0.15, + "learning_rate": 0.000962002195005702, + "loss": 0.9648, + "step": 2813 + }, + { + "epoch": 0.15, + "learning_rate": 0.0009619689057969949, + "loss": 0.9023, + "step": 2814 + }, + { + "epoch": 0.15, + "learning_rate": 0.0009619356025891127, + "loss": 0.9766, + "step": 2815 + }, + { + "epoch": 0.15, + "learning_rate": 0.0009619022853830643, + "loss": 0.9492, + "step": 2816 + }, + { + "epoch": 0.15, + "learning_rate": 0.0009618689541798593, + "loss": 0.9258, + "step": 2817 + }, + { + "epoch": 0.15, + "learning_rate": 0.000961835608980508, + "loss": 0.9414, + "step": 2818 + }, + { + "epoch": 0.15, + "learning_rate": 0.0009618022497860206, + "loss": 0.8984, + "step": 2819 + }, + { + "epoch": 0.15, + "learning_rate": 0.0009617688765974082, + "loss": 1.0156, + "step": 2820 + }, + { + "epoch": 0.15, + "learning_rate": 0.0009617354894156818, + "loss": 0.8477, + "step": 2821 + }, + { + "epoch": 0.15, + "learning_rate": 0.0009617020882418535, + "loss": 0.9414, + "step": 2822 + }, + { + "epoch": 0.15, + "learning_rate": 0.0009616686730769354, + "loss": 0.9141, + "step": 2823 + }, + { + "epoch": 0.15, + "learning_rate": 0.0009616352439219399, + "loss": 0.8867, + "step": 2824 + }, + { + "epoch": 0.15, + "learning_rate": 0.0009616018007778802, + "loss": 0.9258, + "step": 2825 + }, + { + "epoch": 0.15, + "learning_rate": 0.0009615683436457696, + "loss": 0.9062, + "step": 2826 + }, + { + "epoch": 0.15, + "learning_rate": 0.000961534872526622, + "loss": 0.8125, + "step": 2827 + }, + { + "epoch": 0.15, + "learning_rate": 0.0009615013874214518, + "loss": 0.9492, + "step": 2828 + }, + { + "epoch": 0.15, + "learning_rate": 0.0009614678883312735, + "loss": 0.9258, + "step": 2829 + }, + { + "epoch": 0.15, + "learning_rate": 0.0009614343752571025, + "loss": 0.9023, + "step": 2830 + }, + { + "epoch": 0.15, + "learning_rate": 0.0009614008481999541, + "loss": 0.9609, + "step": 2831 + }, + { + "epoch": 0.15, + "learning_rate": 0.0009613673071608444, + "loss": 0.8555, + "step": 2832 + }, + { + "epoch": 0.15, + "learning_rate": 0.0009613337521407897, + "loss": 0.8867, + "step": 2833 + }, + { + "epoch": 0.15, + "learning_rate": 0.000961300183140807, + "loss": 0.8516, + "step": 2834 + }, + { + "epoch": 0.15, + "learning_rate": 0.0009612666001619136, + "loss": 0.7891, + "step": 2835 + }, + { + "epoch": 0.15, + "learning_rate": 0.0009612330032051268, + "loss": 0.9219, + "step": 2836 + }, + { + "epoch": 0.15, + "learning_rate": 0.0009611993922714652, + "loss": 0.875, + "step": 2837 + }, + { + "epoch": 0.15, + "learning_rate": 0.0009611657673619469, + "loss": 0.9219, + "step": 2838 + }, + { + "epoch": 0.15, + "learning_rate": 0.000961132128477591, + "loss": 0.9805, + "step": 2839 + }, + { + "epoch": 0.15, + "learning_rate": 0.000961098475619417, + "loss": 0.8086, + "step": 2840 + }, + { + "epoch": 0.15, + "learning_rate": 0.0009610648087884444, + "loss": 0.9648, + "step": 2841 + }, + { + "epoch": 0.15, + "learning_rate": 0.0009610311279856937, + "loss": 0.9023, + "step": 2842 + }, + { + "epoch": 0.15, + "learning_rate": 0.0009609974332121853, + "loss": 0.8438, + "step": 2843 + }, + { + "epoch": 0.15, + "learning_rate": 0.0009609637244689406, + "loss": 0.9375, + "step": 2844 + }, + { + "epoch": 0.15, + "learning_rate": 0.0009609300017569807, + "loss": 0.9375, + "step": 2845 + }, + { + "epoch": 0.15, + "learning_rate": 0.0009608962650773276, + "loss": 0.9219, + "step": 2846 + }, + { + "epoch": 0.15, + "learning_rate": 0.0009608625144310039, + "loss": 0.793, + "step": 2847 + }, + { + "epoch": 0.15, + "learning_rate": 0.000960828749819032, + "loss": 0.9336, + "step": 2848 + }, + { + "epoch": 0.15, + "learning_rate": 0.0009607949712424354, + "loss": 0.9531, + "step": 2849 + }, + { + "epoch": 0.15, + "learning_rate": 0.0009607611787022374, + "loss": 0.9609, + "step": 2850 + }, + { + "epoch": 0.15, + "learning_rate": 0.0009607273721994624, + "loss": 0.9922, + "step": 2851 + }, + { + "epoch": 0.15, + "learning_rate": 0.0009606935517351346, + "loss": 0.9414, + "step": 2852 + }, + { + "epoch": 0.15, + "learning_rate": 0.0009606597173102787, + "loss": 0.8438, + "step": 2853 + }, + { + "epoch": 0.15, + "learning_rate": 0.0009606258689259201, + "loss": 1.0078, + "step": 2854 + }, + { + "epoch": 0.15, + "learning_rate": 0.0009605920065830849, + "loss": 0.8906, + "step": 2855 + }, + { + "epoch": 0.15, + "learning_rate": 0.0009605581302827988, + "loss": 0.9688, + "step": 2856 + }, + { + "epoch": 0.15, + "learning_rate": 0.0009605242400260886, + "loss": 0.9023, + "step": 2857 + }, + { + "epoch": 0.15, + "learning_rate": 0.0009604903358139809, + "loss": 0.8398, + "step": 2858 + }, + { + "epoch": 0.15, + "learning_rate": 0.0009604564176475037, + "loss": 0.8242, + "step": 2859 + }, + { + "epoch": 0.15, + "learning_rate": 0.0009604224855276844, + "loss": 0.9062, + "step": 2860 + }, + { + "epoch": 0.15, + "learning_rate": 0.0009603885394555515, + "loss": 0.9141, + "step": 2861 + }, + { + "epoch": 0.15, + "learning_rate": 0.0009603545794321335, + "loss": 0.8828, + "step": 2862 + }, + { + "epoch": 0.15, + "learning_rate": 0.0009603206054584595, + "loss": 0.9062, + "step": 2863 + }, + { + "epoch": 0.15, + "learning_rate": 0.0009602866175355591, + "loss": 0.9297, + "step": 2864 + }, + { + "epoch": 0.15, + "learning_rate": 0.0009602526156644624, + "loss": 0.9531, + "step": 2865 + }, + { + "epoch": 0.15, + "learning_rate": 0.0009602185998461994, + "loss": 0.8789, + "step": 2866 + }, + { + "epoch": 0.15, + "learning_rate": 0.0009601845700818013, + "loss": 0.9219, + "step": 2867 + }, + { + "epoch": 0.15, + "learning_rate": 0.0009601505263722989, + "loss": 0.8984, + "step": 2868 + }, + { + "epoch": 0.15, + "learning_rate": 0.0009601164687187241, + "loss": 0.8711, + "step": 2869 + }, + { + "epoch": 0.15, + "learning_rate": 0.000960082397122109, + "loss": 0.918, + "step": 2870 + }, + { + "epoch": 0.15, + "learning_rate": 0.0009600483115834859, + "loss": 0.8867, + "step": 2871 + }, + { + "epoch": 0.15, + "learning_rate": 0.0009600142121038879, + "loss": 0.957, + "step": 2872 + }, + { + "epoch": 0.15, + "learning_rate": 0.0009599800986843481, + "loss": 0.8711, + "step": 2873 + }, + { + "epoch": 0.15, + "learning_rate": 0.0009599459713259004, + "loss": 0.9219, + "step": 2874 + }, + { + "epoch": 0.15, + "learning_rate": 0.0009599118300295788, + "loss": 0.8477, + "step": 2875 + }, + { + "epoch": 0.15, + "learning_rate": 0.0009598776747964182, + "loss": 0.9141, + "step": 2876 + }, + { + "epoch": 0.15, + "learning_rate": 0.0009598435056274535, + "loss": 0.8633, + "step": 2877 + }, + { + "epoch": 0.15, + "learning_rate": 0.0009598093225237199, + "loss": 0.832, + "step": 2878 + }, + { + "epoch": 0.15, + "learning_rate": 0.0009597751254862535, + "loss": 0.9727, + "step": 2879 + }, + { + "epoch": 0.15, + "learning_rate": 0.0009597409145160905, + "loss": 0.9414, + "step": 2880 + }, + { + "epoch": 0.15, + "learning_rate": 0.0009597066896142677, + "loss": 0.9375, + "step": 2881 + }, + { + "epoch": 0.15, + "learning_rate": 0.0009596724507818222, + "loss": 0.8672, + "step": 2882 + }, + { + "epoch": 0.15, + "learning_rate": 0.0009596381980197914, + "loss": 0.8555, + "step": 2883 + }, + { + "epoch": 0.16, + "learning_rate": 0.0009596039313292134, + "loss": 0.9609, + "step": 2884 + }, + { + "epoch": 0.16, + "learning_rate": 0.0009595696507111264, + "loss": 0.8398, + "step": 2885 + }, + { + "epoch": 0.16, + "learning_rate": 0.0009595353561665696, + "loss": 0.8164, + "step": 2886 + }, + { + "epoch": 0.16, + "learning_rate": 0.000959501047696582, + "loss": 0.9297, + "step": 2887 + }, + { + "epoch": 0.16, + "learning_rate": 0.0009594667253022034, + "loss": 0.8672, + "step": 2888 + }, + { + "epoch": 0.16, + "learning_rate": 0.0009594323889844736, + "loss": 0.9805, + "step": 2889 + }, + { + "epoch": 0.16, + "learning_rate": 0.0009593980387444332, + "loss": 0.8555, + "step": 2890 + }, + { + "epoch": 0.16, + "learning_rate": 0.0009593636745831234, + "loss": 0.9062, + "step": 2891 + }, + { + "epoch": 0.16, + "learning_rate": 0.0009593292965015852, + "loss": 0.8203, + "step": 2892 + }, + { + "epoch": 0.16, + "learning_rate": 0.0009592949045008606, + "loss": 0.918, + "step": 2893 + }, + { + "epoch": 0.16, + "learning_rate": 0.0009592604985819915, + "loss": 0.8984, + "step": 2894 + }, + { + "epoch": 0.16, + "learning_rate": 0.0009592260787460208, + "loss": 0.8828, + "step": 2895 + }, + { + "epoch": 0.16, + "learning_rate": 0.0009591916449939915, + "loss": 0.9453, + "step": 2896 + }, + { + "epoch": 0.16, + "learning_rate": 0.000959157197326947, + "loss": 0.9023, + "step": 2897 + }, + { + "epoch": 0.16, + "learning_rate": 0.0009591227357459312, + "loss": 0.8477, + "step": 2898 + }, + { + "epoch": 0.16, + "learning_rate": 0.0009590882602519882, + "loss": 0.875, + "step": 2899 + }, + { + "epoch": 0.16, + "learning_rate": 0.0009590537708461631, + "loss": 0.8945, + "step": 2900 + }, + { + "epoch": 0.16, + "learning_rate": 0.0009590192675295007, + "loss": 0.9336, + "step": 2901 + }, + { + "epoch": 0.16, + "learning_rate": 0.0009589847503030469, + "loss": 0.9531, + "step": 2902 + }, + { + "epoch": 0.16, + "learning_rate": 0.0009589502191678471, + "loss": 0.9062, + "step": 2903 + }, + { + "epoch": 0.16, + "learning_rate": 0.0009589156741249485, + "loss": 0.8789, + "step": 2904 + }, + { + "epoch": 0.16, + "learning_rate": 0.0009588811151753973, + "loss": 0.8633, + "step": 2905 + }, + { + "epoch": 0.16, + "learning_rate": 0.0009588465423202412, + "loss": 0.9258, + "step": 2906 + }, + { + "epoch": 0.16, + "learning_rate": 0.0009588119555605275, + "loss": 0.918, + "step": 2907 + }, + { + "epoch": 0.16, + "learning_rate": 0.0009587773548973043, + "loss": 0.9414, + "step": 2908 + }, + { + "epoch": 0.16, + "learning_rate": 0.0009587427403316205, + "loss": 1.0078, + "step": 2909 + }, + { + "epoch": 0.16, + "learning_rate": 0.0009587081118645247, + "loss": 0.9922, + "step": 2910 + }, + { + "epoch": 0.16, + "learning_rate": 0.0009586734694970663, + "loss": 0.8945, + "step": 2911 + }, + { + "epoch": 0.16, + "learning_rate": 0.000958638813230295, + "loss": 0.9414, + "step": 2912 + }, + { + "epoch": 0.16, + "learning_rate": 0.0009586041430652612, + "loss": 1.0, + "step": 2913 + }, + { + "epoch": 0.16, + "learning_rate": 0.0009585694590030157, + "loss": 0.9414, + "step": 2914 + }, + { + "epoch": 0.16, + "learning_rate": 0.0009585347610446089, + "loss": 0.8633, + "step": 2915 + }, + { + "epoch": 0.16, + "learning_rate": 0.0009585000491910928, + "loss": 0.9297, + "step": 2916 + }, + { + "epoch": 0.16, + "learning_rate": 0.0009584653234435191, + "loss": 1.0156, + "step": 2917 + }, + { + "epoch": 0.16, + "learning_rate": 0.0009584305838029402, + "loss": 0.875, + "step": 2918 + }, + { + "epoch": 0.16, + "learning_rate": 0.0009583958302704086, + "loss": 0.9727, + "step": 2919 + }, + { + "epoch": 0.16, + "learning_rate": 0.0009583610628469778, + "loss": 0.9258, + "step": 2920 + }, + { + "epoch": 0.16, + "learning_rate": 0.0009583262815337011, + "loss": 1.0, + "step": 2921 + }, + { + "epoch": 0.16, + "learning_rate": 0.0009582914863316325, + "loss": 0.918, + "step": 2922 + }, + { + "epoch": 0.16, + "learning_rate": 0.0009582566772418264, + "loss": 0.875, + "step": 2923 + }, + { + "epoch": 0.16, + "learning_rate": 0.0009582218542653378, + "loss": 0.9961, + "step": 2924 + }, + { + "epoch": 0.16, + "learning_rate": 0.0009581870174032219, + "loss": 0.9453, + "step": 2925 + }, + { + "epoch": 0.16, + "learning_rate": 0.0009581521666565343, + "loss": 0.8359, + "step": 2926 + }, + { + "epoch": 0.16, + "learning_rate": 0.000958117302026331, + "loss": 0.9531, + "step": 2927 + }, + { + "epoch": 0.16, + "learning_rate": 0.0009580824235136687, + "loss": 0.9727, + "step": 2928 + }, + { + "epoch": 0.16, + "learning_rate": 0.0009580475311196042, + "loss": 0.9023, + "step": 2929 + }, + { + "epoch": 0.16, + "learning_rate": 0.000958012624845195, + "loss": 0.8242, + "step": 2930 + }, + { + "epoch": 0.16, + "learning_rate": 0.0009579777046914986, + "loss": 0.8164, + "step": 2931 + }, + { + "epoch": 0.16, + "learning_rate": 0.0009579427706595736, + "loss": 0.8984, + "step": 2932 + }, + { + "epoch": 0.16, + "learning_rate": 0.0009579078227504782, + "loss": 0.8945, + "step": 2933 + }, + { + "epoch": 0.16, + "learning_rate": 0.0009578728609652719, + "loss": 0.875, + "step": 2934 + }, + { + "epoch": 0.16, + "learning_rate": 0.0009578378853050136, + "loss": 0.9453, + "step": 2935 + }, + { + "epoch": 0.16, + "learning_rate": 0.0009578028957707636, + "loss": 0.8438, + "step": 2936 + }, + { + "epoch": 0.16, + "learning_rate": 0.000957767892363582, + "loss": 0.918, + "step": 2937 + }, + { + "epoch": 0.16, + "learning_rate": 0.0009577328750845296, + "loss": 0.9297, + "step": 2938 + }, + { + "epoch": 0.16, + "learning_rate": 0.0009576978439346676, + "loss": 0.9297, + "step": 2939 + }, + { + "epoch": 0.16, + "learning_rate": 0.0009576627989150573, + "loss": 0.9102, + "step": 2940 + }, + { + "epoch": 0.16, + "learning_rate": 0.000957627740026761, + "loss": 0.9219, + "step": 2941 + }, + { + "epoch": 0.16, + "learning_rate": 0.000957592667270841, + "loss": 0.8398, + "step": 2942 + }, + { + "epoch": 0.16, + "learning_rate": 0.00095755758064836, + "loss": 0.9414, + "step": 2943 + }, + { + "epoch": 0.16, + "learning_rate": 0.0009575224801603812, + "loss": 0.9023, + "step": 2944 + }, + { + "epoch": 0.16, + "learning_rate": 0.0009574873658079686, + "loss": 0.9102, + "step": 2945 + }, + { + "epoch": 0.16, + "learning_rate": 0.000957452237592186, + "loss": 0.8242, + "step": 2946 + }, + { + "epoch": 0.16, + "learning_rate": 0.0009574170955140978, + "loss": 0.9102, + "step": 2947 + }, + { + "epoch": 0.16, + "learning_rate": 0.0009573819395747693, + "loss": 0.9062, + "step": 2948 + }, + { + "epoch": 0.16, + "learning_rate": 0.0009573467697752654, + "loss": 0.9375, + "step": 2949 + }, + { + "epoch": 0.16, + "learning_rate": 0.0009573115861166523, + "loss": 0.8906, + "step": 2950 + }, + { + "epoch": 0.16, + "learning_rate": 0.0009572763885999958, + "loss": 0.9141, + "step": 2951 + }, + { + "epoch": 0.16, + "learning_rate": 0.0009572411772263628, + "loss": 1.0078, + "step": 2952 + }, + { + "epoch": 0.16, + "learning_rate": 0.00095720595199682, + "loss": 0.9336, + "step": 2953 + }, + { + "epoch": 0.16, + "learning_rate": 0.0009571707129124352, + "loss": 0.8398, + "step": 2954 + }, + { + "epoch": 0.16, + "learning_rate": 0.0009571354599742759, + "loss": 0.7695, + "step": 2955 + }, + { + "epoch": 0.16, + "learning_rate": 0.0009571001931834107, + "loss": 0.8359, + "step": 2956 + }, + { + "epoch": 0.16, + "learning_rate": 0.0009570649125409081, + "loss": 0.9688, + "step": 2957 + }, + { + "epoch": 0.16, + "learning_rate": 0.0009570296180478373, + "loss": 0.8555, + "step": 2958 + }, + { + "epoch": 0.16, + "learning_rate": 0.0009569943097052678, + "loss": 0.9219, + "step": 2959 + }, + { + "epoch": 0.16, + "learning_rate": 0.0009569589875142695, + "loss": 0.8984, + "step": 2960 + }, + { + "epoch": 0.16, + "learning_rate": 0.000956923651475913, + "loss": 0.8906, + "step": 2961 + }, + { + "epoch": 0.16, + "learning_rate": 0.0009568883015912688, + "loss": 0.9297, + "step": 2962 + }, + { + "epoch": 0.16, + "learning_rate": 0.0009568529378614083, + "loss": 0.9688, + "step": 2963 + }, + { + "epoch": 0.16, + "learning_rate": 0.0009568175602874033, + "loss": 0.8984, + "step": 2964 + }, + { + "epoch": 0.16, + "learning_rate": 0.0009567821688703256, + "loss": 1.0312, + "step": 2965 + }, + { + "epoch": 0.16, + "learning_rate": 0.0009567467636112475, + "loss": 0.9219, + "step": 2966 + }, + { + "epoch": 0.16, + "learning_rate": 0.0009567113445112424, + "loss": 1.0547, + "step": 2967 + }, + { + "epoch": 0.16, + "learning_rate": 0.0009566759115713832, + "loss": 0.8789, + "step": 2968 + }, + { + "epoch": 0.16, + "learning_rate": 0.0009566404647927439, + "loss": 1.0234, + "step": 2969 + }, + { + "epoch": 0.16, + "learning_rate": 0.0009566050041763985, + "loss": 0.8516, + "step": 2970 + }, + { + "epoch": 0.16, + "learning_rate": 0.0009565695297234216, + "loss": 0.9414, + "step": 2971 + }, + { + "epoch": 0.16, + "learning_rate": 0.0009565340414348882, + "loss": 0.9766, + "step": 2972 + }, + { + "epoch": 0.16, + "learning_rate": 0.0009564985393118737, + "loss": 0.8359, + "step": 2973 + }, + { + "epoch": 0.16, + "learning_rate": 0.000956463023355454, + "loss": 0.8633, + "step": 2974 + }, + { + "epoch": 0.16, + "learning_rate": 0.0009564274935667052, + "loss": 0.9453, + "step": 2975 + }, + { + "epoch": 0.16, + "learning_rate": 0.0009563919499467042, + "loss": 0.8906, + "step": 2976 + }, + { + "epoch": 0.16, + "learning_rate": 0.0009563563924965279, + "loss": 0.8867, + "step": 2977 + }, + { + "epoch": 0.16, + "learning_rate": 0.0009563208212172538, + "loss": 0.9102, + "step": 2978 + }, + { + "epoch": 0.16, + "learning_rate": 0.00095628523610996, + "loss": 0.8633, + "step": 2979 + }, + { + "epoch": 0.16, + "learning_rate": 0.0009562496371757248, + "loss": 0.9023, + "step": 2980 + }, + { + "epoch": 0.16, + "learning_rate": 0.0009562140244156267, + "loss": 0.9375, + "step": 2981 + }, + { + "epoch": 0.16, + "learning_rate": 0.0009561783978307451, + "loss": 0.8594, + "step": 2982 + }, + { + "epoch": 0.16, + "learning_rate": 0.0009561427574221597, + "loss": 0.9414, + "step": 2983 + }, + { + "epoch": 0.16, + "learning_rate": 0.0009561071031909502, + "loss": 0.8906, + "step": 2984 + }, + { + "epoch": 0.16, + "learning_rate": 0.0009560714351381976, + "loss": 0.8984, + "step": 2985 + }, + { + "epoch": 0.16, + "learning_rate": 0.0009560357532649821, + "loss": 0.9688, + "step": 2986 + }, + { + "epoch": 0.16, + "learning_rate": 0.0009560000575723855, + "loss": 0.8164, + "step": 2987 + }, + { + "epoch": 0.16, + "learning_rate": 0.0009559643480614893, + "loss": 0.8906, + "step": 2988 + }, + { + "epoch": 0.16, + "learning_rate": 0.0009559286247333754, + "loss": 0.957, + "step": 2989 + }, + { + "epoch": 0.16, + "learning_rate": 0.0009558928875891266, + "loss": 0.9219, + "step": 2990 + }, + { + "epoch": 0.16, + "learning_rate": 0.0009558571366298258, + "loss": 0.8359, + "step": 2991 + }, + { + "epoch": 0.16, + "learning_rate": 0.0009558213718565562, + "loss": 0.8398, + "step": 2992 + }, + { + "epoch": 0.16, + "learning_rate": 0.000955785593270402, + "loss": 0.9258, + "step": 2993 + }, + { + "epoch": 0.16, + "learning_rate": 0.000955749800872447, + "loss": 0.9492, + "step": 2994 + }, + { + "epoch": 0.16, + "learning_rate": 0.0009557139946637761, + "loss": 0.8672, + "step": 2995 + }, + { + "epoch": 0.16, + "learning_rate": 0.0009556781746454741, + "loss": 0.8711, + "step": 2996 + }, + { + "epoch": 0.16, + "learning_rate": 0.0009556423408186268, + "loss": 0.9258, + "step": 2997 + }, + { + "epoch": 0.16, + "learning_rate": 0.0009556064931843196, + "loss": 0.8906, + "step": 2998 + }, + { + "epoch": 0.16, + "learning_rate": 0.0009555706317436393, + "loss": 0.8477, + "step": 2999 + }, + { + "epoch": 0.16, + "learning_rate": 0.0009555347564976723, + "loss": 0.8516, + "step": 3000 + }, + { + "epoch": 0.16, + "learning_rate": 0.0009554988674475058, + "loss": 0.875, + "step": 3001 + }, + { + "epoch": 0.16, + "learning_rate": 0.0009554629645942274, + "loss": 0.9531, + "step": 3002 + }, + { + "epoch": 0.16, + "learning_rate": 0.0009554270479389251, + "loss": 0.875, + "step": 3003 + }, + { + "epoch": 0.16, + "learning_rate": 0.0009553911174826873, + "loss": 0.8047, + "step": 3004 + }, + { + "epoch": 0.16, + "learning_rate": 0.0009553551732266027, + "loss": 0.9414, + "step": 3005 + }, + { + "epoch": 0.16, + "learning_rate": 0.0009553192151717607, + "loss": 0.9531, + "step": 3006 + }, + { + "epoch": 0.16, + "learning_rate": 0.0009552832433192506, + "loss": 0.918, + "step": 3007 + }, + { + "epoch": 0.16, + "learning_rate": 0.000955247257670163, + "loss": 0.9062, + "step": 3008 + }, + { + "epoch": 0.16, + "learning_rate": 0.0009552112582255878, + "loss": 0.9297, + "step": 3009 + }, + { + "epoch": 0.16, + "learning_rate": 0.0009551752449866164, + "loss": 1.0547, + "step": 3010 + }, + { + "epoch": 0.16, + "learning_rate": 0.0009551392179543399, + "loss": 0.9648, + "step": 3011 + }, + { + "epoch": 0.16, + "learning_rate": 0.00095510317712985, + "loss": 0.8672, + "step": 3012 + }, + { + "epoch": 0.16, + "learning_rate": 0.0009550671225142389, + "loss": 0.8516, + "step": 3013 + }, + { + "epoch": 0.16, + "learning_rate": 0.0009550310541085992, + "loss": 0.918, + "step": 3014 + }, + { + "epoch": 0.16, + "learning_rate": 0.0009549949719140239, + "loss": 1.0, + "step": 3015 + }, + { + "epoch": 0.16, + "learning_rate": 0.0009549588759316063, + "loss": 0.8984, + "step": 3016 + }, + { + "epoch": 0.16, + "learning_rate": 0.0009549227661624404, + "loss": 0.9453, + "step": 3017 + }, + { + "epoch": 0.16, + "learning_rate": 0.0009548866426076204, + "loss": 0.918, + "step": 3018 + }, + { + "epoch": 0.16, + "learning_rate": 0.0009548505052682407, + "loss": 0.8359, + "step": 3019 + }, + { + "epoch": 0.16, + "learning_rate": 0.0009548143541453966, + "loss": 0.9727, + "step": 3020 + }, + { + "epoch": 0.16, + "learning_rate": 0.0009547781892401837, + "loss": 0.8633, + "step": 3021 + }, + { + "epoch": 0.16, + "learning_rate": 0.000954742010553698, + "loss": 0.8906, + "step": 3022 + }, + { + "epoch": 0.16, + "learning_rate": 0.0009547058180870354, + "loss": 0.9766, + "step": 3023 + }, + { + "epoch": 0.16, + "learning_rate": 0.000954669611841293, + "loss": 0.9141, + "step": 3024 + }, + { + "epoch": 0.16, + "learning_rate": 0.0009546333918175677, + "loss": 0.9258, + "step": 3025 + }, + { + "epoch": 0.16, + "learning_rate": 0.0009545971580169573, + "loss": 0.9375, + "step": 3026 + }, + { + "epoch": 0.16, + "learning_rate": 0.0009545609104405598, + "loss": 0.9727, + "step": 3027 + }, + { + "epoch": 0.16, + "learning_rate": 0.0009545246490894736, + "loss": 0.9531, + "step": 3028 + }, + { + "epoch": 0.16, + "learning_rate": 0.0009544883739647974, + "loss": 0.9258, + "step": 3029 + }, + { + "epoch": 0.16, + "learning_rate": 0.0009544520850676306, + "loss": 0.8633, + "step": 3030 + }, + { + "epoch": 0.16, + "learning_rate": 0.0009544157823990728, + "loss": 0.9531, + "step": 3031 + }, + { + "epoch": 0.16, + "learning_rate": 0.0009543794659602243, + "loss": 0.8945, + "step": 3032 + }, + { + "epoch": 0.16, + "learning_rate": 0.0009543431357521853, + "loss": 0.9219, + "step": 3033 + }, + { + "epoch": 0.16, + "learning_rate": 0.0009543067917760569, + "loss": 0.8711, + "step": 3034 + }, + { + "epoch": 0.16, + "learning_rate": 0.0009542704340329402, + "loss": 0.8359, + "step": 3035 + }, + { + "epoch": 0.16, + "learning_rate": 0.0009542340625239374, + "loss": 0.9492, + "step": 3036 + }, + { + "epoch": 0.16, + "learning_rate": 0.0009541976772501504, + "loss": 0.9062, + "step": 3037 + }, + { + "epoch": 0.16, + "learning_rate": 0.0009541612782126818, + "loss": 0.8945, + "step": 3038 + }, + { + "epoch": 0.16, + "learning_rate": 0.0009541248654126346, + "loss": 0.9258, + "step": 3039 + }, + { + "epoch": 0.16, + "learning_rate": 0.0009540884388511122, + "loss": 0.9258, + "step": 3040 + }, + { + "epoch": 0.16, + "learning_rate": 0.0009540519985292187, + "loss": 0.8711, + "step": 3041 + }, + { + "epoch": 0.16, + "learning_rate": 0.000954015544448058, + "loss": 0.8555, + "step": 3042 + }, + { + "epoch": 0.16, + "learning_rate": 0.000953979076608735, + "loss": 0.8945, + "step": 3043 + }, + { + "epoch": 0.16, + "learning_rate": 0.0009539425950123546, + "loss": 0.9297, + "step": 3044 + }, + { + "epoch": 0.16, + "learning_rate": 0.0009539060996600228, + "loss": 0.8711, + "step": 3045 + }, + { + "epoch": 0.16, + "learning_rate": 0.000953869590552845, + "loss": 0.8828, + "step": 3046 + }, + { + "epoch": 0.16, + "learning_rate": 0.0009538330676919275, + "loss": 0.8281, + "step": 3047 + }, + { + "epoch": 0.16, + "learning_rate": 0.0009537965310783776, + "loss": 0.9688, + "step": 3048 + }, + { + "epoch": 0.16, + "learning_rate": 0.0009537599807133021, + "loss": 0.8594, + "step": 3049 + }, + { + "epoch": 0.16, + "learning_rate": 0.0009537234165978087, + "loss": 0.8945, + "step": 3050 + }, + { + "epoch": 0.16, + "learning_rate": 0.0009536868387330053, + "loss": 0.8984, + "step": 3051 + }, + { + "epoch": 0.16, + "learning_rate": 0.0009536502471200005, + "loss": 0.918, + "step": 3052 + }, + { + "epoch": 0.16, + "learning_rate": 0.0009536136417599031, + "loss": 0.9258, + "step": 3053 + }, + { + "epoch": 0.16, + "learning_rate": 0.0009535770226538221, + "loss": 0.8672, + "step": 3054 + }, + { + "epoch": 0.16, + "learning_rate": 0.0009535403898028676, + "loss": 0.875, + "step": 3055 + }, + { + "epoch": 0.16, + "learning_rate": 0.0009535037432081494, + "loss": 0.8672, + "step": 3056 + }, + { + "epoch": 0.16, + "learning_rate": 0.0009534670828707781, + "loss": 0.8047, + "step": 3057 + }, + { + "epoch": 0.16, + "learning_rate": 0.0009534304087918647, + "loss": 0.9297, + "step": 3058 + }, + { + "epoch": 0.16, + "learning_rate": 0.0009533937209725204, + "loss": 0.8164, + "step": 3059 + }, + { + "epoch": 0.16, + "learning_rate": 0.0009533570194138571, + "loss": 0.9766, + "step": 3060 + }, + { + "epoch": 0.16, + "learning_rate": 0.0009533203041169869, + "loss": 0.957, + "step": 3061 + }, + { + "epoch": 0.16, + "learning_rate": 0.0009532835750830224, + "loss": 0.9375, + "step": 3062 + }, + { + "epoch": 0.16, + "learning_rate": 0.0009532468323130768, + "loss": 0.8984, + "step": 3063 + }, + { + "epoch": 0.16, + "learning_rate": 0.0009532100758082631, + "loss": 0.9102, + "step": 3064 + }, + { + "epoch": 0.16, + "learning_rate": 0.0009531733055696955, + "loss": 0.8555, + "step": 3065 + }, + { + "epoch": 0.16, + "learning_rate": 0.0009531365215984882, + "loss": 1.0547, + "step": 3066 + }, + { + "epoch": 0.16, + "learning_rate": 0.0009530997238957557, + "loss": 0.9258, + "step": 3067 + }, + { + "epoch": 0.16, + "learning_rate": 0.0009530629124626134, + "loss": 0.9297, + "step": 3068 + }, + { + "epoch": 0.16, + "learning_rate": 0.0009530260873001765, + "loss": 0.957, + "step": 3069 + }, + { + "epoch": 0.17, + "learning_rate": 0.000952989248409561, + "loss": 0.8711, + "step": 3070 + }, + { + "epoch": 0.17, + "learning_rate": 0.0009529523957918833, + "loss": 0.9531, + "step": 3071 + }, + { + "epoch": 0.17, + "learning_rate": 0.0009529155294482603, + "loss": 0.8867, + "step": 3072 + }, + { + "epoch": 0.17, + "learning_rate": 0.0009528786493798089, + "loss": 0.9297, + "step": 3073 + }, + { + "epoch": 0.17, + "learning_rate": 0.0009528417555876469, + "loss": 0.9414, + "step": 3074 + }, + { + "epoch": 0.17, + "learning_rate": 0.0009528048480728921, + "loss": 0.9492, + "step": 3075 + }, + { + "epoch": 0.17, + "learning_rate": 0.0009527679268366631, + "loss": 0.8711, + "step": 3076 + }, + { + "epoch": 0.17, + "learning_rate": 0.0009527309918800787, + "loss": 0.9141, + "step": 3077 + }, + { + "epoch": 0.17, + "learning_rate": 0.0009526940432042579, + "loss": 0.918, + "step": 3078 + }, + { + "epoch": 0.17, + "learning_rate": 0.0009526570808103208, + "loss": 0.9062, + "step": 3079 + }, + { + "epoch": 0.17, + "learning_rate": 0.0009526201046993871, + "loss": 0.9453, + "step": 3080 + }, + { + "epoch": 0.17, + "learning_rate": 0.0009525831148725775, + "loss": 0.9023, + "step": 3081 + }, + { + "epoch": 0.17, + "learning_rate": 0.000952546111331013, + "loss": 0.9336, + "step": 3082 + }, + { + "epoch": 0.17, + "learning_rate": 0.0009525090940758146, + "loss": 0.9219, + "step": 3083 + }, + { + "epoch": 0.17, + "learning_rate": 0.0009524720631081044, + "loss": 0.8828, + "step": 3084 + }, + { + "epoch": 0.17, + "learning_rate": 0.0009524350184290043, + "loss": 0.9141, + "step": 3085 + }, + { + "epoch": 0.17, + "learning_rate": 0.0009523979600396371, + "loss": 0.8711, + "step": 3086 + }, + { + "epoch": 0.17, + "learning_rate": 0.0009523608879411255, + "loss": 0.8477, + "step": 3087 + }, + { + "epoch": 0.17, + "learning_rate": 0.0009523238021345933, + "loss": 0.8828, + "step": 3088 + }, + { + "epoch": 0.17, + "learning_rate": 0.0009522867026211639, + "loss": 0.8828, + "step": 3089 + }, + { + "epoch": 0.17, + "learning_rate": 0.0009522495894019618, + "loss": 0.8555, + "step": 3090 + }, + { + "epoch": 0.17, + "learning_rate": 0.0009522124624781115, + "loss": 0.8906, + "step": 3091 + }, + { + "epoch": 0.17, + "learning_rate": 0.0009521753218507383, + "loss": 0.9023, + "step": 3092 + }, + { + "epoch": 0.17, + "learning_rate": 0.0009521381675209675, + "loss": 0.8594, + "step": 3093 + }, + { + "epoch": 0.17, + "learning_rate": 0.0009521009994899251, + "loss": 0.9297, + "step": 3094 + }, + { + "epoch": 0.17, + "learning_rate": 0.0009520638177587371, + "loss": 0.9414, + "step": 3095 + }, + { + "epoch": 0.17, + "learning_rate": 0.0009520266223285306, + "loss": 0.957, + "step": 3096 + }, + { + "epoch": 0.17, + "learning_rate": 0.0009519894132004327, + "loss": 0.9297, + "step": 3097 + }, + { + "epoch": 0.17, + "learning_rate": 0.0009519521903755709, + "loss": 0.9258, + "step": 3098 + }, + { + "epoch": 0.17, + "learning_rate": 0.000951914953855073, + "loss": 0.8867, + "step": 3099 + }, + { + "epoch": 0.17, + "learning_rate": 0.0009518777036400676, + "loss": 0.9492, + "step": 3100 + }, + { + "epoch": 0.17, + "learning_rate": 0.0009518404397316836, + "loss": 0.9141, + "step": 3101 + }, + { + "epoch": 0.17, + "learning_rate": 0.0009518031621310499, + "loss": 0.9648, + "step": 3102 + }, + { + "epoch": 0.17, + "learning_rate": 0.0009517658708392964, + "loss": 0.9492, + "step": 3103 + }, + { + "epoch": 0.17, + "learning_rate": 0.000951728565857553, + "loss": 0.8594, + "step": 3104 + }, + { + "epoch": 0.17, + "learning_rate": 0.0009516912471869503, + "loss": 0.8828, + "step": 3105 + }, + { + "epoch": 0.17, + "learning_rate": 0.000951653914828619, + "loss": 0.9492, + "step": 3106 + }, + { + "epoch": 0.17, + "learning_rate": 0.0009516165687836904, + "loss": 0.9219, + "step": 3107 + }, + { + "epoch": 0.17, + "learning_rate": 0.0009515792090532964, + "loss": 0.8477, + "step": 3108 + }, + { + "epoch": 0.17, + "learning_rate": 0.000951541835638569, + "loss": 0.8477, + "step": 3109 + }, + { + "epoch": 0.17, + "learning_rate": 0.0009515044485406408, + "loss": 0.8867, + "step": 3110 + }, + { + "epoch": 0.17, + "learning_rate": 0.0009514670477606446, + "loss": 0.8594, + "step": 3111 + }, + { + "epoch": 0.17, + "learning_rate": 0.0009514296332997138, + "loss": 0.8477, + "step": 3112 + }, + { + "epoch": 0.17, + "learning_rate": 0.0009513922051589825, + "loss": 0.8398, + "step": 3113 + }, + { + "epoch": 0.17, + "learning_rate": 0.0009513547633395845, + "loss": 0.9414, + "step": 3114 + }, + { + "epoch": 0.17, + "learning_rate": 0.0009513173078426547, + "loss": 1.0156, + "step": 3115 + }, + { + "epoch": 0.17, + "learning_rate": 0.0009512798386693277, + "loss": 0.9648, + "step": 3116 + }, + { + "epoch": 0.17, + "learning_rate": 0.0009512423558207394, + "loss": 0.8945, + "step": 3117 + }, + { + "epoch": 0.17, + "learning_rate": 0.0009512048592980256, + "loss": 0.8203, + "step": 3118 + }, + { + "epoch": 0.17, + "learning_rate": 0.0009511673491023223, + "loss": 0.8906, + "step": 3119 + }, + { + "epoch": 0.17, + "learning_rate": 0.0009511298252347663, + "loss": 0.9023, + "step": 3120 + }, + { + "epoch": 0.17, + "learning_rate": 0.0009510922876964947, + "loss": 0.8906, + "step": 3121 + }, + { + "epoch": 0.17, + "learning_rate": 0.000951054736488645, + "loss": 0.957, + "step": 3122 + }, + { + "epoch": 0.17, + "learning_rate": 0.0009510171716123553, + "loss": 0.9023, + "step": 3123 + }, + { + "epoch": 0.17, + "learning_rate": 0.0009509795930687637, + "loss": 0.8398, + "step": 3124 + }, + { + "epoch": 0.17, + "learning_rate": 0.000950942000859009, + "loss": 0.9492, + "step": 3125 + }, + { + "epoch": 0.17, + "learning_rate": 0.0009509043949842305, + "loss": 0.9961, + "step": 3126 + }, + { + "epoch": 0.17, + "learning_rate": 0.0009508667754455676, + "loss": 0.8711, + "step": 3127 + }, + { + "epoch": 0.17, + "learning_rate": 0.0009508291422441606, + "loss": 0.8516, + "step": 3128 + }, + { + "epoch": 0.17, + "learning_rate": 0.0009507914953811495, + "loss": 0.9727, + "step": 3129 + }, + { + "epoch": 0.17, + "learning_rate": 0.0009507538348576754, + "loss": 0.9648, + "step": 3130 + }, + { + "epoch": 0.17, + "learning_rate": 0.0009507161606748795, + "loss": 0.8828, + "step": 3131 + }, + { + "epoch": 0.17, + "learning_rate": 0.0009506784728339033, + "loss": 0.8477, + "step": 3132 + }, + { + "epoch": 0.17, + "learning_rate": 0.0009506407713358891, + "loss": 0.9297, + "step": 3133 + }, + { + "epoch": 0.17, + "learning_rate": 0.0009506030561819792, + "loss": 0.9336, + "step": 3134 + }, + { + "epoch": 0.17, + "learning_rate": 0.0009505653273733166, + "loss": 0.8477, + "step": 3135 + }, + { + "epoch": 0.17, + "learning_rate": 0.0009505275849110446, + "loss": 0.8594, + "step": 3136 + }, + { + "epoch": 0.17, + "learning_rate": 0.0009504898287963066, + "loss": 0.8906, + "step": 3137 + }, + { + "epoch": 0.17, + "learning_rate": 0.0009504520590302473, + "loss": 0.8945, + "step": 3138 + }, + { + "epoch": 0.17, + "learning_rate": 0.0009504142756140108, + "loss": 0.9766, + "step": 3139 + }, + { + "epoch": 0.17, + "learning_rate": 0.0009503764785487425, + "loss": 0.8438, + "step": 3140 + }, + { + "epoch": 0.17, + "learning_rate": 0.0009503386678355872, + "loss": 0.9062, + "step": 3141 + }, + { + "epoch": 0.17, + "learning_rate": 0.0009503008434756912, + "loss": 0.957, + "step": 3142 + }, + { + "epoch": 0.17, + "learning_rate": 0.0009502630054702003, + "loss": 0.8867, + "step": 3143 + }, + { + "epoch": 0.17, + "learning_rate": 0.0009502251538202613, + "loss": 0.918, + "step": 3144 + }, + { + "epoch": 0.17, + "learning_rate": 0.0009501872885270214, + "loss": 0.9258, + "step": 3145 + }, + { + "epoch": 0.17, + "learning_rate": 0.0009501494095916277, + "loss": 0.9414, + "step": 3146 + }, + { + "epoch": 0.17, + "learning_rate": 0.0009501115170152284, + "loss": 0.8125, + "step": 3147 + }, + { + "epoch": 0.17, + "learning_rate": 0.0009500736107989716, + "loss": 0.9414, + "step": 3148 + }, + { + "epoch": 0.17, + "learning_rate": 0.0009500356909440059, + "loss": 0.8633, + "step": 3149 + }, + { + "epoch": 0.17, + "learning_rate": 0.0009499977574514804, + "loss": 0.918, + "step": 3150 + }, + { + "epoch": 0.17, + "learning_rate": 0.0009499598103225449, + "loss": 0.8359, + "step": 3151 + }, + { + "epoch": 0.17, + "learning_rate": 0.0009499218495583491, + "loss": 0.8438, + "step": 3152 + }, + { + "epoch": 0.17, + "learning_rate": 0.0009498838751600432, + "loss": 0.9531, + "step": 3153 + }, + { + "epoch": 0.17, + "learning_rate": 0.0009498458871287781, + "loss": 0.9805, + "step": 3154 + }, + { + "epoch": 0.17, + "learning_rate": 0.0009498078854657051, + "loss": 0.8789, + "step": 3155 + }, + { + "epoch": 0.17, + "learning_rate": 0.0009497698701719754, + "loss": 0.7773, + "step": 3156 + }, + { + "epoch": 0.17, + "learning_rate": 0.0009497318412487416, + "loss": 0.8242, + "step": 3157 + }, + { + "epoch": 0.17, + "learning_rate": 0.0009496937986971554, + "loss": 0.9219, + "step": 3158 + }, + { + "epoch": 0.17, + "learning_rate": 0.00094965574251837, + "loss": 0.9453, + "step": 3159 + }, + { + "epoch": 0.17, + "learning_rate": 0.0009496176727135387, + "loss": 0.8594, + "step": 3160 + }, + { + "epoch": 0.17, + "learning_rate": 0.0009495795892838151, + "loss": 0.8828, + "step": 3161 + }, + { + "epoch": 0.17, + "learning_rate": 0.0009495414922303529, + "loss": 0.8672, + "step": 3162 + }, + { + "epoch": 0.17, + "learning_rate": 0.0009495033815543068, + "loss": 1.0469, + "step": 3163 + }, + { + "epoch": 0.17, + "learning_rate": 0.0009494652572568319, + "loss": 0.8945, + "step": 3164 + }, + { + "epoch": 0.17, + "learning_rate": 0.0009494271193390833, + "loss": 0.9297, + "step": 3165 + }, + { + "epoch": 0.17, + "learning_rate": 0.0009493889678022165, + "loss": 0.9531, + "step": 3166 + }, + { + "epoch": 0.17, + "learning_rate": 0.0009493508026473881, + "loss": 0.8789, + "step": 3167 + }, + { + "epoch": 0.17, + "learning_rate": 0.0009493126238757541, + "loss": 0.9883, + "step": 3168 + }, + { + "epoch": 0.17, + "learning_rate": 0.0009492744314884719, + "loss": 0.8516, + "step": 3169 + }, + { + "epoch": 0.17, + "learning_rate": 0.0009492362254866984, + "loss": 0.8633, + "step": 3170 + }, + { + "epoch": 0.17, + "learning_rate": 0.0009491980058715918, + "loss": 0.9922, + "step": 3171 + }, + { + "epoch": 0.17, + "learning_rate": 0.00094915977264431, + "loss": 0.9141, + "step": 3172 + }, + { + "epoch": 0.17, + "learning_rate": 0.0009491215258060117, + "loss": 0.918, + "step": 3173 + }, + { + "epoch": 0.17, + "learning_rate": 0.0009490832653578559, + "loss": 0.8828, + "step": 3174 + }, + { + "epoch": 0.17, + "learning_rate": 0.0009490449913010021, + "loss": 0.9258, + "step": 3175 + }, + { + "epoch": 0.17, + "learning_rate": 0.0009490067036366099, + "loss": 0.9453, + "step": 3176 + }, + { + "epoch": 0.17, + "learning_rate": 0.0009489684023658398, + "loss": 0.8516, + "step": 3177 + }, + { + "epoch": 0.17, + "learning_rate": 0.0009489300874898522, + "loss": 0.9219, + "step": 3178 + }, + { + "epoch": 0.17, + "learning_rate": 0.0009488917590098084, + "loss": 0.875, + "step": 3179 + }, + { + "epoch": 0.17, + "learning_rate": 0.0009488534169268697, + "loss": 0.9219, + "step": 3180 + }, + { + "epoch": 0.17, + "learning_rate": 0.0009488150612421981, + "loss": 0.9297, + "step": 3181 + }, + { + "epoch": 0.17, + "learning_rate": 0.000948776691956956, + "loss": 0.875, + "step": 3182 + }, + { + "epoch": 0.17, + "learning_rate": 0.0009487383090723057, + "loss": 0.8359, + "step": 3183 + }, + { + "epoch": 0.17, + "learning_rate": 0.0009486999125894109, + "loss": 0.9648, + "step": 3184 + }, + { + "epoch": 0.17, + "learning_rate": 0.0009486615025094347, + "loss": 0.8555, + "step": 3185 + }, + { + "epoch": 0.17, + "learning_rate": 0.0009486230788335413, + "loss": 0.9414, + "step": 3186 + }, + { + "epoch": 0.17, + "learning_rate": 0.0009485846415628949, + "loss": 0.9062, + "step": 3187 + }, + { + "epoch": 0.17, + "learning_rate": 0.0009485461906986603, + "loss": 0.9375, + "step": 3188 + }, + { + "epoch": 0.17, + "learning_rate": 0.0009485077262420028, + "loss": 0.8984, + "step": 3189 + }, + { + "epoch": 0.17, + "learning_rate": 0.000948469248194088, + "loss": 0.918, + "step": 3190 + }, + { + "epoch": 0.17, + "learning_rate": 0.0009484307565560817, + "loss": 0.8672, + "step": 3191 + }, + { + "epoch": 0.17, + "learning_rate": 0.0009483922513291506, + "loss": 0.8984, + "step": 3192 + }, + { + "epoch": 0.17, + "learning_rate": 0.0009483537325144613, + "loss": 0.9531, + "step": 3193 + }, + { + "epoch": 0.17, + "learning_rate": 0.0009483152001131812, + "loss": 0.918, + "step": 3194 + }, + { + "epoch": 0.17, + "learning_rate": 0.0009482766541264778, + "loss": 0.9297, + "step": 3195 + }, + { + "epoch": 0.17, + "learning_rate": 0.0009482380945555193, + "loss": 1.0, + "step": 3196 + }, + { + "epoch": 0.17, + "learning_rate": 0.0009481995214014743, + "loss": 0.8633, + "step": 3197 + }, + { + "epoch": 0.17, + "learning_rate": 0.0009481609346655115, + "loss": 0.9805, + "step": 3198 + }, + { + "epoch": 0.17, + "learning_rate": 0.0009481223343488002, + "loss": 0.9336, + "step": 3199 + }, + { + "epoch": 0.17, + "learning_rate": 0.0009480837204525103, + "loss": 0.8594, + "step": 3200 + }, + { + "epoch": 0.17, + "learning_rate": 0.0009480450929778118, + "loss": 0.9297, + "step": 3201 + }, + { + "epoch": 0.17, + "learning_rate": 0.0009480064519258751, + "loss": 0.918, + "step": 3202 + }, + { + "epoch": 0.17, + "learning_rate": 0.0009479677972978713, + "loss": 0.8945, + "step": 3203 + }, + { + "epoch": 0.17, + "learning_rate": 0.0009479291290949717, + "loss": 0.9609, + "step": 3204 + }, + { + "epoch": 0.17, + "learning_rate": 0.0009478904473183483, + "loss": 0.9609, + "step": 3205 + }, + { + "epoch": 0.17, + "learning_rate": 0.0009478517519691729, + "loss": 1.0078, + "step": 3206 + }, + { + "epoch": 0.17, + "learning_rate": 0.0009478130430486185, + "loss": 0.8789, + "step": 3207 + }, + { + "epoch": 0.17, + "learning_rate": 0.0009477743205578579, + "loss": 0.957, + "step": 3208 + }, + { + "epoch": 0.17, + "learning_rate": 0.0009477355844980644, + "loss": 0.9023, + "step": 3209 + }, + { + "epoch": 0.17, + "learning_rate": 0.0009476968348704121, + "loss": 0.8516, + "step": 3210 + }, + { + "epoch": 0.17, + "learning_rate": 0.0009476580716760751, + "loss": 0.8086, + "step": 3211 + }, + { + "epoch": 0.17, + "learning_rate": 0.000947619294916228, + "loss": 0.8281, + "step": 3212 + }, + { + "epoch": 0.17, + "learning_rate": 0.0009475805045920459, + "loss": 0.8047, + "step": 3213 + }, + { + "epoch": 0.17, + "learning_rate": 0.0009475417007047043, + "loss": 0.8359, + "step": 3214 + }, + { + "epoch": 0.17, + "learning_rate": 0.0009475028832553791, + "loss": 0.9102, + "step": 3215 + }, + { + "epoch": 0.17, + "learning_rate": 0.0009474640522452467, + "loss": 0.9375, + "step": 3216 + }, + { + "epoch": 0.17, + "learning_rate": 0.0009474252076754835, + "loss": 0.8242, + "step": 3217 + }, + { + "epoch": 0.17, + "learning_rate": 0.0009473863495472669, + "loss": 0.9727, + "step": 3218 + }, + { + "epoch": 0.17, + "learning_rate": 0.0009473474778617742, + "loss": 0.8359, + "step": 3219 + }, + { + "epoch": 0.17, + "learning_rate": 0.0009473085926201836, + "loss": 0.8633, + "step": 3220 + }, + { + "epoch": 0.17, + "learning_rate": 0.0009472696938236734, + "loss": 0.8906, + "step": 3221 + }, + { + "epoch": 0.17, + "learning_rate": 0.000947230781473422, + "loss": 0.8555, + "step": 3222 + }, + { + "epoch": 0.17, + "learning_rate": 0.000947191855570609, + "loss": 1.0078, + "step": 3223 + }, + { + "epoch": 0.17, + "learning_rate": 0.0009471529161164139, + "loss": 0.7578, + "step": 3224 + }, + { + "epoch": 0.17, + "learning_rate": 0.0009471139631120165, + "loss": 0.9141, + "step": 3225 + }, + { + "epoch": 0.17, + "learning_rate": 0.0009470749965585975, + "loss": 0.957, + "step": 3226 + }, + { + "epoch": 0.17, + "learning_rate": 0.0009470360164573375, + "loss": 0.9648, + "step": 3227 + }, + { + "epoch": 0.17, + "learning_rate": 0.0009469970228094176, + "loss": 0.9062, + "step": 3228 + }, + { + "epoch": 0.17, + "learning_rate": 0.0009469580156160198, + "loss": 0.8555, + "step": 3229 + }, + { + "epoch": 0.17, + "learning_rate": 0.0009469189948783259, + "loss": 0.9648, + "step": 3230 + }, + { + "epoch": 0.17, + "learning_rate": 0.0009468799605975185, + "loss": 0.8984, + "step": 3231 + }, + { + "epoch": 0.17, + "learning_rate": 0.0009468409127747802, + "loss": 0.9297, + "step": 3232 + }, + { + "epoch": 0.17, + "learning_rate": 0.0009468018514112946, + "loss": 0.918, + "step": 3233 + }, + { + "epoch": 0.17, + "learning_rate": 0.0009467627765082452, + "loss": 0.9023, + "step": 3234 + }, + { + "epoch": 0.17, + "learning_rate": 0.0009467236880668162, + "loss": 0.9531, + "step": 3235 + }, + { + "epoch": 0.17, + "learning_rate": 0.0009466845860881918, + "loss": 1.0234, + "step": 3236 + }, + { + "epoch": 0.17, + "learning_rate": 0.0009466454705735574, + "loss": 0.8945, + "step": 3237 + }, + { + "epoch": 0.17, + "learning_rate": 0.0009466063415240981, + "loss": 0.9609, + "step": 3238 + }, + { + "epoch": 0.17, + "learning_rate": 0.0009465671989409997, + "loss": 0.9102, + "step": 3239 + }, + { + "epoch": 0.17, + "learning_rate": 0.0009465280428254481, + "loss": 0.9414, + "step": 3240 + }, + { + "epoch": 0.17, + "learning_rate": 0.0009464888731786301, + "loss": 0.875, + "step": 3241 + }, + { + "epoch": 0.17, + "learning_rate": 0.0009464496900017325, + "loss": 0.9453, + "step": 3242 + }, + { + "epoch": 0.17, + "learning_rate": 0.000946410493295943, + "loss": 0.9609, + "step": 3243 + }, + { + "epoch": 0.17, + "learning_rate": 0.0009463712830624491, + "loss": 0.8789, + "step": 3244 + }, + { + "epoch": 0.17, + "learning_rate": 0.0009463320593024391, + "loss": 0.8867, + "step": 3245 + }, + { + "epoch": 0.17, + "learning_rate": 0.0009462928220171015, + "loss": 0.8984, + "step": 3246 + }, + { + "epoch": 0.17, + "learning_rate": 0.0009462535712076255, + "loss": 0.9727, + "step": 3247 + }, + { + "epoch": 0.17, + "learning_rate": 0.0009462143068752003, + "loss": 0.9453, + "step": 3248 + }, + { + "epoch": 0.17, + "learning_rate": 0.000946175029021016, + "loss": 0.8633, + "step": 3249 + }, + { + "epoch": 0.17, + "learning_rate": 0.0009461357376462627, + "loss": 0.9023, + "step": 3250 + }, + { + "epoch": 0.17, + "learning_rate": 0.0009460964327521311, + "loss": 0.9023, + "step": 3251 + }, + { + "epoch": 0.17, + "learning_rate": 0.0009460571143398122, + "loss": 0.9414, + "step": 3252 + }, + { + "epoch": 0.17, + "learning_rate": 0.0009460177824104976, + "loss": 0.8672, + "step": 3253 + }, + { + "epoch": 0.17, + "learning_rate": 0.000945978436965379, + "loss": 0.8672, + "step": 3254 + }, + { + "epoch": 0.17, + "learning_rate": 0.0009459390780056488, + "loss": 0.9414, + "step": 3255 + }, + { + "epoch": 0.17, + "learning_rate": 0.0009458997055324999, + "loss": 0.8906, + "step": 3256 + }, + { + "epoch": 0.18, + "learning_rate": 0.000945860319547125, + "loss": 0.8867, + "step": 3257 + }, + { + "epoch": 0.18, + "learning_rate": 0.0009458209200507179, + "loss": 0.9023, + "step": 3258 + }, + { + "epoch": 0.18, + "learning_rate": 0.0009457815070444725, + "loss": 0.9453, + "step": 3259 + }, + { + "epoch": 0.18, + "learning_rate": 0.0009457420805295831, + "loss": 0.9023, + "step": 3260 + }, + { + "epoch": 0.18, + "learning_rate": 0.0009457026405072447, + "loss": 0.8672, + "step": 3261 + }, + { + "epoch": 0.18, + "learning_rate": 0.000945663186978652, + "loss": 0.875, + "step": 3262 + }, + { + "epoch": 0.18, + "learning_rate": 0.0009456237199450009, + "loss": 0.8555, + "step": 3263 + }, + { + "epoch": 0.18, + "learning_rate": 0.0009455842394074872, + "loss": 0.9844, + "step": 3264 + }, + { + "epoch": 0.18, + "learning_rate": 0.0009455447453673076, + "loss": 0.8398, + "step": 3265 + }, + { + "epoch": 0.18, + "learning_rate": 0.0009455052378256586, + "loss": 0.8789, + "step": 3266 + }, + { + "epoch": 0.18, + "learning_rate": 0.0009454657167837372, + "loss": 0.9141, + "step": 3267 + }, + { + "epoch": 0.18, + "learning_rate": 0.0009454261822427417, + "loss": 0.957, + "step": 3268 + }, + { + "epoch": 0.18, + "learning_rate": 0.0009453866342038696, + "loss": 0.8828, + "step": 3269 + }, + { + "epoch": 0.18, + "learning_rate": 0.0009453470726683195, + "loss": 0.8633, + "step": 3270 + }, + { + "epoch": 0.18, + "learning_rate": 0.0009453074976372901, + "loss": 0.8359, + "step": 3271 + }, + { + "epoch": 0.18, + "learning_rate": 0.0009452679091119809, + "loss": 0.875, + "step": 3272 + }, + { + "epoch": 0.18, + "learning_rate": 0.0009452283070935914, + "loss": 0.8984, + "step": 3273 + }, + { + "epoch": 0.18, + "learning_rate": 0.0009451886915833219, + "loss": 0.8828, + "step": 3274 + }, + { + "epoch": 0.18, + "learning_rate": 0.0009451490625823724, + "loss": 0.9375, + "step": 3275 + }, + { + "epoch": 0.18, + "learning_rate": 0.0009451094200919442, + "loss": 0.8984, + "step": 3276 + }, + { + "epoch": 0.18, + "learning_rate": 0.0009450697641132384, + "loss": 0.8984, + "step": 3277 + }, + { + "epoch": 0.18, + "learning_rate": 0.0009450300946474568, + "loss": 0.9102, + "step": 3278 + }, + { + "epoch": 0.18, + "learning_rate": 0.0009449904116958015, + "loss": 0.9297, + "step": 3279 + }, + { + "epoch": 0.18, + "learning_rate": 0.0009449507152594751, + "loss": 0.8867, + "step": 3280 + }, + { + "epoch": 0.18, + "learning_rate": 0.0009449110053396803, + "loss": 0.8867, + "step": 3281 + }, + { + "epoch": 0.18, + "learning_rate": 0.0009448712819376207, + "loss": 0.9375, + "step": 3282 + }, + { + "epoch": 0.18, + "learning_rate": 0.0009448315450544999, + "loss": 0.8672, + "step": 3283 + }, + { + "epoch": 0.18, + "learning_rate": 0.0009447917946915221, + "loss": 0.8086, + "step": 3284 + }, + { + "epoch": 0.18, + "learning_rate": 0.0009447520308498919, + "loss": 0.9258, + "step": 3285 + }, + { + "epoch": 0.18, + "learning_rate": 0.0009447122535308142, + "loss": 0.9531, + "step": 3286 + }, + { + "epoch": 0.18, + "learning_rate": 0.0009446724627354944, + "loss": 0.9531, + "step": 3287 + }, + { + "epoch": 0.18, + "learning_rate": 0.0009446326584651384, + "loss": 0.8633, + "step": 3288 + }, + { + "epoch": 0.18, + "learning_rate": 0.0009445928407209523, + "loss": 0.8867, + "step": 3289 + }, + { + "epoch": 0.18, + "learning_rate": 0.0009445530095041426, + "loss": 0.8789, + "step": 3290 + }, + { + "epoch": 0.18, + "learning_rate": 0.0009445131648159166, + "loss": 0.9648, + "step": 3291 + }, + { + "epoch": 0.18, + "learning_rate": 0.0009444733066574815, + "loss": 0.875, + "step": 3292 + }, + { + "epoch": 0.18, + "learning_rate": 0.0009444334350300452, + "loss": 0.8672, + "step": 3293 + }, + { + "epoch": 0.18, + "learning_rate": 0.0009443935499348159, + "loss": 0.9336, + "step": 3294 + }, + { + "epoch": 0.18, + "learning_rate": 0.0009443536513730022, + "loss": 0.9062, + "step": 3295 + }, + { + "epoch": 0.18, + "learning_rate": 0.0009443137393458134, + "loss": 0.9023, + "step": 3296 + }, + { + "epoch": 0.18, + "learning_rate": 0.0009442738138544588, + "loss": 0.8711, + "step": 3297 + }, + { + "epoch": 0.18, + "learning_rate": 0.0009442338749001483, + "loss": 0.9492, + "step": 3298 + }, + { + "epoch": 0.18, + "learning_rate": 0.0009441939224840921, + "loss": 0.8164, + "step": 3299 + }, + { + "epoch": 0.18, + "learning_rate": 0.0009441539566075009, + "loss": 0.9219, + "step": 3300 + }, + { + "epoch": 0.18, + "learning_rate": 0.000944113977271586, + "loss": 0.957, + "step": 3301 + }, + { + "epoch": 0.18, + "learning_rate": 0.0009440739844775586, + "loss": 0.9414, + "step": 3302 + }, + { + "epoch": 0.18, + "learning_rate": 0.0009440339782266309, + "loss": 0.8398, + "step": 3303 + }, + { + "epoch": 0.18, + "learning_rate": 0.0009439939585200152, + "loss": 0.8594, + "step": 3304 + }, + { + "epoch": 0.18, + "learning_rate": 0.0009439539253589239, + "loss": 0.8633, + "step": 3305 + }, + { + "epoch": 0.18, + "learning_rate": 0.0009439138787445705, + "loss": 0.9062, + "step": 3306 + }, + { + "epoch": 0.18, + "learning_rate": 0.0009438738186781682, + "loss": 0.9688, + "step": 3307 + }, + { + "epoch": 0.18, + "learning_rate": 0.0009438337451609314, + "loss": 0.9727, + "step": 3308 + }, + { + "epoch": 0.18, + "learning_rate": 0.0009437936581940741, + "loss": 0.8594, + "step": 3309 + }, + { + "epoch": 0.18, + "learning_rate": 0.0009437535577788112, + "loss": 0.8945, + "step": 3310 + }, + { + "epoch": 0.18, + "learning_rate": 0.0009437134439163578, + "loss": 0.9766, + "step": 3311 + }, + { + "epoch": 0.18, + "learning_rate": 0.0009436733166079296, + "loss": 0.8359, + "step": 3312 + }, + { + "epoch": 0.18, + "learning_rate": 0.0009436331758547425, + "loss": 0.8906, + "step": 3313 + }, + { + "epoch": 0.18, + "learning_rate": 0.0009435930216580129, + "loss": 0.8711, + "step": 3314 + }, + { + "epoch": 0.18, + "learning_rate": 0.0009435528540189576, + "loss": 0.918, + "step": 3315 + }, + { + "epoch": 0.18, + "learning_rate": 0.000943512672938794, + "loss": 0.9453, + "step": 3316 + }, + { + "epoch": 0.18, + "learning_rate": 0.0009434724784187394, + "loss": 0.9453, + "step": 3317 + }, + { + "epoch": 0.18, + "learning_rate": 0.000943432270460012, + "loss": 0.9336, + "step": 3318 + }, + { + "epoch": 0.18, + "learning_rate": 0.0009433920490638302, + "loss": 0.8594, + "step": 3319 + }, + { + "epoch": 0.18, + "learning_rate": 0.0009433518142314128, + "loss": 0.8398, + "step": 3320 + }, + { + "epoch": 0.18, + "learning_rate": 0.0009433115659639793, + "loss": 0.8945, + "step": 3321 + }, + { + "epoch": 0.18, + "learning_rate": 0.000943271304262749, + "loss": 0.9062, + "step": 3322 + }, + { + "epoch": 0.18, + "learning_rate": 0.0009432310291289421, + "loss": 0.9648, + "step": 3323 + }, + { + "epoch": 0.18, + "learning_rate": 0.0009431907405637791, + "loss": 0.9297, + "step": 3324 + }, + { + "epoch": 0.18, + "learning_rate": 0.000943150438568481, + "loss": 0.9141, + "step": 3325 + }, + { + "epoch": 0.18, + "learning_rate": 0.0009431101231442689, + "loss": 0.8711, + "step": 3326 + }, + { + "epoch": 0.18, + "learning_rate": 0.0009430697942923645, + "loss": 0.8789, + "step": 3327 + }, + { + "epoch": 0.18, + "learning_rate": 0.0009430294520139899, + "loss": 0.9609, + "step": 3328 + }, + { + "epoch": 0.18, + "learning_rate": 0.0009429890963103676, + "loss": 0.8398, + "step": 3329 + }, + { + "epoch": 0.18, + "learning_rate": 0.0009429487271827206, + "loss": 0.9297, + "step": 3330 + }, + { + "epoch": 0.18, + "learning_rate": 0.0009429083446322723, + "loss": 0.918, + "step": 3331 + }, + { + "epoch": 0.18, + "learning_rate": 0.0009428679486602461, + "loss": 0.8789, + "step": 3332 + }, + { + "epoch": 0.18, + "learning_rate": 0.0009428275392678665, + "loss": 0.9258, + "step": 3333 + }, + { + "epoch": 0.18, + "learning_rate": 0.0009427871164563577, + "loss": 0.9609, + "step": 3334 + }, + { + "epoch": 0.18, + "learning_rate": 0.0009427466802269448, + "loss": 1.0625, + "step": 3335 + }, + { + "epoch": 0.18, + "learning_rate": 0.0009427062305808531, + "loss": 0.8164, + "step": 3336 + }, + { + "epoch": 0.18, + "learning_rate": 0.0009426657675193086, + "loss": 0.9961, + "step": 3337 + }, + { + "epoch": 0.18, + "learning_rate": 0.0009426252910435371, + "loss": 0.9531, + "step": 3338 + }, + { + "epoch": 0.18, + "learning_rate": 0.0009425848011547656, + "loss": 0.8555, + "step": 3339 + }, + { + "epoch": 0.18, + "learning_rate": 0.0009425442978542205, + "loss": 0.957, + "step": 3340 + }, + { + "epoch": 0.18, + "learning_rate": 0.0009425037811431296, + "loss": 0.9023, + "step": 3341 + }, + { + "epoch": 0.18, + "learning_rate": 0.0009424632510227207, + "loss": 0.957, + "step": 3342 + }, + { + "epoch": 0.18, + "learning_rate": 0.0009424227074942218, + "loss": 0.8984, + "step": 3343 + }, + { + "epoch": 0.18, + "learning_rate": 0.0009423821505588616, + "loss": 0.875, + "step": 3344 + }, + { + "epoch": 0.18, + "learning_rate": 0.0009423415802178689, + "loss": 0.8438, + "step": 3345 + }, + { + "epoch": 0.18, + "learning_rate": 0.0009423009964724737, + "loss": 1.0, + "step": 3346 + }, + { + "epoch": 0.18, + "learning_rate": 0.0009422603993239052, + "loss": 0.8984, + "step": 3347 + }, + { + "epoch": 0.18, + "learning_rate": 0.0009422197887733938, + "loss": 0.8477, + "step": 3348 + }, + { + "epoch": 0.18, + "learning_rate": 0.0009421791648221704, + "loss": 1.0625, + "step": 3349 + }, + { + "epoch": 0.18, + "learning_rate": 0.0009421385274714657, + "loss": 0.8594, + "step": 3350 + }, + { + "epoch": 0.18, + "learning_rate": 0.0009420978767225112, + "loss": 0.9336, + "step": 3351 + }, + { + "epoch": 0.18, + "learning_rate": 0.000942057212576539, + "loss": 0.8477, + "step": 3352 + }, + { + "epoch": 0.18, + "learning_rate": 0.0009420165350347812, + "loss": 0.8945, + "step": 3353 + }, + { + "epoch": 0.18, + "learning_rate": 0.0009419758440984702, + "loss": 0.8711, + "step": 3354 + }, + { + "epoch": 0.18, + "learning_rate": 0.0009419351397688396, + "loss": 0.8711, + "step": 3355 + }, + { + "epoch": 0.18, + "learning_rate": 0.0009418944220471224, + "loss": 0.9062, + "step": 3356 + }, + { + "epoch": 0.18, + "learning_rate": 0.0009418536909345527, + "loss": 0.9609, + "step": 3357 + }, + { + "epoch": 0.18, + "learning_rate": 0.000941812946432365, + "loss": 0.8203, + "step": 3358 + }, + { + "epoch": 0.18, + "learning_rate": 0.0009417721885417935, + "loss": 0.9023, + "step": 3359 + }, + { + "epoch": 0.18, + "learning_rate": 0.0009417314172640737, + "loss": 0.8359, + "step": 3360 + }, + { + "epoch": 0.18, + "learning_rate": 0.0009416906326004407, + "loss": 0.8984, + "step": 3361 + }, + { + "epoch": 0.18, + "learning_rate": 0.0009416498345521309, + "loss": 0.8398, + "step": 3362 + }, + { + "epoch": 0.18, + "learning_rate": 0.0009416090231203802, + "loss": 1.0938, + "step": 3363 + }, + { + "epoch": 0.18, + "learning_rate": 0.0009415681983064257, + "loss": 0.832, + "step": 3364 + }, + { + "epoch": 0.18, + "learning_rate": 0.0009415273601115041, + "loss": 0.8555, + "step": 3365 + }, + { + "epoch": 0.18, + "learning_rate": 0.0009414865085368533, + "loss": 0.9141, + "step": 3366 + }, + { + "epoch": 0.18, + "learning_rate": 0.0009414456435837111, + "loss": 0.8633, + "step": 3367 + }, + { + "epoch": 0.18, + "learning_rate": 0.0009414047652533158, + "loss": 0.9453, + "step": 3368 + }, + { + "epoch": 0.18, + "learning_rate": 0.0009413638735469061, + "loss": 0.8359, + "step": 3369 + }, + { + "epoch": 0.18, + "learning_rate": 0.0009413229684657214, + "loss": 0.9453, + "step": 3370 + }, + { + "epoch": 0.18, + "learning_rate": 0.000941282050011001, + "loss": 0.8594, + "step": 3371 + }, + { + "epoch": 0.18, + "learning_rate": 0.000941241118183985, + "loss": 0.8516, + "step": 3372 + }, + { + "epoch": 0.18, + "learning_rate": 0.0009412001729859137, + "loss": 0.918, + "step": 3373 + }, + { + "epoch": 0.18, + "learning_rate": 0.0009411592144180278, + "loss": 0.957, + "step": 3374 + }, + { + "epoch": 0.18, + "learning_rate": 0.0009411182424815687, + "loss": 0.8594, + "step": 3375 + }, + { + "epoch": 0.18, + "learning_rate": 0.0009410772571777779, + "loss": 0.875, + "step": 3376 + }, + { + "epoch": 0.18, + "learning_rate": 0.0009410362585078973, + "loss": 0.8555, + "step": 3377 + }, + { + "epoch": 0.18, + "learning_rate": 0.0009409952464731693, + "loss": 0.9258, + "step": 3378 + }, + { + "epoch": 0.18, + "learning_rate": 0.0009409542210748367, + "loss": 0.9453, + "step": 3379 + }, + { + "epoch": 0.18, + "learning_rate": 0.0009409131823141429, + "loss": 0.8789, + "step": 3380 + }, + { + "epoch": 0.18, + "learning_rate": 0.0009408721301923313, + "loss": 0.8828, + "step": 3381 + }, + { + "epoch": 0.18, + "learning_rate": 0.000940831064710646, + "loss": 0.8984, + "step": 3382 + }, + { + "epoch": 0.18, + "learning_rate": 0.0009407899858703313, + "loss": 0.8906, + "step": 3383 + }, + { + "epoch": 0.18, + "learning_rate": 0.0009407488936726322, + "loss": 0.8828, + "step": 3384 + }, + { + "epoch": 0.18, + "learning_rate": 0.0009407077881187938, + "loss": 0.8711, + "step": 3385 + }, + { + "epoch": 0.18, + "learning_rate": 0.0009406666692100619, + "loss": 0.7734, + "step": 3386 + }, + { + "epoch": 0.18, + "learning_rate": 0.0009406255369476822, + "loss": 0.8945, + "step": 3387 + }, + { + "epoch": 0.18, + "learning_rate": 0.0009405843913329015, + "loss": 0.9297, + "step": 3388 + }, + { + "epoch": 0.18, + "learning_rate": 0.0009405432323669664, + "loss": 0.9141, + "step": 3389 + }, + { + "epoch": 0.18, + "learning_rate": 0.0009405020600511245, + "loss": 0.8984, + "step": 3390 + }, + { + "epoch": 0.18, + "learning_rate": 0.000940460874386623, + "loss": 0.9492, + "step": 3391 + }, + { + "epoch": 0.18, + "learning_rate": 0.0009404196753747103, + "loss": 0.9219, + "step": 3392 + }, + { + "epoch": 0.18, + "learning_rate": 0.0009403784630166347, + "loss": 0.9062, + "step": 3393 + }, + { + "epoch": 0.18, + "learning_rate": 0.0009403372373136449, + "loss": 0.9141, + "step": 3394 + }, + { + "epoch": 0.18, + "learning_rate": 0.0009402959982669907, + "loss": 0.8867, + "step": 3395 + }, + { + "epoch": 0.18, + "learning_rate": 0.0009402547458779213, + "loss": 0.9414, + "step": 3396 + }, + { + "epoch": 0.18, + "learning_rate": 0.0009402134801476871, + "loss": 0.8945, + "step": 3397 + }, + { + "epoch": 0.18, + "learning_rate": 0.0009401722010775382, + "loss": 0.8398, + "step": 3398 + }, + { + "epoch": 0.18, + "learning_rate": 0.0009401309086687259, + "loss": 0.8594, + "step": 3399 + }, + { + "epoch": 0.18, + "learning_rate": 0.0009400896029225013, + "loss": 0.8242, + "step": 3400 + }, + { + "epoch": 0.18, + "learning_rate": 0.0009400482838401161, + "loss": 0.9219, + "step": 3401 + }, + { + "epoch": 0.18, + "learning_rate": 0.0009400069514228223, + "loss": 0.8906, + "step": 3402 + }, + { + "epoch": 0.18, + "learning_rate": 0.0009399656056718726, + "loss": 0.9453, + "step": 3403 + }, + { + "epoch": 0.18, + "learning_rate": 0.0009399242465885201, + "loss": 0.8086, + "step": 3404 + }, + { + "epoch": 0.18, + "learning_rate": 0.0009398828741740175, + "loss": 0.8906, + "step": 3405 + }, + { + "epoch": 0.18, + "learning_rate": 0.000939841488429619, + "loss": 0.8906, + "step": 3406 + }, + { + "epoch": 0.18, + "learning_rate": 0.0009398000893565786, + "loss": 0.8242, + "step": 3407 + }, + { + "epoch": 0.18, + "learning_rate": 0.000939758676956151, + "loss": 0.8672, + "step": 3408 + }, + { + "epoch": 0.18, + "learning_rate": 0.0009397172512295908, + "loss": 0.9258, + "step": 3409 + }, + { + "epoch": 0.18, + "learning_rate": 0.0009396758121781535, + "loss": 0.9922, + "step": 3410 + }, + { + "epoch": 0.18, + "learning_rate": 0.0009396343598030948, + "loss": 0.9219, + "step": 3411 + }, + { + "epoch": 0.18, + "learning_rate": 0.0009395928941056711, + "loss": 0.8984, + "step": 3412 + }, + { + "epoch": 0.18, + "learning_rate": 0.0009395514150871384, + "loss": 0.832, + "step": 3413 + }, + { + "epoch": 0.18, + "learning_rate": 0.0009395099227487542, + "loss": 0.8594, + "step": 3414 + }, + { + "epoch": 0.18, + "learning_rate": 0.0009394684170917758, + "loss": 0.8711, + "step": 3415 + }, + { + "epoch": 0.18, + "learning_rate": 0.0009394268981174605, + "loss": 0.9023, + "step": 3416 + }, + { + "epoch": 0.18, + "learning_rate": 0.0009393853658270668, + "loss": 0.8672, + "step": 3417 + }, + { + "epoch": 0.18, + "learning_rate": 0.0009393438202218533, + "loss": 0.9531, + "step": 3418 + }, + { + "epoch": 0.18, + "learning_rate": 0.0009393022613030788, + "loss": 0.9531, + "step": 3419 + }, + { + "epoch": 0.18, + "learning_rate": 0.0009392606890720028, + "loss": 0.9258, + "step": 3420 + }, + { + "epoch": 0.18, + "learning_rate": 0.0009392191035298851, + "loss": 0.9297, + "step": 3421 + }, + { + "epoch": 0.18, + "learning_rate": 0.0009391775046779858, + "loss": 0.9492, + "step": 3422 + }, + { + "epoch": 0.18, + "learning_rate": 0.0009391358925175655, + "loss": 1.1719, + "step": 3423 + }, + { + "epoch": 0.18, + "learning_rate": 0.0009390942670498849, + "loss": 0.9492, + "step": 3424 + }, + { + "epoch": 0.18, + "learning_rate": 0.0009390526282762059, + "loss": 1.0078, + "step": 3425 + }, + { + "epoch": 0.18, + "learning_rate": 0.0009390109761977902, + "loss": 0.918, + "step": 3426 + }, + { + "epoch": 0.18, + "learning_rate": 0.0009389693108158996, + "loss": 1.0078, + "step": 3427 + }, + { + "epoch": 0.18, + "learning_rate": 0.0009389276321317972, + "loss": 1.0234, + "step": 3428 + }, + { + "epoch": 0.18, + "learning_rate": 0.0009388859401467456, + "loss": 0.8047, + "step": 3429 + }, + { + "epoch": 0.18, + "learning_rate": 0.0009388442348620083, + "loss": 0.9844, + "step": 3430 + }, + { + "epoch": 0.18, + "learning_rate": 0.0009388025162788493, + "loss": 0.9219, + "step": 3431 + }, + { + "epoch": 0.18, + "learning_rate": 0.0009387607843985326, + "loss": 0.9453, + "step": 3432 + }, + { + "epoch": 0.18, + "learning_rate": 0.0009387190392223228, + "loss": 1.0078, + "step": 3433 + }, + { + "epoch": 0.18, + "learning_rate": 0.0009386772807514852, + "loss": 0.875, + "step": 3434 + }, + { + "epoch": 0.18, + "learning_rate": 0.0009386355089872849, + "loss": 0.957, + "step": 3435 + }, + { + "epoch": 0.18, + "learning_rate": 0.0009385937239309879, + "loss": 0.9688, + "step": 3436 + }, + { + "epoch": 0.18, + "learning_rate": 0.0009385519255838602, + "loss": 1.0469, + "step": 3437 + }, + { + "epoch": 0.18, + "learning_rate": 0.0009385101139471688, + "loss": 1.0312, + "step": 3438 + }, + { + "epoch": 0.18, + "learning_rate": 0.0009384682890221805, + "loss": 0.8555, + "step": 3439 + }, + { + "epoch": 0.18, + "learning_rate": 0.0009384264508101627, + "loss": 1.0156, + "step": 3440 + }, + { + "epoch": 0.18, + "learning_rate": 0.0009383845993123831, + "loss": 0.9453, + "step": 3441 + }, + { + "epoch": 0.18, + "learning_rate": 0.0009383427345301106, + "loss": 1.0078, + "step": 3442 + }, + { + "epoch": 0.19, + "learning_rate": 0.000938300856464613, + "loss": 1.0391, + "step": 3443 + }, + { + "epoch": 0.19, + "learning_rate": 0.0009382589651171598, + "loss": 0.9492, + "step": 3444 + }, + { + "epoch": 0.19, + "learning_rate": 0.0009382170604890203, + "loss": 0.9609, + "step": 3445 + }, + { + "epoch": 0.19, + "learning_rate": 0.0009381751425814645, + "loss": 0.9648, + "step": 3446 + }, + { + "epoch": 0.19, + "learning_rate": 0.0009381332113957626, + "loss": 0.9336, + "step": 3447 + }, + { + "epoch": 0.19, + "learning_rate": 0.0009380912669331851, + "loss": 1.0078, + "step": 3448 + }, + { + "epoch": 0.19, + "learning_rate": 0.0009380493091950033, + "loss": 1.1016, + "step": 3449 + }, + { + "epoch": 0.19, + "learning_rate": 0.0009380073381824883, + "loss": 0.8633, + "step": 3450 + }, + { + "epoch": 0.19, + "learning_rate": 0.0009379653538969124, + "loss": 0.9531, + "step": 3451 + }, + { + "epoch": 0.19, + "learning_rate": 0.0009379233563395475, + "loss": 0.9258, + "step": 3452 + }, + { + "epoch": 0.19, + "learning_rate": 0.0009378813455116666, + "loss": 0.918, + "step": 3453 + }, + { + "epoch": 0.19, + "learning_rate": 0.0009378393214145424, + "loss": 1.0234, + "step": 3454 + }, + { + "epoch": 0.19, + "learning_rate": 0.0009377972840494487, + "loss": 0.9922, + "step": 3455 + }, + { + "epoch": 0.19, + "learning_rate": 0.000937755233417659, + "loss": 0.9648, + "step": 3456 + }, + { + "epoch": 0.19, + "learning_rate": 0.000937713169520448, + "loss": 1.0156, + "step": 3457 + }, + { + "epoch": 0.19, + "learning_rate": 0.0009376710923590898, + "loss": 0.9531, + "step": 3458 + }, + { + "epoch": 0.19, + "learning_rate": 0.0009376290019348602, + "loss": 1.0156, + "step": 3459 + }, + { + "epoch": 0.19, + "learning_rate": 0.0009375868982490342, + "loss": 1.0391, + "step": 3460 + }, + { + "epoch": 0.19, + "learning_rate": 0.0009375447813028877, + "loss": 0.9844, + "step": 3461 + }, + { + "epoch": 0.19, + "learning_rate": 0.0009375026510976972, + "loss": 1.0234, + "step": 3462 + }, + { + "epoch": 0.19, + "learning_rate": 0.0009374605076347394, + "loss": 1.0547, + "step": 3463 + }, + { + "epoch": 0.19, + "learning_rate": 0.0009374183509152909, + "loss": 0.9648, + "step": 3464 + }, + { + "epoch": 0.19, + "learning_rate": 0.0009373761809406298, + "loss": 0.9297, + "step": 3465 + }, + { + "epoch": 0.19, + "learning_rate": 0.0009373339977120337, + "loss": 0.8828, + "step": 3466 + }, + { + "epoch": 0.19, + "learning_rate": 0.0009372918012307808, + "loss": 1.0, + "step": 3467 + }, + { + "epoch": 0.19, + "learning_rate": 0.0009372495914981499, + "loss": 1.0078, + "step": 3468 + }, + { + "epoch": 0.19, + "learning_rate": 0.0009372073685154201, + "loss": 0.9883, + "step": 3469 + }, + { + "epoch": 0.19, + "learning_rate": 0.0009371651322838709, + "loss": 0.8633, + "step": 3470 + }, + { + "epoch": 0.19, + "learning_rate": 0.0009371228828047822, + "loss": 0.9219, + "step": 3471 + }, + { + "epoch": 0.19, + "learning_rate": 0.0009370806200794343, + "loss": 1.0469, + "step": 3472 + }, + { + "epoch": 0.19, + "learning_rate": 0.0009370383441091079, + "loss": 0.9805, + "step": 3473 + }, + { + "epoch": 0.19, + "learning_rate": 0.000936996054895084, + "loss": 1.0156, + "step": 3474 + }, + { + "epoch": 0.19, + "learning_rate": 0.0009369537524386442, + "loss": 1.0312, + "step": 3475 + }, + { + "epoch": 0.19, + "learning_rate": 0.0009369114367410705, + "loss": 0.9961, + "step": 3476 + }, + { + "epoch": 0.19, + "learning_rate": 0.0009368691078036451, + "loss": 0.9727, + "step": 3477 + }, + { + "epoch": 0.19, + "learning_rate": 0.0009368267656276506, + "loss": 1.0, + "step": 3478 + }, + { + "epoch": 0.19, + "learning_rate": 0.0009367844102143704, + "loss": 0.918, + "step": 3479 + }, + { + "epoch": 0.19, + "learning_rate": 0.0009367420415650876, + "loss": 0.9922, + "step": 3480 + }, + { + "epoch": 0.19, + "learning_rate": 0.0009366996596810863, + "loss": 0.8984, + "step": 3481 + }, + { + "epoch": 0.19, + "learning_rate": 0.0009366572645636509, + "loss": 0.9766, + "step": 3482 + }, + { + "epoch": 0.19, + "learning_rate": 0.0009366148562140661, + "loss": 1.0625, + "step": 3483 + }, + { + "epoch": 0.19, + "learning_rate": 0.0009365724346336168, + "loss": 0.9961, + "step": 3484 + }, + { + "epoch": 0.19, + "learning_rate": 0.000936529999823589, + "loss": 0.9609, + "step": 3485 + }, + { + "epoch": 0.19, + "learning_rate": 0.000936487551785268, + "loss": 0.9336, + "step": 3486 + }, + { + "epoch": 0.19, + "learning_rate": 0.0009364450905199406, + "loss": 0.9453, + "step": 3487 + }, + { + "epoch": 0.19, + "learning_rate": 0.0009364026160288932, + "loss": 0.875, + "step": 3488 + }, + { + "epoch": 0.19, + "learning_rate": 0.000936360128313413, + "loss": 0.9727, + "step": 3489 + }, + { + "epoch": 0.19, + "learning_rate": 0.0009363176273747876, + "loss": 0.9844, + "step": 3490 + }, + { + "epoch": 0.19, + "learning_rate": 0.0009362751132143049, + "loss": 0.9766, + "step": 3491 + }, + { + "epoch": 0.19, + "learning_rate": 0.0009362325858332532, + "loss": 0.9258, + "step": 3492 + }, + { + "epoch": 0.19, + "learning_rate": 0.0009361900452329211, + "loss": 0.9844, + "step": 3493 + }, + { + "epoch": 0.19, + "learning_rate": 0.0009361474914145979, + "loss": 0.8672, + "step": 3494 + }, + { + "epoch": 0.19, + "learning_rate": 0.0009361049243795731, + "loss": 0.9531, + "step": 3495 + }, + { + "epoch": 0.19, + "learning_rate": 0.0009360623441291365, + "loss": 0.9727, + "step": 3496 + }, + { + "epoch": 0.19, + "learning_rate": 0.0009360197506645784, + "loss": 1.0156, + "step": 3497 + }, + { + "epoch": 0.19, + "learning_rate": 0.0009359771439871898, + "loss": 0.9102, + "step": 3498 + }, + { + "epoch": 0.19, + "learning_rate": 0.0009359345240982616, + "loss": 0.9883, + "step": 3499 + }, + { + "epoch": 0.19, + "learning_rate": 0.0009358918909990854, + "loss": 0.9453, + "step": 3500 + }, + { + "epoch": 0.19, + "learning_rate": 0.0009358492446909529, + "loss": 0.9453, + "step": 3501 + }, + { + "epoch": 0.19, + "learning_rate": 0.0009358065851751567, + "loss": 0.8711, + "step": 3502 + }, + { + "epoch": 0.19, + "learning_rate": 0.0009357639124529894, + "loss": 0.9219, + "step": 3503 + }, + { + "epoch": 0.19, + "learning_rate": 0.0009357212265257442, + "loss": 0.9375, + "step": 3504 + }, + { + "epoch": 0.19, + "learning_rate": 0.0009356785273947146, + "loss": 1.0312, + "step": 3505 + }, + { + "epoch": 0.19, + "learning_rate": 0.0009356358150611943, + "loss": 0.9336, + "step": 3506 + }, + { + "epoch": 0.19, + "learning_rate": 0.0009355930895264781, + "loss": 0.9102, + "step": 3507 + }, + { + "epoch": 0.19, + "learning_rate": 0.0009355503507918603, + "loss": 0.9062, + "step": 3508 + }, + { + "epoch": 0.19, + "learning_rate": 0.0009355075988586362, + "loss": 1.0078, + "step": 3509 + }, + { + "epoch": 0.19, + "learning_rate": 0.0009354648337281013, + "loss": 0.9297, + "step": 3510 + }, + { + "epoch": 0.19, + "learning_rate": 0.0009354220554015516, + "loss": 0.9336, + "step": 3511 + }, + { + "epoch": 0.19, + "learning_rate": 0.0009353792638802831, + "loss": 1.0078, + "step": 3512 + }, + { + "epoch": 0.19, + "learning_rate": 0.0009353364591655931, + "loss": 0.9883, + "step": 3513 + }, + { + "epoch": 0.19, + "learning_rate": 0.000935293641258778, + "loss": 0.9688, + "step": 3514 + }, + { + "epoch": 0.19, + "learning_rate": 0.000935250810161136, + "loss": 1.0312, + "step": 3515 + }, + { + "epoch": 0.19, + "learning_rate": 0.0009352079658739648, + "loss": 0.9414, + "step": 3516 + }, + { + "epoch": 0.19, + "learning_rate": 0.0009351651083985624, + "loss": 1.0312, + "step": 3517 + }, + { + "epoch": 0.19, + "learning_rate": 0.0009351222377362279, + "loss": 1.0078, + "step": 3518 + }, + { + "epoch": 0.19, + "learning_rate": 0.0009350793538882603, + "loss": 0.9531, + "step": 3519 + }, + { + "epoch": 0.19, + "learning_rate": 0.0009350364568559591, + "loss": 0.9062, + "step": 3520 + }, + { + "epoch": 0.19, + "learning_rate": 0.0009349935466406243, + "loss": 0.9414, + "step": 3521 + }, + { + "epoch": 0.19, + "learning_rate": 0.0009349506232435561, + "loss": 0.9141, + "step": 3522 + }, + { + "epoch": 0.19, + "learning_rate": 0.0009349076866660553, + "loss": 0.8828, + "step": 3523 + }, + { + "epoch": 0.19, + "learning_rate": 0.000934864736909423, + "loss": 0.9453, + "step": 3524 + }, + { + "epoch": 0.19, + "learning_rate": 0.0009348217739749607, + "loss": 0.9531, + "step": 3525 + }, + { + "epoch": 0.19, + "learning_rate": 0.0009347787978639705, + "loss": 0.9102, + "step": 3526 + }, + { + "epoch": 0.19, + "learning_rate": 0.0009347358085777545, + "loss": 0.9883, + "step": 3527 + }, + { + "epoch": 0.19, + "learning_rate": 0.0009346928061176155, + "loss": 0.9766, + "step": 3528 + }, + { + "epoch": 0.19, + "learning_rate": 0.0009346497904848564, + "loss": 0.9258, + "step": 3529 + }, + { + "epoch": 0.19, + "learning_rate": 0.000934606761680781, + "loss": 0.8984, + "step": 3530 + }, + { + "epoch": 0.19, + "learning_rate": 0.0009345637197066931, + "loss": 0.8789, + "step": 3531 + }, + { + "epoch": 0.19, + "learning_rate": 0.0009345206645638971, + "loss": 0.9102, + "step": 3532 + }, + { + "epoch": 0.19, + "learning_rate": 0.0009344775962536976, + "loss": 0.9727, + "step": 3533 + }, + { + "epoch": 0.19, + "learning_rate": 0.0009344345147773998, + "loss": 0.957, + "step": 3534 + }, + { + "epoch": 0.19, + "learning_rate": 0.0009343914201363092, + "loss": 1.0391, + "step": 3535 + }, + { + "epoch": 0.19, + "learning_rate": 0.0009343483123317315, + "loss": 0.9766, + "step": 3536 + }, + { + "epoch": 0.19, + "learning_rate": 0.0009343051913649734, + "loss": 0.9219, + "step": 3537 + }, + { + "epoch": 0.19, + "learning_rate": 0.0009342620572373412, + "loss": 0.9688, + "step": 3538 + }, + { + "epoch": 0.19, + "learning_rate": 0.0009342189099501424, + "loss": 0.9492, + "step": 3539 + }, + { + "epoch": 0.19, + "learning_rate": 0.0009341757495046842, + "loss": 0.9062, + "step": 3540 + }, + { + "epoch": 0.19, + "learning_rate": 0.0009341325759022746, + "loss": 0.9805, + "step": 3541 + }, + { + "epoch": 0.19, + "learning_rate": 0.0009340893891442219, + "loss": 0.8945, + "step": 3542 + }, + { + "epoch": 0.19, + "learning_rate": 0.0009340461892318349, + "loss": 0.9883, + "step": 3543 + }, + { + "epoch": 0.19, + "learning_rate": 0.0009340029761664225, + "loss": 0.9883, + "step": 3544 + }, + { + "epoch": 0.19, + "learning_rate": 0.0009339597499492943, + "loss": 0.9492, + "step": 3545 + }, + { + "epoch": 0.19, + "learning_rate": 0.0009339165105817602, + "loss": 0.9141, + "step": 3546 + }, + { + "epoch": 0.19, + "learning_rate": 0.0009338732580651306, + "loss": 0.7969, + "step": 3547 + }, + { + "epoch": 0.19, + "learning_rate": 0.000933829992400716, + "loss": 0.9375, + "step": 3548 + }, + { + "epoch": 0.19, + "learning_rate": 0.0009337867135898277, + "loss": 0.8867, + "step": 3549 + }, + { + "epoch": 0.19, + "learning_rate": 0.0009337434216337769, + "loss": 0.9219, + "step": 3550 + }, + { + "epoch": 0.19, + "learning_rate": 0.0009337001165338756, + "loss": 0.9102, + "step": 3551 + }, + { + "epoch": 0.19, + "learning_rate": 0.0009336567982914364, + "loss": 0.9453, + "step": 3552 + }, + { + "epoch": 0.19, + "learning_rate": 0.0009336134669077716, + "loss": 0.9648, + "step": 3553 + }, + { + "epoch": 0.19, + "learning_rate": 0.0009335701223841945, + "loss": 0.8945, + "step": 3554 + }, + { + "epoch": 0.19, + "learning_rate": 0.0009335267647220184, + "loss": 0.8438, + "step": 3555 + }, + { + "epoch": 0.19, + "learning_rate": 0.0009334833939225574, + "loss": 0.7852, + "step": 3556 + }, + { + "epoch": 0.19, + "learning_rate": 0.0009334400099871254, + "loss": 0.9375, + "step": 3557 + }, + { + "epoch": 0.19, + "learning_rate": 0.0009333966129170375, + "loss": 0.8906, + "step": 3558 + }, + { + "epoch": 0.19, + "learning_rate": 0.0009333532027136085, + "loss": 1.0312, + "step": 3559 + }, + { + "epoch": 0.19, + "learning_rate": 0.0009333097793781543, + "loss": 0.9336, + "step": 3560 + }, + { + "epoch": 0.19, + "learning_rate": 0.0009332663429119901, + "loss": 0.8867, + "step": 3561 + }, + { + "epoch": 0.19, + "learning_rate": 0.0009332228933164326, + "loss": 0.8125, + "step": 3562 + }, + { + "epoch": 0.19, + "learning_rate": 0.0009331794305927983, + "loss": 0.9062, + "step": 3563 + }, + { + "epoch": 0.19, + "learning_rate": 0.0009331359547424046, + "loss": 0.918, + "step": 3564 + }, + { + "epoch": 0.19, + "learning_rate": 0.0009330924657665685, + "loss": 0.9219, + "step": 3565 + }, + { + "epoch": 0.19, + "learning_rate": 0.0009330489636666081, + "loss": 0.8672, + "step": 3566 + }, + { + "epoch": 0.19, + "learning_rate": 0.0009330054484438415, + "loss": 0.9297, + "step": 3567 + }, + { + "epoch": 0.19, + "learning_rate": 0.0009329619200995876, + "loss": 0.9766, + "step": 3568 + }, + { + "epoch": 0.19, + "learning_rate": 0.0009329183786351654, + "loss": 0.9609, + "step": 3569 + }, + { + "epoch": 0.19, + "learning_rate": 0.0009328748240518942, + "loss": 0.9961, + "step": 3570 + }, + { + "epoch": 0.19, + "learning_rate": 0.0009328312563510938, + "loss": 1.0, + "step": 3571 + }, + { + "epoch": 0.19, + "learning_rate": 0.0009327876755340848, + "loss": 0.9961, + "step": 3572 + }, + { + "epoch": 0.19, + "learning_rate": 0.0009327440816021875, + "loss": 0.9258, + "step": 3573 + }, + { + "epoch": 0.19, + "learning_rate": 0.0009327004745567229, + "loss": 1.0547, + "step": 3574 + }, + { + "epoch": 0.19, + "learning_rate": 0.0009326568543990128, + "loss": 0.9922, + "step": 3575 + }, + { + "epoch": 0.19, + "learning_rate": 0.0009326132211303787, + "loss": 0.8867, + "step": 3576 + }, + { + "epoch": 0.19, + "learning_rate": 0.000932569574752143, + "loss": 0.8945, + "step": 3577 + }, + { + "epoch": 0.19, + "learning_rate": 0.0009325259152656283, + "loss": 1.0312, + "step": 3578 + }, + { + "epoch": 0.19, + "learning_rate": 0.0009324822426721574, + "loss": 0.9062, + "step": 3579 + }, + { + "epoch": 0.19, + "learning_rate": 0.0009324385569730541, + "loss": 0.8672, + "step": 3580 + }, + { + "epoch": 0.19, + "learning_rate": 0.000932394858169642, + "loss": 0.9531, + "step": 3581 + }, + { + "epoch": 0.19, + "learning_rate": 0.0009323511462632454, + "loss": 0.9219, + "step": 3582 + }, + { + "epoch": 0.19, + "learning_rate": 0.0009323074212551889, + "loss": 0.832, + "step": 3583 + }, + { + "epoch": 0.19, + "learning_rate": 0.0009322636831467974, + "loss": 0.9258, + "step": 3584 + }, + { + "epoch": 0.19, + "learning_rate": 0.0009322199319393965, + "loss": 0.9102, + "step": 3585 + }, + { + "epoch": 0.19, + "learning_rate": 0.0009321761676343118, + "loss": 0.75, + "step": 3586 + }, + { + "epoch": 0.19, + "learning_rate": 0.0009321323902328697, + "loss": 0.9844, + "step": 3587 + }, + { + "epoch": 0.19, + "learning_rate": 0.0009320885997363966, + "loss": 0.9336, + "step": 3588 + }, + { + "epoch": 0.19, + "learning_rate": 0.0009320447961462197, + "loss": 0.8984, + "step": 3589 + }, + { + "epoch": 0.19, + "learning_rate": 0.0009320009794636661, + "loss": 0.9609, + "step": 3590 + }, + { + "epoch": 0.19, + "learning_rate": 0.000931957149690064, + "loss": 0.8984, + "step": 3591 + }, + { + "epoch": 0.19, + "learning_rate": 0.0009319133068267413, + "loss": 0.9922, + "step": 3592 + }, + { + "epoch": 0.19, + "learning_rate": 0.0009318694508750268, + "loss": 0.8711, + "step": 3593 + }, + { + "epoch": 0.19, + "learning_rate": 0.000931825581836249, + "loss": 0.9023, + "step": 3594 + }, + { + "epoch": 0.19, + "learning_rate": 0.0009317816997117379, + "loss": 0.7891, + "step": 3595 + }, + { + "epoch": 0.19, + "learning_rate": 0.0009317378045028227, + "loss": 0.9375, + "step": 3596 + }, + { + "epoch": 0.19, + "learning_rate": 0.0009316938962108341, + "loss": 0.9141, + "step": 3597 + }, + { + "epoch": 0.19, + "learning_rate": 0.0009316499748371023, + "loss": 0.9102, + "step": 3598 + }, + { + "epoch": 0.19, + "learning_rate": 0.0009316060403829583, + "loss": 0.9961, + "step": 3599 + }, + { + "epoch": 0.19, + "learning_rate": 0.0009315620928497337, + "loss": 0.9492, + "step": 3600 + }, + { + "epoch": 0.19, + "learning_rate": 0.00093151813223876, + "loss": 1.0078, + "step": 3601 + }, + { + "epoch": 0.19, + "learning_rate": 0.0009314741585513695, + "loss": 0.8398, + "step": 3602 + }, + { + "epoch": 0.19, + "learning_rate": 0.0009314301717888947, + "loss": 0.9141, + "step": 3603 + }, + { + "epoch": 0.19, + "learning_rate": 0.0009313861719526684, + "loss": 0.8828, + "step": 3604 + }, + { + "epoch": 0.19, + "learning_rate": 0.0009313421590440241, + "loss": 0.8711, + "step": 3605 + }, + { + "epoch": 0.19, + "learning_rate": 0.0009312981330642956, + "loss": 0.9883, + "step": 3606 + }, + { + "epoch": 0.19, + "learning_rate": 0.0009312540940148167, + "loss": 1.0625, + "step": 3607 + }, + { + "epoch": 0.19, + "learning_rate": 0.0009312100418969225, + "loss": 0.8555, + "step": 3608 + }, + { + "epoch": 0.19, + "learning_rate": 0.0009311659767119474, + "loss": 0.9609, + "step": 3609 + }, + { + "epoch": 0.19, + "learning_rate": 0.0009311218984612269, + "loss": 0.9648, + "step": 3610 + }, + { + "epoch": 0.19, + "learning_rate": 0.0009310778071460968, + "loss": 0.9102, + "step": 3611 + }, + { + "epoch": 0.19, + "learning_rate": 0.000931033702767893, + "loss": 0.9414, + "step": 3612 + }, + { + "epoch": 0.19, + "learning_rate": 0.0009309895853279522, + "loss": 0.9648, + "step": 3613 + }, + { + "epoch": 0.19, + "learning_rate": 0.0009309454548276113, + "loss": 0.9258, + "step": 3614 + }, + { + "epoch": 0.19, + "learning_rate": 0.0009309013112682075, + "loss": 0.9102, + "step": 3615 + }, + { + "epoch": 0.19, + "learning_rate": 0.0009308571546510787, + "loss": 1.0156, + "step": 3616 + }, + { + "epoch": 0.19, + "learning_rate": 0.0009308129849775627, + "loss": 0.8984, + "step": 3617 + }, + { + "epoch": 0.19, + "learning_rate": 0.0009307688022489979, + "loss": 0.9531, + "step": 3618 + }, + { + "epoch": 0.19, + "learning_rate": 0.0009307246064667236, + "loss": 0.9414, + "step": 3619 + }, + { + "epoch": 0.19, + "learning_rate": 0.000930680397632079, + "loss": 0.9258, + "step": 3620 + }, + { + "epoch": 0.19, + "learning_rate": 0.0009306361757464034, + "loss": 0.8828, + "step": 3621 + }, + { + "epoch": 0.19, + "learning_rate": 0.0009305919408110374, + "loss": 0.9688, + "step": 3622 + }, + { + "epoch": 0.19, + "learning_rate": 0.000930547692827321, + "loss": 0.9492, + "step": 3623 + }, + { + "epoch": 0.19, + "learning_rate": 0.0009305034317965953, + "loss": 0.9688, + "step": 3624 + }, + { + "epoch": 0.19, + "learning_rate": 0.0009304591577202015, + "loss": 0.9766, + "step": 3625 + }, + { + "epoch": 0.19, + "learning_rate": 0.0009304148705994811, + "loss": 0.8789, + "step": 3626 + }, + { + "epoch": 0.19, + "learning_rate": 0.0009303705704357764, + "loss": 0.957, + "step": 3627 + }, + { + "epoch": 0.19, + "learning_rate": 0.0009303262572304298, + "loss": 1.0156, + "step": 3628 + }, + { + "epoch": 0.2, + "learning_rate": 0.0009302819309847839, + "loss": 0.8789, + "step": 3629 + }, + { + "epoch": 0.2, + "learning_rate": 0.0009302375917001822, + "loss": 0.8906, + "step": 3630 + }, + { + "epoch": 0.2, + "learning_rate": 0.0009301932393779681, + "loss": 1.0156, + "step": 3631 + }, + { + "epoch": 0.2, + "learning_rate": 0.000930148874019486, + "loss": 0.9141, + "step": 3632 + }, + { + "epoch": 0.2, + "learning_rate": 0.0009301044956260801, + "loss": 0.9336, + "step": 3633 + }, + { + "epoch": 0.2, + "learning_rate": 0.0009300601041990948, + "loss": 0.9648, + "step": 3634 + }, + { + "epoch": 0.2, + "learning_rate": 0.0009300156997398759, + "loss": 0.9453, + "step": 3635 + }, + { + "epoch": 0.2, + "learning_rate": 0.0009299712822497688, + "loss": 0.8906, + "step": 3636 + }, + { + "epoch": 0.2, + "learning_rate": 0.0009299268517301193, + "loss": 0.9062, + "step": 3637 + }, + { + "epoch": 0.2, + "learning_rate": 0.000929882408182274, + "loss": 0.9805, + "step": 3638 + }, + { + "epoch": 0.2, + "learning_rate": 0.0009298379516075799, + "loss": 0.9102, + "step": 3639 + }, + { + "epoch": 0.2, + "learning_rate": 0.0009297934820073837, + "loss": 0.9141, + "step": 3640 + }, + { + "epoch": 0.2, + "learning_rate": 0.0009297489993830332, + "loss": 0.9219, + "step": 3641 + }, + { + "epoch": 0.2, + "learning_rate": 0.0009297045037358765, + "loss": 0.9375, + "step": 3642 + }, + { + "epoch": 0.2, + "learning_rate": 0.0009296599950672619, + "loss": 0.8984, + "step": 3643 + }, + { + "epoch": 0.2, + "learning_rate": 0.0009296154733785379, + "loss": 0.875, + "step": 3644 + }, + { + "epoch": 0.2, + "learning_rate": 0.000929570938671054, + "loss": 0.875, + "step": 3645 + }, + { + "epoch": 0.2, + "learning_rate": 0.0009295263909461595, + "loss": 1.0078, + "step": 3646 + }, + { + "epoch": 0.2, + "learning_rate": 0.0009294818302052045, + "loss": 0.9102, + "step": 3647 + }, + { + "epoch": 0.2, + "learning_rate": 0.0009294372564495392, + "loss": 0.9336, + "step": 3648 + }, + { + "epoch": 0.2, + "learning_rate": 0.0009293926696805145, + "loss": 0.8984, + "step": 3649 + }, + { + "epoch": 0.2, + "learning_rate": 0.0009293480698994814, + "loss": 1.0625, + "step": 3650 + }, + { + "epoch": 0.2, + "learning_rate": 0.0009293034571077914, + "loss": 0.9805, + "step": 3651 + }, + { + "epoch": 0.2, + "learning_rate": 0.0009292588313067966, + "loss": 0.8945, + "step": 3652 + }, + { + "epoch": 0.2, + "learning_rate": 0.000929214192497849, + "loss": 1.0078, + "step": 3653 + }, + { + "epoch": 0.2, + "learning_rate": 0.0009291695406823016, + "loss": 0.9258, + "step": 3654 + }, + { + "epoch": 0.2, + "learning_rate": 0.0009291248758615074, + "loss": 0.8594, + "step": 3655 + }, + { + "epoch": 0.2, + "learning_rate": 0.0009290801980368199, + "loss": 0.9766, + "step": 3656 + }, + { + "epoch": 0.2, + "learning_rate": 0.0009290355072095928, + "loss": 0.9023, + "step": 3657 + }, + { + "epoch": 0.2, + "learning_rate": 0.0009289908033811807, + "loss": 0.8906, + "step": 3658 + }, + { + "epoch": 0.2, + "learning_rate": 0.000928946086552938, + "loss": 0.9141, + "step": 3659 + }, + { + "epoch": 0.2, + "learning_rate": 0.0009289013567262198, + "loss": 0.8086, + "step": 3660 + }, + { + "epoch": 0.2, + "learning_rate": 0.0009288566139023818, + "loss": 0.9414, + "step": 3661 + }, + { + "epoch": 0.2, + "learning_rate": 0.0009288118580827796, + "loss": 0.918, + "step": 3662 + }, + { + "epoch": 0.2, + "learning_rate": 0.0009287670892687695, + "loss": 0.8594, + "step": 3663 + }, + { + "epoch": 0.2, + "learning_rate": 0.0009287223074617082, + "loss": 0.8828, + "step": 3664 + }, + { + "epoch": 0.2, + "learning_rate": 0.0009286775126629527, + "loss": 0.8594, + "step": 3665 + }, + { + "epoch": 0.2, + "learning_rate": 0.0009286327048738605, + "loss": 0.8789, + "step": 3666 + }, + { + "epoch": 0.2, + "learning_rate": 0.0009285878840957893, + "loss": 0.9766, + "step": 3667 + }, + { + "epoch": 0.2, + "learning_rate": 0.0009285430503300975, + "loss": 0.957, + "step": 3668 + }, + { + "epoch": 0.2, + "learning_rate": 0.0009284982035781435, + "loss": 0.8867, + "step": 3669 + }, + { + "epoch": 0.2, + "learning_rate": 0.0009284533438412864, + "loss": 0.9297, + "step": 3670 + }, + { + "epoch": 0.2, + "learning_rate": 0.0009284084711208856, + "loss": 0.9258, + "step": 3671 + }, + { + "epoch": 0.2, + "learning_rate": 0.0009283635854183008, + "loss": 0.9141, + "step": 3672 + }, + { + "epoch": 0.2, + "learning_rate": 0.0009283186867348925, + "loss": 0.9297, + "step": 3673 + }, + { + "epoch": 0.2, + "learning_rate": 0.000928273775072021, + "loss": 0.8633, + "step": 3674 + }, + { + "epoch": 0.2, + "learning_rate": 0.0009282288504310471, + "loss": 0.9844, + "step": 3675 + }, + { + "epoch": 0.2, + "learning_rate": 0.0009281839128133325, + "loss": 1.0, + "step": 3676 + }, + { + "epoch": 0.2, + "learning_rate": 0.0009281389622202387, + "loss": 0.9766, + "step": 3677 + }, + { + "epoch": 0.2, + "learning_rate": 0.0009280939986531282, + "loss": 0.8477, + "step": 3678 + }, + { + "epoch": 0.2, + "learning_rate": 0.0009280490221133632, + "loss": 1.0234, + "step": 3679 + }, + { + "epoch": 0.2, + "learning_rate": 0.0009280040326023069, + "loss": 0.9258, + "step": 3680 + }, + { + "epoch": 0.2, + "learning_rate": 0.0009279590301213225, + "loss": 0.9766, + "step": 3681 + }, + { + "epoch": 0.2, + "learning_rate": 0.0009279140146717737, + "loss": 0.9219, + "step": 3682 + }, + { + "epoch": 0.2, + "learning_rate": 0.0009278689862550245, + "loss": 0.9453, + "step": 3683 + }, + { + "epoch": 0.2, + "learning_rate": 0.0009278239448724397, + "loss": 0.9531, + "step": 3684 + }, + { + "epoch": 0.2, + "learning_rate": 0.000927778890525384, + "loss": 0.8984, + "step": 3685 + }, + { + "epoch": 0.2, + "learning_rate": 0.0009277338232152228, + "loss": 0.9141, + "step": 3686 + }, + { + "epoch": 0.2, + "learning_rate": 0.0009276887429433216, + "loss": 0.8477, + "step": 3687 + }, + { + "epoch": 0.2, + "learning_rate": 0.0009276436497110467, + "loss": 0.9336, + "step": 3688 + }, + { + "epoch": 0.2, + "learning_rate": 0.0009275985435197644, + "loss": 0.8672, + "step": 3689 + }, + { + "epoch": 0.2, + "learning_rate": 0.0009275534243708418, + "loss": 0.8633, + "step": 3690 + }, + { + "epoch": 0.2, + "learning_rate": 0.0009275082922656459, + "loss": 0.918, + "step": 3691 + }, + { + "epoch": 0.2, + "learning_rate": 0.0009274631472055446, + "loss": 0.9258, + "step": 3692 + }, + { + "epoch": 0.2, + "learning_rate": 0.0009274179891919057, + "loss": 0.918, + "step": 3693 + }, + { + "epoch": 0.2, + "learning_rate": 0.0009273728182260976, + "loss": 0.8828, + "step": 3694 + }, + { + "epoch": 0.2, + "learning_rate": 0.0009273276343094894, + "loss": 0.875, + "step": 3695 + }, + { + "epoch": 0.2, + "learning_rate": 0.0009272824374434504, + "loss": 0.9453, + "step": 3696 + }, + { + "epoch": 0.2, + "learning_rate": 0.0009272372276293497, + "loss": 0.8516, + "step": 3697 + }, + { + "epoch": 0.2, + "learning_rate": 0.0009271920048685577, + "loss": 0.9141, + "step": 3698 + }, + { + "epoch": 0.2, + "learning_rate": 0.0009271467691624447, + "loss": 0.918, + "step": 3699 + }, + { + "epoch": 0.2, + "learning_rate": 0.0009271015205123816, + "loss": 0.9375, + "step": 3700 + }, + { + "epoch": 0.2, + "learning_rate": 0.0009270562589197394, + "loss": 0.9883, + "step": 3701 + }, + { + "epoch": 0.2, + "learning_rate": 0.0009270109843858897, + "loss": 0.793, + "step": 3702 + }, + { + "epoch": 0.2, + "learning_rate": 0.0009269656969122047, + "loss": 0.8594, + "step": 3703 + }, + { + "epoch": 0.2, + "learning_rate": 0.0009269203965000563, + "loss": 0.9297, + "step": 3704 + }, + { + "epoch": 0.2, + "learning_rate": 0.0009268750831508177, + "loss": 0.9414, + "step": 3705 + }, + { + "epoch": 0.2, + "learning_rate": 0.0009268297568658619, + "loss": 0.8125, + "step": 3706 + }, + { + "epoch": 0.2, + "learning_rate": 0.0009267844176465625, + "loss": 0.9414, + "step": 3707 + }, + { + "epoch": 0.2, + "learning_rate": 0.0009267390654942931, + "loss": 1.0234, + "step": 3708 + }, + { + "epoch": 0.2, + "learning_rate": 0.0009266937004104285, + "loss": 0.8555, + "step": 3709 + }, + { + "epoch": 0.2, + "learning_rate": 0.000926648322396343, + "loss": 0.8516, + "step": 3710 + }, + { + "epoch": 0.2, + "learning_rate": 0.0009266029314534119, + "loss": 0.9219, + "step": 3711 + }, + { + "epoch": 0.2, + "learning_rate": 0.0009265575275830108, + "loss": 0.9492, + "step": 3712 + }, + { + "epoch": 0.2, + "learning_rate": 0.0009265121107865153, + "loss": 1.0156, + "step": 3713 + }, + { + "epoch": 0.2, + "learning_rate": 0.0009264666810653018, + "loss": 0.9648, + "step": 3714 + }, + { + "epoch": 0.2, + "learning_rate": 0.0009264212384207471, + "loss": 0.9727, + "step": 3715 + }, + { + "epoch": 0.2, + "learning_rate": 0.0009263757828542282, + "loss": 1.0078, + "step": 3716 + }, + { + "epoch": 0.2, + "learning_rate": 0.0009263303143671225, + "loss": 0.8672, + "step": 3717 + }, + { + "epoch": 0.2, + "learning_rate": 0.0009262848329608078, + "loss": 0.8594, + "step": 3718 + }, + { + "epoch": 0.2, + "learning_rate": 0.0009262393386366625, + "loss": 0.9023, + "step": 3719 + }, + { + "epoch": 0.2, + "learning_rate": 0.0009261938313960649, + "loss": 0.8438, + "step": 3720 + }, + { + "epoch": 0.2, + "learning_rate": 0.0009261483112403945, + "loss": 0.8516, + "step": 3721 + }, + { + "epoch": 0.2, + "learning_rate": 0.0009261027781710303, + "loss": 0.9609, + "step": 3722 + }, + { + "epoch": 0.2, + "learning_rate": 0.0009260572321893524, + "loss": 0.8477, + "step": 3723 + }, + { + "epoch": 0.2, + "learning_rate": 0.0009260116732967406, + "loss": 0.9375, + "step": 3724 + }, + { + "epoch": 0.2, + "learning_rate": 0.0009259661014945759, + "loss": 0.9297, + "step": 3725 + }, + { + "epoch": 0.2, + "learning_rate": 0.0009259205167842391, + "loss": 0.8125, + "step": 3726 + }, + { + "epoch": 0.2, + "learning_rate": 0.0009258749191671116, + "loss": 0.8477, + "step": 3727 + }, + { + "epoch": 0.2, + "learning_rate": 0.000925829308644575, + "loss": 0.9062, + "step": 3728 + }, + { + "epoch": 0.2, + "learning_rate": 0.0009257836852180117, + "loss": 0.8477, + "step": 3729 + }, + { + "epoch": 0.2, + "learning_rate": 0.0009257380488888041, + "loss": 0.9258, + "step": 3730 + }, + { + "epoch": 0.2, + "learning_rate": 0.0009256923996583352, + "loss": 0.8633, + "step": 3731 + }, + { + "epoch": 0.2, + "learning_rate": 0.0009256467375279882, + "loss": 0.9414, + "step": 3732 + }, + { + "epoch": 0.2, + "learning_rate": 0.0009256010624991469, + "loss": 0.9648, + "step": 3733 + }, + { + "epoch": 0.2, + "learning_rate": 0.0009255553745731953, + "loss": 0.957, + "step": 3734 + }, + { + "epoch": 0.2, + "learning_rate": 0.000925509673751518, + "loss": 0.8711, + "step": 3735 + }, + { + "epoch": 0.2, + "learning_rate": 0.0009254639600355002, + "loss": 0.9062, + "step": 3736 + }, + { + "epoch": 0.2, + "learning_rate": 0.0009254182334265264, + "loss": 0.9414, + "step": 3737 + }, + { + "epoch": 0.2, + "learning_rate": 0.0009253724939259828, + "loss": 0.9258, + "step": 3738 + }, + { + "epoch": 0.2, + "learning_rate": 0.0009253267415352556, + "loss": 0.9805, + "step": 3739 + }, + { + "epoch": 0.2, + "learning_rate": 0.0009252809762557308, + "loss": 0.8516, + "step": 3740 + }, + { + "epoch": 0.2, + "learning_rate": 0.0009252351980887955, + "loss": 0.8711, + "step": 3741 + }, + { + "epoch": 0.2, + "learning_rate": 0.0009251894070358369, + "loss": 0.918, + "step": 3742 + }, + { + "epoch": 0.2, + "learning_rate": 0.0009251436030982426, + "loss": 0.9727, + "step": 3743 + }, + { + "epoch": 0.2, + "learning_rate": 0.0009250977862774006, + "loss": 0.9844, + "step": 3744 + }, + { + "epoch": 0.2, + "learning_rate": 0.0009250519565746993, + "loss": 0.9102, + "step": 3745 + }, + { + "epoch": 0.2, + "learning_rate": 0.0009250061139915276, + "loss": 0.875, + "step": 3746 + }, + { + "epoch": 0.2, + "learning_rate": 0.0009249602585292744, + "loss": 0.9023, + "step": 3747 + }, + { + "epoch": 0.2, + "learning_rate": 0.0009249143901893296, + "loss": 0.8867, + "step": 3748 + }, + { + "epoch": 0.2, + "learning_rate": 0.0009248685089730831, + "loss": 0.9297, + "step": 3749 + }, + { + "epoch": 0.2, + "learning_rate": 0.000924822614881925, + "loss": 0.9062, + "step": 3750 + }, + { + "epoch": 0.2, + "learning_rate": 0.0009247767079172462, + "loss": 0.7969, + "step": 3751 + }, + { + "epoch": 0.2, + "learning_rate": 0.0009247307880804378, + "loss": 0.9297, + "step": 3752 + }, + { + "epoch": 0.2, + "learning_rate": 0.0009246848553728915, + "loss": 0.9375, + "step": 3753 + }, + { + "epoch": 0.2, + "learning_rate": 0.000924638909795999, + "loss": 0.8867, + "step": 3754 + }, + { + "epoch": 0.2, + "learning_rate": 0.0009245929513511528, + "loss": 0.8594, + "step": 3755 + }, + { + "epoch": 0.2, + "learning_rate": 0.0009245469800397453, + "loss": 1.0391, + "step": 3756 + }, + { + "epoch": 0.2, + "learning_rate": 0.0009245009958631698, + "loss": 1.0, + "step": 3757 + }, + { + "epoch": 0.2, + "learning_rate": 0.0009244549988228197, + "loss": 0.9258, + "step": 3758 + }, + { + "epoch": 0.2, + "learning_rate": 0.000924408988920089, + "loss": 0.9453, + "step": 3759 + }, + { + "epoch": 0.2, + "learning_rate": 0.0009243629661563717, + "loss": 0.9688, + "step": 3760 + }, + { + "epoch": 0.2, + "learning_rate": 0.0009243169305330626, + "loss": 0.8633, + "step": 3761 + }, + { + "epoch": 0.2, + "learning_rate": 0.0009242708820515567, + "loss": 0.9688, + "step": 3762 + }, + { + "epoch": 0.2, + "learning_rate": 0.0009242248207132495, + "loss": 0.957, + "step": 3763 + }, + { + "epoch": 0.2, + "learning_rate": 0.0009241787465195367, + "loss": 0.9062, + "step": 3764 + }, + { + "epoch": 0.2, + "learning_rate": 0.0009241326594718144, + "loss": 0.9375, + "step": 3765 + }, + { + "epoch": 0.2, + "learning_rate": 0.0009240865595714796, + "loss": 0.9492, + "step": 3766 + }, + { + "epoch": 0.2, + "learning_rate": 0.0009240404468199288, + "loss": 0.9062, + "step": 3767 + }, + { + "epoch": 0.2, + "learning_rate": 0.0009239943212185595, + "loss": 0.875, + "step": 3768 + }, + { + "epoch": 0.2, + "learning_rate": 0.0009239481827687697, + "loss": 0.9375, + "step": 3769 + }, + { + "epoch": 0.2, + "learning_rate": 0.0009239020314719573, + "loss": 0.9414, + "step": 3770 + }, + { + "epoch": 0.2, + "learning_rate": 0.0009238558673295209, + "loss": 0.9375, + "step": 3771 + }, + { + "epoch": 0.2, + "learning_rate": 0.0009238096903428595, + "loss": 0.8945, + "step": 3772 + }, + { + "epoch": 0.2, + "learning_rate": 0.0009237635005133724, + "loss": 0.8281, + "step": 3773 + }, + { + "epoch": 0.2, + "learning_rate": 0.000923717297842459, + "loss": 0.8633, + "step": 3774 + }, + { + "epoch": 0.2, + "learning_rate": 0.0009236710823315199, + "loss": 0.9453, + "step": 3775 + }, + { + "epoch": 0.2, + "learning_rate": 0.0009236248539819552, + "loss": 0.8477, + "step": 3776 + }, + { + "epoch": 0.2, + "learning_rate": 0.0009235786127951659, + "loss": 0.9531, + "step": 3777 + }, + { + "epoch": 0.2, + "learning_rate": 0.0009235323587725532, + "loss": 0.8594, + "step": 3778 + }, + { + "epoch": 0.2, + "learning_rate": 0.000923486091915519, + "loss": 0.9609, + "step": 3779 + }, + { + "epoch": 0.2, + "learning_rate": 0.000923439812225465, + "loss": 0.8945, + "step": 3780 + }, + { + "epoch": 0.2, + "learning_rate": 0.0009233935197037938, + "loss": 0.9609, + "step": 3781 + }, + { + "epoch": 0.2, + "learning_rate": 0.0009233472143519081, + "loss": 0.8516, + "step": 3782 + }, + { + "epoch": 0.2, + "learning_rate": 0.0009233008961712114, + "loss": 0.9688, + "step": 3783 + }, + { + "epoch": 0.2, + "learning_rate": 0.000923254565163107, + "loss": 0.793, + "step": 3784 + }, + { + "epoch": 0.2, + "learning_rate": 0.0009232082213289989, + "loss": 0.9805, + "step": 3785 + }, + { + "epoch": 0.2, + "learning_rate": 0.0009231618646702915, + "loss": 1.0312, + "step": 3786 + }, + { + "epoch": 0.2, + "learning_rate": 0.0009231154951883897, + "loss": 0.9023, + "step": 3787 + }, + { + "epoch": 0.2, + "learning_rate": 0.0009230691128846984, + "loss": 0.9375, + "step": 3788 + }, + { + "epoch": 0.2, + "learning_rate": 0.0009230227177606234, + "loss": 0.8906, + "step": 3789 + }, + { + "epoch": 0.2, + "learning_rate": 0.0009229763098175705, + "loss": 0.957, + "step": 3790 + }, + { + "epoch": 0.2, + "learning_rate": 0.000922929889056946, + "loss": 0.9648, + "step": 3791 + }, + { + "epoch": 0.2, + "learning_rate": 0.0009228834554801566, + "loss": 0.9336, + "step": 3792 + }, + { + "epoch": 0.2, + "learning_rate": 0.0009228370090886095, + "loss": 0.9141, + "step": 3793 + }, + { + "epoch": 0.2, + "learning_rate": 0.000922790549883712, + "loss": 0.9102, + "step": 3794 + }, + { + "epoch": 0.2, + "learning_rate": 0.000922744077866872, + "loss": 0.9219, + "step": 3795 + }, + { + "epoch": 0.2, + "learning_rate": 0.0009226975930394978, + "loss": 0.9414, + "step": 3796 + }, + { + "epoch": 0.2, + "learning_rate": 0.0009226510954029981, + "loss": 0.9258, + "step": 3797 + }, + { + "epoch": 0.2, + "learning_rate": 0.000922604584958782, + "loss": 0.9336, + "step": 3798 + }, + { + "epoch": 0.2, + "learning_rate": 0.0009225580617082586, + "loss": 0.9453, + "step": 3799 + }, + { + "epoch": 0.2, + "learning_rate": 0.000922511525652838, + "loss": 0.8242, + "step": 3800 + }, + { + "epoch": 0.2, + "learning_rate": 0.0009224649767939304, + "loss": 0.832, + "step": 3801 + }, + { + "epoch": 0.2, + "learning_rate": 0.0009224184151329462, + "loss": 1.0469, + "step": 3802 + }, + { + "epoch": 0.2, + "learning_rate": 0.0009223718406712964, + "loss": 0.8984, + "step": 3803 + }, + { + "epoch": 0.2, + "learning_rate": 0.0009223252534103925, + "loss": 1.0, + "step": 3804 + }, + { + "epoch": 0.2, + "learning_rate": 0.0009222786533516461, + "loss": 0.8945, + "step": 3805 + }, + { + "epoch": 0.2, + "learning_rate": 0.0009222320404964695, + "loss": 0.832, + "step": 3806 + }, + { + "epoch": 0.2, + "learning_rate": 0.0009221854148462749, + "loss": 0.8672, + "step": 3807 + }, + { + "epoch": 0.2, + "learning_rate": 0.0009221387764024756, + "loss": 0.9414, + "step": 3808 + }, + { + "epoch": 0.2, + "learning_rate": 0.0009220921251664847, + "loss": 0.8789, + "step": 3809 + }, + { + "epoch": 0.2, + "learning_rate": 0.000922045461139716, + "loss": 1.0078, + "step": 3810 + }, + { + "epoch": 0.2, + "learning_rate": 0.0009219987843235833, + "loss": 0.9453, + "step": 3811 + }, + { + "epoch": 0.2, + "learning_rate": 0.0009219520947195015, + "loss": 0.8672, + "step": 3812 + }, + { + "epoch": 0.2, + "learning_rate": 0.000921905392328885, + "loss": 1.0391, + "step": 3813 + }, + { + "epoch": 0.2, + "learning_rate": 0.0009218586771531493, + "loss": 0.8867, + "step": 3814 + }, + { + "epoch": 0.21, + "learning_rate": 0.0009218119491937099, + "loss": 0.9375, + "step": 3815 + }, + { + "epoch": 0.21, + "learning_rate": 0.000921765208451983, + "loss": 0.918, + "step": 3816 + }, + { + "epoch": 0.21, + "learning_rate": 0.0009217184549293847, + "loss": 0.9961, + "step": 3817 + }, + { + "epoch": 0.21, + "learning_rate": 0.000921671688627332, + "loss": 0.9453, + "step": 3818 + }, + { + "epoch": 0.21, + "learning_rate": 0.0009216249095472421, + "loss": 0.9375, + "step": 3819 + }, + { + "epoch": 0.21, + "learning_rate": 0.0009215781176905325, + "loss": 0.9219, + "step": 3820 + }, + { + "epoch": 0.21, + "learning_rate": 0.000921531313058621, + "loss": 0.9805, + "step": 3821 + }, + { + "epoch": 0.21, + "learning_rate": 0.0009214844956529261, + "loss": 0.8789, + "step": 3822 + }, + { + "epoch": 0.21, + "learning_rate": 0.0009214376654748665, + "loss": 0.9102, + "step": 3823 + }, + { + "epoch": 0.21, + "learning_rate": 0.0009213908225258613, + "loss": 0.9023, + "step": 3824 + }, + { + "epoch": 0.21, + "learning_rate": 0.00092134396680733, + "loss": 0.9258, + "step": 3825 + }, + { + "epoch": 0.21, + "learning_rate": 0.0009212970983206925, + "loss": 0.7891, + "step": 3826 + }, + { + "epoch": 0.21, + "learning_rate": 0.0009212502170673692, + "loss": 0.957, + "step": 3827 + }, + { + "epoch": 0.21, + "learning_rate": 0.0009212033230487802, + "loss": 0.8945, + "step": 3828 + }, + { + "epoch": 0.21, + "learning_rate": 0.0009211564162663473, + "loss": 0.9336, + "step": 3829 + }, + { + "epoch": 0.21, + "learning_rate": 0.0009211094967214912, + "loss": 0.9531, + "step": 3830 + }, + { + "epoch": 0.21, + "learning_rate": 0.0009210625644156344, + "loss": 0.9453, + "step": 3831 + }, + { + "epoch": 0.21, + "learning_rate": 0.0009210156193501986, + "loss": 0.9375, + "step": 3832 + }, + { + "epoch": 0.21, + "learning_rate": 0.0009209686615266067, + "loss": 0.8281, + "step": 3833 + }, + { + "epoch": 0.21, + "learning_rate": 0.0009209216909462816, + "loss": 0.9062, + "step": 3834 + }, + { + "epoch": 0.21, + "learning_rate": 0.0009208747076106465, + "loss": 0.9336, + "step": 3835 + }, + { + "epoch": 0.21, + "learning_rate": 0.0009208277115211254, + "loss": 0.8281, + "step": 3836 + }, + { + "epoch": 0.21, + "learning_rate": 0.0009207807026791421, + "loss": 0.9609, + "step": 3837 + }, + { + "epoch": 0.21, + "learning_rate": 0.0009207336810861215, + "loss": 0.918, + "step": 3838 + }, + { + "epoch": 0.21, + "learning_rate": 0.0009206866467434882, + "loss": 0.9531, + "step": 3839 + }, + { + "epoch": 0.21, + "learning_rate": 0.0009206395996526677, + "loss": 0.8906, + "step": 3840 + }, + { + "epoch": 0.21, + "learning_rate": 0.0009205925398150857, + "loss": 0.8828, + "step": 3841 + }, + { + "epoch": 0.21, + "learning_rate": 0.000920545467232168, + "loss": 0.9492, + "step": 3842 + }, + { + "epoch": 0.21, + "learning_rate": 0.0009204983819053413, + "loss": 0.8398, + "step": 3843 + }, + { + "epoch": 0.21, + "learning_rate": 0.0009204512838360323, + "loss": 0.8711, + "step": 3844 + }, + { + "epoch": 0.21, + "learning_rate": 0.0009204041730256684, + "loss": 0.8789, + "step": 3845 + }, + { + "epoch": 0.21, + "learning_rate": 0.0009203570494756771, + "loss": 0.8516, + "step": 3846 + }, + { + "epoch": 0.21, + "learning_rate": 0.0009203099131874864, + "loss": 0.8633, + "step": 3847 + }, + { + "epoch": 0.21, + "learning_rate": 0.0009202627641625247, + "loss": 0.9141, + "step": 3848 + }, + { + "epoch": 0.21, + "learning_rate": 0.0009202156024022208, + "loss": 0.8594, + "step": 3849 + }, + { + "epoch": 0.21, + "learning_rate": 0.0009201684279080037, + "loss": 1.0, + "step": 3850 + }, + { + "epoch": 0.21, + "learning_rate": 0.0009201212406813031, + "loss": 0.957, + "step": 3851 + }, + { + "epoch": 0.21, + "learning_rate": 0.0009200740407235491, + "loss": 1.0, + "step": 3852 + }, + { + "epoch": 0.21, + "learning_rate": 0.0009200268280361714, + "loss": 0.8789, + "step": 3853 + }, + { + "epoch": 0.21, + "learning_rate": 0.0009199796026206014, + "loss": 0.9297, + "step": 3854 + }, + { + "epoch": 0.21, + "learning_rate": 0.0009199323644782698, + "loss": 0.8906, + "step": 3855 + }, + { + "epoch": 0.21, + "learning_rate": 0.0009198851136106081, + "loss": 0.9023, + "step": 3856 + }, + { + "epoch": 0.21, + "learning_rate": 0.0009198378500190483, + "loss": 0.918, + "step": 3857 + }, + { + "epoch": 0.21, + "learning_rate": 0.0009197905737050226, + "loss": 0.9062, + "step": 3858 + }, + { + "epoch": 0.21, + "learning_rate": 0.0009197432846699636, + "loss": 0.9531, + "step": 3859 + }, + { + "epoch": 0.21, + "learning_rate": 0.0009196959829153042, + "loss": 0.8477, + "step": 3860 + }, + { + "epoch": 0.21, + "learning_rate": 0.0009196486684424778, + "loss": 0.8711, + "step": 3861 + }, + { + "epoch": 0.21, + "learning_rate": 0.0009196013412529185, + "loss": 0.9609, + "step": 3862 + }, + { + "epoch": 0.21, + "learning_rate": 0.0009195540013480601, + "loss": 0.9219, + "step": 3863 + }, + { + "epoch": 0.21, + "learning_rate": 0.0009195066487293374, + "loss": 0.9492, + "step": 3864 + }, + { + "epoch": 0.21, + "learning_rate": 0.0009194592833981852, + "loss": 0.8594, + "step": 3865 + }, + { + "epoch": 0.21, + "learning_rate": 0.0009194119053560388, + "loss": 0.9414, + "step": 3866 + }, + { + "epoch": 0.21, + "learning_rate": 0.0009193645146043342, + "loss": 0.9102, + "step": 3867 + }, + { + "epoch": 0.21, + "learning_rate": 0.000919317111144507, + "loss": 0.8008, + "step": 3868 + }, + { + "epoch": 0.21, + "learning_rate": 0.0009192696949779941, + "loss": 0.9141, + "step": 3869 + }, + { + "epoch": 0.21, + "learning_rate": 0.0009192222661062321, + "loss": 0.8945, + "step": 3870 + }, + { + "epoch": 0.21, + "learning_rate": 0.0009191748245306586, + "loss": 0.918, + "step": 3871 + }, + { + "epoch": 0.21, + "learning_rate": 0.0009191273702527109, + "loss": 0.9219, + "step": 3872 + }, + { + "epoch": 0.21, + "learning_rate": 0.0009190799032738271, + "loss": 0.8828, + "step": 3873 + }, + { + "epoch": 0.21, + "learning_rate": 0.0009190324235954456, + "loss": 0.8906, + "step": 3874 + }, + { + "epoch": 0.21, + "learning_rate": 0.0009189849312190053, + "loss": 0.8984, + "step": 3875 + }, + { + "epoch": 0.21, + "learning_rate": 0.0009189374261459452, + "loss": 0.8242, + "step": 3876 + }, + { + "epoch": 0.21, + "learning_rate": 0.0009188899083777051, + "loss": 0.8555, + "step": 3877 + }, + { + "epoch": 0.21, + "learning_rate": 0.0009188423779157247, + "loss": 0.9258, + "step": 3878 + }, + { + "epoch": 0.21, + "learning_rate": 0.0009187948347614443, + "loss": 0.9844, + "step": 3879 + }, + { + "epoch": 0.21, + "learning_rate": 0.0009187472789163049, + "loss": 0.9805, + "step": 3880 + }, + { + "epoch": 0.21, + "learning_rate": 0.0009186997103817476, + "loss": 0.9453, + "step": 3881 + }, + { + "epoch": 0.21, + "learning_rate": 0.0009186521291592135, + "loss": 0.9062, + "step": 3882 + }, + { + "epoch": 0.21, + "learning_rate": 0.0009186045352501447, + "loss": 0.9531, + "step": 3883 + }, + { + "epoch": 0.21, + "learning_rate": 0.0009185569286559837, + "loss": 0.8984, + "step": 3884 + }, + { + "epoch": 0.21, + "learning_rate": 0.0009185093093781726, + "loss": 0.8906, + "step": 3885 + }, + { + "epoch": 0.21, + "learning_rate": 0.0009184616774181546, + "loss": 0.9023, + "step": 3886 + }, + { + "epoch": 0.21, + "learning_rate": 0.0009184140327773736, + "loss": 0.9883, + "step": 3887 + }, + { + "epoch": 0.21, + "learning_rate": 0.0009183663754572728, + "loss": 0.9336, + "step": 3888 + }, + { + "epoch": 0.21, + "learning_rate": 0.0009183187054592966, + "loss": 0.8359, + "step": 3889 + }, + { + "epoch": 0.21, + "learning_rate": 0.0009182710227848893, + "loss": 0.8633, + "step": 3890 + }, + { + "epoch": 0.21, + "learning_rate": 0.0009182233274354963, + "loss": 0.8477, + "step": 3891 + }, + { + "epoch": 0.21, + "learning_rate": 0.0009181756194125628, + "loss": 0.9297, + "step": 3892 + }, + { + "epoch": 0.21, + "learning_rate": 0.0009181278987175341, + "loss": 0.8789, + "step": 3893 + }, + { + "epoch": 0.21, + "learning_rate": 0.0009180801653518568, + "loss": 0.957, + "step": 3894 + }, + { + "epoch": 0.21, + "learning_rate": 0.0009180324193169771, + "loss": 0.9844, + "step": 3895 + }, + { + "epoch": 0.21, + "learning_rate": 0.000917984660614342, + "loss": 0.875, + "step": 3896 + }, + { + "epoch": 0.21, + "learning_rate": 0.0009179368892453987, + "loss": 0.9805, + "step": 3897 + }, + { + "epoch": 0.21, + "learning_rate": 0.0009178891052115947, + "loss": 0.8711, + "step": 3898 + }, + { + "epoch": 0.21, + "learning_rate": 0.0009178413085143782, + "loss": 0.918, + "step": 3899 + }, + { + "epoch": 0.21, + "learning_rate": 0.0009177934991551975, + "loss": 0.9766, + "step": 3900 + }, + { + "epoch": 0.21, + "learning_rate": 0.0009177456771355015, + "loss": 0.8594, + "step": 3901 + }, + { + "epoch": 0.21, + "learning_rate": 0.0009176978424567391, + "loss": 0.8828, + "step": 3902 + }, + { + "epoch": 0.21, + "learning_rate": 0.0009176499951203602, + "loss": 0.8711, + "step": 3903 + }, + { + "epoch": 0.21, + "learning_rate": 0.0009176021351278144, + "loss": 0.8906, + "step": 3904 + }, + { + "epoch": 0.21, + "learning_rate": 0.0009175542624805522, + "loss": 0.9023, + "step": 3905 + }, + { + "epoch": 0.21, + "learning_rate": 0.0009175063771800243, + "loss": 0.9414, + "step": 3906 + }, + { + "epoch": 0.21, + "learning_rate": 0.0009174584792276816, + "loss": 1.0078, + "step": 3907 + }, + { + "epoch": 0.21, + "learning_rate": 0.000917410568624976, + "loss": 0.9805, + "step": 3908 + }, + { + "epoch": 0.21, + "learning_rate": 0.0009173626453733588, + "loss": 0.8828, + "step": 3909 + }, + { + "epoch": 0.21, + "learning_rate": 0.0009173147094742826, + "loss": 0.8633, + "step": 3910 + }, + { + "epoch": 0.21, + "learning_rate": 0.0009172667609291998, + "loss": 0.8906, + "step": 3911 + }, + { + "epoch": 0.21, + "learning_rate": 0.0009172187997395636, + "loss": 0.9375, + "step": 3912 + }, + { + "epoch": 0.21, + "learning_rate": 0.0009171708259068273, + "loss": 1.0, + "step": 3913 + }, + { + "epoch": 0.21, + "learning_rate": 0.0009171228394324447, + "loss": 0.8242, + "step": 3914 + }, + { + "epoch": 0.21, + "learning_rate": 0.0009170748403178697, + "loss": 0.9531, + "step": 3915 + }, + { + "epoch": 0.21, + "learning_rate": 0.0009170268285645571, + "loss": 0.9492, + "step": 3916 + }, + { + "epoch": 0.21, + "learning_rate": 0.0009169788041739617, + "loss": 0.918, + "step": 3917 + }, + { + "epoch": 0.21, + "learning_rate": 0.0009169307671475391, + "loss": 0.832, + "step": 3918 + }, + { + "epoch": 0.21, + "learning_rate": 0.0009168827174867444, + "loss": 0.8594, + "step": 3919 + }, + { + "epoch": 0.21, + "learning_rate": 0.0009168346551930342, + "loss": 0.9961, + "step": 3920 + }, + { + "epoch": 0.21, + "learning_rate": 0.0009167865802678645, + "loss": 0.9062, + "step": 3921 + }, + { + "epoch": 0.21, + "learning_rate": 0.0009167384927126925, + "loss": 0.9531, + "step": 3922 + }, + { + "epoch": 0.21, + "learning_rate": 0.0009166903925289753, + "loss": 0.8516, + "step": 3923 + }, + { + "epoch": 0.21, + "learning_rate": 0.0009166422797181703, + "loss": 1.0547, + "step": 3924 + }, + { + "epoch": 0.21, + "learning_rate": 0.0009165941542817359, + "loss": 0.9141, + "step": 3925 + }, + { + "epoch": 0.21, + "learning_rate": 0.00091654601622113, + "loss": 0.9688, + "step": 3926 + }, + { + "epoch": 0.21, + "learning_rate": 0.0009164978655378115, + "loss": 0.8281, + "step": 3927 + }, + { + "epoch": 0.21, + "learning_rate": 0.0009164497022332396, + "loss": 0.8672, + "step": 3928 + }, + { + "epoch": 0.21, + "learning_rate": 0.0009164015263088738, + "loss": 0.8828, + "step": 3929 + }, + { + "epoch": 0.21, + "learning_rate": 0.000916353337766174, + "loss": 0.8906, + "step": 3930 + }, + { + "epoch": 0.21, + "learning_rate": 0.0009163051366066005, + "loss": 0.9141, + "step": 3931 + }, + { + "epoch": 0.21, + "learning_rate": 0.0009162569228316136, + "loss": 0.9062, + "step": 3932 + }, + { + "epoch": 0.21, + "learning_rate": 0.0009162086964426748, + "loss": 0.9883, + "step": 3933 + }, + { + "epoch": 0.21, + "learning_rate": 0.0009161604574412452, + "loss": 0.8398, + "step": 3934 + }, + { + "epoch": 0.21, + "learning_rate": 0.0009161122058287869, + "loss": 0.9414, + "step": 3935 + }, + { + "epoch": 0.21, + "learning_rate": 0.0009160639416067618, + "loss": 0.9258, + "step": 3936 + }, + { + "epoch": 0.21, + "learning_rate": 0.0009160156647766327, + "loss": 0.9023, + "step": 3937 + }, + { + "epoch": 0.21, + "learning_rate": 0.0009159673753398623, + "loss": 0.8828, + "step": 3938 + }, + { + "epoch": 0.21, + "learning_rate": 0.0009159190732979141, + "loss": 0.8945, + "step": 3939 + }, + { + "epoch": 0.21, + "learning_rate": 0.0009158707586522516, + "loss": 0.875, + "step": 3940 + }, + { + "epoch": 0.21, + "learning_rate": 0.0009158224314043393, + "loss": 0.9336, + "step": 3941 + }, + { + "epoch": 0.21, + "learning_rate": 0.0009157740915556414, + "loss": 0.8828, + "step": 3942 + }, + { + "epoch": 0.21, + "learning_rate": 0.0009157257391076227, + "loss": 1.0, + "step": 3943 + }, + { + "epoch": 0.21, + "learning_rate": 0.0009156773740617485, + "loss": 0.8828, + "step": 3944 + }, + { + "epoch": 0.21, + "learning_rate": 0.0009156289964194846, + "loss": 0.9141, + "step": 3945 + }, + { + "epoch": 0.21, + "learning_rate": 0.0009155806061822967, + "loss": 0.8945, + "step": 3946 + }, + { + "epoch": 0.21, + "learning_rate": 0.0009155322033516513, + "loss": 0.9336, + "step": 3947 + }, + { + "epoch": 0.21, + "learning_rate": 0.0009154837879290154, + "loss": 0.8828, + "step": 3948 + }, + { + "epoch": 0.21, + "learning_rate": 0.0009154353599158558, + "loss": 0.9141, + "step": 3949 + }, + { + "epoch": 0.21, + "learning_rate": 0.0009153869193136402, + "loss": 0.8906, + "step": 3950 + }, + { + "epoch": 0.21, + "learning_rate": 0.0009153384661238364, + "loss": 0.8555, + "step": 3951 + }, + { + "epoch": 0.21, + "learning_rate": 0.0009152900003479128, + "loss": 0.8789, + "step": 3952 + }, + { + "epoch": 0.21, + "learning_rate": 0.000915241521987338, + "loss": 0.8242, + "step": 3953 + }, + { + "epoch": 0.21, + "learning_rate": 0.0009151930310435812, + "loss": 0.9766, + "step": 3954 + }, + { + "epoch": 0.21, + "learning_rate": 0.0009151445275181118, + "loss": 0.9453, + "step": 3955 + }, + { + "epoch": 0.21, + "learning_rate": 0.0009150960114123994, + "loss": 0.8555, + "step": 3956 + }, + { + "epoch": 0.21, + "learning_rate": 0.0009150474827279143, + "loss": 0.8281, + "step": 3957 + }, + { + "epoch": 0.21, + "learning_rate": 0.0009149989414661273, + "loss": 0.9141, + "step": 3958 + }, + { + "epoch": 0.21, + "learning_rate": 0.000914950387628509, + "loss": 0.918, + "step": 3959 + }, + { + "epoch": 0.21, + "learning_rate": 0.000914901821216531, + "loss": 0.8594, + "step": 3960 + }, + { + "epoch": 0.21, + "learning_rate": 0.000914853242231665, + "loss": 0.9023, + "step": 3961 + }, + { + "epoch": 0.21, + "learning_rate": 0.000914804650675383, + "loss": 0.9961, + "step": 3962 + }, + { + "epoch": 0.21, + "learning_rate": 0.0009147560465491575, + "loss": 0.8906, + "step": 3963 + }, + { + "epoch": 0.21, + "learning_rate": 0.0009147074298544613, + "loss": 0.8828, + "step": 3964 + }, + { + "epoch": 0.21, + "learning_rate": 0.0009146588005927679, + "loss": 0.9414, + "step": 3965 + }, + { + "epoch": 0.21, + "learning_rate": 0.0009146101587655508, + "loss": 0.9023, + "step": 3966 + }, + { + "epoch": 0.21, + "learning_rate": 0.0009145615043742839, + "loss": 0.8945, + "step": 3967 + }, + { + "epoch": 0.21, + "learning_rate": 0.0009145128374204417, + "loss": 1.0469, + "step": 3968 + }, + { + "epoch": 0.21, + "learning_rate": 0.0009144641579054988, + "loss": 0.8906, + "step": 3969 + }, + { + "epoch": 0.21, + "learning_rate": 0.0009144154658309306, + "loss": 1.0156, + "step": 3970 + }, + { + "epoch": 0.21, + "learning_rate": 0.0009143667611982125, + "loss": 0.9531, + "step": 3971 + }, + { + "epoch": 0.21, + "learning_rate": 0.0009143180440088204, + "loss": 0.8906, + "step": 3972 + }, + { + "epoch": 0.21, + "learning_rate": 0.0009142693142642306, + "loss": 0.8828, + "step": 3973 + }, + { + "epoch": 0.21, + "learning_rate": 0.0009142205719659198, + "loss": 0.9102, + "step": 3974 + }, + { + "epoch": 0.21, + "learning_rate": 0.000914171817115365, + "loss": 0.875, + "step": 3975 + }, + { + "epoch": 0.21, + "learning_rate": 0.0009141230497140437, + "loss": 0.9258, + "step": 3976 + }, + { + "epoch": 0.21, + "learning_rate": 0.0009140742697634337, + "loss": 0.9883, + "step": 3977 + }, + { + "epoch": 0.21, + "learning_rate": 0.0009140254772650132, + "loss": 0.9492, + "step": 3978 + }, + { + "epoch": 0.21, + "learning_rate": 0.0009139766722202606, + "loss": 0.957, + "step": 3979 + }, + { + "epoch": 0.21, + "learning_rate": 0.0009139278546306551, + "loss": 0.8047, + "step": 3980 + }, + { + "epoch": 0.21, + "learning_rate": 0.000913879024497676, + "loss": 0.9531, + "step": 3981 + }, + { + "epoch": 0.21, + "learning_rate": 0.0009138301818228027, + "loss": 0.8438, + "step": 3982 + }, + { + "epoch": 0.21, + "learning_rate": 0.0009137813266075157, + "loss": 0.8438, + "step": 3983 + }, + { + "epoch": 0.21, + "learning_rate": 0.0009137324588532954, + "loss": 0.8281, + "step": 3984 + }, + { + "epoch": 0.21, + "learning_rate": 0.0009136835785616224, + "loss": 0.8516, + "step": 3985 + }, + { + "epoch": 0.21, + "learning_rate": 0.0009136346857339783, + "loss": 0.9219, + "step": 3986 + }, + { + "epoch": 0.21, + "learning_rate": 0.0009135857803718443, + "loss": 0.8711, + "step": 3987 + }, + { + "epoch": 0.21, + "learning_rate": 0.0009135368624767026, + "loss": 0.9102, + "step": 3988 + }, + { + "epoch": 0.21, + "learning_rate": 0.0009134879320500358, + "loss": 0.8125, + "step": 3989 + }, + { + "epoch": 0.21, + "learning_rate": 0.0009134389890933263, + "loss": 0.9766, + "step": 3990 + }, + { + "epoch": 0.21, + "learning_rate": 0.0009133900336080572, + "loss": 0.8516, + "step": 3991 + }, + { + "epoch": 0.21, + "learning_rate": 0.0009133410655957124, + "loss": 0.9414, + "step": 3992 + }, + { + "epoch": 0.21, + "learning_rate": 0.0009132920850577755, + "loss": 0.9453, + "step": 3993 + }, + { + "epoch": 0.21, + "learning_rate": 0.0009132430919957309, + "loss": 0.7852, + "step": 3994 + }, + { + "epoch": 0.21, + "learning_rate": 0.0009131940864110631, + "loss": 0.9062, + "step": 3995 + }, + { + "epoch": 0.21, + "learning_rate": 0.0009131450683052571, + "loss": 0.8789, + "step": 3996 + }, + { + "epoch": 0.21, + "learning_rate": 0.0009130960376797987, + "loss": 0.9453, + "step": 3997 + }, + { + "epoch": 0.21, + "learning_rate": 0.0009130469945361733, + "loss": 0.9609, + "step": 3998 + }, + { + "epoch": 0.21, + "learning_rate": 0.0009129979388758672, + "loss": 0.8516, + "step": 3999 + }, + { + "epoch": 0.21, + "learning_rate": 0.0009129488707003669, + "loss": 0.8633, + "step": 4000 + }, + { + "epoch": 0.22, + "learning_rate": 0.0009128997900111591, + "loss": 0.9453, + "step": 4001 + }, + { + "epoch": 0.22, + "learning_rate": 0.0009128506968097316, + "loss": 0.9766, + "step": 4002 + }, + { + "epoch": 0.22, + "learning_rate": 0.000912801591097572, + "loss": 0.9102, + "step": 4003 + }, + { + "epoch": 0.22, + "learning_rate": 0.000912752472876168, + "loss": 0.9375, + "step": 4004 + }, + { + "epoch": 0.22, + "learning_rate": 0.0009127033421470085, + "loss": 0.9062, + "step": 4005 + }, + { + "epoch": 0.22, + "learning_rate": 0.0009126541989115818, + "loss": 0.9062, + "step": 4006 + }, + { + "epoch": 0.22, + "learning_rate": 0.0009126050431713775, + "loss": 1.0, + "step": 4007 + }, + { + "epoch": 0.22, + "learning_rate": 0.0009125558749278851, + "loss": 0.9883, + "step": 4008 + }, + { + "epoch": 0.22, + "learning_rate": 0.0009125066941825945, + "loss": 0.9883, + "step": 4009 + }, + { + "epoch": 0.22, + "learning_rate": 0.0009124575009369961, + "loss": 0.9375, + "step": 4010 + }, + { + "epoch": 0.22, + "learning_rate": 0.0009124082951925805, + "loss": 0.9141, + "step": 4011 + }, + { + "epoch": 0.22, + "learning_rate": 0.000912359076950839, + "loss": 0.9531, + "step": 4012 + }, + { + "epoch": 0.22, + "learning_rate": 0.0009123098462132628, + "loss": 0.9688, + "step": 4013 + }, + { + "epoch": 0.22, + "learning_rate": 0.0009122606029813439, + "loss": 0.918, + "step": 4014 + }, + { + "epoch": 0.22, + "learning_rate": 0.0009122113472565746, + "loss": 0.9336, + "step": 4015 + }, + { + "epoch": 0.22, + "learning_rate": 0.0009121620790404475, + "loss": 0.9375, + "step": 4016 + }, + { + "epoch": 0.22, + "learning_rate": 0.0009121127983344554, + "loss": 0.875, + "step": 4017 + }, + { + "epoch": 0.22, + "learning_rate": 0.0009120635051400918, + "loss": 0.9297, + "step": 4018 + }, + { + "epoch": 0.22, + "learning_rate": 0.0009120141994588505, + "loss": 0.9023, + "step": 4019 + }, + { + "epoch": 0.22, + "learning_rate": 0.0009119648812922256, + "loss": 0.9805, + "step": 4020 + }, + { + "epoch": 0.22, + "learning_rate": 0.0009119155506417116, + "loss": 1.0078, + "step": 4021 + }, + { + "epoch": 0.22, + "learning_rate": 0.0009118662075088033, + "loss": 0.8477, + "step": 4022 + }, + { + "epoch": 0.22, + "learning_rate": 0.0009118168518949959, + "loss": 0.8477, + "step": 4023 + }, + { + "epoch": 0.22, + "learning_rate": 0.0009117674838017853, + "loss": 0.8398, + "step": 4024 + }, + { + "epoch": 0.22, + "learning_rate": 0.0009117181032306672, + "loss": 0.8477, + "step": 4025 + }, + { + "epoch": 0.22, + "learning_rate": 0.0009116687101831385, + "loss": 0.9219, + "step": 4026 + }, + { + "epoch": 0.22, + "learning_rate": 0.0009116193046606953, + "loss": 0.9062, + "step": 4027 + }, + { + "epoch": 0.22, + "learning_rate": 0.0009115698866648352, + "loss": 0.9453, + "step": 4028 + }, + { + "epoch": 0.22, + "learning_rate": 0.0009115204561970556, + "loss": 0.9258, + "step": 4029 + }, + { + "epoch": 0.22, + "learning_rate": 0.0009114710132588544, + "loss": 0.9336, + "step": 4030 + }, + { + "epoch": 0.22, + "learning_rate": 0.0009114215578517298, + "loss": 0.9102, + "step": 4031 + }, + { + "epoch": 0.22, + "learning_rate": 0.0009113720899771807, + "loss": 1.0391, + "step": 4032 + }, + { + "epoch": 0.22, + "learning_rate": 0.0009113226096367059, + "loss": 0.9336, + "step": 4033 + }, + { + "epoch": 0.22, + "learning_rate": 0.000911273116831805, + "loss": 0.9102, + "step": 4034 + }, + { + "epoch": 0.22, + "learning_rate": 0.0009112236115639776, + "loss": 0.8398, + "step": 4035 + }, + { + "epoch": 0.22, + "learning_rate": 0.0009111740938347239, + "loss": 0.9062, + "step": 4036 + }, + { + "epoch": 0.22, + "learning_rate": 0.0009111245636455448, + "loss": 0.8438, + "step": 4037 + }, + { + "epoch": 0.22, + "learning_rate": 0.0009110750209979408, + "loss": 0.9297, + "step": 4038 + }, + { + "epoch": 0.22, + "learning_rate": 0.0009110254658934134, + "loss": 0.9023, + "step": 4039 + }, + { + "epoch": 0.22, + "learning_rate": 0.0009109758983334642, + "loss": 0.9258, + "step": 4040 + }, + { + "epoch": 0.22, + "learning_rate": 0.0009109263183195952, + "loss": 1.0156, + "step": 4041 + }, + { + "epoch": 0.22, + "learning_rate": 0.0009108767258533091, + "loss": 0.9336, + "step": 4042 + }, + { + "epoch": 0.22, + "learning_rate": 0.0009108271209361086, + "loss": 0.9727, + "step": 4043 + }, + { + "epoch": 0.22, + "learning_rate": 0.0009107775035694967, + "loss": 0.8945, + "step": 4044 + }, + { + "epoch": 0.22, + "learning_rate": 0.0009107278737549771, + "loss": 0.9023, + "step": 4045 + }, + { + "epoch": 0.22, + "learning_rate": 0.0009106782314940539, + "loss": 0.8477, + "step": 4046 + }, + { + "epoch": 0.22, + "learning_rate": 0.0009106285767882313, + "loss": 0.8945, + "step": 4047 + }, + { + "epoch": 0.22, + "learning_rate": 0.000910578909639014, + "loss": 0.9336, + "step": 4048 + }, + { + "epoch": 0.22, + "learning_rate": 0.0009105292300479069, + "loss": 1.0156, + "step": 4049 + }, + { + "epoch": 0.22, + "learning_rate": 0.0009104795380164157, + "loss": 0.8438, + "step": 4050 + }, + { + "epoch": 0.22, + "learning_rate": 0.0009104298335460461, + "loss": 0.9062, + "step": 4051 + }, + { + "epoch": 0.22, + "learning_rate": 0.0009103801166383044, + "loss": 1.1016, + "step": 4052 + }, + { + "epoch": 0.22, + "learning_rate": 0.0009103303872946971, + "loss": 0.8945, + "step": 4053 + }, + { + "epoch": 0.22, + "learning_rate": 0.0009102806455167314, + "loss": 0.9062, + "step": 4054 + }, + { + "epoch": 0.22, + "learning_rate": 0.0009102308913059143, + "loss": 0.8711, + "step": 4055 + }, + { + "epoch": 0.22, + "learning_rate": 0.0009101811246637536, + "loss": 0.8672, + "step": 4056 + }, + { + "epoch": 0.22, + "learning_rate": 0.0009101313455917575, + "loss": 0.8242, + "step": 4057 + }, + { + "epoch": 0.22, + "learning_rate": 0.0009100815540914345, + "loss": 0.8398, + "step": 4058 + }, + { + "epoch": 0.22, + "learning_rate": 0.0009100317501642933, + "loss": 0.8477, + "step": 4059 + }, + { + "epoch": 0.22, + "learning_rate": 0.0009099819338118432, + "loss": 0.918, + "step": 4060 + }, + { + "epoch": 0.22, + "learning_rate": 0.0009099321050355938, + "loss": 0.9688, + "step": 4061 + }, + { + "epoch": 0.22, + "learning_rate": 0.0009098822638370552, + "loss": 0.9414, + "step": 4062 + }, + { + "epoch": 0.22, + "learning_rate": 0.0009098324102177373, + "loss": 0.8789, + "step": 4063 + }, + { + "epoch": 0.22, + "learning_rate": 0.0009097825441791514, + "loss": 0.9297, + "step": 4064 + }, + { + "epoch": 0.22, + "learning_rate": 0.0009097326657228085, + "loss": 0.9297, + "step": 4065 + }, + { + "epoch": 0.22, + "learning_rate": 0.0009096827748502197, + "loss": 0.9062, + "step": 4066 + }, + { + "epoch": 0.22, + "learning_rate": 0.000909632871562897, + "loss": 0.8711, + "step": 4067 + }, + { + "epoch": 0.22, + "learning_rate": 0.000909582955862353, + "loss": 0.8984, + "step": 4068 + }, + { + "epoch": 0.22, + "learning_rate": 0.0009095330277500998, + "loss": 0.9023, + "step": 4069 + }, + { + "epoch": 0.22, + "learning_rate": 0.0009094830872276508, + "loss": 0.8945, + "step": 4070 + }, + { + "epoch": 0.22, + "learning_rate": 0.000909433134296519, + "loss": 0.9258, + "step": 4071 + }, + { + "epoch": 0.22, + "learning_rate": 0.0009093831689582183, + "loss": 0.9492, + "step": 4072 + }, + { + "epoch": 0.22, + "learning_rate": 0.0009093331912142631, + "loss": 0.9414, + "step": 4073 + }, + { + "epoch": 0.22, + "learning_rate": 0.0009092832010661674, + "loss": 0.8516, + "step": 4074 + }, + { + "epoch": 0.22, + "learning_rate": 0.0009092331985154464, + "loss": 0.8906, + "step": 4075 + }, + { + "epoch": 0.22, + "learning_rate": 0.0009091831835636152, + "loss": 0.9688, + "step": 4076 + }, + { + "epoch": 0.22, + "learning_rate": 0.0009091331562121894, + "loss": 0.8984, + "step": 4077 + }, + { + "epoch": 0.22, + "learning_rate": 0.0009090831164626852, + "loss": 0.9688, + "step": 4078 + }, + { + "epoch": 0.22, + "learning_rate": 0.0009090330643166186, + "loss": 0.9531, + "step": 4079 + }, + { + "epoch": 0.22, + "learning_rate": 0.0009089829997755065, + "loss": 0.8477, + "step": 4080 + }, + { + "epoch": 0.22, + "learning_rate": 0.0009089329228408663, + "loss": 0.8281, + "step": 4081 + }, + { + "epoch": 0.22, + "learning_rate": 0.0009088828335142152, + "loss": 0.8477, + "step": 4082 + }, + { + "epoch": 0.22, + "learning_rate": 0.0009088327317970712, + "loss": 0.9297, + "step": 4083 + }, + { + "epoch": 0.22, + "learning_rate": 0.0009087826176909524, + "loss": 0.9297, + "step": 4084 + }, + { + "epoch": 0.22, + "learning_rate": 0.0009087324911973776, + "loss": 0.8711, + "step": 4085 + }, + { + "epoch": 0.22, + "learning_rate": 0.0009086823523178657, + "loss": 0.9062, + "step": 4086 + }, + { + "epoch": 0.22, + "learning_rate": 0.0009086322010539359, + "loss": 0.9453, + "step": 4087 + }, + { + "epoch": 0.22, + "learning_rate": 0.0009085820374071083, + "loss": 0.8828, + "step": 4088 + }, + { + "epoch": 0.22, + "learning_rate": 0.0009085318613789028, + "loss": 0.918, + "step": 4089 + }, + { + "epoch": 0.22, + "learning_rate": 0.00090848167297084, + "loss": 0.8906, + "step": 4090 + }, + { + "epoch": 0.22, + "learning_rate": 0.0009084314721844407, + "loss": 1.125, + "step": 4091 + }, + { + "epoch": 0.22, + "learning_rate": 0.0009083812590212261, + "loss": 0.918, + "step": 4092 + }, + { + "epoch": 0.22, + "learning_rate": 0.0009083310334827181, + "loss": 0.9531, + "step": 4093 + }, + { + "epoch": 0.22, + "learning_rate": 0.0009082807955704383, + "loss": 1.0, + "step": 4094 + }, + { + "epoch": 0.22, + "learning_rate": 0.0009082305452859093, + "loss": 0.8047, + "step": 4095 + }, + { + "epoch": 0.22, + "learning_rate": 0.0009081802826306538, + "loss": 0.9883, + "step": 4096 + }, + { + "epoch": 0.22, + "learning_rate": 0.000908130007606195, + "loss": 0.9062, + "step": 4097 + }, + { + "epoch": 0.22, + "learning_rate": 0.0009080797202140564, + "loss": 1.0469, + "step": 4098 + }, + { + "epoch": 0.22, + "learning_rate": 0.0009080294204557618, + "loss": 0.9453, + "step": 4099 + }, + { + "epoch": 0.22, + "learning_rate": 0.0009079791083328353, + "loss": 0.9102, + "step": 4100 + }, + { + "epoch": 0.22, + "learning_rate": 0.0009079287838468018, + "loss": 1.0234, + "step": 4101 + }, + { + "epoch": 0.22, + "learning_rate": 0.0009078784469991861, + "loss": 0.9492, + "step": 4102 + }, + { + "epoch": 0.22, + "learning_rate": 0.0009078280977915138, + "loss": 0.9961, + "step": 4103 + }, + { + "epoch": 0.22, + "learning_rate": 0.0009077777362253103, + "loss": 0.9922, + "step": 4104 + }, + { + "epoch": 0.22, + "learning_rate": 0.000907727362302102, + "loss": 0.9492, + "step": 4105 + }, + { + "epoch": 0.22, + "learning_rate": 0.0009076769760234155, + "loss": 1.0312, + "step": 4106 + }, + { + "epoch": 0.22, + "learning_rate": 0.0009076265773907773, + "loss": 1.0781, + "step": 4107 + }, + { + "epoch": 0.22, + "learning_rate": 0.0009075761664057149, + "loss": 1.0938, + "step": 4108 + }, + { + "epoch": 0.22, + "learning_rate": 0.0009075257430697559, + "loss": 1.0, + "step": 4109 + }, + { + "epoch": 0.22, + "learning_rate": 0.000907475307384428, + "loss": 1.0078, + "step": 4110 + }, + { + "epoch": 0.22, + "learning_rate": 0.00090742485935126, + "loss": 0.9648, + "step": 4111 + }, + { + "epoch": 0.22, + "learning_rate": 0.0009073743989717804, + "loss": 1.1094, + "step": 4112 + }, + { + "epoch": 0.22, + "learning_rate": 0.0009073239262475184, + "loss": 0.9141, + "step": 4113 + }, + { + "epoch": 0.22, + "learning_rate": 0.0009072734411800034, + "loss": 1.0156, + "step": 4114 + }, + { + "epoch": 0.22, + "learning_rate": 0.0009072229437707653, + "loss": 1.0312, + "step": 4115 + }, + { + "epoch": 0.22, + "learning_rate": 0.0009071724340213344, + "loss": 0.9531, + "step": 4116 + }, + { + "epoch": 0.22, + "learning_rate": 0.0009071219119332414, + "loss": 0.9727, + "step": 4117 + }, + { + "epoch": 0.22, + "learning_rate": 0.0009070713775080169, + "loss": 0.9336, + "step": 4118 + }, + { + "epoch": 0.22, + "learning_rate": 0.0009070208307471926, + "loss": 0.9375, + "step": 4119 + }, + { + "epoch": 0.22, + "learning_rate": 0.0009069702716523003, + "loss": 0.8672, + "step": 4120 + }, + { + "epoch": 0.22, + "learning_rate": 0.0009069197002248716, + "loss": 1.0078, + "step": 4121 + }, + { + "epoch": 0.22, + "learning_rate": 0.0009068691164664396, + "loss": 0.9805, + "step": 4122 + }, + { + "epoch": 0.22, + "learning_rate": 0.0009068185203785368, + "loss": 0.9219, + "step": 4123 + }, + { + "epoch": 0.22, + "learning_rate": 0.0009067679119626966, + "loss": 0.9336, + "step": 4124 + }, + { + "epoch": 0.22, + "learning_rate": 0.0009067172912204524, + "loss": 0.9297, + "step": 4125 + }, + { + "epoch": 0.22, + "learning_rate": 0.0009066666581533385, + "loss": 0.9766, + "step": 4126 + }, + { + "epoch": 0.22, + "learning_rate": 0.0009066160127628889, + "loss": 0.957, + "step": 4127 + }, + { + "epoch": 0.22, + "learning_rate": 0.0009065653550506383, + "loss": 0.9453, + "step": 4128 + }, + { + "epoch": 0.22, + "learning_rate": 0.0009065146850181221, + "loss": 0.9922, + "step": 4129 + }, + { + "epoch": 0.22, + "learning_rate": 0.0009064640026668756, + "loss": 0.9961, + "step": 4130 + }, + { + "epoch": 0.22, + "learning_rate": 0.0009064133079984347, + "loss": 0.9805, + "step": 4131 + }, + { + "epoch": 0.22, + "learning_rate": 0.0009063626010143355, + "loss": 0.9922, + "step": 4132 + }, + { + "epoch": 0.22, + "learning_rate": 0.0009063118817161146, + "loss": 0.8906, + "step": 4133 + }, + { + "epoch": 0.22, + "learning_rate": 0.0009062611501053093, + "loss": 0.9102, + "step": 4134 + }, + { + "epoch": 0.22, + "learning_rate": 0.0009062104061834564, + "loss": 1.0234, + "step": 4135 + }, + { + "epoch": 0.22, + "learning_rate": 0.0009061596499520941, + "loss": 0.957, + "step": 4136 + }, + { + "epoch": 0.22, + "learning_rate": 0.00090610888141276, + "loss": 0.9688, + "step": 4137 + }, + { + "epoch": 0.22, + "learning_rate": 0.000906058100566993, + "loss": 0.9609, + "step": 4138 + }, + { + "epoch": 0.22, + "learning_rate": 0.0009060073074163317, + "loss": 0.9297, + "step": 4139 + }, + { + "epoch": 0.22, + "learning_rate": 0.0009059565019623153, + "loss": 0.8945, + "step": 4140 + }, + { + "epoch": 0.22, + "learning_rate": 0.0009059056842064833, + "loss": 1.0781, + "step": 4141 + }, + { + "epoch": 0.22, + "learning_rate": 0.0009058548541503759, + "loss": 0.9453, + "step": 4142 + }, + { + "epoch": 0.22, + "learning_rate": 0.000905804011795533, + "loss": 1.0703, + "step": 4143 + }, + { + "epoch": 0.22, + "learning_rate": 0.0009057531571434958, + "loss": 0.9609, + "step": 4144 + }, + { + "epoch": 0.22, + "learning_rate": 0.0009057022901958051, + "loss": 1.0078, + "step": 4145 + }, + { + "epoch": 0.22, + "learning_rate": 0.0009056514109540024, + "loss": 0.9531, + "step": 4146 + }, + { + "epoch": 0.22, + "learning_rate": 0.0009056005194196293, + "loss": 0.9766, + "step": 4147 + }, + { + "epoch": 0.22, + "learning_rate": 0.0009055496155942282, + "loss": 1.0312, + "step": 4148 + }, + { + "epoch": 0.22, + "learning_rate": 0.0009054986994793416, + "loss": 0.9766, + "step": 4149 + }, + { + "epoch": 0.22, + "learning_rate": 0.0009054477710765123, + "loss": 0.8867, + "step": 4150 + }, + { + "epoch": 0.22, + "learning_rate": 0.0009053968303872839, + "loss": 1.0469, + "step": 4151 + }, + { + "epoch": 0.22, + "learning_rate": 0.0009053458774131998, + "loss": 0.9688, + "step": 4152 + }, + { + "epoch": 0.22, + "learning_rate": 0.0009052949121558042, + "loss": 0.9492, + "step": 4153 + }, + { + "epoch": 0.22, + "learning_rate": 0.0009052439346166415, + "loss": 0.8633, + "step": 4154 + }, + { + "epoch": 0.22, + "learning_rate": 0.0009051929447972562, + "loss": 0.8906, + "step": 4155 + }, + { + "epoch": 0.22, + "learning_rate": 0.0009051419426991938, + "loss": 0.9531, + "step": 4156 + }, + { + "epoch": 0.22, + "learning_rate": 0.0009050909283239999, + "loss": 0.9609, + "step": 4157 + }, + { + "epoch": 0.22, + "learning_rate": 0.00090503990167322, + "loss": 0.9492, + "step": 4158 + }, + { + "epoch": 0.22, + "learning_rate": 0.0009049888627484006, + "loss": 0.8789, + "step": 4159 + }, + { + "epoch": 0.22, + "learning_rate": 0.0009049378115510885, + "loss": 0.9453, + "step": 4160 + }, + { + "epoch": 0.22, + "learning_rate": 0.0009048867480828304, + "loss": 1.0469, + "step": 4161 + }, + { + "epoch": 0.22, + "learning_rate": 0.0009048356723451741, + "loss": 0.8594, + "step": 4162 + }, + { + "epoch": 0.22, + "learning_rate": 0.0009047845843396669, + "loss": 0.9766, + "step": 4163 + }, + { + "epoch": 0.22, + "learning_rate": 0.0009047334840678572, + "loss": 0.9023, + "step": 4164 + }, + { + "epoch": 0.22, + "learning_rate": 0.0009046823715312935, + "loss": 0.9727, + "step": 4165 + }, + { + "epoch": 0.22, + "learning_rate": 0.0009046312467315247, + "loss": 0.9766, + "step": 4166 + }, + { + "epoch": 0.22, + "learning_rate": 0.0009045801096700998, + "loss": 0.8242, + "step": 4167 + }, + { + "epoch": 0.22, + "learning_rate": 0.0009045289603485687, + "loss": 0.9297, + "step": 4168 + }, + { + "epoch": 0.22, + "learning_rate": 0.0009044777987684814, + "loss": 0.9766, + "step": 4169 + }, + { + "epoch": 0.22, + "learning_rate": 0.0009044266249313881, + "loss": 0.9375, + "step": 4170 + }, + { + "epoch": 0.22, + "learning_rate": 0.0009043754388388394, + "loss": 1.0078, + "step": 4171 + }, + { + "epoch": 0.22, + "learning_rate": 0.0009043242404923869, + "loss": 0.8984, + "step": 4172 + }, + { + "epoch": 0.22, + "learning_rate": 0.0009042730298935818, + "loss": 0.8828, + "step": 4173 + }, + { + "epoch": 0.22, + "learning_rate": 0.0009042218070439757, + "loss": 0.9219, + "step": 4174 + }, + { + "epoch": 0.22, + "learning_rate": 0.0009041705719451212, + "loss": 0.8945, + "step": 4175 + }, + { + "epoch": 0.22, + "learning_rate": 0.0009041193245985706, + "loss": 0.8789, + "step": 4176 + }, + { + "epoch": 0.22, + "learning_rate": 0.0009040680650058773, + "loss": 1.0234, + "step": 4177 + }, + { + "epoch": 0.22, + "learning_rate": 0.0009040167931685941, + "loss": 0.9531, + "step": 4178 + }, + { + "epoch": 0.22, + "learning_rate": 0.0009039655090882751, + "loss": 0.9453, + "step": 4179 + }, + { + "epoch": 0.22, + "learning_rate": 0.000903914212766474, + "loss": 0.875, + "step": 4180 + }, + { + "epoch": 0.22, + "learning_rate": 0.0009038629042047455, + "loss": 0.8594, + "step": 4181 + }, + { + "epoch": 0.22, + "learning_rate": 0.0009038115834046445, + "loss": 0.8633, + "step": 4182 + }, + { + "epoch": 0.22, + "learning_rate": 0.0009037602503677262, + "loss": 0.9141, + "step": 4183 + }, + { + "epoch": 0.22, + "learning_rate": 0.0009037089050955458, + "loss": 0.9492, + "step": 4184 + }, + { + "epoch": 0.22, + "learning_rate": 0.0009036575475896596, + "loss": 1.0078, + "step": 4185 + }, + { + "epoch": 0.22, + "learning_rate": 0.0009036061778516238, + "loss": 0.9414, + "step": 4186 + }, + { + "epoch": 0.23, + "learning_rate": 0.000903554795882995, + "loss": 0.9375, + "step": 4187 + }, + { + "epoch": 0.23, + "learning_rate": 0.0009035034016853303, + "loss": 0.8477, + "step": 4188 + }, + { + "epoch": 0.23, + "learning_rate": 0.0009034519952601871, + "loss": 0.9023, + "step": 4189 + }, + { + "epoch": 0.23, + "learning_rate": 0.0009034005766091231, + "loss": 0.9297, + "step": 4190 + }, + { + "epoch": 0.23, + "learning_rate": 0.0009033491457336966, + "loss": 0.9297, + "step": 4191 + }, + { + "epoch": 0.23, + "learning_rate": 0.000903297702635466, + "loss": 0.9961, + "step": 4192 + }, + { + "epoch": 0.23, + "learning_rate": 0.0009032462473159904, + "loss": 0.9414, + "step": 4193 + }, + { + "epoch": 0.23, + "learning_rate": 0.0009031947797768288, + "loss": 0.8906, + "step": 4194 + }, + { + "epoch": 0.23, + "learning_rate": 0.0009031433000195409, + "loss": 1.0625, + "step": 4195 + }, + { + "epoch": 0.23, + "learning_rate": 0.000903091808045687, + "loss": 0.9414, + "step": 4196 + }, + { + "epoch": 0.23, + "learning_rate": 0.0009030403038568269, + "loss": 0.9805, + "step": 4197 + }, + { + "epoch": 0.23, + "learning_rate": 0.0009029887874545219, + "loss": 0.9141, + "step": 4198 + }, + { + "epoch": 0.23, + "learning_rate": 0.0009029372588403329, + "loss": 0.9062, + "step": 4199 + }, + { + "epoch": 0.23, + "learning_rate": 0.0009028857180158214, + "loss": 0.9883, + "step": 4200 + }, + { + "epoch": 0.23, + "learning_rate": 0.0009028341649825491, + "loss": 1.0156, + "step": 4201 + }, + { + "epoch": 0.23, + "learning_rate": 0.0009027825997420785, + "loss": 0.957, + "step": 4202 + }, + { + "epoch": 0.23, + "learning_rate": 0.000902731022295972, + "loss": 0.918, + "step": 4203 + }, + { + "epoch": 0.23, + "learning_rate": 0.0009026794326457925, + "loss": 0.8633, + "step": 4204 + }, + { + "epoch": 0.23, + "learning_rate": 0.0009026278307931037, + "loss": 0.9414, + "step": 4205 + }, + { + "epoch": 0.23, + "learning_rate": 0.000902576216739469, + "loss": 0.9062, + "step": 4206 + }, + { + "epoch": 0.23, + "learning_rate": 0.0009025245904864525, + "loss": 0.8516, + "step": 4207 + }, + { + "epoch": 0.23, + "learning_rate": 0.0009024729520356187, + "loss": 0.9961, + "step": 4208 + }, + { + "epoch": 0.23, + "learning_rate": 0.0009024213013885324, + "loss": 0.9297, + "step": 4209 + }, + { + "epoch": 0.23, + "learning_rate": 0.0009023696385467589, + "loss": 0.9336, + "step": 4210 + }, + { + "epoch": 0.23, + "learning_rate": 0.0009023179635118634, + "loss": 0.9023, + "step": 4211 + }, + { + "epoch": 0.23, + "learning_rate": 0.0009022662762854124, + "loss": 0.9375, + "step": 4212 + }, + { + "epoch": 0.23, + "learning_rate": 0.0009022145768689716, + "loss": 0.9414, + "step": 4213 + }, + { + "epoch": 0.23, + "learning_rate": 0.0009021628652641081, + "loss": 0.8984, + "step": 4214 + }, + { + "epoch": 0.23, + "learning_rate": 0.0009021111414723886, + "loss": 0.9805, + "step": 4215 + }, + { + "epoch": 0.23, + "learning_rate": 0.0009020594054953807, + "loss": 0.8281, + "step": 4216 + }, + { + "epoch": 0.23, + "learning_rate": 0.0009020076573346522, + "loss": 0.9023, + "step": 4217 + }, + { + "epoch": 0.23, + "learning_rate": 0.0009019558969917711, + "loss": 0.9922, + "step": 4218 + }, + { + "epoch": 0.23, + "learning_rate": 0.0009019041244683059, + "loss": 1.0156, + "step": 4219 + }, + { + "epoch": 0.23, + "learning_rate": 0.0009018523397658257, + "loss": 0.9336, + "step": 4220 + }, + { + "epoch": 0.23, + "learning_rate": 0.0009018005428858996, + "loss": 0.9727, + "step": 4221 + }, + { + "epoch": 0.23, + "learning_rate": 0.000901748733830097, + "loss": 0.8789, + "step": 4222 + }, + { + "epoch": 0.23, + "learning_rate": 0.0009016969125999883, + "loss": 1.0156, + "step": 4223 + }, + { + "epoch": 0.23, + "learning_rate": 0.0009016450791971436, + "loss": 0.8789, + "step": 4224 + }, + { + "epoch": 0.23, + "learning_rate": 0.0009015932336231337, + "loss": 0.9805, + "step": 4225 + }, + { + "epoch": 0.23, + "learning_rate": 0.0009015413758795295, + "loss": 0.9062, + "step": 4226 + }, + { + "epoch": 0.23, + "learning_rate": 0.0009014895059679028, + "loss": 0.9258, + "step": 4227 + }, + { + "epoch": 0.23, + "learning_rate": 0.0009014376238898252, + "loss": 0.9414, + "step": 4228 + }, + { + "epoch": 0.23, + "learning_rate": 0.0009013857296468688, + "loss": 0.8594, + "step": 4229 + }, + { + "epoch": 0.23, + "learning_rate": 0.0009013338232406066, + "loss": 1.0078, + "step": 4230 + }, + { + "epoch": 0.23, + "learning_rate": 0.0009012819046726111, + "loss": 0.8984, + "step": 4231 + }, + { + "epoch": 0.23, + "learning_rate": 0.0009012299739444558, + "loss": 0.875, + "step": 4232 + }, + { + "epoch": 0.23, + "learning_rate": 0.0009011780310577142, + "loss": 0.9297, + "step": 4233 + }, + { + "epoch": 0.23, + "learning_rate": 0.0009011260760139606, + "loss": 0.918, + "step": 4234 + }, + { + "epoch": 0.23, + "learning_rate": 0.0009010741088147692, + "loss": 0.9375, + "step": 4235 + }, + { + "epoch": 0.23, + "learning_rate": 0.0009010221294617149, + "loss": 0.8867, + "step": 4236 + }, + { + "epoch": 0.23, + "learning_rate": 0.0009009701379563728, + "loss": 0.9961, + "step": 4237 + }, + { + "epoch": 0.23, + "learning_rate": 0.0009009181343003184, + "loss": 0.8594, + "step": 4238 + }, + { + "epoch": 0.23, + "learning_rate": 0.0009008661184951277, + "loss": 0.918, + "step": 4239 + }, + { + "epoch": 0.23, + "learning_rate": 0.0009008140905423767, + "loss": 0.957, + "step": 4240 + }, + { + "epoch": 0.23, + "learning_rate": 0.0009007620504436421, + "loss": 0.9844, + "step": 4241 + }, + { + "epoch": 0.23, + "learning_rate": 0.0009007099982005011, + "loss": 1.0234, + "step": 4242 + }, + { + "epoch": 0.23, + "learning_rate": 0.0009006579338145307, + "loss": 0.9922, + "step": 4243 + }, + { + "epoch": 0.23, + "learning_rate": 0.000900605857287309, + "loss": 0.9375, + "step": 4244 + }, + { + "epoch": 0.23, + "learning_rate": 0.0009005537686204139, + "loss": 0.9062, + "step": 4245 + }, + { + "epoch": 0.23, + "learning_rate": 0.0009005016678154239, + "loss": 0.8867, + "step": 4246 + }, + { + "epoch": 0.23, + "learning_rate": 0.0009004495548739175, + "loss": 1.0234, + "step": 4247 + }, + { + "epoch": 0.23, + "learning_rate": 0.0009003974297974745, + "loss": 0.8438, + "step": 4248 + }, + { + "epoch": 0.23, + "learning_rate": 0.000900345292587674, + "loss": 1.0078, + "step": 4249 + }, + { + "epoch": 0.23, + "learning_rate": 0.0009002931432460961, + "loss": 0.832, + "step": 4250 + }, + { + "epoch": 0.23, + "learning_rate": 0.0009002409817743211, + "loss": 0.9883, + "step": 4251 + }, + { + "epoch": 0.23, + "learning_rate": 0.0009001888081739297, + "loss": 1.0312, + "step": 4252 + }, + { + "epoch": 0.23, + "learning_rate": 0.0009001366224465028, + "loss": 0.9023, + "step": 4253 + }, + { + "epoch": 0.23, + "learning_rate": 0.000900084424593622, + "loss": 0.9688, + "step": 4254 + }, + { + "epoch": 0.23, + "learning_rate": 0.0009000322146168687, + "loss": 0.9922, + "step": 4255 + }, + { + "epoch": 0.23, + "learning_rate": 0.0008999799925178254, + "loss": 0.9258, + "step": 4256 + }, + { + "epoch": 0.23, + "learning_rate": 0.0008999277582980744, + "loss": 0.9062, + "step": 4257 + }, + { + "epoch": 0.23, + "learning_rate": 0.0008998755119591987, + "loss": 1.0, + "step": 4258 + }, + { + "epoch": 0.23, + "learning_rate": 0.0008998232535027815, + "loss": 0.8945, + "step": 4259 + }, + { + "epoch": 0.23, + "learning_rate": 0.0008997709829304062, + "loss": 0.8711, + "step": 4260 + }, + { + "epoch": 0.23, + "learning_rate": 0.0008997187002436571, + "loss": 0.8906, + "step": 4261 + }, + { + "epoch": 0.23, + "learning_rate": 0.0008996664054441184, + "loss": 0.9219, + "step": 4262 + }, + { + "epoch": 0.23, + "learning_rate": 0.0008996140985333747, + "loss": 0.9023, + "step": 4263 + }, + { + "epoch": 0.23, + "learning_rate": 0.0008995617795130112, + "loss": 1.0078, + "step": 4264 + }, + { + "epoch": 0.23, + "learning_rate": 0.0008995094483846133, + "loss": 0.9336, + "step": 4265 + }, + { + "epoch": 0.23, + "learning_rate": 0.0008994571051497668, + "loss": 0.9102, + "step": 4266 + }, + { + "epoch": 0.23, + "learning_rate": 0.0008994047498100579, + "loss": 0.8945, + "step": 4267 + }, + { + "epoch": 0.23, + "learning_rate": 0.0008993523823670731, + "loss": 0.9297, + "step": 4268 + }, + { + "epoch": 0.23, + "learning_rate": 0.0008993000028223992, + "loss": 0.918, + "step": 4269 + }, + { + "epoch": 0.23, + "learning_rate": 0.0008992476111776237, + "loss": 0.8984, + "step": 4270 + }, + { + "epoch": 0.23, + "learning_rate": 0.0008991952074343341, + "loss": 0.8906, + "step": 4271 + }, + { + "epoch": 0.23, + "learning_rate": 0.0008991427915941186, + "loss": 0.8633, + "step": 4272 + }, + { + "epoch": 0.23, + "learning_rate": 0.0008990903636585652, + "loss": 0.8984, + "step": 4273 + }, + { + "epoch": 0.23, + "learning_rate": 0.0008990379236292629, + "loss": 0.8711, + "step": 4274 + }, + { + "epoch": 0.23, + "learning_rate": 0.0008989854715078007, + "loss": 0.8242, + "step": 4275 + }, + { + "epoch": 0.23, + "learning_rate": 0.0008989330072957682, + "loss": 0.957, + "step": 4276 + }, + { + "epoch": 0.23, + "learning_rate": 0.0008988805309947552, + "loss": 0.9219, + "step": 4277 + }, + { + "epoch": 0.23, + "learning_rate": 0.0008988280426063518, + "loss": 0.8945, + "step": 4278 + }, + { + "epoch": 0.23, + "learning_rate": 0.0008987755421321486, + "loss": 0.9414, + "step": 4279 + }, + { + "epoch": 0.23, + "learning_rate": 0.0008987230295737366, + "loss": 0.9844, + "step": 4280 + }, + { + "epoch": 0.23, + "learning_rate": 0.000898670504932707, + "loss": 0.9883, + "step": 4281 + }, + { + "epoch": 0.23, + "learning_rate": 0.0008986179682106516, + "loss": 0.8789, + "step": 4282 + }, + { + "epoch": 0.23, + "learning_rate": 0.0008985654194091626, + "loss": 0.9102, + "step": 4283 + }, + { + "epoch": 0.23, + "learning_rate": 0.0008985128585298319, + "loss": 0.9414, + "step": 4284 + }, + { + "epoch": 0.23, + "learning_rate": 0.0008984602855742527, + "loss": 0.9375, + "step": 4285 + }, + { + "epoch": 0.23, + "learning_rate": 0.0008984077005440179, + "loss": 1.0, + "step": 4286 + }, + { + "epoch": 0.23, + "learning_rate": 0.0008983551034407211, + "loss": 0.9453, + "step": 4287 + }, + { + "epoch": 0.23, + "learning_rate": 0.0008983024942659561, + "loss": 0.9336, + "step": 4288 + }, + { + "epoch": 0.23, + "learning_rate": 0.0008982498730213172, + "loss": 0.9102, + "step": 4289 + }, + { + "epoch": 0.23, + "learning_rate": 0.0008981972397083991, + "loss": 0.9648, + "step": 4290 + }, + { + "epoch": 0.23, + "learning_rate": 0.0008981445943287965, + "loss": 0.957, + "step": 4291 + }, + { + "epoch": 0.23, + "learning_rate": 0.0008980919368841049, + "loss": 0.9414, + "step": 4292 + }, + { + "epoch": 0.23, + "learning_rate": 0.0008980392673759199, + "loss": 0.875, + "step": 4293 + }, + { + "epoch": 0.23, + "learning_rate": 0.0008979865858058376, + "loss": 0.9258, + "step": 4294 + }, + { + "epoch": 0.23, + "learning_rate": 0.0008979338921754546, + "loss": 0.957, + "step": 4295 + }, + { + "epoch": 0.23, + "learning_rate": 0.0008978811864863674, + "loss": 0.957, + "step": 4296 + }, + { + "epoch": 0.23, + "learning_rate": 0.0008978284687401733, + "loss": 0.9258, + "step": 4297 + }, + { + "epoch": 0.23, + "learning_rate": 0.0008977757389384697, + "loss": 0.8984, + "step": 4298 + }, + { + "epoch": 0.23, + "learning_rate": 0.0008977229970828549, + "loss": 0.9297, + "step": 4299 + }, + { + "epoch": 0.23, + "learning_rate": 0.0008976702431749266, + "loss": 0.9297, + "step": 4300 + }, + { + "epoch": 0.23, + "learning_rate": 0.0008976174772162837, + "loss": 0.8711, + "step": 4301 + }, + { + "epoch": 0.23, + "learning_rate": 0.0008975646992085252, + "loss": 1.0078, + "step": 4302 + }, + { + "epoch": 0.23, + "learning_rate": 0.0008975119091532503, + "loss": 0.9297, + "step": 4303 + }, + { + "epoch": 0.23, + "learning_rate": 0.0008974591070520588, + "loss": 0.9375, + "step": 4304 + }, + { + "epoch": 0.23, + "learning_rate": 0.0008974062929065509, + "loss": 0.9023, + "step": 4305 + }, + { + "epoch": 0.23, + "learning_rate": 0.0008973534667183269, + "loss": 0.9375, + "step": 4306 + }, + { + "epoch": 0.23, + "learning_rate": 0.0008973006284889875, + "loss": 1.0156, + "step": 4307 + }, + { + "epoch": 0.23, + "learning_rate": 0.0008972477782201341, + "loss": 0.9258, + "step": 4308 + }, + { + "epoch": 0.23, + "learning_rate": 0.0008971949159133681, + "loss": 0.9219, + "step": 4309 + }, + { + "epoch": 0.23, + "learning_rate": 0.0008971420415702914, + "loss": 0.8672, + "step": 4310 + }, + { + "epoch": 0.23, + "learning_rate": 0.0008970891551925064, + "loss": 0.8828, + "step": 4311 + }, + { + "epoch": 0.23, + "learning_rate": 0.0008970362567816158, + "loss": 0.9297, + "step": 4312 + }, + { + "epoch": 0.23, + "learning_rate": 0.0008969833463392221, + "loss": 0.8789, + "step": 4313 + }, + { + "epoch": 0.23, + "learning_rate": 0.0008969304238669292, + "loss": 0.9492, + "step": 4314 + }, + { + "epoch": 0.23, + "learning_rate": 0.0008968774893663404, + "loss": 0.8867, + "step": 4315 + }, + { + "epoch": 0.23, + "learning_rate": 0.0008968245428390601, + "loss": 0.9688, + "step": 4316 + }, + { + "epoch": 0.23, + "learning_rate": 0.0008967715842866928, + "loss": 0.9062, + "step": 4317 + }, + { + "epoch": 0.23, + "learning_rate": 0.0008967186137108429, + "loss": 0.875, + "step": 4318 + }, + { + "epoch": 0.23, + "learning_rate": 0.0008966656311131161, + "loss": 0.8438, + "step": 4319 + }, + { + "epoch": 0.23, + "learning_rate": 0.0008966126364951177, + "loss": 0.9727, + "step": 4320 + }, + { + "epoch": 0.23, + "learning_rate": 0.0008965596298584535, + "loss": 0.9297, + "step": 4321 + }, + { + "epoch": 0.23, + "learning_rate": 0.00089650661120473, + "loss": 0.9453, + "step": 4322 + }, + { + "epoch": 0.23, + "learning_rate": 0.0008964535805355536, + "loss": 0.9023, + "step": 4323 + }, + { + "epoch": 0.23, + "learning_rate": 0.0008964005378525315, + "loss": 0.8945, + "step": 4324 + }, + { + "epoch": 0.23, + "learning_rate": 0.0008963474831572711, + "loss": 0.918, + "step": 4325 + }, + { + "epoch": 0.23, + "learning_rate": 0.00089629441645138, + "loss": 0.8789, + "step": 4326 + }, + { + "epoch": 0.23, + "learning_rate": 0.0008962413377364662, + "loss": 0.9766, + "step": 4327 + }, + { + "epoch": 0.23, + "learning_rate": 0.0008961882470141384, + "loss": 0.9219, + "step": 4328 + }, + { + "epoch": 0.23, + "learning_rate": 0.0008961351442860053, + "loss": 0.8984, + "step": 4329 + }, + { + "epoch": 0.23, + "learning_rate": 0.000896082029553676, + "loss": 0.957, + "step": 4330 + }, + { + "epoch": 0.23, + "learning_rate": 0.0008960289028187603, + "loss": 0.9531, + "step": 4331 + }, + { + "epoch": 0.23, + "learning_rate": 0.0008959757640828679, + "loss": 0.8828, + "step": 4332 + }, + { + "epoch": 0.23, + "learning_rate": 0.0008959226133476092, + "loss": 1.0234, + "step": 4333 + }, + { + "epoch": 0.23, + "learning_rate": 0.0008958694506145947, + "loss": 0.8711, + "step": 4334 + }, + { + "epoch": 0.23, + "learning_rate": 0.0008958162758854356, + "loss": 0.9414, + "step": 4335 + }, + { + "epoch": 0.23, + "learning_rate": 0.000895763089161743, + "loss": 0.957, + "step": 4336 + }, + { + "epoch": 0.23, + "learning_rate": 0.0008957098904451288, + "loss": 0.9648, + "step": 4337 + }, + { + "epoch": 0.23, + "learning_rate": 0.0008956566797372052, + "loss": 0.8672, + "step": 4338 + }, + { + "epoch": 0.23, + "learning_rate": 0.0008956034570395846, + "loss": 0.8906, + "step": 4339 + }, + { + "epoch": 0.23, + "learning_rate": 0.0008955502223538796, + "loss": 0.8828, + "step": 4340 + }, + { + "epoch": 0.23, + "learning_rate": 0.0008954969756817035, + "loss": 0.9219, + "step": 4341 + }, + { + "epoch": 0.23, + "learning_rate": 0.0008954437170246701, + "loss": 0.8281, + "step": 4342 + }, + { + "epoch": 0.23, + "learning_rate": 0.0008953904463843931, + "loss": 0.9531, + "step": 4343 + }, + { + "epoch": 0.23, + "learning_rate": 0.0008953371637624867, + "loss": 1.0312, + "step": 4344 + }, + { + "epoch": 0.23, + "learning_rate": 0.0008952838691605658, + "loss": 0.9102, + "step": 4345 + }, + { + "epoch": 0.23, + "learning_rate": 0.0008952305625802451, + "loss": 0.8359, + "step": 4346 + }, + { + "epoch": 0.23, + "learning_rate": 0.0008951772440231401, + "loss": 0.9766, + "step": 4347 + }, + { + "epoch": 0.23, + "learning_rate": 0.0008951239134908664, + "loss": 0.8711, + "step": 4348 + }, + { + "epoch": 0.23, + "learning_rate": 0.0008950705709850404, + "loss": 0.8672, + "step": 4349 + }, + { + "epoch": 0.23, + "learning_rate": 0.0008950172165072783, + "loss": 0.918, + "step": 4350 + }, + { + "epoch": 0.23, + "learning_rate": 0.000894963850059197, + "loss": 0.918, + "step": 4351 + }, + { + "epoch": 0.23, + "learning_rate": 0.0008949104716424137, + "loss": 0.875, + "step": 4352 + }, + { + "epoch": 0.23, + "learning_rate": 0.0008948570812585458, + "loss": 0.9492, + "step": 4353 + }, + { + "epoch": 0.23, + "learning_rate": 0.0008948036789092113, + "loss": 0.8242, + "step": 4354 + }, + { + "epoch": 0.23, + "learning_rate": 0.0008947502645960286, + "loss": 0.9453, + "step": 4355 + }, + { + "epoch": 0.23, + "learning_rate": 0.0008946968383206162, + "loss": 0.9141, + "step": 4356 + }, + { + "epoch": 0.23, + "learning_rate": 0.000894643400084593, + "loss": 0.9844, + "step": 4357 + }, + { + "epoch": 0.23, + "learning_rate": 0.0008945899498895784, + "loss": 0.9414, + "step": 4358 + }, + { + "epoch": 0.23, + "learning_rate": 0.0008945364877371925, + "loss": 0.957, + "step": 4359 + }, + { + "epoch": 0.23, + "learning_rate": 0.0008944830136290548, + "loss": 0.9258, + "step": 4360 + }, + { + "epoch": 0.23, + "learning_rate": 0.0008944295275667858, + "loss": 0.9609, + "step": 4361 + }, + { + "epoch": 0.23, + "learning_rate": 0.0008943760295520067, + "loss": 0.8477, + "step": 4362 + }, + { + "epoch": 0.23, + "learning_rate": 0.0008943225195863385, + "loss": 0.9062, + "step": 4363 + }, + { + "epoch": 0.23, + "learning_rate": 0.0008942689976714025, + "loss": 0.9102, + "step": 4364 + }, + { + "epoch": 0.23, + "learning_rate": 0.000894215463808821, + "loss": 1.0234, + "step": 4365 + }, + { + "epoch": 0.23, + "learning_rate": 0.0008941619180002159, + "loss": 0.8828, + "step": 4366 + }, + { + "epoch": 0.23, + "learning_rate": 0.0008941083602472099, + "loss": 0.8984, + "step": 4367 + }, + { + "epoch": 0.23, + "learning_rate": 0.000894054790551426, + "loss": 0.9805, + "step": 4368 + }, + { + "epoch": 0.23, + "learning_rate": 0.0008940012089144876, + "loss": 0.918, + "step": 4369 + }, + { + "epoch": 0.23, + "learning_rate": 0.0008939476153380184, + "loss": 1.0234, + "step": 4370 + }, + { + "epoch": 0.23, + "learning_rate": 0.0008938940098236423, + "loss": 0.8672, + "step": 4371 + }, + { + "epoch": 0.23, + "learning_rate": 0.000893840392372984, + "loss": 0.918, + "step": 4372 + }, + { + "epoch": 0.24, + "learning_rate": 0.000893786762987668, + "loss": 0.8633, + "step": 4373 + }, + { + "epoch": 0.24, + "learning_rate": 0.0008937331216693197, + "loss": 0.8867, + "step": 4374 + }, + { + "epoch": 0.24, + "learning_rate": 0.0008936794684195642, + "loss": 0.8359, + "step": 4375 + }, + { + "epoch": 0.24, + "learning_rate": 0.0008936258032400279, + "loss": 0.9492, + "step": 4376 + }, + { + "epoch": 0.24, + "learning_rate": 0.0008935721261323366, + "loss": 0.9258, + "step": 4377 + }, + { + "epoch": 0.24, + "learning_rate": 0.0008935184370981172, + "loss": 0.9492, + "step": 4378 + }, + { + "epoch": 0.24, + "learning_rate": 0.0008934647361389965, + "loss": 0.9961, + "step": 4379 + }, + { + "epoch": 0.24, + "learning_rate": 0.0008934110232566017, + "loss": 0.9297, + "step": 4380 + }, + { + "epoch": 0.24, + "learning_rate": 0.0008933572984525608, + "loss": 1.0078, + "step": 4381 + }, + { + "epoch": 0.24, + "learning_rate": 0.0008933035617285017, + "loss": 0.9375, + "step": 4382 + }, + { + "epoch": 0.24, + "learning_rate": 0.0008932498130860526, + "loss": 0.8945, + "step": 4383 + }, + { + "epoch": 0.24, + "learning_rate": 0.0008931960525268423, + "loss": 0.9531, + "step": 4384 + }, + { + "epoch": 0.24, + "learning_rate": 0.0008931422800525003, + "loss": 0.8672, + "step": 4385 + }, + { + "epoch": 0.24, + "learning_rate": 0.0008930884956646557, + "loss": 0.9375, + "step": 4386 + }, + { + "epoch": 0.24, + "learning_rate": 0.0008930346993649385, + "loss": 0.957, + "step": 4387 + }, + { + "epoch": 0.24, + "learning_rate": 0.0008929808911549788, + "loss": 0.8477, + "step": 4388 + }, + { + "epoch": 0.24, + "learning_rate": 0.0008929270710364073, + "loss": 0.9414, + "step": 4389 + }, + { + "epoch": 0.24, + "learning_rate": 0.0008928732390108547, + "loss": 0.957, + "step": 4390 + }, + { + "epoch": 0.24, + "learning_rate": 0.0008928193950799526, + "loss": 0.8789, + "step": 4391 + }, + { + "epoch": 0.24, + "learning_rate": 0.0008927655392453324, + "loss": 0.9258, + "step": 4392 + }, + { + "epoch": 0.24, + "learning_rate": 0.0008927116715086263, + "loss": 0.8906, + "step": 4393 + }, + { + "epoch": 0.24, + "learning_rate": 0.0008926577918714666, + "loss": 0.9219, + "step": 4394 + }, + { + "epoch": 0.24, + "learning_rate": 0.0008926039003354859, + "loss": 0.9062, + "step": 4395 + }, + { + "epoch": 0.24, + "learning_rate": 0.0008925499969023174, + "loss": 0.9453, + "step": 4396 + }, + { + "epoch": 0.24, + "learning_rate": 0.0008924960815735946, + "loss": 0.918, + "step": 4397 + }, + { + "epoch": 0.24, + "learning_rate": 0.0008924421543509511, + "loss": 0.8516, + "step": 4398 + }, + { + "epoch": 0.24, + "learning_rate": 0.0008923882152360214, + "loss": 0.8438, + "step": 4399 + }, + { + "epoch": 0.24, + "learning_rate": 0.0008923342642304398, + "loss": 1.0078, + "step": 4400 + }, + { + "epoch": 0.24, + "learning_rate": 0.0008922803013358412, + "loss": 0.9688, + "step": 4401 + }, + { + "epoch": 0.24, + "learning_rate": 0.0008922263265538611, + "loss": 0.9023, + "step": 4402 + }, + { + "epoch": 0.24, + "learning_rate": 0.0008921723398861346, + "loss": 0.9141, + "step": 4403 + }, + { + "epoch": 0.24, + "learning_rate": 0.0008921183413342982, + "loss": 0.8477, + "step": 4404 + }, + { + "epoch": 0.24, + "learning_rate": 0.0008920643308999881, + "loss": 0.9805, + "step": 4405 + }, + { + "epoch": 0.24, + "learning_rate": 0.0008920103085848407, + "loss": 0.9688, + "step": 4406 + }, + { + "epoch": 0.24, + "learning_rate": 0.0008919562743904934, + "loss": 0.9414, + "step": 4407 + }, + { + "epoch": 0.24, + "learning_rate": 0.0008919022283185834, + "loss": 0.8555, + "step": 4408 + }, + { + "epoch": 0.24, + "learning_rate": 0.0008918481703707486, + "loss": 0.9219, + "step": 4409 + }, + { + "epoch": 0.24, + "learning_rate": 0.000891794100548627, + "loss": 0.9258, + "step": 4410 + }, + { + "epoch": 0.24, + "learning_rate": 0.0008917400188538574, + "loss": 0.8906, + "step": 4411 + }, + { + "epoch": 0.24, + "learning_rate": 0.0008916859252880783, + "loss": 0.9062, + "step": 4412 + }, + { + "epoch": 0.24, + "learning_rate": 0.000891631819852929, + "loss": 0.8281, + "step": 4413 + }, + { + "epoch": 0.24, + "learning_rate": 0.0008915777025500492, + "loss": 0.8906, + "step": 4414 + }, + { + "epoch": 0.24, + "learning_rate": 0.0008915235733810788, + "loss": 0.9414, + "step": 4415 + }, + { + "epoch": 0.24, + "learning_rate": 0.000891469432347658, + "loss": 0.9609, + "step": 4416 + }, + { + "epoch": 0.24, + "learning_rate": 0.0008914152794514274, + "loss": 0.875, + "step": 4417 + }, + { + "epoch": 0.24, + "learning_rate": 0.0008913611146940283, + "loss": 0.9414, + "step": 4418 + }, + { + "epoch": 0.24, + "learning_rate": 0.0008913069380771017, + "loss": 1.0469, + "step": 4419 + }, + { + "epoch": 0.24, + "learning_rate": 0.0008912527496022897, + "loss": 0.9336, + "step": 4420 + }, + { + "epoch": 0.24, + "learning_rate": 0.000891198549271234, + "loss": 0.9219, + "step": 4421 + }, + { + "epoch": 0.24, + "learning_rate": 0.0008911443370855773, + "loss": 0.8203, + "step": 4422 + }, + { + "epoch": 0.24, + "learning_rate": 0.0008910901130469624, + "loss": 1.0156, + "step": 4423 + }, + { + "epoch": 0.24, + "learning_rate": 0.0008910358771570324, + "loss": 0.9492, + "step": 4424 + }, + { + "epoch": 0.24, + "learning_rate": 0.0008909816294174309, + "loss": 0.9688, + "step": 4425 + }, + { + "epoch": 0.24, + "learning_rate": 0.0008909273698298016, + "loss": 0.875, + "step": 4426 + }, + { + "epoch": 0.24, + "learning_rate": 0.000890873098395789, + "loss": 0.9844, + "step": 4427 + }, + { + "epoch": 0.24, + "learning_rate": 0.0008908188151170375, + "loss": 0.9883, + "step": 4428 + }, + { + "epoch": 0.24, + "learning_rate": 0.0008907645199951921, + "loss": 0.9492, + "step": 4429 + }, + { + "epoch": 0.24, + "learning_rate": 0.0008907102130318983, + "loss": 0.8008, + "step": 4430 + }, + { + "epoch": 0.24, + "learning_rate": 0.0008906558942288014, + "loss": 0.9727, + "step": 4431 + }, + { + "epoch": 0.24, + "learning_rate": 0.0008906015635875478, + "loss": 0.8672, + "step": 4432 + }, + { + "epoch": 0.24, + "learning_rate": 0.0008905472211097838, + "loss": 0.9102, + "step": 4433 + }, + { + "epoch": 0.24, + "learning_rate": 0.0008904928667971562, + "loss": 0.8242, + "step": 4434 + }, + { + "epoch": 0.24, + "learning_rate": 0.000890438500651312, + "loss": 0.9609, + "step": 4435 + }, + { + "epoch": 0.24, + "learning_rate": 0.0008903841226738986, + "loss": 0.9102, + "step": 4436 + }, + { + "epoch": 0.24, + "learning_rate": 0.000890329732866564, + "loss": 0.918, + "step": 4437 + }, + { + "epoch": 0.24, + "learning_rate": 0.0008902753312309563, + "loss": 0.8906, + "step": 4438 + }, + { + "epoch": 0.24, + "learning_rate": 0.0008902209177687241, + "loss": 0.9062, + "step": 4439 + }, + { + "epoch": 0.24, + "learning_rate": 0.0008901664924815165, + "loss": 0.9648, + "step": 4440 + }, + { + "epoch": 0.24, + "learning_rate": 0.0008901120553709823, + "loss": 0.8516, + "step": 4441 + }, + { + "epoch": 0.24, + "learning_rate": 0.0008900576064387716, + "loss": 0.9141, + "step": 4442 + }, + { + "epoch": 0.24, + "learning_rate": 0.0008900031456865338, + "loss": 0.8984, + "step": 4443 + }, + { + "epoch": 0.24, + "learning_rate": 0.0008899486731159199, + "loss": 0.9219, + "step": 4444 + }, + { + "epoch": 0.24, + "learning_rate": 0.0008898941887285803, + "loss": 0.875, + "step": 4445 + }, + { + "epoch": 0.24, + "learning_rate": 0.0008898396925261659, + "loss": 0.9727, + "step": 4446 + }, + { + "epoch": 0.24, + "learning_rate": 0.0008897851845103284, + "loss": 0.918, + "step": 4447 + }, + { + "epoch": 0.24, + "learning_rate": 0.0008897306646827193, + "loss": 0.9023, + "step": 4448 + }, + { + "epoch": 0.24, + "learning_rate": 0.000889676133044991, + "loss": 0.8398, + "step": 4449 + }, + { + "epoch": 0.24, + "learning_rate": 0.0008896215895987958, + "loss": 1.0, + "step": 4450 + }, + { + "epoch": 0.24, + "learning_rate": 0.0008895670343457866, + "loss": 0.9922, + "step": 4451 + }, + { + "epoch": 0.24, + "learning_rate": 0.0008895124672876166, + "loss": 0.9102, + "step": 4452 + }, + { + "epoch": 0.24, + "learning_rate": 0.0008894578884259393, + "loss": 0.8281, + "step": 4453 + }, + { + "epoch": 0.24, + "learning_rate": 0.0008894032977624088, + "loss": 0.9141, + "step": 4454 + }, + { + "epoch": 0.24, + "learning_rate": 0.0008893486952986792, + "loss": 0.9297, + "step": 4455 + }, + { + "epoch": 0.24, + "learning_rate": 0.0008892940810364052, + "loss": 0.9688, + "step": 4456 + }, + { + "epoch": 0.24, + "learning_rate": 0.0008892394549772419, + "loss": 0.8125, + "step": 4457 + }, + { + "epoch": 0.24, + "learning_rate": 0.0008891848171228445, + "loss": 0.9141, + "step": 4458 + }, + { + "epoch": 0.24, + "learning_rate": 0.0008891301674748686, + "loss": 0.9219, + "step": 4459 + }, + { + "epoch": 0.24, + "learning_rate": 0.0008890755060349705, + "loss": 0.918, + "step": 4460 + }, + { + "epoch": 0.24, + "learning_rate": 0.0008890208328048066, + "loss": 0.9141, + "step": 4461 + }, + { + "epoch": 0.24, + "learning_rate": 0.0008889661477860335, + "loss": 0.9258, + "step": 4462 + }, + { + "epoch": 0.24, + "learning_rate": 0.0008889114509803085, + "loss": 0.9727, + "step": 4463 + }, + { + "epoch": 0.24, + "learning_rate": 0.000888856742389289, + "loss": 0.9258, + "step": 4464 + }, + { + "epoch": 0.24, + "learning_rate": 0.0008888020220146329, + "loss": 0.9727, + "step": 4465 + }, + { + "epoch": 0.24, + "learning_rate": 0.0008887472898579983, + "loss": 0.8477, + "step": 4466 + }, + { + "epoch": 0.24, + "learning_rate": 0.000888692545921044, + "loss": 0.8438, + "step": 4467 + }, + { + "epoch": 0.24, + "learning_rate": 0.0008886377902054287, + "loss": 0.9141, + "step": 4468 + }, + { + "epoch": 0.24, + "learning_rate": 0.0008885830227128117, + "loss": 0.8945, + "step": 4469 + }, + { + "epoch": 0.24, + "learning_rate": 0.0008885282434448528, + "loss": 0.9453, + "step": 4470 + }, + { + "epoch": 0.24, + "learning_rate": 0.0008884734524032119, + "loss": 0.8984, + "step": 4471 + }, + { + "epoch": 0.24, + "learning_rate": 0.0008884186495895492, + "loss": 0.9492, + "step": 4472 + }, + { + "epoch": 0.24, + "learning_rate": 0.0008883638350055256, + "loss": 0.8906, + "step": 4473 + }, + { + "epoch": 0.24, + "learning_rate": 0.0008883090086528022, + "loss": 0.9531, + "step": 4474 + }, + { + "epoch": 0.24, + "learning_rate": 0.0008882541705330403, + "loss": 0.8711, + "step": 4475 + }, + { + "epoch": 0.24, + "learning_rate": 0.0008881993206479016, + "loss": 0.8242, + "step": 4476 + }, + { + "epoch": 0.24, + "learning_rate": 0.0008881444589990484, + "loss": 0.8984, + "step": 4477 + }, + { + "epoch": 0.24, + "learning_rate": 0.000888089585588143, + "loss": 1.0, + "step": 4478 + }, + { + "epoch": 0.24, + "learning_rate": 0.0008880347004168483, + "loss": 0.9258, + "step": 4479 + }, + { + "epoch": 0.24, + "learning_rate": 0.0008879798034868277, + "loss": 0.9336, + "step": 4480 + }, + { + "epoch": 0.24, + "learning_rate": 0.0008879248947997447, + "loss": 0.8398, + "step": 4481 + }, + { + "epoch": 0.24, + "learning_rate": 0.000887869974357263, + "loss": 0.9219, + "step": 4482 + }, + { + "epoch": 0.24, + "learning_rate": 0.0008878150421610469, + "loss": 0.875, + "step": 4483 + }, + { + "epoch": 0.24, + "learning_rate": 0.0008877600982127613, + "loss": 0.8945, + "step": 4484 + }, + { + "epoch": 0.24, + "learning_rate": 0.000887705142514071, + "loss": 0.8945, + "step": 4485 + }, + { + "epoch": 0.24, + "learning_rate": 0.0008876501750666412, + "loss": 0.9062, + "step": 4486 + }, + { + "epoch": 0.24, + "learning_rate": 0.0008875951958721378, + "loss": 0.9336, + "step": 4487 + }, + { + "epoch": 0.24, + "learning_rate": 0.0008875402049322267, + "loss": 0.9492, + "step": 4488 + }, + { + "epoch": 0.24, + "learning_rate": 0.0008874852022485747, + "loss": 1.0, + "step": 4489 + }, + { + "epoch": 0.24, + "learning_rate": 0.000887430187822848, + "loss": 0.832, + "step": 4490 + }, + { + "epoch": 0.24, + "learning_rate": 0.0008873751616567141, + "loss": 0.9961, + "step": 4491 + }, + { + "epoch": 0.24, + "learning_rate": 0.0008873201237518402, + "loss": 1.0, + "step": 4492 + }, + { + "epoch": 0.24, + "learning_rate": 0.0008872650741098945, + "loss": 0.8516, + "step": 4493 + }, + { + "epoch": 0.24, + "learning_rate": 0.0008872100127325447, + "loss": 0.8867, + "step": 4494 + }, + { + "epoch": 0.24, + "learning_rate": 0.0008871549396214598, + "loss": 0.9062, + "step": 4495 + }, + { + "epoch": 0.24, + "learning_rate": 0.0008870998547783085, + "loss": 0.8789, + "step": 4496 + }, + { + "epoch": 0.24, + "learning_rate": 0.0008870447582047599, + "loss": 0.9844, + "step": 4497 + }, + { + "epoch": 0.24, + "learning_rate": 0.0008869896499024839, + "loss": 0.9844, + "step": 4498 + }, + { + "epoch": 0.24, + "learning_rate": 0.0008869345298731502, + "loss": 0.9531, + "step": 4499 + }, + { + "epoch": 0.24, + "learning_rate": 0.0008868793981184293, + "loss": 0.9258, + "step": 4500 + }, + { + "epoch": 0.24, + "learning_rate": 0.0008868242546399919, + "loss": 0.8906, + "step": 4501 + }, + { + "epoch": 0.24, + "learning_rate": 0.0008867690994395087, + "loss": 0.9766, + "step": 4502 + }, + { + "epoch": 0.24, + "learning_rate": 0.0008867139325186515, + "loss": 0.9062, + "step": 4503 + }, + { + "epoch": 0.24, + "learning_rate": 0.0008866587538790918, + "loss": 0.9844, + "step": 4504 + }, + { + "epoch": 0.24, + "learning_rate": 0.0008866035635225018, + "loss": 0.8711, + "step": 4505 + }, + { + "epoch": 0.24, + "learning_rate": 0.0008865483614505539, + "loss": 0.8867, + "step": 4506 + }, + { + "epoch": 0.24, + "learning_rate": 0.0008864931476649209, + "loss": 0.9648, + "step": 4507 + }, + { + "epoch": 0.24, + "learning_rate": 0.000886437922167276, + "loss": 0.9883, + "step": 4508 + }, + { + "epoch": 0.24, + "learning_rate": 0.0008863826849592926, + "loss": 1.0078, + "step": 4509 + }, + { + "epoch": 0.24, + "learning_rate": 0.0008863274360426447, + "loss": 0.9219, + "step": 4510 + }, + { + "epoch": 0.24, + "learning_rate": 0.0008862721754190066, + "loss": 0.8359, + "step": 4511 + }, + { + "epoch": 0.24, + "learning_rate": 0.0008862169030900526, + "loss": 0.9375, + "step": 4512 + }, + { + "epoch": 0.24, + "learning_rate": 0.0008861616190574579, + "loss": 0.8164, + "step": 4513 + }, + { + "epoch": 0.24, + "learning_rate": 0.0008861063233228976, + "loss": 0.9766, + "step": 4514 + }, + { + "epoch": 0.24, + "learning_rate": 0.0008860510158880474, + "loss": 0.9219, + "step": 4515 + }, + { + "epoch": 0.24, + "learning_rate": 0.0008859956967545834, + "loss": 0.9648, + "step": 4516 + }, + { + "epoch": 0.24, + "learning_rate": 0.0008859403659241818, + "loss": 0.8164, + "step": 4517 + }, + { + "epoch": 0.24, + "learning_rate": 0.0008858850233985194, + "loss": 0.918, + "step": 4518 + }, + { + "epoch": 0.24, + "learning_rate": 0.0008858296691792731, + "loss": 0.8672, + "step": 4519 + }, + { + "epoch": 0.24, + "learning_rate": 0.0008857743032681206, + "loss": 0.9883, + "step": 4520 + }, + { + "epoch": 0.24, + "learning_rate": 0.0008857189256667395, + "loss": 0.9219, + "step": 4521 + }, + { + "epoch": 0.24, + "learning_rate": 0.0008856635363768078, + "loss": 0.8906, + "step": 4522 + }, + { + "epoch": 0.24, + "learning_rate": 0.0008856081354000043, + "loss": 0.9336, + "step": 4523 + }, + { + "epoch": 0.24, + "learning_rate": 0.0008855527227380075, + "loss": 0.9883, + "step": 4524 + }, + { + "epoch": 0.24, + "learning_rate": 0.000885497298392497, + "loss": 0.9258, + "step": 4525 + }, + { + "epoch": 0.24, + "learning_rate": 0.0008854418623651517, + "loss": 0.9922, + "step": 4526 + }, + { + "epoch": 0.24, + "learning_rate": 0.0008853864146576521, + "loss": 0.8711, + "step": 4527 + }, + { + "epoch": 0.24, + "learning_rate": 0.0008853309552716782, + "loss": 0.9453, + "step": 4528 + }, + { + "epoch": 0.24, + "learning_rate": 0.0008852754842089105, + "loss": 0.9844, + "step": 4529 + }, + { + "epoch": 0.24, + "learning_rate": 0.0008852200014710303, + "loss": 0.8633, + "step": 4530 + }, + { + "epoch": 0.24, + "learning_rate": 0.0008851645070597186, + "loss": 0.9062, + "step": 4531 + }, + { + "epoch": 0.24, + "learning_rate": 0.0008851090009766569, + "loss": 0.9453, + "step": 4532 + }, + { + "epoch": 0.24, + "learning_rate": 0.0008850534832235277, + "loss": 0.9844, + "step": 4533 + }, + { + "epoch": 0.24, + "learning_rate": 0.000884997953802013, + "loss": 0.9531, + "step": 4534 + }, + { + "epoch": 0.24, + "learning_rate": 0.0008849424127137958, + "loss": 0.8984, + "step": 4535 + }, + { + "epoch": 0.24, + "learning_rate": 0.0008848868599605588, + "loss": 0.7852, + "step": 4536 + }, + { + "epoch": 0.24, + "learning_rate": 0.0008848312955439859, + "loss": 0.9062, + "step": 4537 + }, + { + "epoch": 0.24, + "learning_rate": 0.0008847757194657606, + "loss": 0.8516, + "step": 4538 + }, + { + "epoch": 0.24, + "learning_rate": 0.0008847201317275671, + "loss": 0.9766, + "step": 4539 + }, + { + "epoch": 0.24, + "learning_rate": 0.0008846645323310898, + "loss": 0.9883, + "step": 4540 + }, + { + "epoch": 0.24, + "learning_rate": 0.0008846089212780137, + "loss": 0.875, + "step": 4541 + }, + { + "epoch": 0.24, + "learning_rate": 0.0008845532985700238, + "loss": 0.9766, + "step": 4542 + }, + { + "epoch": 0.24, + "learning_rate": 0.0008844976642088058, + "loss": 0.9258, + "step": 4543 + }, + { + "epoch": 0.24, + "learning_rate": 0.0008844420181960456, + "loss": 0.9219, + "step": 4544 + }, + { + "epoch": 0.24, + "learning_rate": 0.0008843863605334294, + "loss": 0.8906, + "step": 4545 + }, + { + "epoch": 0.24, + "learning_rate": 0.000884330691222644, + "loss": 0.8477, + "step": 4546 + }, + { + "epoch": 0.24, + "learning_rate": 0.000884275010265376, + "loss": 0.9102, + "step": 4547 + }, + { + "epoch": 0.24, + "learning_rate": 0.0008842193176633131, + "loss": 0.9531, + "step": 4548 + }, + { + "epoch": 0.24, + "learning_rate": 0.0008841636134181427, + "loss": 0.8281, + "step": 4549 + }, + { + "epoch": 0.24, + "learning_rate": 0.0008841078975315528, + "loss": 0.9297, + "step": 4550 + }, + { + "epoch": 0.24, + "learning_rate": 0.000884052170005232, + "loss": 0.8477, + "step": 4551 + }, + { + "epoch": 0.24, + "learning_rate": 0.000883996430840869, + "loss": 0.8594, + "step": 4552 + }, + { + "epoch": 0.24, + "learning_rate": 0.0008839406800401526, + "loss": 0.8789, + "step": 4553 + }, + { + "epoch": 0.24, + "learning_rate": 0.0008838849176047725, + "loss": 0.9375, + "step": 4554 + }, + { + "epoch": 0.24, + "learning_rate": 0.0008838291435364186, + "loss": 0.9766, + "step": 4555 + }, + { + "epoch": 0.24, + "learning_rate": 0.0008837733578367806, + "loss": 0.8828, + "step": 4556 + }, + { + "epoch": 0.24, + "learning_rate": 0.0008837175605075491, + "loss": 0.9453, + "step": 4557 + }, + { + "epoch": 0.24, + "learning_rate": 0.0008836617515504153, + "loss": 0.8906, + "step": 4558 + }, + { + "epoch": 0.25, + "learning_rate": 0.0008836059309670702, + "loss": 0.9414, + "step": 4559 + }, + { + "epoch": 0.25, + "learning_rate": 0.0008835500987592052, + "loss": 0.9453, + "step": 4560 + }, + { + "epoch": 0.25, + "learning_rate": 0.0008834942549285124, + "loss": 0.918, + "step": 4561 + }, + { + "epoch": 0.25, + "learning_rate": 0.0008834383994766839, + "loss": 0.8359, + "step": 4562 + }, + { + "epoch": 0.25, + "learning_rate": 0.0008833825324054126, + "loss": 0.9414, + "step": 4563 + }, + { + "epoch": 0.25, + "learning_rate": 0.0008833266537163908, + "loss": 0.8672, + "step": 4564 + }, + { + "epoch": 0.25, + "learning_rate": 0.0008832707634113126, + "loss": 0.9648, + "step": 4565 + }, + { + "epoch": 0.25, + "learning_rate": 0.0008832148614918712, + "loss": 0.8594, + "step": 4566 + }, + { + "epoch": 0.25, + "learning_rate": 0.0008831589479597608, + "loss": 0.9102, + "step": 4567 + }, + { + "epoch": 0.25, + "learning_rate": 0.0008831030228166754, + "loss": 0.9258, + "step": 4568 + }, + { + "epoch": 0.25, + "learning_rate": 0.0008830470860643103, + "loss": 0.9492, + "step": 4569 + }, + { + "epoch": 0.25, + "learning_rate": 0.00088299113770436, + "loss": 0.8789, + "step": 4570 + }, + { + "epoch": 0.25, + "learning_rate": 0.0008829351777385204, + "loss": 1.0, + "step": 4571 + }, + { + "epoch": 0.25, + "learning_rate": 0.000882879206168487, + "loss": 0.8477, + "step": 4572 + }, + { + "epoch": 0.25, + "learning_rate": 0.0008828232229959558, + "loss": 0.8398, + "step": 4573 + }, + { + "epoch": 0.25, + "learning_rate": 0.0008827672282226235, + "loss": 1.0391, + "step": 4574 + }, + { + "epoch": 0.25, + "learning_rate": 0.000882711221850187, + "loss": 0.8008, + "step": 4575 + }, + { + "epoch": 0.25, + "learning_rate": 0.0008826552038803433, + "loss": 0.9141, + "step": 4576 + }, + { + "epoch": 0.25, + "learning_rate": 0.0008825991743147899, + "loss": 0.9219, + "step": 4577 + }, + { + "epoch": 0.25, + "learning_rate": 0.0008825431331552248, + "loss": 0.8906, + "step": 4578 + }, + { + "epoch": 0.25, + "learning_rate": 0.000882487080403346, + "loss": 0.8906, + "step": 4579 + }, + { + "epoch": 0.25, + "learning_rate": 0.0008824310160608526, + "loss": 0.9297, + "step": 4580 + }, + { + "epoch": 0.25, + "learning_rate": 0.0008823749401294429, + "loss": 0.957, + "step": 4581 + }, + { + "epoch": 0.25, + "learning_rate": 0.0008823188526108167, + "loss": 0.8984, + "step": 4582 + }, + { + "epoch": 0.25, + "learning_rate": 0.0008822627535066734, + "loss": 0.9219, + "step": 4583 + }, + { + "epoch": 0.25, + "learning_rate": 0.0008822066428187128, + "loss": 0.8594, + "step": 4584 + }, + { + "epoch": 0.25, + "learning_rate": 0.0008821505205486356, + "loss": 1.0, + "step": 4585 + }, + { + "epoch": 0.25, + "learning_rate": 0.0008820943866981423, + "loss": 0.9922, + "step": 4586 + }, + { + "epoch": 0.25, + "learning_rate": 0.0008820382412689341, + "loss": 0.8945, + "step": 4587 + }, + { + "epoch": 0.25, + "learning_rate": 0.0008819820842627122, + "loss": 0.8164, + "step": 4588 + }, + { + "epoch": 0.25, + "learning_rate": 0.0008819259156811783, + "loss": 0.9531, + "step": 4589 + }, + { + "epoch": 0.25, + "learning_rate": 0.0008818697355260348, + "loss": 0.9141, + "step": 4590 + }, + { + "epoch": 0.25, + "learning_rate": 0.0008818135437989838, + "loss": 0.8359, + "step": 4591 + }, + { + "epoch": 0.25, + "learning_rate": 0.0008817573405017283, + "loss": 0.8672, + "step": 4592 + }, + { + "epoch": 0.25, + "learning_rate": 0.0008817011256359713, + "loss": 0.875, + "step": 4593 + }, + { + "epoch": 0.25, + "learning_rate": 0.0008816448992034164, + "loss": 0.8867, + "step": 4594 + }, + { + "epoch": 0.25, + "learning_rate": 0.0008815886612057674, + "loss": 0.8359, + "step": 4595 + }, + { + "epoch": 0.25, + "learning_rate": 0.0008815324116447285, + "loss": 0.8672, + "step": 4596 + }, + { + "epoch": 0.25, + "learning_rate": 0.0008814761505220043, + "loss": 0.918, + "step": 4597 + }, + { + "epoch": 0.25, + "learning_rate": 0.0008814198778392998, + "loss": 0.9414, + "step": 4598 + }, + { + "epoch": 0.25, + "learning_rate": 0.0008813635935983199, + "loss": 0.9258, + "step": 4599 + }, + { + "epoch": 0.25, + "learning_rate": 0.0008813072978007706, + "loss": 0.9258, + "step": 4600 + }, + { + "epoch": 0.25, + "learning_rate": 0.0008812509904483575, + "loss": 0.9297, + "step": 4601 + }, + { + "epoch": 0.25, + "learning_rate": 0.0008811946715427872, + "loss": 0.9375, + "step": 4602 + }, + { + "epoch": 0.25, + "learning_rate": 0.000881138341085766, + "loss": 0.8633, + "step": 4603 + }, + { + "epoch": 0.25, + "learning_rate": 0.0008810819990790013, + "loss": 0.8203, + "step": 4604 + }, + { + "epoch": 0.25, + "learning_rate": 0.0008810256455242003, + "loss": 0.9062, + "step": 4605 + }, + { + "epoch": 0.25, + "learning_rate": 0.0008809692804230704, + "loss": 0.8516, + "step": 4606 + }, + { + "epoch": 0.25, + "learning_rate": 0.00088091290377732, + "loss": 0.875, + "step": 4607 + }, + { + "epoch": 0.25, + "learning_rate": 0.0008808565155886574, + "loss": 0.8945, + "step": 4608 + }, + { + "epoch": 0.25, + "learning_rate": 0.0008808001158587913, + "loss": 0.8906, + "step": 4609 + }, + { + "epoch": 0.25, + "learning_rate": 0.0008807437045894309, + "loss": 0.8828, + "step": 4610 + }, + { + "epoch": 0.25, + "learning_rate": 0.0008806872817822856, + "loss": 0.7812, + "step": 4611 + }, + { + "epoch": 0.25, + "learning_rate": 0.000880630847439065, + "loss": 0.875, + "step": 4612 + }, + { + "epoch": 0.25, + "learning_rate": 0.0008805744015614796, + "loss": 0.9102, + "step": 4613 + }, + { + "epoch": 0.25, + "learning_rate": 0.0008805179441512397, + "loss": 0.9922, + "step": 4614 + }, + { + "epoch": 0.25, + "learning_rate": 0.000880461475210056, + "loss": 0.875, + "step": 4615 + }, + { + "epoch": 0.25, + "learning_rate": 0.0008804049947396399, + "loss": 0.8555, + "step": 4616 + }, + { + "epoch": 0.25, + "learning_rate": 0.0008803485027417031, + "loss": 0.957, + "step": 4617 + }, + { + "epoch": 0.25, + "learning_rate": 0.0008802919992179572, + "loss": 0.9023, + "step": 4618 + }, + { + "epoch": 0.25, + "learning_rate": 0.0008802354841701145, + "loss": 0.8945, + "step": 4619 + }, + { + "epoch": 0.25, + "learning_rate": 0.0008801789575998876, + "loss": 0.8672, + "step": 4620 + }, + { + "epoch": 0.25, + "learning_rate": 0.0008801224195089895, + "loss": 0.9297, + "step": 4621 + }, + { + "epoch": 0.25, + "learning_rate": 0.0008800658698991335, + "loss": 0.9531, + "step": 4622 + }, + { + "epoch": 0.25, + "learning_rate": 0.0008800093087720332, + "loss": 0.8359, + "step": 4623 + }, + { + "epoch": 0.25, + "learning_rate": 0.0008799527361294025, + "loss": 0.9023, + "step": 4624 + }, + { + "epoch": 0.25, + "learning_rate": 0.0008798961519729559, + "loss": 0.8945, + "step": 4625 + }, + { + "epoch": 0.25, + "learning_rate": 0.0008798395563044079, + "loss": 0.7773, + "step": 4626 + }, + { + "epoch": 0.25, + "learning_rate": 0.0008797829491254738, + "loss": 0.9062, + "step": 4627 + }, + { + "epoch": 0.25, + "learning_rate": 0.0008797263304378689, + "loss": 1.0234, + "step": 4628 + }, + { + "epoch": 0.25, + "learning_rate": 0.0008796697002433085, + "loss": 0.8477, + "step": 4629 + }, + { + "epoch": 0.25, + "learning_rate": 0.0008796130585435094, + "loss": 0.8984, + "step": 4630 + }, + { + "epoch": 0.25, + "learning_rate": 0.0008795564053401874, + "loss": 0.8477, + "step": 4631 + }, + { + "epoch": 0.25, + "learning_rate": 0.0008794997406350597, + "loss": 0.9805, + "step": 4632 + }, + { + "epoch": 0.25, + "learning_rate": 0.0008794430644298432, + "loss": 0.8438, + "step": 4633 + }, + { + "epoch": 0.25, + "learning_rate": 0.0008793863767262556, + "loss": 0.9023, + "step": 4634 + }, + { + "epoch": 0.25, + "learning_rate": 0.0008793296775260144, + "loss": 0.8398, + "step": 4635 + }, + { + "epoch": 0.25, + "learning_rate": 0.000879272966830838, + "loss": 0.918, + "step": 4636 + }, + { + "epoch": 0.25, + "learning_rate": 0.0008792162446424449, + "loss": 0.832, + "step": 4637 + }, + { + "epoch": 0.25, + "learning_rate": 0.0008791595109625539, + "loss": 0.8906, + "step": 4638 + }, + { + "epoch": 0.25, + "learning_rate": 0.0008791027657928843, + "loss": 1.0156, + "step": 4639 + }, + { + "epoch": 0.25, + "learning_rate": 0.0008790460091351555, + "loss": 1.0156, + "step": 4640 + }, + { + "epoch": 0.25, + "learning_rate": 0.0008789892409910877, + "loss": 0.8867, + "step": 4641 + }, + { + "epoch": 0.25, + "learning_rate": 0.0008789324613624008, + "loss": 0.8555, + "step": 4642 + }, + { + "epoch": 0.25, + "learning_rate": 0.0008788756702508157, + "loss": 0.918, + "step": 4643 + }, + { + "epoch": 0.25, + "learning_rate": 0.0008788188676580533, + "loss": 0.875, + "step": 4644 + }, + { + "epoch": 0.25, + "learning_rate": 0.0008787620535858348, + "loss": 0.9336, + "step": 4645 + }, + { + "epoch": 0.25, + "learning_rate": 0.0008787052280358821, + "loss": 0.9336, + "step": 4646 + }, + { + "epoch": 0.25, + "learning_rate": 0.0008786483910099168, + "loss": 0.9531, + "step": 4647 + }, + { + "epoch": 0.25, + "learning_rate": 0.0008785915425096613, + "loss": 1.0312, + "step": 4648 + }, + { + "epoch": 0.25, + "learning_rate": 0.0008785346825368387, + "loss": 0.8945, + "step": 4649 + }, + { + "epoch": 0.25, + "learning_rate": 0.0008784778110931719, + "loss": 0.8711, + "step": 4650 + }, + { + "epoch": 0.25, + "learning_rate": 0.0008784209281803841, + "loss": 0.8984, + "step": 4651 + }, + { + "epoch": 0.25, + "learning_rate": 0.000878364033800199, + "loss": 0.957, + "step": 4652 + }, + { + "epoch": 0.25, + "learning_rate": 0.000878307127954341, + "loss": 1.0234, + "step": 4653 + }, + { + "epoch": 0.25, + "learning_rate": 0.0008782502106445343, + "loss": 0.8711, + "step": 4654 + }, + { + "epoch": 0.25, + "learning_rate": 0.0008781932818725037, + "loss": 0.9414, + "step": 4655 + }, + { + "epoch": 0.25, + "learning_rate": 0.0008781363416399744, + "loss": 0.9961, + "step": 4656 + }, + { + "epoch": 0.25, + "learning_rate": 0.0008780793899486719, + "loss": 0.8867, + "step": 4657 + }, + { + "epoch": 0.25, + "learning_rate": 0.0008780224268003219, + "loss": 0.8477, + "step": 4658 + }, + { + "epoch": 0.25, + "learning_rate": 0.0008779654521966505, + "loss": 0.9102, + "step": 4659 + }, + { + "epoch": 0.25, + "learning_rate": 0.0008779084661393845, + "loss": 0.875, + "step": 4660 + }, + { + "epoch": 0.25, + "learning_rate": 0.0008778514686302506, + "loss": 0.9492, + "step": 4661 + }, + { + "epoch": 0.25, + "learning_rate": 0.0008777944596709762, + "loss": 0.8438, + "step": 4662 + }, + { + "epoch": 0.25, + "learning_rate": 0.0008777374392632885, + "loss": 0.957, + "step": 4663 + }, + { + "epoch": 0.25, + "learning_rate": 0.0008776804074089156, + "loss": 0.8789, + "step": 4664 + }, + { + "epoch": 0.25, + "learning_rate": 0.0008776233641095857, + "loss": 0.8477, + "step": 4665 + }, + { + "epoch": 0.25, + "learning_rate": 0.0008775663093670275, + "loss": 0.8945, + "step": 4666 + }, + { + "epoch": 0.25, + "learning_rate": 0.0008775092431829699, + "loss": 0.8945, + "step": 4667 + }, + { + "epoch": 0.25, + "learning_rate": 0.0008774521655591423, + "loss": 0.8633, + "step": 4668 + }, + { + "epoch": 0.25, + "learning_rate": 0.000877395076497274, + "loss": 0.8867, + "step": 4669 + }, + { + "epoch": 0.25, + "learning_rate": 0.0008773379759990954, + "loss": 0.8789, + "step": 4670 + }, + { + "epoch": 0.25, + "learning_rate": 0.0008772808640663365, + "loss": 0.9141, + "step": 4671 + }, + { + "epoch": 0.25, + "learning_rate": 0.0008772237407007282, + "loss": 0.9102, + "step": 4672 + }, + { + "epoch": 0.25, + "learning_rate": 0.0008771666059040015, + "loss": 0.9766, + "step": 4673 + }, + { + "epoch": 0.25, + "learning_rate": 0.0008771094596778877, + "loss": 0.9961, + "step": 4674 + }, + { + "epoch": 0.25, + "learning_rate": 0.0008770523020241184, + "loss": 0.8945, + "step": 4675 + }, + { + "epoch": 0.25, + "learning_rate": 0.0008769951329444259, + "loss": 0.8945, + "step": 4676 + }, + { + "epoch": 0.25, + "learning_rate": 0.0008769379524405425, + "loss": 0.9023, + "step": 4677 + }, + { + "epoch": 0.25, + "learning_rate": 0.0008768807605142011, + "loss": 0.9023, + "step": 4678 + }, + { + "epoch": 0.25, + "learning_rate": 0.0008768235571671346, + "loss": 0.9414, + "step": 4679 + }, + { + "epoch": 0.25, + "learning_rate": 0.0008767663424010765, + "loss": 0.8594, + "step": 4680 + }, + { + "epoch": 0.25, + "learning_rate": 0.0008767091162177605, + "loss": 0.8594, + "step": 4681 + }, + { + "epoch": 0.25, + "learning_rate": 0.000876651878618921, + "loss": 0.8672, + "step": 4682 + }, + { + "epoch": 0.25, + "learning_rate": 0.0008765946296062923, + "loss": 0.9023, + "step": 4683 + }, + { + "epoch": 0.25, + "learning_rate": 0.0008765373691816094, + "loss": 0.8828, + "step": 4684 + }, + { + "epoch": 0.25, + "learning_rate": 0.0008764800973466072, + "loss": 0.9062, + "step": 4685 + }, + { + "epoch": 0.25, + "learning_rate": 0.0008764228141030215, + "loss": 0.8672, + "step": 4686 + }, + { + "epoch": 0.25, + "learning_rate": 0.000876365519452588, + "loss": 0.8945, + "step": 4687 + }, + { + "epoch": 0.25, + "learning_rate": 0.000876308213397043, + "loss": 0.9297, + "step": 4688 + }, + { + "epoch": 0.25, + "learning_rate": 0.000876250895938123, + "loss": 0.9258, + "step": 4689 + }, + { + "epoch": 0.25, + "learning_rate": 0.000876193567077565, + "loss": 0.8359, + "step": 4690 + }, + { + "epoch": 0.25, + "learning_rate": 0.000876136226817106, + "loss": 0.9336, + "step": 4691 + }, + { + "epoch": 0.25, + "learning_rate": 0.0008760788751584841, + "loss": 0.8711, + "step": 4692 + }, + { + "epoch": 0.25, + "learning_rate": 0.0008760215121034367, + "loss": 0.9883, + "step": 4693 + }, + { + "epoch": 0.25, + "learning_rate": 0.0008759641376537025, + "loss": 0.8555, + "step": 4694 + }, + { + "epoch": 0.25, + "learning_rate": 0.0008759067518110198, + "loss": 0.9648, + "step": 4695 + }, + { + "epoch": 0.25, + "learning_rate": 0.0008758493545771278, + "loss": 0.8711, + "step": 4696 + }, + { + "epoch": 0.25, + "learning_rate": 0.0008757919459537657, + "loss": 0.9102, + "step": 4697 + }, + { + "epoch": 0.25, + "learning_rate": 0.0008757345259426732, + "loss": 0.8828, + "step": 4698 + }, + { + "epoch": 0.25, + "learning_rate": 0.0008756770945455905, + "loss": 0.9102, + "step": 4699 + }, + { + "epoch": 0.25, + "learning_rate": 0.0008756196517642577, + "loss": 0.9141, + "step": 4700 + }, + { + "epoch": 0.25, + "learning_rate": 0.0008755621976004158, + "loss": 0.9102, + "step": 4701 + }, + { + "epoch": 0.25, + "learning_rate": 0.0008755047320558053, + "loss": 0.8828, + "step": 4702 + }, + { + "epoch": 0.25, + "learning_rate": 0.0008754472551321684, + "loss": 0.8008, + "step": 4703 + }, + { + "epoch": 0.25, + "learning_rate": 0.000875389766831246, + "loss": 0.9297, + "step": 4704 + }, + { + "epoch": 0.25, + "learning_rate": 0.0008753322671547808, + "loss": 0.8984, + "step": 4705 + }, + { + "epoch": 0.25, + "learning_rate": 0.000875274756104515, + "loss": 0.9062, + "step": 4706 + }, + { + "epoch": 0.25, + "learning_rate": 0.0008752172336821913, + "loss": 0.9844, + "step": 4707 + }, + { + "epoch": 0.25, + "learning_rate": 0.000875159699889553, + "loss": 0.8281, + "step": 4708 + }, + { + "epoch": 0.25, + "learning_rate": 0.0008751021547283433, + "loss": 0.8672, + "step": 4709 + }, + { + "epoch": 0.25, + "learning_rate": 0.0008750445982003063, + "loss": 0.8906, + "step": 4710 + }, + { + "epoch": 0.25, + "learning_rate": 0.000874987030307186, + "loss": 0.9102, + "step": 4711 + }, + { + "epoch": 0.25, + "learning_rate": 0.000874929451050727, + "loss": 0.8086, + "step": 4712 + }, + { + "epoch": 0.25, + "learning_rate": 0.0008748718604326739, + "loss": 0.8906, + "step": 4713 + }, + { + "epoch": 0.25, + "learning_rate": 0.0008748142584547722, + "loss": 0.9414, + "step": 4714 + }, + { + "epoch": 0.25, + "learning_rate": 0.0008747566451187672, + "loss": 0.8555, + "step": 4715 + }, + { + "epoch": 0.25, + "learning_rate": 0.000874699020426405, + "loss": 0.832, + "step": 4716 + }, + { + "epoch": 0.25, + "learning_rate": 0.0008746413843794314, + "loss": 0.9922, + "step": 4717 + }, + { + "epoch": 0.25, + "learning_rate": 0.0008745837369795935, + "loss": 0.8672, + "step": 4718 + }, + { + "epoch": 0.25, + "learning_rate": 0.0008745260782286377, + "loss": 0.9062, + "step": 4719 + }, + { + "epoch": 0.25, + "learning_rate": 0.0008744684081283115, + "loss": 0.9336, + "step": 4720 + }, + { + "epoch": 0.25, + "learning_rate": 0.0008744107266803626, + "loss": 0.8906, + "step": 4721 + }, + { + "epoch": 0.25, + "learning_rate": 0.0008743530338865388, + "loss": 0.957, + "step": 4722 + }, + { + "epoch": 0.25, + "learning_rate": 0.0008742953297485883, + "loss": 0.8867, + "step": 4723 + }, + { + "epoch": 0.25, + "learning_rate": 0.0008742376142682599, + "loss": 0.9062, + "step": 4724 + }, + { + "epoch": 0.25, + "learning_rate": 0.0008741798874473025, + "loss": 0.9102, + "step": 4725 + }, + { + "epoch": 0.25, + "learning_rate": 0.0008741221492874654, + "loss": 0.9023, + "step": 4726 + }, + { + "epoch": 0.25, + "learning_rate": 0.0008740643997904983, + "loss": 0.8359, + "step": 4727 + }, + { + "epoch": 0.25, + "learning_rate": 0.0008740066389581509, + "loss": 0.8711, + "step": 4728 + }, + { + "epoch": 0.25, + "learning_rate": 0.000873948866792174, + "loss": 0.9609, + "step": 4729 + }, + { + "epoch": 0.25, + "learning_rate": 0.0008738910832943181, + "loss": 0.8516, + "step": 4730 + }, + { + "epoch": 0.25, + "learning_rate": 0.0008738332884663341, + "loss": 0.8555, + "step": 4731 + }, + { + "epoch": 0.25, + "learning_rate": 0.0008737754823099736, + "loss": 0.9102, + "step": 4732 + }, + { + "epoch": 0.25, + "learning_rate": 0.0008737176648269883, + "loss": 0.8945, + "step": 4733 + }, + { + "epoch": 0.25, + "learning_rate": 0.00087365983601913, + "loss": 0.957, + "step": 4734 + }, + { + "epoch": 0.25, + "learning_rate": 0.0008736019958881512, + "loss": 0.9414, + "step": 4735 + }, + { + "epoch": 0.25, + "learning_rate": 0.0008735441444358049, + "loss": 0.875, + "step": 4736 + }, + { + "epoch": 0.25, + "learning_rate": 0.0008734862816638438, + "loss": 0.8281, + "step": 4737 + }, + { + "epoch": 0.25, + "learning_rate": 0.0008734284075740217, + "loss": 0.8906, + "step": 4738 + }, + { + "epoch": 0.25, + "learning_rate": 0.0008733705221680922, + "loss": 0.8984, + "step": 4739 + }, + { + "epoch": 0.25, + "learning_rate": 0.0008733126254478094, + "loss": 0.9453, + "step": 4740 + }, + { + "epoch": 0.25, + "learning_rate": 0.0008732547174149278, + "loss": 0.875, + "step": 4741 + }, + { + "epoch": 0.25, + "learning_rate": 0.0008731967980712021, + "loss": 0.9414, + "step": 4742 + }, + { + "epoch": 0.25, + "learning_rate": 0.0008731388674183875, + "loss": 0.8984, + "step": 4743 + }, + { + "epoch": 0.25, + "learning_rate": 0.0008730809254582397, + "loss": 0.832, + "step": 4744 + }, + { + "epoch": 0.26, + "learning_rate": 0.0008730229721925144, + "loss": 0.8945, + "step": 4745 + }, + { + "epoch": 0.26, + "learning_rate": 0.0008729650076229676, + "loss": 0.8828, + "step": 4746 + }, + { + "epoch": 0.26, + "learning_rate": 0.0008729070317513559, + "loss": 0.9023, + "step": 4747 + }, + { + "epoch": 0.26, + "learning_rate": 0.0008728490445794364, + "loss": 0.918, + "step": 4748 + }, + { + "epoch": 0.26, + "learning_rate": 0.0008727910461089659, + "loss": 0.9844, + "step": 4749 + }, + { + "epoch": 0.26, + "learning_rate": 0.0008727330363417023, + "loss": 0.8047, + "step": 4750 + }, + { + "epoch": 0.26, + "learning_rate": 0.0008726750152794032, + "loss": 0.9102, + "step": 4751 + }, + { + "epoch": 0.26, + "learning_rate": 0.0008726169829238271, + "loss": 1.0156, + "step": 4752 + }, + { + "epoch": 0.26, + "learning_rate": 0.0008725589392767324, + "loss": 0.9375, + "step": 4753 + }, + { + "epoch": 0.26, + "learning_rate": 0.000872500884339878, + "loss": 0.8633, + "step": 4754 + }, + { + "epoch": 0.26, + "learning_rate": 0.0008724428181150234, + "loss": 0.8945, + "step": 4755 + }, + { + "epoch": 0.26, + "learning_rate": 0.0008723847406039278, + "loss": 1.0078, + "step": 4756 + }, + { + "epoch": 0.26, + "learning_rate": 0.0008723266518083514, + "loss": 0.9609, + "step": 4757 + }, + { + "epoch": 0.26, + "learning_rate": 0.0008722685517300545, + "loss": 0.9062, + "step": 4758 + }, + { + "epoch": 0.26, + "learning_rate": 0.0008722104403707976, + "loss": 0.9141, + "step": 4759 + }, + { + "epoch": 0.26, + "learning_rate": 0.0008721523177323417, + "loss": 0.8945, + "step": 4760 + }, + { + "epoch": 0.26, + "learning_rate": 0.0008720941838164482, + "loss": 0.9062, + "step": 4761 + }, + { + "epoch": 0.26, + "learning_rate": 0.0008720360386248785, + "loss": 1.0156, + "step": 4762 + }, + { + "epoch": 0.26, + "learning_rate": 0.0008719778821593949, + "loss": 0.8672, + "step": 4763 + }, + { + "epoch": 0.26, + "learning_rate": 0.0008719197144217597, + "loss": 0.9102, + "step": 4764 + }, + { + "epoch": 0.26, + "learning_rate": 0.0008718615354137354, + "loss": 0.9375, + "step": 4765 + }, + { + "epoch": 0.26, + "learning_rate": 0.0008718033451370848, + "loss": 0.8477, + "step": 4766 + }, + { + "epoch": 0.26, + "learning_rate": 0.000871745143593572, + "loss": 0.9023, + "step": 4767 + }, + { + "epoch": 0.26, + "learning_rate": 0.0008716869307849598, + "loss": 0.9297, + "step": 4768 + }, + { + "epoch": 0.26, + "learning_rate": 0.0008716287067130128, + "loss": 0.8281, + "step": 4769 + }, + { + "epoch": 0.26, + "learning_rate": 0.0008715704713794954, + "loss": 0.8984, + "step": 4770 + }, + { + "epoch": 0.26, + "learning_rate": 0.0008715122247861722, + "loss": 0.9062, + "step": 4771 + }, + { + "epoch": 0.26, + "learning_rate": 0.000871453966934808, + "loss": 0.9805, + "step": 4772 + }, + { + "epoch": 0.26, + "learning_rate": 0.0008713956978271686, + "loss": 0.8945, + "step": 4773 + }, + { + "epoch": 0.26, + "learning_rate": 0.0008713374174650197, + "loss": 0.8633, + "step": 4774 + }, + { + "epoch": 0.26, + "learning_rate": 0.0008712791258501272, + "loss": 0.875, + "step": 4775 + }, + { + "epoch": 0.26, + "learning_rate": 0.0008712208229842575, + "loss": 1.0078, + "step": 4776 + }, + { + "epoch": 0.26, + "learning_rate": 0.0008711625088691776, + "loss": 0.8945, + "step": 4777 + }, + { + "epoch": 0.26, + "learning_rate": 0.0008711041835066545, + "loss": 0.8242, + "step": 4778 + }, + { + "epoch": 0.26, + "learning_rate": 0.0008710458468984554, + "loss": 0.8867, + "step": 4779 + }, + { + "epoch": 0.26, + "learning_rate": 0.0008709874990463486, + "loss": 0.9766, + "step": 4780 + }, + { + "epoch": 0.26, + "learning_rate": 0.0008709291399521018, + "loss": 0.9609, + "step": 4781 + }, + { + "epoch": 0.26, + "learning_rate": 0.0008708707696174837, + "loss": 0.8789, + "step": 4782 + }, + { + "epoch": 0.26, + "learning_rate": 0.000870812388044263, + "loss": 0.8594, + "step": 4783 + }, + { + "epoch": 0.26, + "learning_rate": 0.000870753995234209, + "loss": 0.8555, + "step": 4784 + }, + { + "epoch": 0.26, + "learning_rate": 0.0008706955911890908, + "loss": 0.9023, + "step": 4785 + }, + { + "epoch": 0.26, + "learning_rate": 0.0008706371759106788, + "loss": 1.0156, + "step": 4786 + }, + { + "epoch": 0.26, + "learning_rate": 0.0008705787494007428, + "loss": 0.8945, + "step": 4787 + }, + { + "epoch": 0.26, + "learning_rate": 0.0008705203116610534, + "loss": 0.8789, + "step": 4788 + }, + { + "epoch": 0.26, + "learning_rate": 0.0008704618626933816, + "loss": 0.9414, + "step": 4789 + }, + { + "epoch": 0.26, + "learning_rate": 0.0008704034024994982, + "loss": 0.8359, + "step": 4790 + }, + { + "epoch": 0.26, + "learning_rate": 0.0008703449310811753, + "loss": 0.9492, + "step": 4791 + }, + { + "epoch": 0.26, + "learning_rate": 0.0008702864484401842, + "loss": 0.9531, + "step": 4792 + }, + { + "epoch": 0.26, + "learning_rate": 0.0008702279545782976, + "loss": 0.8906, + "step": 4793 + }, + { + "epoch": 0.26, + "learning_rate": 0.0008701694494972879, + "loss": 0.9336, + "step": 4794 + }, + { + "epoch": 0.26, + "learning_rate": 0.0008701109331989276, + "loss": 0.8359, + "step": 4795 + }, + { + "epoch": 0.26, + "learning_rate": 0.0008700524056849905, + "loss": 0.8359, + "step": 4796 + }, + { + "epoch": 0.26, + "learning_rate": 0.00086999386695725, + "loss": 0.8594, + "step": 4797 + }, + { + "epoch": 0.26, + "learning_rate": 0.0008699353170174799, + "loss": 0.8711, + "step": 4798 + }, + { + "epoch": 0.26, + "learning_rate": 0.0008698767558674547, + "loss": 0.8945, + "step": 4799 + }, + { + "epoch": 0.26, + "learning_rate": 0.0008698181835089486, + "loss": 0.918, + "step": 4800 + }, + { + "epoch": 0.26, + "learning_rate": 0.0008697595999437368, + "loss": 0.9102, + "step": 4801 + }, + { + "epoch": 0.26, + "learning_rate": 0.0008697010051735946, + "loss": 0.8477, + "step": 4802 + }, + { + "epoch": 0.26, + "learning_rate": 0.0008696423992002975, + "loss": 0.9062, + "step": 4803 + }, + { + "epoch": 0.26, + "learning_rate": 0.0008695837820256215, + "loss": 0.832, + "step": 4804 + }, + { + "epoch": 0.26, + "learning_rate": 0.0008695251536513428, + "loss": 0.9414, + "step": 4805 + }, + { + "epoch": 0.26, + "learning_rate": 0.0008694665140792384, + "loss": 0.8828, + "step": 4806 + }, + { + "epoch": 0.26, + "learning_rate": 0.0008694078633110848, + "loss": 0.8398, + "step": 4807 + }, + { + "epoch": 0.26, + "learning_rate": 0.0008693492013486595, + "loss": 1.0156, + "step": 4808 + }, + { + "epoch": 0.26, + "learning_rate": 0.0008692905281937403, + "loss": 0.9141, + "step": 4809 + }, + { + "epoch": 0.26, + "learning_rate": 0.0008692318438481048, + "loss": 0.9023, + "step": 4810 + }, + { + "epoch": 0.26, + "learning_rate": 0.0008691731483135317, + "loss": 0.8555, + "step": 4811 + }, + { + "epoch": 0.26, + "learning_rate": 0.0008691144415917995, + "loss": 0.918, + "step": 4812 + }, + { + "epoch": 0.26, + "learning_rate": 0.0008690557236846875, + "loss": 0.8672, + "step": 4813 + }, + { + "epoch": 0.26, + "learning_rate": 0.0008689969945939744, + "loss": 0.9609, + "step": 4814 + }, + { + "epoch": 0.26, + "learning_rate": 0.0008689382543214406, + "loss": 0.8789, + "step": 4815 + }, + { + "epoch": 0.26, + "learning_rate": 0.0008688795028688656, + "loss": 0.9414, + "step": 4816 + }, + { + "epoch": 0.26, + "learning_rate": 0.00086882074023803, + "loss": 0.9219, + "step": 4817 + }, + { + "epoch": 0.26, + "learning_rate": 0.0008687619664307145, + "loss": 0.9727, + "step": 4818 + }, + { + "epoch": 0.26, + "learning_rate": 0.0008687031814487002, + "loss": 0.8047, + "step": 4819 + }, + { + "epoch": 0.26, + "learning_rate": 0.0008686443852937683, + "loss": 0.8516, + "step": 4820 + }, + { + "epoch": 0.26, + "learning_rate": 0.0008685855779677006, + "loss": 0.9023, + "step": 4821 + }, + { + "epoch": 0.26, + "learning_rate": 0.0008685267594722791, + "loss": 0.8789, + "step": 4822 + }, + { + "epoch": 0.26, + "learning_rate": 0.0008684679298092863, + "loss": 0.9141, + "step": 4823 + }, + { + "epoch": 0.26, + "learning_rate": 0.0008684090889805048, + "loss": 0.9414, + "step": 4824 + }, + { + "epoch": 0.26, + "learning_rate": 0.0008683502369877179, + "loss": 0.9688, + "step": 4825 + }, + { + "epoch": 0.26, + "learning_rate": 0.0008682913738327087, + "loss": 0.8984, + "step": 4826 + }, + { + "epoch": 0.26, + "learning_rate": 0.0008682324995172612, + "loss": 0.8164, + "step": 4827 + }, + { + "epoch": 0.26, + "learning_rate": 0.0008681736140431594, + "loss": 0.9844, + "step": 4828 + }, + { + "epoch": 0.26, + "learning_rate": 0.0008681147174121877, + "loss": 0.8594, + "step": 4829 + }, + { + "epoch": 0.26, + "learning_rate": 0.0008680558096261308, + "loss": 0.9336, + "step": 4830 + }, + { + "epoch": 0.26, + "learning_rate": 0.000867996890686774, + "loss": 1.0391, + "step": 4831 + }, + { + "epoch": 0.26, + "learning_rate": 0.0008679379605959026, + "loss": 0.8789, + "step": 4832 + }, + { + "epoch": 0.26, + "learning_rate": 0.0008678790193553023, + "loss": 0.9219, + "step": 4833 + }, + { + "epoch": 0.26, + "learning_rate": 0.0008678200669667593, + "loss": 0.8945, + "step": 4834 + }, + { + "epoch": 0.26, + "learning_rate": 0.0008677611034320602, + "loss": 0.9609, + "step": 4835 + }, + { + "epoch": 0.26, + "learning_rate": 0.0008677021287529916, + "loss": 0.8672, + "step": 4836 + }, + { + "epoch": 0.26, + "learning_rate": 0.0008676431429313405, + "loss": 0.9141, + "step": 4837 + }, + { + "epoch": 0.26, + "learning_rate": 0.0008675841459688947, + "loss": 0.9688, + "step": 4838 + }, + { + "epoch": 0.26, + "learning_rate": 0.0008675251378674417, + "loss": 0.8945, + "step": 4839 + }, + { + "epoch": 0.26, + "learning_rate": 0.0008674661186287698, + "loss": 0.8164, + "step": 4840 + }, + { + "epoch": 0.26, + "learning_rate": 0.0008674070882546675, + "loss": 0.9531, + "step": 4841 + }, + { + "epoch": 0.26, + "learning_rate": 0.0008673480467469236, + "loss": 0.8438, + "step": 4842 + }, + { + "epoch": 0.26, + "learning_rate": 0.0008672889941073272, + "loss": 0.875, + "step": 4843 + }, + { + "epoch": 0.26, + "learning_rate": 0.0008672299303376678, + "loss": 0.9062, + "step": 4844 + }, + { + "epoch": 0.26, + "learning_rate": 0.0008671708554397352, + "loss": 0.9102, + "step": 4845 + }, + { + "epoch": 0.26, + "learning_rate": 0.0008671117694153196, + "loss": 0.9492, + "step": 4846 + }, + { + "epoch": 0.26, + "learning_rate": 0.0008670526722662114, + "loss": 0.8594, + "step": 4847 + }, + { + "epoch": 0.26, + "learning_rate": 0.0008669935639942017, + "loss": 0.9492, + "step": 4848 + }, + { + "epoch": 0.26, + "learning_rate": 0.0008669344446010814, + "loss": 0.8945, + "step": 4849 + }, + { + "epoch": 0.26, + "learning_rate": 0.0008668753140886421, + "loss": 0.918, + "step": 4850 + }, + { + "epoch": 0.26, + "learning_rate": 0.0008668161724586759, + "loss": 0.8359, + "step": 4851 + }, + { + "epoch": 0.26, + "learning_rate": 0.0008667570197129744, + "loss": 0.9688, + "step": 4852 + }, + { + "epoch": 0.26, + "learning_rate": 0.0008666978558533307, + "loss": 0.9102, + "step": 4853 + }, + { + "epoch": 0.26, + "learning_rate": 0.0008666386808815374, + "loss": 0.8672, + "step": 4854 + }, + { + "epoch": 0.26, + "learning_rate": 0.0008665794947993877, + "loss": 0.9453, + "step": 4855 + }, + { + "epoch": 0.26, + "learning_rate": 0.0008665202976086751, + "loss": 0.832, + "step": 4856 + }, + { + "epoch": 0.26, + "learning_rate": 0.0008664610893111937, + "loss": 0.9453, + "step": 4857 + }, + { + "epoch": 0.26, + "learning_rate": 0.0008664018699087374, + "loss": 0.9922, + "step": 4858 + }, + { + "epoch": 0.26, + "learning_rate": 0.0008663426394031009, + "loss": 0.918, + "step": 4859 + }, + { + "epoch": 0.26, + "learning_rate": 0.0008662833977960791, + "loss": 0.9805, + "step": 4860 + }, + { + "epoch": 0.26, + "learning_rate": 0.0008662241450894672, + "loss": 0.8828, + "step": 4861 + }, + { + "epoch": 0.26, + "learning_rate": 0.0008661648812850607, + "loss": 0.8008, + "step": 4862 + }, + { + "epoch": 0.26, + "learning_rate": 0.0008661056063846555, + "loss": 0.957, + "step": 4863 + }, + { + "epoch": 0.26, + "learning_rate": 0.0008660463203900478, + "loss": 0.8906, + "step": 4864 + }, + { + "epoch": 0.26, + "learning_rate": 0.0008659870233030342, + "loss": 0.8867, + "step": 4865 + }, + { + "epoch": 0.26, + "learning_rate": 0.0008659277151254116, + "loss": 0.9648, + "step": 4866 + }, + { + "epoch": 0.26, + "learning_rate": 0.0008658683958589773, + "loss": 0.8906, + "step": 4867 + }, + { + "epoch": 0.26, + "learning_rate": 0.0008658090655055289, + "loss": 0.8945, + "step": 4868 + }, + { + "epoch": 0.26, + "learning_rate": 0.000865749724066864, + "loss": 0.8906, + "step": 4869 + }, + { + "epoch": 0.26, + "learning_rate": 0.0008656903715447812, + "loss": 0.9023, + "step": 4870 + }, + { + "epoch": 0.26, + "learning_rate": 0.0008656310079410787, + "loss": 0.9023, + "step": 4871 + }, + { + "epoch": 0.26, + "learning_rate": 0.0008655716332575559, + "loss": 0.9336, + "step": 4872 + }, + { + "epoch": 0.26, + "learning_rate": 0.0008655122474960116, + "loss": 0.8945, + "step": 4873 + }, + { + "epoch": 0.26, + "learning_rate": 0.0008654528506582455, + "loss": 0.8906, + "step": 4874 + }, + { + "epoch": 0.26, + "learning_rate": 0.0008653934427460577, + "loss": 0.8945, + "step": 4875 + }, + { + "epoch": 0.26, + "learning_rate": 0.0008653340237612483, + "loss": 0.918, + "step": 4876 + }, + { + "epoch": 0.26, + "learning_rate": 0.0008652745937056182, + "loss": 0.9609, + "step": 4877 + }, + { + "epoch": 0.26, + "learning_rate": 0.0008652151525809677, + "loss": 0.8789, + "step": 4878 + }, + { + "epoch": 0.26, + "learning_rate": 0.0008651557003890984, + "loss": 0.8945, + "step": 4879 + }, + { + "epoch": 0.26, + "learning_rate": 0.0008650962371318121, + "loss": 0.875, + "step": 4880 + }, + { + "epoch": 0.26, + "learning_rate": 0.0008650367628109105, + "loss": 0.9609, + "step": 4881 + }, + { + "epoch": 0.26, + "learning_rate": 0.0008649772774281958, + "loss": 0.8672, + "step": 4882 + }, + { + "epoch": 0.26, + "learning_rate": 0.0008649177809854709, + "loss": 0.8945, + "step": 4883 + }, + { + "epoch": 0.26, + "learning_rate": 0.0008648582734845385, + "loss": 0.7617, + "step": 4884 + }, + { + "epoch": 0.26, + "learning_rate": 0.0008647987549272019, + "loss": 0.8672, + "step": 4885 + }, + { + "epoch": 0.26, + "learning_rate": 0.0008647392253152646, + "loss": 0.9844, + "step": 4886 + }, + { + "epoch": 0.26, + "learning_rate": 0.0008646796846505309, + "loss": 0.9414, + "step": 4887 + }, + { + "epoch": 0.26, + "learning_rate": 0.0008646201329348045, + "loss": 0.9023, + "step": 4888 + }, + { + "epoch": 0.26, + "learning_rate": 0.0008645605701698907, + "loss": 0.9023, + "step": 4889 + }, + { + "epoch": 0.26, + "learning_rate": 0.000864500996357594, + "loss": 0.8789, + "step": 4890 + }, + { + "epoch": 0.26, + "learning_rate": 0.0008644414114997199, + "loss": 0.875, + "step": 4891 + }, + { + "epoch": 0.26, + "learning_rate": 0.0008643818155980737, + "loss": 0.8945, + "step": 4892 + }, + { + "epoch": 0.26, + "learning_rate": 0.0008643222086544618, + "loss": 0.9258, + "step": 4893 + }, + { + "epoch": 0.26, + "learning_rate": 0.0008642625906706901, + "loss": 0.9062, + "step": 4894 + }, + { + "epoch": 0.26, + "learning_rate": 0.0008642029616485653, + "loss": 0.8594, + "step": 4895 + }, + { + "epoch": 0.26, + "learning_rate": 0.0008641433215898945, + "loss": 0.7969, + "step": 4896 + }, + { + "epoch": 0.26, + "learning_rate": 0.0008640836704964849, + "loss": 0.9141, + "step": 4897 + }, + { + "epoch": 0.26, + "learning_rate": 0.0008640240083701441, + "loss": 0.8867, + "step": 4898 + }, + { + "epoch": 0.26, + "learning_rate": 0.0008639643352126803, + "loss": 0.8945, + "step": 4899 + }, + { + "epoch": 0.26, + "learning_rate": 0.0008639046510259015, + "loss": 0.8203, + "step": 4900 + }, + { + "epoch": 0.26, + "learning_rate": 0.0008638449558116164, + "loss": 0.8438, + "step": 4901 + }, + { + "epoch": 0.26, + "learning_rate": 0.0008637852495716338, + "loss": 0.9414, + "step": 4902 + }, + { + "epoch": 0.26, + "learning_rate": 0.0008637255323077633, + "loss": 0.918, + "step": 4903 + }, + { + "epoch": 0.26, + "learning_rate": 0.0008636658040218145, + "loss": 0.9062, + "step": 4904 + }, + { + "epoch": 0.26, + "learning_rate": 0.0008636060647155972, + "loss": 0.8984, + "step": 4905 + }, + { + "epoch": 0.26, + "learning_rate": 0.0008635463143909217, + "loss": 0.8672, + "step": 4906 + }, + { + "epoch": 0.26, + "learning_rate": 0.0008634865530495987, + "loss": 0.9219, + "step": 4907 + }, + { + "epoch": 0.26, + "learning_rate": 0.0008634267806934393, + "loss": 0.8789, + "step": 4908 + }, + { + "epoch": 0.26, + "learning_rate": 0.0008633669973242546, + "loss": 1.0, + "step": 4909 + }, + { + "epoch": 0.26, + "learning_rate": 0.0008633072029438563, + "loss": 1.0312, + "step": 4910 + }, + { + "epoch": 0.26, + "learning_rate": 0.0008632473975540562, + "loss": 0.9023, + "step": 4911 + }, + { + "epoch": 0.26, + "learning_rate": 0.0008631875811566668, + "loss": 0.9453, + "step": 4912 + }, + { + "epoch": 0.26, + "learning_rate": 0.0008631277537535007, + "loss": 0.9062, + "step": 4913 + }, + { + "epoch": 0.26, + "learning_rate": 0.000863067915346371, + "loss": 0.9414, + "step": 4914 + }, + { + "epoch": 0.26, + "learning_rate": 0.0008630080659370908, + "loss": 0.8438, + "step": 4915 + }, + { + "epoch": 0.26, + "learning_rate": 0.0008629482055274737, + "loss": 0.832, + "step": 4916 + }, + { + "epoch": 0.26, + "learning_rate": 0.0008628883341193339, + "loss": 0.7695, + "step": 4917 + }, + { + "epoch": 0.26, + "learning_rate": 0.0008628284517144854, + "loss": 0.9492, + "step": 4918 + }, + { + "epoch": 0.26, + "learning_rate": 0.0008627685583147432, + "loss": 0.9492, + "step": 4919 + }, + { + "epoch": 0.26, + "learning_rate": 0.0008627086539219217, + "loss": 0.9414, + "step": 4920 + }, + { + "epoch": 0.26, + "learning_rate": 0.0008626487385378367, + "loss": 0.9062, + "step": 4921 + }, + { + "epoch": 0.26, + "learning_rate": 0.000862588812164304, + "loss": 0.8828, + "step": 4922 + }, + { + "epoch": 0.26, + "learning_rate": 0.0008625288748031389, + "loss": 0.9648, + "step": 4923 + }, + { + "epoch": 0.26, + "learning_rate": 0.0008624689264561582, + "loss": 0.9297, + "step": 4924 + }, + { + "epoch": 0.26, + "learning_rate": 0.0008624089671251784, + "loss": 0.8711, + "step": 4925 + }, + { + "epoch": 0.26, + "learning_rate": 0.0008623489968120164, + "loss": 0.9609, + "step": 4926 + }, + { + "epoch": 0.26, + "learning_rate": 0.0008622890155184895, + "loss": 0.9102, + "step": 4927 + }, + { + "epoch": 0.26, + "learning_rate": 0.0008622290232464155, + "loss": 0.7852, + "step": 4928 + }, + { + "epoch": 0.26, + "learning_rate": 0.0008621690199976123, + "loss": 0.9102, + "step": 4929 + }, + { + "epoch": 0.26, + "learning_rate": 0.0008621090057738979, + "loss": 0.8438, + "step": 4930 + }, + { + "epoch": 0.27, + "learning_rate": 0.0008620489805770912, + "loss": 0.8398, + "step": 4931 + }, + { + "epoch": 0.27, + "learning_rate": 0.0008619889444090112, + "loss": 0.8633, + "step": 4932 + }, + { + "epoch": 0.27, + "learning_rate": 0.0008619288972714771, + "loss": 1.0547, + "step": 4933 + }, + { + "epoch": 0.27, + "learning_rate": 0.0008618688391663086, + "loss": 0.8242, + "step": 4934 + }, + { + "epoch": 0.27, + "learning_rate": 0.0008618087700953257, + "loss": 1.0156, + "step": 4935 + }, + { + "epoch": 0.27, + "learning_rate": 0.0008617486900603483, + "loss": 0.9141, + "step": 4936 + }, + { + "epoch": 0.27, + "learning_rate": 0.0008616885990631975, + "loss": 0.9102, + "step": 4937 + }, + { + "epoch": 0.27, + "learning_rate": 0.0008616284971056942, + "loss": 0.9805, + "step": 4938 + }, + { + "epoch": 0.27, + "learning_rate": 0.0008615683841896593, + "loss": 0.8945, + "step": 4939 + }, + { + "epoch": 0.27, + "learning_rate": 0.0008615082603169149, + "loss": 0.957, + "step": 4940 + }, + { + "epoch": 0.27, + "learning_rate": 0.0008614481254892825, + "loss": 0.8594, + "step": 4941 + }, + { + "epoch": 0.27, + "learning_rate": 0.0008613879797085849, + "loss": 0.9336, + "step": 4942 + }, + { + "epoch": 0.27, + "learning_rate": 0.0008613278229766441, + "loss": 0.8945, + "step": 4943 + }, + { + "epoch": 0.27, + "learning_rate": 0.0008612676552952836, + "loss": 0.9258, + "step": 4944 + }, + { + "epoch": 0.27, + "learning_rate": 0.0008612074766663263, + "loss": 0.9141, + "step": 4945 + }, + { + "epoch": 0.27, + "learning_rate": 0.0008611472870915961, + "loss": 0.9102, + "step": 4946 + }, + { + "epoch": 0.27, + "learning_rate": 0.0008610870865729168, + "loss": 0.9219, + "step": 4947 + }, + { + "epoch": 0.27, + "learning_rate": 0.0008610268751121125, + "loss": 0.9219, + "step": 4948 + }, + { + "epoch": 0.27, + "learning_rate": 0.0008609666527110082, + "loss": 0.8945, + "step": 4949 + }, + { + "epoch": 0.27, + "learning_rate": 0.0008609064193714285, + "loss": 0.9844, + "step": 4950 + }, + { + "epoch": 0.27, + "learning_rate": 0.0008608461750951987, + "loss": 0.9141, + "step": 4951 + }, + { + "epoch": 0.27, + "learning_rate": 0.0008607859198841445, + "loss": 0.9062, + "step": 4952 + }, + { + "epoch": 0.27, + "learning_rate": 0.000860725653740092, + "loss": 0.8945, + "step": 4953 + }, + { + "epoch": 0.27, + "learning_rate": 0.0008606653766648671, + "loss": 0.8516, + "step": 4954 + }, + { + "epoch": 0.27, + "learning_rate": 0.0008606050886602966, + "loss": 0.9648, + "step": 4955 + }, + { + "epoch": 0.27, + "learning_rate": 0.0008605447897282075, + "loss": 0.9336, + "step": 4956 + }, + { + "epoch": 0.27, + "learning_rate": 0.0008604844798704269, + "loss": 0.8867, + "step": 4957 + }, + { + "epoch": 0.27, + "learning_rate": 0.0008604241590887824, + "loss": 0.9102, + "step": 4958 + }, + { + "epoch": 0.27, + "learning_rate": 0.000860363827385102, + "loss": 0.8555, + "step": 4959 + }, + { + "epoch": 0.27, + "learning_rate": 0.0008603034847612138, + "loss": 0.9297, + "step": 4960 + }, + { + "epoch": 0.27, + "learning_rate": 0.0008602431312189466, + "loss": 0.8711, + "step": 4961 + }, + { + "epoch": 0.27, + "learning_rate": 0.0008601827667601292, + "loss": 0.9453, + "step": 4962 + }, + { + "epoch": 0.27, + "learning_rate": 0.0008601223913865907, + "loss": 0.9141, + "step": 4963 + }, + { + "epoch": 0.27, + "learning_rate": 0.0008600620051001608, + "loss": 0.8828, + "step": 4964 + }, + { + "epoch": 0.27, + "learning_rate": 0.0008600016079026696, + "loss": 0.8828, + "step": 4965 + }, + { + "epoch": 0.27, + "learning_rate": 0.000859941199795947, + "loss": 0.8164, + "step": 4966 + }, + { + "epoch": 0.27, + "learning_rate": 0.0008598807807818239, + "loss": 0.918, + "step": 4967 + }, + { + "epoch": 0.27, + "learning_rate": 0.0008598203508621308, + "loss": 0.8906, + "step": 4968 + }, + { + "epoch": 0.27, + "learning_rate": 0.000859759910038699, + "loss": 0.9336, + "step": 4969 + }, + { + "epoch": 0.27, + "learning_rate": 0.0008596994583133605, + "loss": 0.9492, + "step": 4970 + }, + { + "epoch": 0.27, + "learning_rate": 0.0008596389956879467, + "loss": 1.0156, + "step": 4971 + }, + { + "epoch": 0.27, + "learning_rate": 0.00085957852216429, + "loss": 0.9336, + "step": 4972 + }, + { + "epoch": 0.27, + "learning_rate": 0.000859518037744223, + "loss": 0.918, + "step": 4973 + }, + { + "epoch": 0.27, + "learning_rate": 0.0008594575424295785, + "loss": 0.8086, + "step": 4974 + }, + { + "epoch": 0.27, + "learning_rate": 0.0008593970362221897, + "loss": 0.918, + "step": 4975 + }, + { + "epoch": 0.27, + "learning_rate": 0.00085933651912389, + "loss": 0.8438, + "step": 4976 + }, + { + "epoch": 0.27, + "learning_rate": 0.0008592759911365135, + "loss": 0.8789, + "step": 4977 + }, + { + "epoch": 0.27, + "learning_rate": 0.0008592154522618945, + "loss": 0.8828, + "step": 4978 + }, + { + "epoch": 0.27, + "learning_rate": 0.0008591549025018672, + "loss": 0.9922, + "step": 4979 + }, + { + "epoch": 0.27, + "learning_rate": 0.0008590943418582667, + "loss": 0.8516, + "step": 4980 + }, + { + "epoch": 0.27, + "learning_rate": 0.0008590337703329278, + "loss": 0.9375, + "step": 4981 + }, + { + "epoch": 0.27, + "learning_rate": 0.0008589731879276865, + "loss": 0.9023, + "step": 4982 + }, + { + "epoch": 0.27, + "learning_rate": 0.0008589125946443784, + "loss": 0.9453, + "step": 4983 + }, + { + "epoch": 0.27, + "learning_rate": 0.0008588519904848398, + "loss": 0.8516, + "step": 4984 + }, + { + "epoch": 0.27, + "learning_rate": 0.0008587913754509072, + "loss": 0.832, + "step": 4985 + }, + { + "epoch": 0.27, + "learning_rate": 0.0008587307495444172, + "loss": 0.8633, + "step": 4986 + }, + { + "epoch": 0.27, + "learning_rate": 0.0008586701127672072, + "loss": 0.8633, + "step": 4987 + }, + { + "epoch": 0.27, + "learning_rate": 0.0008586094651211145, + "loss": 0.9531, + "step": 4988 + }, + { + "epoch": 0.27, + "learning_rate": 0.0008585488066079772, + "loss": 0.8828, + "step": 4989 + }, + { + "epoch": 0.27, + "learning_rate": 0.0008584881372296332, + "loss": 0.832, + "step": 4990 + }, + { + "epoch": 0.27, + "learning_rate": 0.0008584274569879212, + "loss": 0.875, + "step": 4991 + }, + { + "epoch": 0.27, + "learning_rate": 0.0008583667658846796, + "loss": 0.9453, + "step": 4992 + }, + { + "epoch": 0.27, + "learning_rate": 0.0008583060639217482, + "loss": 0.8867, + "step": 4993 + }, + { + "epoch": 0.27, + "learning_rate": 0.0008582453511009658, + "loss": 0.8516, + "step": 4994 + }, + { + "epoch": 0.27, + "learning_rate": 0.0008581846274241727, + "loss": 0.8672, + "step": 4995 + }, + { + "epoch": 0.27, + "learning_rate": 0.0008581238928932087, + "loss": 0.8281, + "step": 4996 + }, + { + "epoch": 0.27, + "learning_rate": 0.0008580631475099144, + "loss": 0.8828, + "step": 4997 + }, + { + "epoch": 0.27, + "learning_rate": 0.0008580023912761306, + "loss": 0.8945, + "step": 4998 + }, + { + "epoch": 0.27, + "learning_rate": 0.0008579416241936983, + "loss": 0.9141, + "step": 4999 + }, + { + "epoch": 0.27, + "learning_rate": 0.0008578808462644592, + "loss": 0.8516, + "step": 5000 + }, + { + "epoch": 0.27, + "learning_rate": 0.0008578200574902546, + "loss": 0.9414, + "step": 5001 + }, + { + "epoch": 0.27, + "learning_rate": 0.000857759257872927, + "loss": 0.8711, + "step": 5002 + }, + { + "epoch": 0.27, + "learning_rate": 0.0008576984474143188, + "loss": 0.8867, + "step": 5003 + }, + { + "epoch": 0.27, + "learning_rate": 0.0008576376261162726, + "loss": 1.0234, + "step": 5004 + }, + { + "epoch": 0.27, + "learning_rate": 0.0008575767939806317, + "loss": 0.8984, + "step": 5005 + }, + { + "epoch": 0.27, + "learning_rate": 0.0008575159510092392, + "loss": 0.9648, + "step": 5006 + }, + { + "epoch": 0.27, + "learning_rate": 0.000857455097203939, + "loss": 0.9023, + "step": 5007 + }, + { + "epoch": 0.27, + "learning_rate": 0.0008573942325665753, + "loss": 0.8438, + "step": 5008 + }, + { + "epoch": 0.27, + "learning_rate": 0.0008573333570989922, + "loss": 0.8008, + "step": 5009 + }, + { + "epoch": 0.27, + "learning_rate": 0.0008572724708030349, + "loss": 0.9023, + "step": 5010 + }, + { + "epoch": 0.27, + "learning_rate": 0.0008572115736805479, + "loss": 0.918, + "step": 5011 + }, + { + "epoch": 0.27, + "learning_rate": 0.0008571506657333769, + "loss": 0.875, + "step": 5012 + }, + { + "epoch": 0.27, + "learning_rate": 0.0008570897469633677, + "loss": 0.8867, + "step": 5013 + }, + { + "epoch": 0.27, + "learning_rate": 0.0008570288173723661, + "loss": 0.9258, + "step": 5014 + }, + { + "epoch": 0.27, + "learning_rate": 0.0008569678769622187, + "loss": 0.8828, + "step": 5015 + }, + { + "epoch": 0.27, + "learning_rate": 0.0008569069257347718, + "loss": 0.8984, + "step": 5016 + }, + { + "epoch": 0.27, + "learning_rate": 0.0008568459636918729, + "loss": 0.957, + "step": 5017 + }, + { + "epoch": 0.27, + "learning_rate": 0.0008567849908353689, + "loss": 0.8906, + "step": 5018 + }, + { + "epoch": 0.27, + "learning_rate": 0.0008567240071671078, + "loss": 0.8086, + "step": 5019 + }, + { + "epoch": 0.27, + "learning_rate": 0.0008566630126889376, + "loss": 0.8945, + "step": 5020 + }, + { + "epoch": 0.27, + "learning_rate": 0.0008566020074027065, + "loss": 1.0156, + "step": 5021 + }, + { + "epoch": 0.27, + "learning_rate": 0.0008565409913102632, + "loss": 0.9531, + "step": 5022 + }, + { + "epoch": 0.27, + "learning_rate": 0.0008564799644134567, + "loss": 0.9102, + "step": 5023 + }, + { + "epoch": 0.27, + "learning_rate": 0.0008564189267141363, + "loss": 0.9219, + "step": 5024 + }, + { + "epoch": 0.27, + "learning_rate": 0.0008563578782141517, + "loss": 0.8672, + "step": 5025 + }, + { + "epoch": 0.27, + "learning_rate": 0.0008562968189153526, + "loss": 0.9453, + "step": 5026 + }, + { + "epoch": 0.27, + "learning_rate": 0.0008562357488195898, + "loss": 0.9258, + "step": 5027 + }, + { + "epoch": 0.27, + "learning_rate": 0.0008561746679287134, + "loss": 0.9336, + "step": 5028 + }, + { + "epoch": 0.27, + "learning_rate": 0.0008561135762445749, + "loss": 0.8047, + "step": 5029 + }, + { + "epoch": 0.27, + "learning_rate": 0.000856052473769025, + "loss": 0.8711, + "step": 5030 + }, + { + "epoch": 0.27, + "learning_rate": 0.0008559913605039158, + "loss": 0.9414, + "step": 5031 + }, + { + "epoch": 0.27, + "learning_rate": 0.0008559302364510988, + "loss": 0.9141, + "step": 5032 + }, + { + "epoch": 0.27, + "learning_rate": 0.0008558691016124267, + "loss": 0.8281, + "step": 5033 + }, + { + "epoch": 0.27, + "learning_rate": 0.0008558079559897515, + "loss": 0.8438, + "step": 5034 + }, + { + "epoch": 0.27, + "learning_rate": 0.0008557467995849268, + "loss": 0.8945, + "step": 5035 + }, + { + "epoch": 0.27, + "learning_rate": 0.0008556856323998055, + "loss": 0.8594, + "step": 5036 + }, + { + "epoch": 0.27, + "learning_rate": 0.0008556244544362411, + "loss": 0.9219, + "step": 5037 + }, + { + "epoch": 0.27, + "learning_rate": 0.0008555632656960876, + "loss": 0.9414, + "step": 5038 + }, + { + "epoch": 0.27, + "learning_rate": 0.000855502066181199, + "loss": 0.9102, + "step": 5039 + }, + { + "epoch": 0.27, + "learning_rate": 0.0008554408558934303, + "loss": 0.8672, + "step": 5040 + }, + { + "epoch": 0.27, + "learning_rate": 0.000855379634834636, + "loss": 0.9766, + "step": 5041 + }, + { + "epoch": 0.27, + "learning_rate": 0.0008553184030066714, + "loss": 0.8984, + "step": 5042 + }, + { + "epoch": 0.27, + "learning_rate": 0.0008552571604113921, + "loss": 0.9297, + "step": 5043 + }, + { + "epoch": 0.27, + "learning_rate": 0.0008551959070506539, + "loss": 0.9453, + "step": 5044 + }, + { + "epoch": 0.27, + "learning_rate": 0.0008551346429263127, + "loss": 0.8906, + "step": 5045 + }, + { + "epoch": 0.27, + "learning_rate": 0.0008550733680402256, + "loss": 0.9023, + "step": 5046 + }, + { + "epoch": 0.27, + "learning_rate": 0.0008550120823942489, + "loss": 0.8789, + "step": 5047 + }, + { + "epoch": 0.27, + "learning_rate": 0.0008549507859902402, + "loss": 0.9297, + "step": 5048 + }, + { + "epoch": 0.27, + "learning_rate": 0.0008548894788300565, + "loss": 1.0, + "step": 5049 + }, + { + "epoch": 0.27, + "learning_rate": 0.0008548281609155559, + "loss": 0.8672, + "step": 5050 + }, + { + "epoch": 0.27, + "learning_rate": 0.0008547668322485964, + "loss": 0.9492, + "step": 5051 + }, + { + "epoch": 0.27, + "learning_rate": 0.0008547054928310367, + "loss": 0.9492, + "step": 5052 + }, + { + "epoch": 0.27, + "learning_rate": 0.0008546441426647354, + "loss": 0.9023, + "step": 5053 + }, + { + "epoch": 0.27, + "learning_rate": 0.0008545827817515516, + "loss": 0.9375, + "step": 5054 + }, + { + "epoch": 0.27, + "learning_rate": 0.0008545214100933447, + "loss": 0.8789, + "step": 5055 + }, + { + "epoch": 0.27, + "learning_rate": 0.0008544600276919747, + "loss": 1.0156, + "step": 5056 + }, + { + "epoch": 0.27, + "learning_rate": 0.0008543986345493015, + "loss": 0.9492, + "step": 5057 + }, + { + "epoch": 0.27, + "learning_rate": 0.0008543372306671855, + "loss": 0.8906, + "step": 5058 + }, + { + "epoch": 0.27, + "learning_rate": 0.0008542758160474874, + "loss": 0.9297, + "step": 5059 + }, + { + "epoch": 0.27, + "learning_rate": 0.0008542143906920684, + "loss": 0.918, + "step": 5060 + }, + { + "epoch": 0.27, + "learning_rate": 0.0008541529546027899, + "loss": 0.8594, + "step": 5061 + }, + { + "epoch": 0.27, + "learning_rate": 0.0008540915077815134, + "loss": 0.9297, + "step": 5062 + }, + { + "epoch": 0.27, + "learning_rate": 0.0008540300502301013, + "loss": 0.8438, + "step": 5063 + }, + { + "epoch": 0.27, + "learning_rate": 0.0008539685819504156, + "loss": 0.9102, + "step": 5064 + }, + { + "epoch": 0.27, + "learning_rate": 0.0008539071029443193, + "loss": 0.8828, + "step": 5065 + }, + { + "epoch": 0.27, + "learning_rate": 0.0008538456132136751, + "loss": 0.9844, + "step": 5066 + }, + { + "epoch": 0.27, + "learning_rate": 0.0008537841127603465, + "loss": 0.8555, + "step": 5067 + }, + { + "epoch": 0.27, + "learning_rate": 0.0008537226015861972, + "loss": 0.8125, + "step": 5068 + }, + { + "epoch": 0.27, + "learning_rate": 0.0008536610796930912, + "loss": 0.8789, + "step": 5069 + }, + { + "epoch": 0.27, + "learning_rate": 0.0008535995470828928, + "loss": 0.9375, + "step": 5070 + }, + { + "epoch": 0.27, + "learning_rate": 0.0008535380037574666, + "loss": 0.8047, + "step": 5071 + }, + { + "epoch": 0.27, + "learning_rate": 0.0008534764497186776, + "loss": 0.8945, + "step": 5072 + }, + { + "epoch": 0.27, + "learning_rate": 0.0008534148849683909, + "loss": 0.8242, + "step": 5073 + }, + { + "epoch": 0.27, + "learning_rate": 0.0008533533095084726, + "loss": 1.0078, + "step": 5074 + }, + { + "epoch": 0.27, + "learning_rate": 0.000853291723340788, + "loss": 0.875, + "step": 5075 + }, + { + "epoch": 0.27, + "learning_rate": 0.000853230126467204, + "loss": 0.8438, + "step": 5076 + }, + { + "epoch": 0.27, + "learning_rate": 0.0008531685188895865, + "loss": 0.8984, + "step": 5077 + }, + { + "epoch": 0.27, + "learning_rate": 0.0008531069006098032, + "loss": 0.8555, + "step": 5078 + }, + { + "epoch": 0.27, + "learning_rate": 0.0008530452716297205, + "loss": 0.9375, + "step": 5079 + }, + { + "epoch": 0.27, + "learning_rate": 0.0008529836319512067, + "loss": 0.8984, + "step": 5080 + }, + { + "epoch": 0.27, + "learning_rate": 0.0008529219815761293, + "loss": 0.875, + "step": 5081 + }, + { + "epoch": 0.27, + "learning_rate": 0.0008528603205063566, + "loss": 1.0, + "step": 5082 + }, + { + "epoch": 0.27, + "learning_rate": 0.0008527986487437571, + "loss": 0.9648, + "step": 5083 + }, + { + "epoch": 0.27, + "learning_rate": 0.0008527369662901995, + "loss": 0.9531, + "step": 5084 + }, + { + "epoch": 0.27, + "learning_rate": 0.0008526752731475533, + "loss": 0.9336, + "step": 5085 + }, + { + "epoch": 0.27, + "learning_rate": 0.0008526135693176877, + "loss": 0.9141, + "step": 5086 + }, + { + "epoch": 0.27, + "learning_rate": 0.0008525518548024728, + "loss": 0.8867, + "step": 5087 + }, + { + "epoch": 0.27, + "learning_rate": 0.0008524901296037788, + "loss": 0.9727, + "step": 5088 + }, + { + "epoch": 0.27, + "learning_rate": 0.0008524283937234758, + "loss": 0.9453, + "step": 5089 + }, + { + "epoch": 0.27, + "learning_rate": 0.0008523666471634348, + "loss": 0.8867, + "step": 5090 + }, + { + "epoch": 0.27, + "learning_rate": 0.0008523048899255271, + "loss": 0.8594, + "step": 5091 + }, + { + "epoch": 0.27, + "learning_rate": 0.0008522431220116238, + "loss": 0.875, + "step": 5092 + }, + { + "epoch": 0.27, + "learning_rate": 0.000852181343423597, + "loss": 0.8867, + "step": 5093 + }, + { + "epoch": 0.27, + "learning_rate": 0.0008521195541633186, + "loss": 0.8945, + "step": 5094 + }, + { + "epoch": 0.27, + "learning_rate": 0.000852057754232661, + "loss": 0.8828, + "step": 5095 + }, + { + "epoch": 0.27, + "learning_rate": 0.000851995943633497, + "loss": 0.9336, + "step": 5096 + }, + { + "epoch": 0.27, + "learning_rate": 0.0008519341223676998, + "loss": 0.8984, + "step": 5097 + }, + { + "epoch": 0.27, + "learning_rate": 0.0008518722904371426, + "loss": 0.8477, + "step": 5098 + }, + { + "epoch": 0.27, + "learning_rate": 0.000851810447843699, + "loss": 0.8789, + "step": 5099 + }, + { + "epoch": 0.27, + "learning_rate": 0.0008517485945892432, + "loss": 0.8984, + "step": 5100 + }, + { + "epoch": 0.27, + "learning_rate": 0.0008516867306756497, + "loss": 0.8711, + "step": 5101 + }, + { + "epoch": 0.27, + "learning_rate": 0.000851624856104793, + "loss": 0.9375, + "step": 5102 + }, + { + "epoch": 0.27, + "learning_rate": 0.0008515629708785482, + "loss": 0.8594, + "step": 5103 + }, + { + "epoch": 0.27, + "learning_rate": 0.0008515010749987905, + "loss": 0.9023, + "step": 5104 + }, + { + "epoch": 0.27, + "learning_rate": 0.0008514391684673957, + "loss": 0.8945, + "step": 5105 + }, + { + "epoch": 0.27, + "learning_rate": 0.0008513772512862396, + "loss": 0.9453, + "step": 5106 + }, + { + "epoch": 0.27, + "learning_rate": 0.0008513153234571987, + "loss": 0.9219, + "step": 5107 + }, + { + "epoch": 0.27, + "learning_rate": 0.0008512533849821493, + "loss": 0.8242, + "step": 5108 + }, + { + "epoch": 0.27, + "learning_rate": 0.0008511914358629686, + "loss": 0.8594, + "step": 5109 + }, + { + "epoch": 0.27, + "learning_rate": 0.0008511294761015338, + "loss": 0.9492, + "step": 5110 + }, + { + "epoch": 0.27, + "learning_rate": 0.0008510675056997226, + "loss": 0.9219, + "step": 5111 + }, + { + "epoch": 0.27, + "learning_rate": 0.0008510055246594127, + "loss": 0.8984, + "step": 5112 + }, + { + "epoch": 0.27, + "learning_rate": 0.0008509435329824824, + "loss": 0.8945, + "step": 5113 + }, + { + "epoch": 0.27, + "learning_rate": 0.0008508815306708102, + "loss": 0.9102, + "step": 5114 + }, + { + "epoch": 0.27, + "learning_rate": 0.0008508195177262752, + "loss": 0.8672, + "step": 5115 + }, + { + "epoch": 0.27, + "learning_rate": 0.0008507574941507563, + "loss": 0.9492, + "step": 5116 + }, + { + "epoch": 0.28, + "learning_rate": 0.0008506954599461332, + "loss": 0.8711, + "step": 5117 + }, + { + "epoch": 0.28, + "learning_rate": 0.0008506334151142857, + "loss": 0.8555, + "step": 5118 + }, + { + "epoch": 0.28, + "learning_rate": 0.000850571359657094, + "loss": 0.9453, + "step": 5119 + }, + { + "epoch": 0.28, + "learning_rate": 0.0008505092935764385, + "loss": 0.8398, + "step": 5120 + }, + { + "epoch": 0.28, + "learning_rate": 0.0008504472168742001, + "loss": 0.8477, + "step": 5121 + }, + { + "epoch": 0.28, + "learning_rate": 0.0008503851295522598, + "loss": 0.8867, + "step": 5122 + }, + { + "epoch": 0.28, + "learning_rate": 0.0008503230316124992, + "loss": 0.9609, + "step": 5123 + }, + { + "epoch": 0.28, + "learning_rate": 0.0008502609230567999, + "loss": 0.9258, + "step": 5124 + }, + { + "epoch": 0.28, + "learning_rate": 0.0008501988038870442, + "loss": 0.9102, + "step": 5125 + }, + { + "epoch": 0.28, + "learning_rate": 0.0008501366741051143, + "loss": 0.9805, + "step": 5126 + }, + { + "epoch": 0.28, + "learning_rate": 0.0008500745337128932, + "loss": 0.8672, + "step": 5127 + }, + { + "epoch": 0.28, + "learning_rate": 0.0008500123827122637, + "loss": 0.9492, + "step": 5128 + }, + { + "epoch": 0.28, + "learning_rate": 0.0008499502211051092, + "loss": 0.8438, + "step": 5129 + }, + { + "epoch": 0.28, + "learning_rate": 0.0008498880488933135, + "loss": 1.0156, + "step": 5130 + }, + { + "epoch": 0.28, + "learning_rate": 0.0008498258660787608, + "loss": 0.9219, + "step": 5131 + }, + { + "epoch": 0.28, + "learning_rate": 0.000849763672663335, + "loss": 0.8555, + "step": 5132 + }, + { + "epoch": 0.28, + "learning_rate": 0.000849701468648921, + "loss": 0.8477, + "step": 5133 + }, + { + "epoch": 0.28, + "learning_rate": 0.0008496392540374041, + "loss": 0.8984, + "step": 5134 + }, + { + "epoch": 0.28, + "learning_rate": 0.0008495770288306691, + "loss": 0.8047, + "step": 5135 + }, + { + "epoch": 0.28, + "learning_rate": 0.0008495147930306018, + "loss": 0.8047, + "step": 5136 + }, + { + "epoch": 0.28, + "learning_rate": 0.0008494525466390883, + "loss": 0.918, + "step": 5137 + }, + { + "epoch": 0.28, + "learning_rate": 0.0008493902896580146, + "loss": 0.8906, + "step": 5138 + }, + { + "epoch": 0.28, + "learning_rate": 0.0008493280220892675, + "loss": 0.8945, + "step": 5139 + }, + { + "epoch": 0.28, + "learning_rate": 0.000849265743934734, + "loss": 0.8281, + "step": 5140 + }, + { + "epoch": 0.28, + "learning_rate": 0.0008492034551963011, + "loss": 0.9219, + "step": 5141 + }, + { + "epoch": 0.28, + "learning_rate": 0.0008491411558758564, + "loss": 0.9023, + "step": 5142 + }, + { + "epoch": 0.28, + "learning_rate": 0.0008490788459752877, + "loss": 0.9375, + "step": 5143 + }, + { + "epoch": 0.28, + "learning_rate": 0.0008490165254964834, + "loss": 0.8789, + "step": 5144 + }, + { + "epoch": 0.28, + "learning_rate": 0.0008489541944413319, + "loss": 0.9297, + "step": 5145 + }, + { + "epoch": 0.28, + "learning_rate": 0.0008488918528117222, + "loss": 0.8477, + "step": 5146 + }, + { + "epoch": 0.28, + "learning_rate": 0.0008488295006095432, + "loss": 0.9648, + "step": 5147 + }, + { + "epoch": 0.28, + "learning_rate": 0.0008487671378366846, + "loss": 0.9492, + "step": 5148 + }, + { + "epoch": 0.28, + "learning_rate": 0.0008487047644950361, + "loss": 0.8789, + "step": 5149 + }, + { + "epoch": 0.28, + "learning_rate": 0.0008486423805864877, + "loss": 0.8008, + "step": 5150 + }, + { + "epoch": 0.28, + "learning_rate": 0.0008485799861129299, + "loss": 0.875, + "step": 5151 + }, + { + "epoch": 0.28, + "learning_rate": 0.0008485175810762535, + "loss": 0.9688, + "step": 5152 + }, + { + "epoch": 0.28, + "learning_rate": 0.0008484551654783497, + "loss": 0.8438, + "step": 5153 + }, + { + "epoch": 0.28, + "learning_rate": 0.0008483927393211098, + "loss": 0.9688, + "step": 5154 + }, + { + "epoch": 0.28, + "learning_rate": 0.0008483303026064254, + "loss": 0.9727, + "step": 5155 + }, + { + "epoch": 0.28, + "learning_rate": 0.0008482678553361887, + "loss": 0.8945, + "step": 5156 + }, + { + "epoch": 0.28, + "learning_rate": 0.0008482053975122918, + "loss": 0.8398, + "step": 5157 + }, + { + "epoch": 0.28, + "learning_rate": 0.0008481429291366279, + "loss": 0.9219, + "step": 5158 + }, + { + "epoch": 0.28, + "learning_rate": 0.0008480804502110894, + "loss": 0.9453, + "step": 5159 + }, + { + "epoch": 0.28, + "learning_rate": 0.00084801796073757, + "loss": 0.957, + "step": 5160 + }, + { + "epoch": 0.28, + "learning_rate": 0.0008479554607179632, + "loss": 0.8984, + "step": 5161 + }, + { + "epoch": 0.28, + "learning_rate": 0.0008478929501541629, + "loss": 0.875, + "step": 5162 + }, + { + "epoch": 0.28, + "learning_rate": 0.0008478304290480634, + "loss": 0.9609, + "step": 5163 + }, + { + "epoch": 0.28, + "learning_rate": 0.0008477678974015595, + "loss": 0.9062, + "step": 5164 + }, + { + "epoch": 0.28, + "learning_rate": 0.0008477053552165459, + "loss": 0.9023, + "step": 5165 + }, + { + "epoch": 0.28, + "learning_rate": 0.0008476428024949179, + "loss": 0.9062, + "step": 5166 + }, + { + "epoch": 0.28, + "learning_rate": 0.000847580239238571, + "loss": 0.8516, + "step": 5167 + }, + { + "epoch": 0.28, + "learning_rate": 0.0008475176654494012, + "loss": 0.9961, + "step": 5168 + }, + { + "epoch": 0.28, + "learning_rate": 0.0008474550811293044, + "loss": 0.8398, + "step": 5169 + }, + { + "epoch": 0.28, + "learning_rate": 0.0008473924862801777, + "loss": 0.8906, + "step": 5170 + }, + { + "epoch": 0.28, + "learning_rate": 0.0008473298809039172, + "loss": 0.8672, + "step": 5171 + }, + { + "epoch": 0.28, + "learning_rate": 0.0008472672650024206, + "loss": 0.9727, + "step": 5172 + }, + { + "epoch": 0.28, + "learning_rate": 0.000847204638577585, + "loss": 0.9531, + "step": 5173 + }, + { + "epoch": 0.28, + "learning_rate": 0.0008471420016313084, + "loss": 0.8203, + "step": 5174 + }, + { + "epoch": 0.28, + "learning_rate": 0.0008470793541654889, + "loss": 0.918, + "step": 5175 + }, + { + "epoch": 0.28, + "learning_rate": 0.0008470166961820249, + "loss": 0.9023, + "step": 5176 + }, + { + "epoch": 0.28, + "learning_rate": 0.000846954027682815, + "loss": 0.9297, + "step": 5177 + }, + { + "epoch": 0.28, + "learning_rate": 0.0008468913486697584, + "loss": 0.918, + "step": 5178 + }, + { + "epoch": 0.28, + "learning_rate": 0.0008468286591447547, + "loss": 0.9141, + "step": 5179 + }, + { + "epoch": 0.28, + "learning_rate": 0.0008467659591097032, + "loss": 0.8984, + "step": 5180 + }, + { + "epoch": 0.28, + "learning_rate": 0.000846703248566504, + "loss": 0.8789, + "step": 5181 + }, + { + "epoch": 0.28, + "learning_rate": 0.0008466405275170576, + "loss": 0.8516, + "step": 5182 + }, + { + "epoch": 0.28, + "learning_rate": 0.0008465777959632645, + "loss": 0.9453, + "step": 5183 + }, + { + "epoch": 0.28, + "learning_rate": 0.0008465150539070259, + "loss": 0.9297, + "step": 5184 + }, + { + "epoch": 0.28, + "learning_rate": 0.0008464523013502428, + "loss": 0.918, + "step": 5185 + }, + { + "epoch": 0.28, + "learning_rate": 0.0008463895382948169, + "loss": 0.918, + "step": 5186 + }, + { + "epoch": 0.28, + "learning_rate": 0.0008463267647426503, + "loss": 0.9336, + "step": 5187 + }, + { + "epoch": 0.28, + "learning_rate": 0.000846263980695645, + "loss": 0.8867, + "step": 5188 + }, + { + "epoch": 0.28, + "learning_rate": 0.0008462011861557038, + "loss": 0.875, + "step": 5189 + }, + { + "epoch": 0.28, + "learning_rate": 0.0008461383811247293, + "loss": 0.832, + "step": 5190 + }, + { + "epoch": 0.28, + "learning_rate": 0.0008460755656046249, + "loss": 0.8398, + "step": 5191 + }, + { + "epoch": 0.28, + "learning_rate": 0.000846012739597294, + "loss": 0.8594, + "step": 5192 + }, + { + "epoch": 0.28, + "learning_rate": 0.0008459499031046407, + "loss": 0.9766, + "step": 5193 + }, + { + "epoch": 0.28, + "learning_rate": 0.0008458870561285688, + "loss": 0.9023, + "step": 5194 + }, + { + "epoch": 0.28, + "learning_rate": 0.0008458241986709831, + "loss": 0.875, + "step": 5195 + }, + { + "epoch": 0.28, + "learning_rate": 0.000845761330733788, + "loss": 0.8906, + "step": 5196 + }, + { + "epoch": 0.28, + "learning_rate": 0.000845698452318889, + "loss": 0.8359, + "step": 5197 + }, + { + "epoch": 0.28, + "learning_rate": 0.0008456355634281911, + "loss": 0.9648, + "step": 5198 + }, + { + "epoch": 0.28, + "learning_rate": 0.0008455726640636006, + "loss": 0.9062, + "step": 5199 + }, + { + "epoch": 0.28, + "learning_rate": 0.000845509754227023, + "loss": 0.9297, + "step": 5200 + }, + { + "epoch": 0.28, + "learning_rate": 0.0008454468339203651, + "loss": 0.8281, + "step": 5201 + }, + { + "epoch": 0.28, + "learning_rate": 0.0008453839031455333, + "loss": 0.875, + "step": 5202 + }, + { + "epoch": 0.28, + "learning_rate": 0.0008453209619044347, + "loss": 0.918, + "step": 5203 + }, + { + "epoch": 0.28, + "learning_rate": 0.0008452580101989766, + "loss": 0.8672, + "step": 5204 + }, + { + "epoch": 0.28, + "learning_rate": 0.0008451950480310667, + "loss": 0.8828, + "step": 5205 + }, + { + "epoch": 0.28, + "learning_rate": 0.0008451320754026129, + "loss": 0.9844, + "step": 5206 + }, + { + "epoch": 0.28, + "learning_rate": 0.0008450690923155238, + "loss": 0.9219, + "step": 5207 + }, + { + "epoch": 0.28, + "learning_rate": 0.0008450060987717074, + "loss": 0.8594, + "step": 5208 + }, + { + "epoch": 0.28, + "learning_rate": 0.000844943094773073, + "loss": 0.9219, + "step": 5209 + }, + { + "epoch": 0.28, + "learning_rate": 0.0008448800803215297, + "loss": 0.8086, + "step": 5210 + }, + { + "epoch": 0.28, + "learning_rate": 0.0008448170554189873, + "loss": 0.8984, + "step": 5211 + }, + { + "epoch": 0.28, + "learning_rate": 0.0008447540200673554, + "loss": 0.875, + "step": 5212 + }, + { + "epoch": 0.28, + "learning_rate": 0.0008446909742685441, + "loss": 0.9688, + "step": 5213 + }, + { + "epoch": 0.28, + "learning_rate": 0.0008446279180244643, + "loss": 0.9648, + "step": 5214 + }, + { + "epoch": 0.28, + "learning_rate": 0.0008445648513370263, + "loss": 0.9844, + "step": 5215 + }, + { + "epoch": 0.28, + "learning_rate": 0.0008445017742081416, + "loss": 0.8594, + "step": 5216 + }, + { + "epoch": 0.28, + "learning_rate": 0.0008444386866397213, + "loss": 0.8828, + "step": 5217 + }, + { + "epoch": 0.28, + "learning_rate": 0.0008443755886336777, + "loss": 0.8906, + "step": 5218 + }, + { + "epoch": 0.28, + "learning_rate": 0.0008443124801919224, + "loss": 0.9336, + "step": 5219 + }, + { + "epoch": 0.28, + "learning_rate": 0.000844249361316368, + "loss": 0.9062, + "step": 5220 + }, + { + "epoch": 0.28, + "learning_rate": 0.0008441862320089271, + "loss": 1.0156, + "step": 5221 + }, + { + "epoch": 0.28, + "learning_rate": 0.0008441230922715128, + "loss": 0.8281, + "step": 5222 + }, + { + "epoch": 0.28, + "learning_rate": 0.0008440599421060384, + "loss": 0.9336, + "step": 5223 + }, + { + "epoch": 0.28, + "learning_rate": 0.0008439967815144176, + "loss": 0.832, + "step": 5224 + }, + { + "epoch": 0.28, + "learning_rate": 0.0008439336104985644, + "loss": 0.8672, + "step": 5225 + }, + { + "epoch": 0.28, + "learning_rate": 0.000843870429060393, + "loss": 0.9062, + "step": 5226 + }, + { + "epoch": 0.28, + "learning_rate": 0.0008438072372018181, + "loss": 0.8867, + "step": 5227 + }, + { + "epoch": 0.28, + "learning_rate": 0.0008437440349247545, + "loss": 0.9141, + "step": 5228 + }, + { + "epoch": 0.28, + "learning_rate": 0.0008436808222311175, + "loss": 0.8242, + "step": 5229 + }, + { + "epoch": 0.28, + "learning_rate": 0.0008436175991228228, + "loss": 0.8789, + "step": 5230 + }, + { + "epoch": 0.28, + "learning_rate": 0.000843554365601786, + "loss": 0.8828, + "step": 5231 + }, + { + "epoch": 0.28, + "learning_rate": 0.0008434911216699235, + "loss": 0.9414, + "step": 5232 + }, + { + "epoch": 0.28, + "learning_rate": 0.0008434278673291515, + "loss": 0.8438, + "step": 5233 + }, + { + "epoch": 0.28, + "learning_rate": 0.0008433646025813872, + "loss": 0.8359, + "step": 5234 + }, + { + "epoch": 0.28, + "learning_rate": 0.0008433013274285476, + "loss": 0.8789, + "step": 5235 + }, + { + "epoch": 0.28, + "learning_rate": 0.0008432380418725501, + "loss": 0.9023, + "step": 5236 + }, + { + "epoch": 0.28, + "learning_rate": 0.0008431747459153123, + "loss": 0.9102, + "step": 5237 + }, + { + "epoch": 0.28, + "learning_rate": 0.0008431114395587526, + "loss": 0.8672, + "step": 5238 + }, + { + "epoch": 0.28, + "learning_rate": 0.000843048122804789, + "loss": 0.8594, + "step": 5239 + }, + { + "epoch": 0.28, + "learning_rate": 0.0008429847956553407, + "loss": 0.9141, + "step": 5240 + }, + { + "epoch": 0.28, + "learning_rate": 0.0008429214581123263, + "loss": 0.8398, + "step": 5241 + }, + { + "epoch": 0.28, + "learning_rate": 0.0008428581101776653, + "loss": 1.0, + "step": 5242 + }, + { + "epoch": 0.28, + "learning_rate": 0.0008427947518532775, + "loss": 0.9141, + "step": 5243 + }, + { + "epoch": 0.28, + "learning_rate": 0.0008427313831410825, + "loss": 0.9648, + "step": 5244 + }, + { + "epoch": 0.28, + "learning_rate": 0.0008426680040430008, + "loss": 0.9141, + "step": 5245 + }, + { + "epoch": 0.28, + "learning_rate": 0.0008426046145609531, + "loss": 0.918, + "step": 5246 + }, + { + "epoch": 0.28, + "learning_rate": 0.00084254121469686, + "loss": 0.8711, + "step": 5247 + }, + { + "epoch": 0.28, + "learning_rate": 0.0008424778044526431, + "loss": 0.9258, + "step": 5248 + }, + { + "epoch": 0.28, + "learning_rate": 0.0008424143838302237, + "loss": 0.8008, + "step": 5249 + }, + { + "epoch": 0.28, + "learning_rate": 0.0008423509528315236, + "loss": 0.918, + "step": 5250 + }, + { + "epoch": 0.28, + "learning_rate": 0.0008422875114584651, + "loss": 0.832, + "step": 5251 + }, + { + "epoch": 0.28, + "learning_rate": 0.0008422240597129709, + "loss": 0.9023, + "step": 5252 + }, + { + "epoch": 0.28, + "learning_rate": 0.0008421605975969632, + "loss": 0.875, + "step": 5253 + }, + { + "epoch": 0.28, + "learning_rate": 0.0008420971251123657, + "loss": 0.8945, + "step": 5254 + }, + { + "epoch": 0.28, + "learning_rate": 0.0008420336422611015, + "loss": 0.9023, + "step": 5255 + }, + { + "epoch": 0.28, + "learning_rate": 0.0008419701490450943, + "loss": 0.8594, + "step": 5256 + }, + { + "epoch": 0.28, + "learning_rate": 0.0008419066454662684, + "loss": 0.8984, + "step": 5257 + }, + { + "epoch": 0.28, + "learning_rate": 0.0008418431315265481, + "loss": 0.918, + "step": 5258 + }, + { + "epoch": 0.28, + "learning_rate": 0.000841779607227858, + "loss": 0.8125, + "step": 5259 + }, + { + "epoch": 0.28, + "learning_rate": 0.0008417160725721232, + "loss": 0.9102, + "step": 5260 + }, + { + "epoch": 0.28, + "learning_rate": 0.0008416525275612688, + "loss": 0.8398, + "step": 5261 + }, + { + "epoch": 0.28, + "learning_rate": 0.0008415889721972205, + "loss": 0.9141, + "step": 5262 + }, + { + "epoch": 0.28, + "learning_rate": 0.0008415254064819044, + "loss": 0.8164, + "step": 5263 + }, + { + "epoch": 0.28, + "learning_rate": 0.0008414618304172465, + "loss": 0.9258, + "step": 5264 + }, + { + "epoch": 0.28, + "learning_rate": 0.0008413982440051737, + "loss": 0.9219, + "step": 5265 + }, + { + "epoch": 0.28, + "learning_rate": 0.0008413346472476125, + "loss": 0.9297, + "step": 5266 + }, + { + "epoch": 0.28, + "learning_rate": 0.0008412710401464905, + "loss": 0.9023, + "step": 5267 + }, + { + "epoch": 0.28, + "learning_rate": 0.0008412074227037347, + "loss": 0.8242, + "step": 5268 + }, + { + "epoch": 0.28, + "learning_rate": 0.0008411437949212735, + "loss": 1.0, + "step": 5269 + }, + { + "epoch": 0.28, + "learning_rate": 0.0008410801568010344, + "loss": 0.8789, + "step": 5270 + }, + { + "epoch": 0.28, + "learning_rate": 0.0008410165083449463, + "loss": 0.9844, + "step": 5271 + }, + { + "epoch": 0.28, + "learning_rate": 0.0008409528495549377, + "loss": 0.8047, + "step": 5272 + }, + { + "epoch": 0.28, + "learning_rate": 0.0008408891804329381, + "loss": 0.9688, + "step": 5273 + }, + { + "epoch": 0.28, + "learning_rate": 0.0008408255009808763, + "loss": 0.9219, + "step": 5274 + }, + { + "epoch": 0.28, + "learning_rate": 0.0008407618112006825, + "loss": 0.8906, + "step": 5275 + }, + { + "epoch": 0.28, + "learning_rate": 0.0008406981110942862, + "loss": 0.8398, + "step": 5276 + }, + { + "epoch": 0.28, + "learning_rate": 0.0008406344006636182, + "loss": 0.8008, + "step": 5277 + }, + { + "epoch": 0.28, + "learning_rate": 0.0008405706799106088, + "loss": 0.9023, + "step": 5278 + }, + { + "epoch": 0.28, + "learning_rate": 0.0008405069488371892, + "loss": 0.8828, + "step": 5279 + }, + { + "epoch": 0.28, + "learning_rate": 0.0008404432074452906, + "loss": 0.8867, + "step": 5280 + }, + { + "epoch": 0.28, + "learning_rate": 0.0008403794557368445, + "loss": 0.8672, + "step": 5281 + }, + { + "epoch": 0.28, + "learning_rate": 0.0008403156937137827, + "loss": 0.8516, + "step": 5282 + }, + { + "epoch": 0.28, + "learning_rate": 0.0008402519213780376, + "loss": 0.8828, + "step": 5283 + }, + { + "epoch": 0.28, + "learning_rate": 0.0008401881387315415, + "loss": 0.957, + "step": 5284 + }, + { + "epoch": 0.28, + "learning_rate": 0.0008401243457762276, + "loss": 0.918, + "step": 5285 + }, + { + "epoch": 0.28, + "learning_rate": 0.0008400605425140285, + "loss": 0.8711, + "step": 5286 + }, + { + "epoch": 0.28, + "learning_rate": 0.000839996728946878, + "loss": 0.875, + "step": 5287 + }, + { + "epoch": 0.28, + "learning_rate": 0.0008399329050767099, + "loss": 0.8789, + "step": 5288 + }, + { + "epoch": 0.28, + "learning_rate": 0.0008398690709054581, + "loss": 0.8359, + "step": 5289 + }, + { + "epoch": 0.28, + "learning_rate": 0.0008398052264350571, + "loss": 0.8945, + "step": 5290 + }, + { + "epoch": 0.28, + "learning_rate": 0.0008397413716674414, + "loss": 0.8281, + "step": 5291 + }, + { + "epoch": 0.28, + "learning_rate": 0.0008396775066045462, + "loss": 0.8828, + "step": 5292 + }, + { + "epoch": 0.28, + "learning_rate": 0.0008396136312483068, + "loss": 0.9414, + "step": 5293 + }, + { + "epoch": 0.28, + "learning_rate": 0.0008395497456006589, + "loss": 0.8789, + "step": 5294 + }, + { + "epoch": 0.28, + "learning_rate": 0.0008394858496635383, + "loss": 0.8789, + "step": 5295 + }, + { + "epoch": 0.28, + "learning_rate": 0.0008394219434388812, + "loss": 0.832, + "step": 5296 + }, + { + "epoch": 0.28, + "learning_rate": 0.0008393580269286243, + "loss": 0.8906, + "step": 5297 + }, + { + "epoch": 0.28, + "learning_rate": 0.0008392941001347045, + "loss": 0.8867, + "step": 5298 + }, + { + "epoch": 0.28, + "learning_rate": 0.000839230163059059, + "loss": 0.8359, + "step": 5299 + }, + { + "epoch": 0.28, + "learning_rate": 0.0008391662157036252, + "loss": 0.8789, + "step": 5300 + }, + { + "epoch": 0.28, + "learning_rate": 0.000839102258070341, + "loss": 0.9414, + "step": 5301 + }, + { + "epoch": 0.28, + "learning_rate": 0.0008390382901611445, + "loss": 1.0078, + "step": 5302 + }, + { + "epoch": 0.29, + "learning_rate": 0.000838974311977974, + "loss": 0.918, + "step": 5303 + }, + { + "epoch": 0.29, + "learning_rate": 0.0008389103235227687, + "loss": 0.9961, + "step": 5304 + }, + { + "epoch": 0.29, + "learning_rate": 0.0008388463247974669, + "loss": 0.8086, + "step": 5305 + }, + { + "epoch": 0.29, + "learning_rate": 0.0008387823158040089, + "loss": 0.9648, + "step": 5306 + }, + { + "epoch": 0.29, + "learning_rate": 0.0008387182965443335, + "loss": 0.8398, + "step": 5307 + }, + { + "epoch": 0.29, + "learning_rate": 0.0008386542670203812, + "loss": 0.8828, + "step": 5308 + }, + { + "epoch": 0.29, + "learning_rate": 0.0008385902272340922, + "loss": 0.8203, + "step": 5309 + }, + { + "epoch": 0.29, + "learning_rate": 0.0008385261771874072, + "loss": 0.7812, + "step": 5310 + }, + { + "epoch": 0.29, + "learning_rate": 0.000838462116882267, + "loss": 0.875, + "step": 5311 + }, + { + "epoch": 0.29, + "learning_rate": 0.0008383980463206128, + "loss": 0.9922, + "step": 5312 + }, + { + "epoch": 0.29, + "learning_rate": 0.0008383339655043862, + "loss": 0.832, + "step": 5313 + }, + { + "epoch": 0.29, + "learning_rate": 0.0008382698744355293, + "loss": 0.9102, + "step": 5314 + }, + { + "epoch": 0.29, + "learning_rate": 0.0008382057731159837, + "loss": 0.9492, + "step": 5315 + }, + { + "epoch": 0.29, + "learning_rate": 0.0008381416615476926, + "loss": 0.9062, + "step": 5316 + }, + { + "epoch": 0.29, + "learning_rate": 0.0008380775397325982, + "loss": 0.7891, + "step": 5317 + }, + { + "epoch": 0.29, + "learning_rate": 0.000838013407672644, + "loss": 0.8594, + "step": 5318 + }, + { + "epoch": 0.29, + "learning_rate": 0.000837949265369773, + "loss": 0.8672, + "step": 5319 + }, + { + "epoch": 0.29, + "learning_rate": 0.0008378851128259294, + "loss": 0.8867, + "step": 5320 + }, + { + "epoch": 0.29, + "learning_rate": 0.0008378209500430568, + "loss": 0.9805, + "step": 5321 + }, + { + "epoch": 0.29, + "learning_rate": 0.0008377567770230998, + "loss": 0.9648, + "step": 5322 + }, + { + "epoch": 0.29, + "learning_rate": 0.000837692593768003, + "loss": 1.0, + "step": 5323 + }, + { + "epoch": 0.29, + "learning_rate": 0.0008376284002797113, + "loss": 0.8477, + "step": 5324 + }, + { + "epoch": 0.29, + "learning_rate": 0.0008375641965601701, + "loss": 0.9336, + "step": 5325 + }, + { + "epoch": 0.29, + "learning_rate": 0.000837499982611325, + "loss": 0.9102, + "step": 5326 + }, + { + "epoch": 0.29, + "learning_rate": 0.0008374357584351217, + "loss": 0.9336, + "step": 5327 + }, + { + "epoch": 0.29, + "learning_rate": 0.0008373715240335064, + "loss": 0.8398, + "step": 5328 + }, + { + "epoch": 0.29, + "learning_rate": 0.0008373072794084259, + "loss": 0.8672, + "step": 5329 + }, + { + "epoch": 0.29, + "learning_rate": 0.0008372430245618267, + "loss": 0.9297, + "step": 5330 + }, + { + "epoch": 0.29, + "learning_rate": 0.0008371787594956561, + "loss": 0.9102, + "step": 5331 + }, + { + "epoch": 0.29, + "learning_rate": 0.0008371144842118616, + "loss": 0.8789, + "step": 5332 + }, + { + "epoch": 0.29, + "learning_rate": 0.0008370501987123908, + "loss": 0.8672, + "step": 5333 + }, + { + "epoch": 0.29, + "learning_rate": 0.000836985902999192, + "loss": 0.8789, + "step": 5334 + }, + { + "epoch": 0.29, + "learning_rate": 0.0008369215970742132, + "loss": 0.9023, + "step": 5335 + }, + { + "epoch": 0.29, + "learning_rate": 0.0008368572809394033, + "loss": 0.8945, + "step": 5336 + }, + { + "epoch": 0.29, + "learning_rate": 0.0008367929545967114, + "loss": 0.9102, + "step": 5337 + }, + { + "epoch": 0.29, + "learning_rate": 0.0008367286180480867, + "loss": 0.9258, + "step": 5338 + }, + { + "epoch": 0.29, + "learning_rate": 0.0008366642712954786, + "loss": 0.8789, + "step": 5339 + }, + { + "epoch": 0.29, + "learning_rate": 0.0008365999143408374, + "loss": 0.8984, + "step": 5340 + }, + { + "epoch": 0.29, + "learning_rate": 0.000836535547186113, + "loss": 0.8438, + "step": 5341 + }, + { + "epoch": 0.29, + "learning_rate": 0.000836471169833256, + "loss": 0.8789, + "step": 5342 + }, + { + "epoch": 0.29, + "learning_rate": 0.0008364067822842174, + "loss": 0.8828, + "step": 5343 + }, + { + "epoch": 0.29, + "learning_rate": 0.0008363423845409484, + "loss": 0.8398, + "step": 5344 + }, + { + "epoch": 0.29, + "learning_rate": 0.0008362779766054, + "loss": 0.875, + "step": 5345 + }, + { + "epoch": 0.29, + "learning_rate": 0.0008362135584795245, + "loss": 0.7891, + "step": 5346 + }, + { + "epoch": 0.29, + "learning_rate": 0.000836149130165274, + "loss": 0.957, + "step": 5347 + }, + { + "epoch": 0.29, + "learning_rate": 0.0008360846916646003, + "loss": 0.832, + "step": 5348 + }, + { + "epoch": 0.29, + "learning_rate": 0.0008360202429794566, + "loss": 0.8789, + "step": 5349 + }, + { + "epoch": 0.29, + "learning_rate": 0.0008359557841117958, + "loss": 0.8867, + "step": 5350 + }, + { + "epoch": 0.29, + "learning_rate": 0.0008358913150635712, + "loss": 0.9336, + "step": 5351 + }, + { + "epoch": 0.29, + "learning_rate": 0.0008358268358367363, + "loss": 0.7656, + "step": 5352 + }, + { + "epoch": 0.29, + "learning_rate": 0.0008357623464332454, + "loss": 0.8789, + "step": 5353 + }, + { + "epoch": 0.29, + "learning_rate": 0.0008356978468550523, + "loss": 0.9062, + "step": 5354 + }, + { + "epoch": 0.29, + "learning_rate": 0.0008356333371041117, + "loss": 0.9414, + "step": 5355 + }, + { + "epoch": 0.29, + "learning_rate": 0.0008355688171823786, + "loss": 0.9688, + "step": 5356 + }, + { + "epoch": 0.29, + "learning_rate": 0.0008355042870918079, + "loss": 0.9492, + "step": 5357 + }, + { + "epoch": 0.29, + "learning_rate": 0.0008354397468343554, + "loss": 0.9766, + "step": 5358 + }, + { + "epoch": 0.29, + "learning_rate": 0.0008353751964119767, + "loss": 0.8125, + "step": 5359 + }, + { + "epoch": 0.29, + "learning_rate": 0.0008353106358266278, + "loss": 0.9062, + "step": 5360 + }, + { + "epoch": 0.29, + "learning_rate": 0.0008352460650802653, + "loss": 0.9375, + "step": 5361 + }, + { + "epoch": 0.29, + "learning_rate": 0.0008351814841748458, + "loss": 0.9062, + "step": 5362 + }, + { + "epoch": 0.29, + "learning_rate": 0.0008351168931123263, + "loss": 0.7852, + "step": 5363 + }, + { + "epoch": 0.29, + "learning_rate": 0.0008350522918946641, + "loss": 0.8242, + "step": 5364 + }, + { + "epoch": 0.29, + "learning_rate": 0.000834987680523817, + "loss": 0.9219, + "step": 5365 + }, + { + "epoch": 0.29, + "learning_rate": 0.0008349230590017427, + "loss": 0.7734, + "step": 5366 + }, + { + "epoch": 0.29, + "learning_rate": 0.0008348584273303995, + "loss": 0.9023, + "step": 5367 + }, + { + "epoch": 0.29, + "learning_rate": 0.0008347937855117463, + "loss": 0.9219, + "step": 5368 + }, + { + "epoch": 0.29, + "learning_rate": 0.0008347291335477413, + "loss": 0.9531, + "step": 5369 + }, + { + "epoch": 0.29, + "learning_rate": 0.0008346644714403442, + "loss": 0.9062, + "step": 5370 + }, + { + "epoch": 0.29, + "learning_rate": 0.0008345997991915142, + "loss": 0.8633, + "step": 5371 + }, + { + "epoch": 0.29, + "learning_rate": 0.0008345351168032114, + "loss": 0.8906, + "step": 5372 + }, + { + "epoch": 0.29, + "learning_rate": 0.0008344704242773955, + "loss": 0.9336, + "step": 5373 + }, + { + "epoch": 0.29, + "learning_rate": 0.0008344057216160271, + "loss": 0.9844, + "step": 5374 + }, + { + "epoch": 0.29, + "learning_rate": 0.0008343410088210669, + "loss": 0.8164, + "step": 5375 + }, + { + "epoch": 0.29, + "learning_rate": 0.000834276285894476, + "loss": 0.9258, + "step": 5376 + }, + { + "epoch": 0.29, + "learning_rate": 0.0008342115528382155, + "loss": 0.9336, + "step": 5377 + }, + { + "epoch": 0.29, + "learning_rate": 0.0008341468096542471, + "loss": 0.8477, + "step": 5378 + }, + { + "epoch": 0.29, + "learning_rate": 0.0008340820563445328, + "loss": 0.8164, + "step": 5379 + }, + { + "epoch": 0.29, + "learning_rate": 0.0008340172929110348, + "loss": 0.8906, + "step": 5380 + }, + { + "epoch": 0.29, + "learning_rate": 0.0008339525193557156, + "loss": 0.7812, + "step": 5381 + }, + { + "epoch": 0.29, + "learning_rate": 0.0008338877356805381, + "loss": 0.9375, + "step": 5382 + }, + { + "epoch": 0.29, + "learning_rate": 0.0008338229418874654, + "loss": 0.9219, + "step": 5383 + }, + { + "epoch": 0.29, + "learning_rate": 0.0008337581379784611, + "loss": 0.9414, + "step": 5384 + }, + { + "epoch": 0.29, + "learning_rate": 0.0008336933239554888, + "loss": 0.9453, + "step": 5385 + }, + { + "epoch": 0.29, + "learning_rate": 0.0008336284998205128, + "loss": 0.832, + "step": 5386 + }, + { + "epoch": 0.29, + "learning_rate": 0.0008335636655754972, + "loss": 0.9375, + "step": 5387 + }, + { + "epoch": 0.29, + "learning_rate": 0.000833498821222407, + "loss": 0.8516, + "step": 5388 + }, + { + "epoch": 0.29, + "learning_rate": 0.0008334339667632069, + "loss": 0.7617, + "step": 5389 + }, + { + "epoch": 0.29, + "learning_rate": 0.0008333691021998624, + "loss": 0.9023, + "step": 5390 + }, + { + "epoch": 0.29, + "learning_rate": 0.0008333042275343391, + "loss": 0.8711, + "step": 5391 + }, + { + "epoch": 0.29, + "learning_rate": 0.0008332393427686028, + "loss": 0.9414, + "step": 5392 + }, + { + "epoch": 0.29, + "learning_rate": 0.0008331744479046198, + "loss": 0.8672, + "step": 5393 + }, + { + "epoch": 0.29, + "learning_rate": 0.0008331095429443567, + "loss": 0.9609, + "step": 5394 + }, + { + "epoch": 0.29, + "learning_rate": 0.0008330446278897801, + "loss": 0.875, + "step": 5395 + }, + { + "epoch": 0.29, + "learning_rate": 0.0008329797027428574, + "loss": 0.8789, + "step": 5396 + }, + { + "epoch": 0.29, + "learning_rate": 0.0008329147675055558, + "loss": 0.8359, + "step": 5397 + }, + { + "epoch": 0.29, + "learning_rate": 0.0008328498221798433, + "loss": 0.9414, + "step": 5398 + }, + { + "epoch": 0.29, + "learning_rate": 0.0008327848667676877, + "loss": 0.9297, + "step": 5399 + }, + { + "epoch": 0.29, + "learning_rate": 0.0008327199012710576, + "loss": 0.8281, + "step": 5400 + }, + { + "epoch": 0.29, + "learning_rate": 0.0008326549256919216, + "loss": 0.9258, + "step": 5401 + }, + { + "epoch": 0.29, + "learning_rate": 0.0008325899400322487, + "loss": 0.9375, + "step": 5402 + }, + { + "epoch": 0.29, + "learning_rate": 0.000832524944294008, + "loss": 0.9102, + "step": 5403 + }, + { + "epoch": 0.29, + "learning_rate": 0.0008324599384791691, + "loss": 0.8906, + "step": 5404 + }, + { + "epoch": 0.29, + "learning_rate": 0.0008323949225897023, + "loss": 0.918, + "step": 5405 + }, + { + "epoch": 0.29, + "learning_rate": 0.0008323298966275773, + "loss": 0.9102, + "step": 5406 + }, + { + "epoch": 0.29, + "learning_rate": 0.0008322648605947648, + "loss": 0.9844, + "step": 5407 + }, + { + "epoch": 0.29, + "learning_rate": 0.0008321998144932357, + "loss": 0.9297, + "step": 5408 + }, + { + "epoch": 0.29, + "learning_rate": 0.0008321347583249609, + "loss": 0.8438, + "step": 5409 + }, + { + "epoch": 0.29, + "learning_rate": 0.0008320696920919119, + "loss": 0.875, + "step": 5410 + }, + { + "epoch": 0.29, + "learning_rate": 0.0008320046157960605, + "loss": 0.9883, + "step": 5411 + }, + { + "epoch": 0.29, + "learning_rate": 0.0008319395294393787, + "loss": 0.8633, + "step": 5412 + }, + { + "epoch": 0.29, + "learning_rate": 0.0008318744330238388, + "loss": 0.8594, + "step": 5413 + }, + { + "epoch": 0.29, + "learning_rate": 0.0008318093265514133, + "loss": 0.8867, + "step": 5414 + }, + { + "epoch": 0.29, + "learning_rate": 0.0008317442100240755, + "loss": 0.8672, + "step": 5415 + }, + { + "epoch": 0.29, + "learning_rate": 0.0008316790834437982, + "loss": 0.8906, + "step": 5416 + }, + { + "epoch": 0.29, + "learning_rate": 0.0008316139468125553, + "loss": 0.875, + "step": 5417 + }, + { + "epoch": 0.29, + "learning_rate": 0.0008315488001323205, + "loss": 0.8203, + "step": 5418 + }, + { + "epoch": 0.29, + "learning_rate": 0.000831483643405068, + "loss": 0.9062, + "step": 5419 + }, + { + "epoch": 0.29, + "learning_rate": 0.0008314184766327721, + "loss": 0.9102, + "step": 5420 + }, + { + "epoch": 0.29, + "learning_rate": 0.0008313532998174079, + "loss": 0.8398, + "step": 5421 + }, + { + "epoch": 0.29, + "learning_rate": 0.0008312881129609502, + "loss": 0.8438, + "step": 5422 + }, + { + "epoch": 0.29, + "learning_rate": 0.0008312229160653744, + "loss": 0.9102, + "step": 5423 + }, + { + "epoch": 0.29, + "learning_rate": 0.0008311577091326564, + "loss": 0.8906, + "step": 5424 + }, + { + "epoch": 0.29, + "learning_rate": 0.000831092492164772, + "loss": 0.9453, + "step": 5425 + }, + { + "epoch": 0.29, + "learning_rate": 0.0008310272651636973, + "loss": 0.8438, + "step": 5426 + }, + { + "epoch": 0.29, + "learning_rate": 0.0008309620281314093, + "loss": 1.0391, + "step": 5427 + }, + { + "epoch": 0.29, + "learning_rate": 0.0008308967810698847, + "loss": 0.957, + "step": 5428 + }, + { + "epoch": 0.29, + "learning_rate": 0.0008308315239811007, + "loss": 0.8555, + "step": 5429 + }, + { + "epoch": 0.29, + "learning_rate": 0.0008307662568670346, + "loss": 0.957, + "step": 5430 + }, + { + "epoch": 0.29, + "learning_rate": 0.0008307009797296647, + "loss": 0.9062, + "step": 5431 + }, + { + "epoch": 0.29, + "learning_rate": 0.0008306356925709686, + "loss": 0.8242, + "step": 5432 + }, + { + "epoch": 0.29, + "learning_rate": 0.0008305703953929251, + "loss": 1.0469, + "step": 5433 + }, + { + "epoch": 0.29, + "learning_rate": 0.0008305050881975126, + "loss": 0.9336, + "step": 5434 + }, + { + "epoch": 0.29, + "learning_rate": 0.0008304397709867104, + "loss": 0.8984, + "step": 5435 + }, + { + "epoch": 0.29, + "learning_rate": 0.0008303744437624976, + "loss": 0.8867, + "step": 5436 + }, + { + "epoch": 0.29, + "learning_rate": 0.000830309106526854, + "loss": 0.9297, + "step": 5437 + }, + { + "epoch": 0.29, + "learning_rate": 0.0008302437592817596, + "loss": 0.9219, + "step": 5438 + }, + { + "epoch": 0.29, + "learning_rate": 0.0008301784020291942, + "loss": 0.8828, + "step": 5439 + }, + { + "epoch": 0.29, + "learning_rate": 0.0008301130347711389, + "loss": 0.8359, + "step": 5440 + }, + { + "epoch": 0.29, + "learning_rate": 0.0008300476575095742, + "loss": 0.8555, + "step": 5441 + }, + { + "epoch": 0.29, + "learning_rate": 0.0008299822702464814, + "loss": 0.8477, + "step": 5442 + }, + { + "epoch": 0.29, + "learning_rate": 0.0008299168729838418, + "loss": 0.9102, + "step": 5443 + }, + { + "epoch": 0.29, + "learning_rate": 0.0008298514657236373, + "loss": 0.9609, + "step": 5444 + }, + { + "epoch": 0.29, + "learning_rate": 0.0008297860484678499, + "loss": 0.9688, + "step": 5445 + }, + { + "epoch": 0.29, + "learning_rate": 0.0008297206212184619, + "loss": 0.8984, + "step": 5446 + }, + { + "epoch": 0.29, + "learning_rate": 0.000829655183977456, + "loss": 0.8828, + "step": 5447 + }, + { + "epoch": 0.29, + "learning_rate": 0.0008295897367468153, + "loss": 0.9297, + "step": 5448 + }, + { + "epoch": 0.29, + "learning_rate": 0.0008295242795285227, + "loss": 0.8945, + "step": 5449 + }, + { + "epoch": 0.29, + "learning_rate": 0.0008294588123245621, + "loss": 0.8945, + "step": 5450 + }, + { + "epoch": 0.29, + "learning_rate": 0.0008293933351369173, + "loss": 0.9062, + "step": 5451 + }, + { + "epoch": 0.29, + "learning_rate": 0.0008293278479675725, + "loss": 0.918, + "step": 5452 + }, + { + "epoch": 0.29, + "learning_rate": 0.0008292623508185121, + "loss": 0.9375, + "step": 5453 + }, + { + "epoch": 0.29, + "learning_rate": 0.0008291968436917209, + "loss": 0.9609, + "step": 5454 + }, + { + "epoch": 0.29, + "learning_rate": 0.000829131326589184, + "loss": 0.8789, + "step": 5455 + }, + { + "epoch": 0.29, + "learning_rate": 0.0008290657995128867, + "loss": 0.7695, + "step": 5456 + }, + { + "epoch": 0.29, + "learning_rate": 0.0008290002624648147, + "loss": 0.9062, + "step": 5457 + }, + { + "epoch": 0.29, + "learning_rate": 0.0008289347154469543, + "loss": 0.9336, + "step": 5458 + }, + { + "epoch": 0.29, + "learning_rate": 0.0008288691584612913, + "loss": 0.8711, + "step": 5459 + }, + { + "epoch": 0.29, + "learning_rate": 0.0008288035915098126, + "loss": 0.8945, + "step": 5460 + }, + { + "epoch": 0.29, + "learning_rate": 0.0008287380145945052, + "loss": 0.9258, + "step": 5461 + }, + { + "epoch": 0.29, + "learning_rate": 0.0008286724277173558, + "loss": 0.9375, + "step": 5462 + }, + { + "epoch": 0.29, + "learning_rate": 0.0008286068308803525, + "loss": 0.9336, + "step": 5463 + }, + { + "epoch": 0.29, + "learning_rate": 0.0008285412240854826, + "loss": 0.9336, + "step": 5464 + }, + { + "epoch": 0.29, + "learning_rate": 0.0008284756073347343, + "loss": 0.8984, + "step": 5465 + }, + { + "epoch": 0.29, + "learning_rate": 0.0008284099806300964, + "loss": 0.9766, + "step": 5466 + }, + { + "epoch": 0.29, + "learning_rate": 0.0008283443439735573, + "loss": 1.0547, + "step": 5467 + }, + { + "epoch": 0.29, + "learning_rate": 0.0008282786973671059, + "loss": 0.9688, + "step": 5468 + }, + { + "epoch": 0.29, + "learning_rate": 0.0008282130408127317, + "loss": 0.9258, + "step": 5469 + }, + { + "epoch": 0.29, + "learning_rate": 0.0008281473743124244, + "loss": 0.918, + "step": 5470 + }, + { + "epoch": 0.29, + "learning_rate": 0.0008280816978681736, + "loss": 0.9023, + "step": 5471 + }, + { + "epoch": 0.29, + "learning_rate": 0.0008280160114819695, + "loss": 0.8555, + "step": 5472 + }, + { + "epoch": 0.29, + "learning_rate": 0.000827950315155803, + "loss": 0.8594, + "step": 5473 + }, + { + "epoch": 0.29, + "learning_rate": 0.0008278846088916646, + "loss": 0.8867, + "step": 5474 + }, + { + "epoch": 0.29, + "learning_rate": 0.0008278188926915455, + "loss": 0.8555, + "step": 5475 + }, + { + "epoch": 0.29, + "learning_rate": 0.0008277531665574373, + "loss": 0.9062, + "step": 5476 + }, + { + "epoch": 0.29, + "learning_rate": 0.0008276874304913314, + "loss": 0.875, + "step": 5477 + }, + { + "epoch": 0.29, + "learning_rate": 0.00082762168449522, + "loss": 0.9219, + "step": 5478 + }, + { + "epoch": 0.29, + "learning_rate": 0.0008275559285710955, + "loss": 0.9375, + "step": 5479 + }, + { + "epoch": 0.29, + "learning_rate": 0.0008274901627209502, + "loss": 0.8633, + "step": 5480 + }, + { + "epoch": 0.29, + "learning_rate": 0.0008274243869467772, + "loss": 1.0078, + "step": 5481 + }, + { + "epoch": 0.29, + "learning_rate": 0.00082735860125057, + "loss": 0.9414, + "step": 5482 + }, + { + "epoch": 0.29, + "learning_rate": 0.0008272928056343216, + "loss": 0.9258, + "step": 5483 + }, + { + "epoch": 0.29, + "learning_rate": 0.0008272270001000263, + "loss": 0.9727, + "step": 5484 + }, + { + "epoch": 0.29, + "learning_rate": 0.000827161184649678, + "loss": 0.9414, + "step": 5485 + }, + { + "epoch": 0.29, + "learning_rate": 0.000827095359285271, + "loss": 0.8164, + "step": 5486 + }, + { + "epoch": 0.29, + "learning_rate": 0.0008270295240088004, + "loss": 0.875, + "step": 5487 + }, + { + "epoch": 0.29, + "learning_rate": 0.0008269636788222608, + "loss": 0.9102, + "step": 5488 + }, + { + "epoch": 0.3, + "learning_rate": 0.0008268978237276477, + "loss": 0.8945, + "step": 5489 + }, + { + "epoch": 0.3, + "learning_rate": 0.0008268319587269569, + "loss": 0.9531, + "step": 5490 + }, + { + "epoch": 0.3, + "learning_rate": 0.0008267660838221839, + "loss": 0.9336, + "step": 5491 + }, + { + "epoch": 0.3, + "learning_rate": 0.0008267001990153256, + "loss": 0.9062, + "step": 5492 + }, + { + "epoch": 0.3, + "learning_rate": 0.0008266343043083778, + "loss": 0.9766, + "step": 5493 + }, + { + "epoch": 0.3, + "learning_rate": 0.0008265683997033378, + "loss": 0.9336, + "step": 5494 + }, + { + "epoch": 0.3, + "learning_rate": 0.0008265024852022025, + "loss": 0.8086, + "step": 5495 + }, + { + "epoch": 0.3, + "learning_rate": 0.0008264365608069694, + "loss": 0.875, + "step": 5496 + }, + { + "epoch": 0.3, + "learning_rate": 0.0008263706265196362, + "loss": 0.957, + "step": 5497 + }, + { + "epoch": 0.3, + "learning_rate": 0.000826304682342201, + "loss": 0.9492, + "step": 5498 + }, + { + "epoch": 0.3, + "learning_rate": 0.0008262387282766621, + "loss": 0.8438, + "step": 5499 + }, + { + "epoch": 0.3, + "learning_rate": 0.0008261727643250181, + "loss": 0.9023, + "step": 5500 + }, + { + "epoch": 0.3, + "learning_rate": 0.0008261067904892679, + "loss": 0.8672, + "step": 5501 + }, + { + "epoch": 0.3, + "learning_rate": 0.0008260408067714107, + "loss": 0.9023, + "step": 5502 + }, + { + "epoch": 0.3, + "learning_rate": 0.0008259748131734462, + "loss": 0.9336, + "step": 5503 + }, + { + "epoch": 0.3, + "learning_rate": 0.0008259088096973739, + "loss": 0.8711, + "step": 5504 + }, + { + "epoch": 0.3, + "learning_rate": 0.0008258427963451945, + "loss": 0.9258, + "step": 5505 + }, + { + "epoch": 0.3, + "learning_rate": 0.0008257767731189076, + "loss": 0.8945, + "step": 5506 + }, + { + "epoch": 0.3, + "learning_rate": 0.0008257107400205146, + "loss": 0.8828, + "step": 5507 + }, + { + "epoch": 0.3, + "learning_rate": 0.0008256446970520162, + "loss": 0.8438, + "step": 5508 + }, + { + "epoch": 0.3, + "learning_rate": 0.0008255786442154138, + "loss": 0.8672, + "step": 5509 + }, + { + "epoch": 0.3, + "learning_rate": 0.0008255125815127089, + "loss": 0.8398, + "step": 5510 + }, + { + "epoch": 0.3, + "learning_rate": 0.0008254465089459038, + "loss": 0.9297, + "step": 5511 + }, + { + "epoch": 0.3, + "learning_rate": 0.0008253804265170002, + "loss": 1.0, + "step": 5512 + }, + { + "epoch": 0.3, + "learning_rate": 0.0008253143342280009, + "loss": 0.9219, + "step": 5513 + }, + { + "epoch": 0.3, + "learning_rate": 0.000825248232080909, + "loss": 0.8984, + "step": 5514 + }, + { + "epoch": 0.3, + "learning_rate": 0.000825182120077727, + "loss": 0.9062, + "step": 5515 + }, + { + "epoch": 0.3, + "learning_rate": 0.0008251159982204586, + "loss": 0.9062, + "step": 5516 + }, + { + "epoch": 0.3, + "learning_rate": 0.0008250498665111074, + "loss": 0.9453, + "step": 5517 + }, + { + "epoch": 0.3, + "learning_rate": 0.0008249837249516778, + "loss": 0.8828, + "step": 5518 + }, + { + "epoch": 0.3, + "learning_rate": 0.0008249175735441736, + "loss": 0.8516, + "step": 5519 + }, + { + "epoch": 0.3, + "learning_rate": 0.0008248514122905998, + "loss": 0.957, + "step": 5520 + }, + { + "epoch": 0.3, + "learning_rate": 0.0008247852411929611, + "loss": 0.918, + "step": 5521 + }, + { + "epoch": 0.3, + "learning_rate": 0.0008247190602532626, + "loss": 1.0, + "step": 5522 + }, + { + "epoch": 0.3, + "learning_rate": 0.0008246528694735101, + "loss": 0.9414, + "step": 5523 + }, + { + "epoch": 0.3, + "learning_rate": 0.0008245866688557092, + "loss": 0.8984, + "step": 5524 + }, + { + "epoch": 0.3, + "learning_rate": 0.000824520458401866, + "loss": 0.9336, + "step": 5525 + }, + { + "epoch": 0.3, + "learning_rate": 0.0008244542381139868, + "loss": 0.918, + "step": 5526 + }, + { + "epoch": 0.3, + "learning_rate": 0.0008243880079940785, + "loss": 0.9297, + "step": 5527 + }, + { + "epoch": 0.3, + "learning_rate": 0.0008243217680441482, + "loss": 0.8945, + "step": 5528 + }, + { + "epoch": 0.3, + "learning_rate": 0.0008242555182662027, + "loss": 0.9258, + "step": 5529 + }, + { + "epoch": 0.3, + "learning_rate": 0.0008241892586622501, + "loss": 1.0781, + "step": 5530 + }, + { + "epoch": 0.3, + "learning_rate": 0.0008241229892342978, + "loss": 1.0391, + "step": 5531 + }, + { + "epoch": 0.3, + "learning_rate": 0.0008240567099843546, + "loss": 0.9453, + "step": 5532 + }, + { + "epoch": 0.3, + "learning_rate": 0.0008239904209144283, + "loss": 0.8672, + "step": 5533 + }, + { + "epoch": 0.3, + "learning_rate": 0.0008239241220265281, + "loss": 0.9766, + "step": 5534 + }, + { + "epoch": 0.3, + "learning_rate": 0.0008238578133226631, + "loss": 0.9219, + "step": 5535 + }, + { + "epoch": 0.3, + "learning_rate": 0.0008237914948048424, + "loss": 0.9219, + "step": 5536 + }, + { + "epoch": 0.3, + "learning_rate": 0.0008237251664750758, + "loss": 0.9492, + "step": 5537 + }, + { + "epoch": 0.3, + "learning_rate": 0.0008236588283353734, + "loss": 0.9102, + "step": 5538 + }, + { + "epoch": 0.3, + "learning_rate": 0.0008235924803877453, + "loss": 0.8008, + "step": 5539 + }, + { + "epoch": 0.3, + "learning_rate": 0.0008235261226342022, + "loss": 0.9023, + "step": 5540 + }, + { + "epoch": 0.3, + "learning_rate": 0.0008234597550767546, + "loss": 0.8359, + "step": 5541 + }, + { + "epoch": 0.3, + "learning_rate": 0.000823393377717414, + "loss": 0.8359, + "step": 5542 + }, + { + "epoch": 0.3, + "learning_rate": 0.000823326990558192, + "loss": 0.9375, + "step": 5543 + }, + { + "epoch": 0.3, + "learning_rate": 0.0008232605936010999, + "loss": 0.8633, + "step": 5544 + }, + { + "epoch": 0.3, + "learning_rate": 0.0008231941868481501, + "loss": 1.0, + "step": 5545 + }, + { + "epoch": 0.3, + "learning_rate": 0.0008231277703013547, + "loss": 0.9688, + "step": 5546 + }, + { + "epoch": 0.3, + "learning_rate": 0.0008230613439627264, + "loss": 0.9062, + "step": 5547 + }, + { + "epoch": 0.3, + "learning_rate": 0.0008229949078342782, + "loss": 0.9258, + "step": 5548 + }, + { + "epoch": 0.3, + "learning_rate": 0.0008229284619180235, + "loss": 0.8828, + "step": 5549 + }, + { + "epoch": 0.3, + "learning_rate": 0.0008228620062159754, + "loss": 0.9688, + "step": 5550 + }, + { + "epoch": 0.3, + "learning_rate": 0.0008227955407301481, + "loss": 0.8711, + "step": 5551 + }, + { + "epoch": 0.3, + "learning_rate": 0.0008227290654625556, + "loss": 0.9102, + "step": 5552 + }, + { + "epoch": 0.3, + "learning_rate": 0.0008226625804152124, + "loss": 0.9609, + "step": 5553 + }, + { + "epoch": 0.3, + "learning_rate": 0.0008225960855901328, + "loss": 0.9023, + "step": 5554 + }, + { + "epoch": 0.3, + "learning_rate": 0.0008225295809893325, + "loss": 0.8594, + "step": 5555 + }, + { + "epoch": 0.3, + "learning_rate": 0.0008224630666148262, + "loss": 0.9102, + "step": 5556 + }, + { + "epoch": 0.3, + "learning_rate": 0.00082239654246863, + "loss": 0.8672, + "step": 5557 + }, + { + "epoch": 0.3, + "learning_rate": 0.0008223300085527594, + "loss": 0.9766, + "step": 5558 + }, + { + "epoch": 0.3, + "learning_rate": 0.0008222634648692308, + "loss": 0.8477, + "step": 5559 + }, + { + "epoch": 0.3, + "learning_rate": 0.0008221969114200606, + "loss": 0.8555, + "step": 5560 + }, + { + "epoch": 0.3, + "learning_rate": 0.0008221303482072658, + "loss": 0.8945, + "step": 5561 + }, + { + "epoch": 0.3, + "learning_rate": 0.000822063775232863, + "loss": 1.0078, + "step": 5562 + }, + { + "epoch": 0.3, + "learning_rate": 0.00082199719249887, + "loss": 0.8828, + "step": 5563 + }, + { + "epoch": 0.3, + "learning_rate": 0.0008219306000073044, + "loss": 0.8867, + "step": 5564 + }, + { + "epoch": 0.3, + "learning_rate": 0.0008218639977601843, + "loss": 0.875, + "step": 5565 + }, + { + "epoch": 0.3, + "learning_rate": 0.0008217973857595276, + "loss": 1.0078, + "step": 5566 + }, + { + "epoch": 0.3, + "learning_rate": 0.000821730764007353, + "loss": 0.9062, + "step": 5567 + }, + { + "epoch": 0.3, + "learning_rate": 0.0008216641325056796, + "loss": 0.9414, + "step": 5568 + }, + { + "epoch": 0.3, + "learning_rate": 0.0008215974912565263, + "loss": 0.9336, + "step": 5569 + }, + { + "epoch": 0.3, + "learning_rate": 0.0008215308402619126, + "loss": 0.9297, + "step": 5570 + }, + { + "epoch": 0.3, + "learning_rate": 0.0008214641795238584, + "loss": 0.918, + "step": 5571 + }, + { + "epoch": 0.3, + "learning_rate": 0.0008213975090443835, + "loss": 0.8945, + "step": 5572 + }, + { + "epoch": 0.3, + "learning_rate": 0.0008213308288255084, + "loss": 0.8555, + "step": 5573 + }, + { + "epoch": 0.3, + "learning_rate": 0.0008212641388692537, + "loss": 0.8867, + "step": 5574 + }, + { + "epoch": 0.3, + "learning_rate": 0.0008211974391776402, + "loss": 0.918, + "step": 5575 + }, + { + "epoch": 0.3, + "learning_rate": 0.0008211307297526894, + "loss": 0.9805, + "step": 5576 + }, + { + "epoch": 0.3, + "learning_rate": 0.0008210640105964224, + "loss": 0.8906, + "step": 5577 + }, + { + "epoch": 0.3, + "learning_rate": 0.0008209972817108614, + "loss": 0.8867, + "step": 5578 + }, + { + "epoch": 0.3, + "learning_rate": 0.0008209305430980283, + "loss": 0.8906, + "step": 5579 + }, + { + "epoch": 0.3, + "learning_rate": 0.0008208637947599456, + "loss": 0.8594, + "step": 5580 + }, + { + "epoch": 0.3, + "learning_rate": 0.000820797036698636, + "loss": 0.9062, + "step": 5581 + }, + { + "epoch": 0.3, + "learning_rate": 0.0008207302689161222, + "loss": 0.8945, + "step": 5582 + }, + { + "epoch": 0.3, + "learning_rate": 0.0008206634914144278, + "loss": 0.8594, + "step": 5583 + }, + { + "epoch": 0.3, + "learning_rate": 0.0008205967041955765, + "loss": 0.8555, + "step": 5584 + }, + { + "epoch": 0.3, + "learning_rate": 0.0008205299072615917, + "loss": 0.9727, + "step": 5585 + }, + { + "epoch": 0.3, + "learning_rate": 0.0008204631006144981, + "loss": 0.9219, + "step": 5586 + }, + { + "epoch": 0.3, + "learning_rate": 0.0008203962842563195, + "loss": 0.8633, + "step": 5587 + }, + { + "epoch": 0.3, + "learning_rate": 0.0008203294581890815, + "loss": 0.9102, + "step": 5588 + }, + { + "epoch": 0.3, + "learning_rate": 0.0008202626224148083, + "loss": 0.9609, + "step": 5589 + }, + { + "epoch": 0.3, + "learning_rate": 0.0008201957769355259, + "loss": 0.8828, + "step": 5590 + }, + { + "epoch": 0.3, + "learning_rate": 0.0008201289217532595, + "loss": 0.8398, + "step": 5591 + }, + { + "epoch": 0.3, + "learning_rate": 0.0008200620568700354, + "loss": 0.8906, + "step": 5592 + }, + { + "epoch": 0.3, + "learning_rate": 0.0008199951822878794, + "loss": 0.8867, + "step": 5593 + }, + { + "epoch": 0.3, + "learning_rate": 0.0008199282980088184, + "loss": 0.8828, + "step": 5594 + }, + { + "epoch": 0.3, + "learning_rate": 0.0008198614040348791, + "loss": 0.918, + "step": 5595 + }, + { + "epoch": 0.3, + "learning_rate": 0.0008197945003680884, + "loss": 0.9375, + "step": 5596 + }, + { + "epoch": 0.3, + "learning_rate": 0.000819727587010474, + "loss": 0.9805, + "step": 5597 + }, + { + "epoch": 0.3, + "learning_rate": 0.0008196606639640636, + "loss": 0.9023, + "step": 5598 + }, + { + "epoch": 0.3, + "learning_rate": 0.0008195937312308847, + "loss": 0.9375, + "step": 5599 + }, + { + "epoch": 0.3, + "learning_rate": 0.0008195267888129662, + "loss": 0.957, + "step": 5600 + }, + { + "epoch": 0.3, + "learning_rate": 0.0008194598367123365, + "loss": 0.9023, + "step": 5601 + }, + { + "epoch": 0.3, + "learning_rate": 0.0008193928749310242, + "loss": 0.875, + "step": 5602 + }, + { + "epoch": 0.3, + "learning_rate": 0.0008193259034710587, + "loss": 0.8789, + "step": 5603 + }, + { + "epoch": 0.3, + "learning_rate": 0.0008192589223344696, + "loss": 0.8828, + "step": 5604 + }, + { + "epoch": 0.3, + "learning_rate": 0.0008191919315232861, + "loss": 0.918, + "step": 5605 + }, + { + "epoch": 0.3, + "learning_rate": 0.0008191249310395388, + "loss": 0.8906, + "step": 5606 + }, + { + "epoch": 0.3, + "learning_rate": 0.0008190579208852577, + "loss": 0.8594, + "step": 5607 + }, + { + "epoch": 0.3, + "learning_rate": 0.0008189909010624737, + "loss": 0.8242, + "step": 5608 + }, + { + "epoch": 0.3, + "learning_rate": 0.0008189238715732175, + "loss": 0.9531, + "step": 5609 + }, + { + "epoch": 0.3, + "learning_rate": 0.0008188568324195202, + "loss": 0.8828, + "step": 5610 + }, + { + "epoch": 0.3, + "learning_rate": 0.0008187897836034137, + "loss": 0.8359, + "step": 5611 + }, + { + "epoch": 0.3, + "learning_rate": 0.0008187227251269296, + "loss": 0.9336, + "step": 5612 + }, + { + "epoch": 0.3, + "learning_rate": 0.0008186556569921, + "loss": 0.8516, + "step": 5613 + }, + { + "epoch": 0.3, + "learning_rate": 0.0008185885792009572, + "loss": 0.8984, + "step": 5614 + }, + { + "epoch": 0.3, + "learning_rate": 0.0008185214917555339, + "loss": 0.8359, + "step": 5615 + }, + { + "epoch": 0.3, + "learning_rate": 0.0008184543946578633, + "loss": 0.875, + "step": 5616 + }, + { + "epoch": 0.3, + "learning_rate": 0.0008183872879099783, + "loss": 0.8594, + "step": 5617 + }, + { + "epoch": 0.3, + "learning_rate": 0.0008183201715139128, + "loss": 0.8594, + "step": 5618 + }, + { + "epoch": 0.3, + "learning_rate": 0.0008182530454717003, + "loss": 0.957, + "step": 5619 + }, + { + "epoch": 0.3, + "learning_rate": 0.0008181859097853752, + "loss": 0.8906, + "step": 5620 + }, + { + "epoch": 0.3, + "learning_rate": 0.0008181187644569718, + "loss": 0.9219, + "step": 5621 + }, + { + "epoch": 0.3, + "learning_rate": 0.0008180516094885248, + "loss": 0.9922, + "step": 5622 + }, + { + "epoch": 0.3, + "learning_rate": 0.0008179844448820697, + "loss": 0.9141, + "step": 5623 + }, + { + "epoch": 0.3, + "learning_rate": 0.0008179172706396409, + "loss": 0.9883, + "step": 5624 + }, + { + "epoch": 0.3, + "learning_rate": 0.0008178500867632749, + "loss": 0.8477, + "step": 5625 + }, + { + "epoch": 0.3, + "learning_rate": 0.0008177828932550068, + "loss": 0.8477, + "step": 5626 + }, + { + "epoch": 0.3, + "learning_rate": 0.0008177156901168736, + "loss": 0.8984, + "step": 5627 + }, + { + "epoch": 0.3, + "learning_rate": 0.0008176484773509112, + "loss": 0.8945, + "step": 5628 + }, + { + "epoch": 0.3, + "learning_rate": 0.0008175812549591564, + "loss": 0.875, + "step": 5629 + }, + { + "epoch": 0.3, + "learning_rate": 0.0008175140229436464, + "loss": 0.9023, + "step": 5630 + }, + { + "epoch": 0.3, + "learning_rate": 0.0008174467813064186, + "loss": 0.8438, + "step": 5631 + }, + { + "epoch": 0.3, + "learning_rate": 0.0008173795300495106, + "loss": 0.8516, + "step": 5632 + }, + { + "epoch": 0.3, + "learning_rate": 0.0008173122691749604, + "loss": 0.8086, + "step": 5633 + }, + { + "epoch": 0.3, + "learning_rate": 0.0008172449986848058, + "loss": 0.9531, + "step": 5634 + }, + { + "epoch": 0.3, + "learning_rate": 0.000817177718581086, + "loss": 0.8438, + "step": 5635 + }, + { + "epoch": 0.3, + "learning_rate": 0.0008171104288658393, + "loss": 0.9023, + "step": 5636 + }, + { + "epoch": 0.3, + "learning_rate": 0.0008170431295411049, + "loss": 0.8477, + "step": 5637 + }, + { + "epoch": 0.3, + "learning_rate": 0.0008169758206089223, + "loss": 0.8086, + "step": 5638 + }, + { + "epoch": 0.3, + "learning_rate": 0.0008169085020713313, + "loss": 0.9805, + "step": 5639 + }, + { + "epoch": 0.3, + "learning_rate": 0.0008168411739303715, + "loss": 0.8906, + "step": 5640 + }, + { + "epoch": 0.3, + "learning_rate": 0.0008167738361880834, + "loss": 0.8984, + "step": 5641 + }, + { + "epoch": 0.3, + "learning_rate": 0.0008167064888465076, + "loss": 0.9922, + "step": 5642 + }, + { + "epoch": 0.3, + "learning_rate": 0.0008166391319076848, + "loss": 0.9062, + "step": 5643 + }, + { + "epoch": 0.3, + "learning_rate": 0.0008165717653736561, + "loss": 0.9102, + "step": 5644 + }, + { + "epoch": 0.3, + "learning_rate": 0.0008165043892464632, + "loss": 0.7812, + "step": 5645 + }, + { + "epoch": 0.3, + "learning_rate": 0.0008164370035281477, + "loss": 0.875, + "step": 5646 + }, + { + "epoch": 0.3, + "learning_rate": 0.0008163696082207515, + "loss": 0.8828, + "step": 5647 + }, + { + "epoch": 0.3, + "learning_rate": 0.0008163022033263168, + "loss": 0.8516, + "step": 5648 + }, + { + "epoch": 0.3, + "learning_rate": 0.0008162347888468864, + "loss": 0.8828, + "step": 5649 + }, + { + "epoch": 0.3, + "learning_rate": 0.0008161673647845032, + "loss": 0.8945, + "step": 5650 + }, + { + "epoch": 0.3, + "learning_rate": 0.0008160999311412103, + "loss": 0.8164, + "step": 5651 + }, + { + "epoch": 0.3, + "learning_rate": 0.0008160324879190511, + "loss": 0.9219, + "step": 5652 + }, + { + "epoch": 0.3, + "learning_rate": 0.0008159650351200696, + "loss": 0.9883, + "step": 5653 + }, + { + "epoch": 0.3, + "learning_rate": 0.0008158975727463094, + "loss": 0.9141, + "step": 5654 + }, + { + "epoch": 0.3, + "learning_rate": 0.0008158301007998152, + "loss": 0.8203, + "step": 5655 + }, + { + "epoch": 0.3, + "learning_rate": 0.0008157626192826315, + "loss": 0.9961, + "step": 5656 + }, + { + "epoch": 0.3, + "learning_rate": 0.0008156951281968032, + "loss": 0.8555, + "step": 5657 + }, + { + "epoch": 0.3, + "learning_rate": 0.0008156276275443755, + "loss": 0.8711, + "step": 5658 + }, + { + "epoch": 0.3, + "learning_rate": 0.0008155601173273939, + "loss": 0.9336, + "step": 5659 + }, + { + "epoch": 0.3, + "learning_rate": 0.0008154925975479043, + "loss": 0.918, + "step": 5660 + }, + { + "epoch": 0.3, + "learning_rate": 0.0008154250682079525, + "loss": 0.6992, + "step": 5661 + }, + { + "epoch": 0.3, + "learning_rate": 0.0008153575293095852, + "loss": 0.8945, + "step": 5662 + }, + { + "epoch": 0.3, + "learning_rate": 0.0008152899808548486, + "loss": 0.8555, + "step": 5663 + }, + { + "epoch": 0.3, + "learning_rate": 0.0008152224228457902, + "loss": 0.793, + "step": 5664 + }, + { + "epoch": 0.3, + "learning_rate": 0.0008151548552844568, + "loss": 0.8867, + "step": 5665 + }, + { + "epoch": 0.3, + "learning_rate": 0.0008150872781728962, + "loss": 0.8594, + "step": 5666 + }, + { + "epoch": 0.3, + "learning_rate": 0.0008150196915131558, + "loss": 0.8906, + "step": 5667 + }, + { + "epoch": 0.3, + "learning_rate": 0.0008149520953072842, + "loss": 0.8984, + "step": 5668 + }, + { + "epoch": 0.3, + "learning_rate": 0.0008148844895573296, + "loss": 0.9219, + "step": 5669 + }, + { + "epoch": 0.3, + "learning_rate": 0.0008148168742653404, + "loss": 0.8281, + "step": 5670 + }, + { + "epoch": 0.3, + "learning_rate": 0.0008147492494333659, + "loss": 0.9531, + "step": 5671 + }, + { + "epoch": 0.3, + "learning_rate": 0.0008146816150634553, + "loss": 0.9062, + "step": 5672 + }, + { + "epoch": 0.3, + "learning_rate": 0.000814613971157658, + "loss": 0.9336, + "step": 5673 + }, + { + "epoch": 0.3, + "learning_rate": 0.000814546317718024, + "loss": 0.8906, + "step": 5674 + }, + { + "epoch": 0.31, + "learning_rate": 0.0008144786547466033, + "loss": 0.8359, + "step": 5675 + }, + { + "epoch": 0.31, + "learning_rate": 0.0008144109822454464, + "loss": 0.8789, + "step": 5676 + }, + { + "epoch": 0.31, + "learning_rate": 0.0008143433002166039, + "loss": 0.9609, + "step": 5677 + }, + { + "epoch": 0.31, + "learning_rate": 0.000814275608662127, + "loss": 0.8711, + "step": 5678 + }, + { + "epoch": 0.31, + "learning_rate": 0.0008142079075840667, + "loss": 0.9531, + "step": 5679 + }, + { + "epoch": 0.31, + "learning_rate": 0.0008141401969844746, + "loss": 0.8008, + "step": 5680 + }, + { + "epoch": 0.31, + "learning_rate": 0.0008140724768654027, + "loss": 1.0234, + "step": 5681 + }, + { + "epoch": 0.31, + "learning_rate": 0.0008140047472289033, + "loss": 0.9258, + "step": 5682 + }, + { + "epoch": 0.31, + "learning_rate": 0.0008139370080770283, + "loss": 0.8672, + "step": 5683 + }, + { + "epoch": 0.31, + "learning_rate": 0.0008138692594118309, + "loss": 0.8945, + "step": 5684 + }, + { + "epoch": 0.31, + "learning_rate": 0.0008138015012353639, + "loss": 0.8867, + "step": 5685 + }, + { + "epoch": 0.31, + "learning_rate": 0.0008137337335496807, + "loss": 0.875, + "step": 5686 + }, + { + "epoch": 0.31, + "learning_rate": 0.0008136659563568347, + "loss": 0.957, + "step": 5687 + }, + { + "epoch": 0.31, + "learning_rate": 0.0008135981696588802, + "loss": 0.918, + "step": 5688 + }, + { + "epoch": 0.31, + "learning_rate": 0.0008135303734578707, + "loss": 0.9062, + "step": 5689 + }, + { + "epoch": 0.31, + "learning_rate": 0.000813462567755861, + "loss": 0.8594, + "step": 5690 + }, + { + "epoch": 0.31, + "learning_rate": 0.0008133947525549059, + "loss": 0.8594, + "step": 5691 + }, + { + "epoch": 0.31, + "learning_rate": 0.0008133269278570605, + "loss": 0.9297, + "step": 5692 + }, + { + "epoch": 0.31, + "learning_rate": 0.0008132590936643797, + "loss": 0.9023, + "step": 5693 + }, + { + "epoch": 0.31, + "learning_rate": 0.0008131912499789196, + "loss": 0.8633, + "step": 5694 + }, + { + "epoch": 0.31, + "learning_rate": 0.0008131233968027358, + "loss": 0.9141, + "step": 5695 + }, + { + "epoch": 0.31, + "learning_rate": 0.0008130555341378843, + "loss": 0.9844, + "step": 5696 + }, + { + "epoch": 0.31, + "learning_rate": 0.000812987661986422, + "loss": 0.9961, + "step": 5697 + }, + { + "epoch": 0.31, + "learning_rate": 0.0008129197803504054, + "loss": 0.8945, + "step": 5698 + }, + { + "epoch": 0.31, + "learning_rate": 0.0008128518892318916, + "loss": 0.9219, + "step": 5699 + }, + { + "epoch": 0.31, + "learning_rate": 0.000812783988632938, + "loss": 0.9062, + "step": 5700 + }, + { + "epoch": 0.31, + "learning_rate": 0.000812716078555602, + "loss": 0.8945, + "step": 5701 + }, + { + "epoch": 0.31, + "learning_rate": 0.0008126481590019415, + "loss": 0.8438, + "step": 5702 + }, + { + "epoch": 0.31, + "learning_rate": 0.000812580229974015, + "loss": 0.9648, + "step": 5703 + }, + { + "epoch": 0.31, + "learning_rate": 0.0008125122914738808, + "loss": 0.9102, + "step": 5704 + }, + { + "epoch": 0.31, + "learning_rate": 0.0008124443435035973, + "loss": 0.9258, + "step": 5705 + }, + { + "epoch": 0.31, + "learning_rate": 0.000812376386065224, + "loss": 0.8398, + "step": 5706 + }, + { + "epoch": 0.31, + "learning_rate": 0.0008123084191608203, + "loss": 0.9141, + "step": 5707 + }, + { + "epoch": 0.31, + "learning_rate": 0.0008122404427924455, + "loss": 0.9453, + "step": 5708 + }, + { + "epoch": 0.31, + "learning_rate": 0.0008121724569621596, + "loss": 0.8477, + "step": 5709 + }, + { + "epoch": 0.31, + "learning_rate": 0.0008121044616720228, + "loss": 0.8633, + "step": 5710 + }, + { + "epoch": 0.31, + "learning_rate": 0.0008120364569240957, + "loss": 0.875, + "step": 5711 + }, + { + "epoch": 0.31, + "learning_rate": 0.0008119684427204389, + "loss": 0.8086, + "step": 5712 + }, + { + "epoch": 0.31, + "learning_rate": 0.0008119004190631136, + "loss": 0.8516, + "step": 5713 + }, + { + "epoch": 0.31, + "learning_rate": 0.000811832385954181, + "loss": 0.8242, + "step": 5714 + }, + { + "epoch": 0.31, + "learning_rate": 0.0008117643433957029, + "loss": 0.8789, + "step": 5715 + }, + { + "epoch": 0.31, + "learning_rate": 0.000811696291389741, + "loss": 0.8477, + "step": 5716 + }, + { + "epoch": 0.31, + "learning_rate": 0.0008116282299383577, + "loss": 0.9531, + "step": 5717 + }, + { + "epoch": 0.31, + "learning_rate": 0.0008115601590436152, + "loss": 0.9141, + "step": 5718 + }, + { + "epoch": 0.31, + "learning_rate": 0.0008114920787075767, + "loss": 0.875, + "step": 5719 + }, + { + "epoch": 0.31, + "learning_rate": 0.0008114239889323049, + "loss": 0.9102, + "step": 5720 + }, + { + "epoch": 0.31, + "learning_rate": 0.0008113558897198633, + "loss": 0.832, + "step": 5721 + }, + { + "epoch": 0.31, + "learning_rate": 0.0008112877810723154, + "loss": 0.9023, + "step": 5722 + }, + { + "epoch": 0.31, + "learning_rate": 0.0008112196629917255, + "loss": 0.8555, + "step": 5723 + }, + { + "epoch": 0.31, + "learning_rate": 0.000811151535480157, + "loss": 0.8984, + "step": 5724 + }, + { + "epoch": 0.31, + "learning_rate": 0.0008110833985396755, + "loss": 0.9844, + "step": 5725 + }, + { + "epoch": 0.31, + "learning_rate": 0.0008110152521723449, + "loss": 0.8672, + "step": 5726 + }, + { + "epoch": 0.31, + "learning_rate": 0.0008109470963802303, + "loss": 0.8945, + "step": 5727 + }, + { + "epoch": 0.31, + "learning_rate": 0.0008108789311653976, + "loss": 0.9258, + "step": 5728 + }, + { + "epoch": 0.31, + "learning_rate": 0.0008108107565299121, + "loss": 0.9258, + "step": 5729 + }, + { + "epoch": 0.31, + "learning_rate": 0.0008107425724758394, + "loss": 0.8711, + "step": 5730 + }, + { + "epoch": 0.31, + "learning_rate": 0.0008106743790052464, + "loss": 0.8438, + "step": 5731 + }, + { + "epoch": 0.31, + "learning_rate": 0.000810606176120199, + "loss": 0.8477, + "step": 5732 + }, + { + "epoch": 0.31, + "learning_rate": 0.0008105379638227644, + "loss": 0.8867, + "step": 5733 + }, + { + "epoch": 0.31, + "learning_rate": 0.0008104697421150092, + "loss": 0.9336, + "step": 5734 + }, + { + "epoch": 0.31, + "learning_rate": 0.000810401510999001, + "loss": 0.9062, + "step": 5735 + }, + { + "epoch": 0.31, + "learning_rate": 0.0008103332704768076, + "loss": 0.8555, + "step": 5736 + }, + { + "epoch": 0.31, + "learning_rate": 0.0008102650205504964, + "loss": 0.875, + "step": 5737 + }, + { + "epoch": 0.31, + "learning_rate": 0.0008101967612221361, + "loss": 0.8906, + "step": 5738 + }, + { + "epoch": 0.31, + "learning_rate": 0.000810128492493795, + "loss": 0.8633, + "step": 5739 + }, + { + "epoch": 0.31, + "learning_rate": 0.000810060214367542, + "loss": 0.9492, + "step": 5740 + }, + { + "epoch": 0.31, + "learning_rate": 0.0008099919268454459, + "loss": 0.8867, + "step": 5741 + }, + { + "epoch": 0.31, + "learning_rate": 0.000809923629929576, + "loss": 0.9219, + "step": 5742 + }, + { + "epoch": 0.31, + "learning_rate": 0.0008098553236220023, + "loss": 0.8477, + "step": 5743 + }, + { + "epoch": 0.31, + "learning_rate": 0.0008097870079247945, + "loss": 0.8984, + "step": 5744 + }, + { + "epoch": 0.31, + "learning_rate": 0.0008097186828400224, + "loss": 0.793, + "step": 5745 + }, + { + "epoch": 0.31, + "learning_rate": 0.0008096503483697573, + "loss": 0.8711, + "step": 5746 + }, + { + "epoch": 0.31, + "learning_rate": 0.0008095820045160694, + "loss": 0.9375, + "step": 5747 + }, + { + "epoch": 0.31, + "learning_rate": 0.0008095136512810296, + "loss": 0.8945, + "step": 5748 + }, + { + "epoch": 0.31, + "learning_rate": 0.0008094452886667097, + "loss": 0.9062, + "step": 5749 + }, + { + "epoch": 0.31, + "learning_rate": 0.000809376916675181, + "loss": 0.9375, + "step": 5750 + }, + { + "epoch": 0.31, + "learning_rate": 0.0008093085353085154, + "loss": 0.9102, + "step": 5751 + }, + { + "epoch": 0.31, + "learning_rate": 0.0008092401445687852, + "loss": 0.832, + "step": 5752 + }, + { + "epoch": 0.31, + "learning_rate": 0.0008091717444580628, + "loss": 0.8867, + "step": 5753 + }, + { + "epoch": 0.31, + "learning_rate": 0.0008091033349784211, + "loss": 0.957, + "step": 5754 + }, + { + "epoch": 0.31, + "learning_rate": 0.000809034916131933, + "loss": 0.8594, + "step": 5755 + }, + { + "epoch": 0.31, + "learning_rate": 0.0008089664879206717, + "loss": 0.8984, + "step": 5756 + }, + { + "epoch": 0.31, + "learning_rate": 0.000808898050346711, + "loss": 0.9141, + "step": 5757 + }, + { + "epoch": 0.31, + "learning_rate": 0.0008088296034121248, + "loss": 0.9375, + "step": 5758 + }, + { + "epoch": 0.31, + "learning_rate": 0.0008087611471189871, + "loss": 0.8477, + "step": 5759 + }, + { + "epoch": 0.31, + "learning_rate": 0.0008086926814693722, + "loss": 0.9492, + "step": 5760 + }, + { + "epoch": 0.31, + "learning_rate": 0.0008086242064653553, + "loss": 0.7656, + "step": 5761 + }, + { + "epoch": 0.31, + "learning_rate": 0.0008085557221090112, + "loss": 0.9531, + "step": 5762 + }, + { + "epoch": 0.31, + "learning_rate": 0.0008084872284024149, + "loss": 1.0, + "step": 5763 + }, + { + "epoch": 0.31, + "learning_rate": 0.0008084187253476426, + "loss": 0.8359, + "step": 5764 + }, + { + "epoch": 0.31, + "learning_rate": 0.0008083502129467697, + "loss": 0.8906, + "step": 5765 + }, + { + "epoch": 0.31, + "learning_rate": 0.0008082816912018724, + "loss": 0.875, + "step": 5766 + }, + { + "epoch": 0.31, + "learning_rate": 0.0008082131601150273, + "loss": 0.8477, + "step": 5767 + }, + { + "epoch": 0.31, + "learning_rate": 0.0008081446196883109, + "loss": 0.9492, + "step": 5768 + }, + { + "epoch": 0.31, + "learning_rate": 0.0008080760699238004, + "loss": 0.875, + "step": 5769 + }, + { + "epoch": 0.31, + "learning_rate": 0.0008080075108235729, + "loss": 0.9492, + "step": 5770 + }, + { + "epoch": 0.31, + "learning_rate": 0.0008079389423897063, + "loss": 0.9688, + "step": 5771 + }, + { + "epoch": 0.31, + "learning_rate": 0.000807870364624278, + "loss": 0.875, + "step": 5772 + }, + { + "epoch": 0.31, + "learning_rate": 0.0008078017775293665, + "loss": 0.9727, + "step": 5773 + }, + { + "epoch": 0.31, + "learning_rate": 0.0008077331811070501, + "loss": 0.8594, + "step": 5774 + }, + { + "epoch": 0.31, + "learning_rate": 0.0008076645753594074, + "loss": 0.9688, + "step": 5775 + }, + { + "epoch": 0.31, + "learning_rate": 0.0008075959602885175, + "loss": 0.8164, + "step": 5776 + }, + { + "epoch": 0.31, + "learning_rate": 0.0008075273358964596, + "loss": 0.8438, + "step": 5777 + }, + { + "epoch": 0.31, + "learning_rate": 0.0008074587021853131, + "loss": 0.9805, + "step": 5778 + }, + { + "epoch": 0.31, + "learning_rate": 0.0008073900591571582, + "loss": 0.9648, + "step": 5779 + }, + { + "epoch": 0.31, + "learning_rate": 0.0008073214068140747, + "loss": 0.8438, + "step": 5780 + }, + { + "epoch": 0.31, + "learning_rate": 0.000807252745158143, + "loss": 0.8438, + "step": 5781 + }, + { + "epoch": 0.31, + "learning_rate": 0.0008071840741914439, + "loss": 0.8555, + "step": 5782 + }, + { + "epoch": 0.31, + "learning_rate": 0.0008071153939160585, + "loss": 0.9453, + "step": 5783 + }, + { + "epoch": 0.31, + "learning_rate": 0.0008070467043340676, + "loss": 0.9336, + "step": 5784 + }, + { + "epoch": 0.31, + "learning_rate": 0.0008069780054475531, + "loss": 0.8438, + "step": 5785 + }, + { + "epoch": 0.31, + "learning_rate": 0.0008069092972585965, + "loss": 1.0391, + "step": 5786 + }, + { + "epoch": 0.31, + "learning_rate": 0.0008068405797692803, + "loss": 0.9023, + "step": 5787 + }, + { + "epoch": 0.31, + "learning_rate": 0.0008067718529816864, + "loss": 0.9062, + "step": 5788 + }, + { + "epoch": 0.31, + "learning_rate": 0.0008067031168978978, + "loss": 0.9453, + "step": 5789 + }, + { + "epoch": 0.31, + "learning_rate": 0.0008066343715199971, + "loss": 0.8203, + "step": 5790 + }, + { + "epoch": 0.31, + "learning_rate": 0.0008065656168500681, + "loss": 0.8711, + "step": 5791 + }, + { + "epoch": 0.31, + "learning_rate": 0.0008064968528901936, + "loss": 0.9336, + "step": 5792 + }, + { + "epoch": 0.31, + "learning_rate": 0.0008064280796424578, + "loss": 0.9648, + "step": 5793 + }, + { + "epoch": 0.31, + "learning_rate": 0.0008063592971089445, + "loss": 0.8438, + "step": 5794 + }, + { + "epoch": 0.31, + "learning_rate": 0.0008062905052917382, + "loss": 0.9297, + "step": 5795 + }, + { + "epoch": 0.31, + "learning_rate": 0.0008062217041929236, + "loss": 0.8203, + "step": 5796 + }, + { + "epoch": 0.31, + "learning_rate": 0.0008061528938145855, + "loss": 0.9453, + "step": 5797 + }, + { + "epoch": 0.31, + "learning_rate": 0.0008060840741588089, + "loss": 0.8008, + "step": 5798 + }, + { + "epoch": 0.31, + "learning_rate": 0.0008060152452276794, + "loss": 0.8555, + "step": 5799 + }, + { + "epoch": 0.31, + "learning_rate": 0.0008059464070232829, + "loss": 0.9062, + "step": 5800 + }, + { + "epoch": 0.31, + "learning_rate": 0.0008058775595477052, + "loss": 0.9219, + "step": 5801 + }, + { + "epoch": 0.31, + "learning_rate": 0.0008058087028030328, + "loss": 0.9844, + "step": 5802 + }, + { + "epoch": 0.31, + "learning_rate": 0.0008057398367913522, + "loss": 0.9492, + "step": 5803 + }, + { + "epoch": 0.31, + "learning_rate": 0.0008056709615147501, + "loss": 0.9062, + "step": 5804 + }, + { + "epoch": 0.31, + "learning_rate": 0.0008056020769753138, + "loss": 0.8438, + "step": 5805 + }, + { + "epoch": 0.31, + "learning_rate": 0.0008055331831751309, + "loss": 0.9492, + "step": 5806 + }, + { + "epoch": 0.31, + "learning_rate": 0.0008054642801162888, + "loss": 0.9141, + "step": 5807 + }, + { + "epoch": 0.31, + "learning_rate": 0.0008053953678008757, + "loss": 0.8125, + "step": 5808 + }, + { + "epoch": 0.31, + "learning_rate": 0.0008053264462309797, + "loss": 0.9648, + "step": 5809 + }, + { + "epoch": 0.31, + "learning_rate": 0.0008052575154086894, + "loss": 0.9492, + "step": 5810 + }, + { + "epoch": 0.31, + "learning_rate": 0.0008051885753360938, + "loss": 0.9492, + "step": 5811 + }, + { + "epoch": 0.31, + "learning_rate": 0.0008051196260152819, + "loss": 0.8633, + "step": 5812 + }, + { + "epoch": 0.31, + "learning_rate": 0.0008050506674483429, + "loss": 0.8711, + "step": 5813 + }, + { + "epoch": 0.31, + "learning_rate": 0.0008049816996373668, + "loss": 0.9336, + "step": 5814 + }, + { + "epoch": 0.31, + "learning_rate": 0.0008049127225844433, + "loss": 0.9844, + "step": 5815 + }, + { + "epoch": 0.31, + "learning_rate": 0.0008048437362916627, + "loss": 0.8203, + "step": 5816 + }, + { + "epoch": 0.31, + "learning_rate": 0.0008047747407611157, + "loss": 0.8945, + "step": 5817 + }, + { + "epoch": 0.31, + "learning_rate": 0.0008047057359948928, + "loss": 0.8789, + "step": 5818 + }, + { + "epoch": 0.31, + "learning_rate": 0.0008046367219950852, + "loss": 0.9883, + "step": 5819 + }, + { + "epoch": 0.31, + "learning_rate": 0.0008045676987637843, + "loss": 0.9336, + "step": 5820 + }, + { + "epoch": 0.31, + "learning_rate": 0.0008044986663030817, + "loss": 0.9258, + "step": 5821 + }, + { + "epoch": 0.31, + "learning_rate": 0.0008044296246150693, + "loss": 0.8828, + "step": 5822 + }, + { + "epoch": 0.31, + "learning_rate": 0.0008043605737018391, + "loss": 0.9023, + "step": 5823 + }, + { + "epoch": 0.31, + "learning_rate": 0.000804291513565484, + "loss": 0.8633, + "step": 5824 + }, + { + "epoch": 0.31, + "learning_rate": 0.0008042224442080964, + "loss": 0.8359, + "step": 5825 + }, + { + "epoch": 0.31, + "learning_rate": 0.0008041533656317695, + "loss": 0.8398, + "step": 5826 + }, + { + "epoch": 0.31, + "learning_rate": 0.0008040842778385963, + "loss": 0.9727, + "step": 5827 + }, + { + "epoch": 0.31, + "learning_rate": 0.0008040151808306708, + "loss": 0.8672, + "step": 5828 + }, + { + "epoch": 0.31, + "learning_rate": 0.0008039460746100866, + "loss": 1.0547, + "step": 5829 + }, + { + "epoch": 0.31, + "learning_rate": 0.000803876959178938, + "loss": 0.918, + "step": 5830 + }, + { + "epoch": 0.31, + "learning_rate": 0.0008038078345393194, + "loss": 0.9883, + "step": 5831 + }, + { + "epoch": 0.31, + "learning_rate": 0.0008037387006933253, + "loss": 0.8945, + "step": 5832 + }, + { + "epoch": 0.31, + "learning_rate": 0.0008036695576430509, + "loss": 0.9375, + "step": 5833 + }, + { + "epoch": 0.31, + "learning_rate": 0.0008036004053905917, + "loss": 0.8594, + "step": 5834 + }, + { + "epoch": 0.31, + "learning_rate": 0.0008035312439380425, + "loss": 0.8477, + "step": 5835 + }, + { + "epoch": 0.31, + "learning_rate": 0.0008034620732874999, + "loss": 0.9258, + "step": 5836 + }, + { + "epoch": 0.31, + "learning_rate": 0.0008033928934410595, + "loss": 0.8945, + "step": 5837 + }, + { + "epoch": 0.31, + "learning_rate": 0.0008033237044008179, + "loss": 0.8711, + "step": 5838 + }, + { + "epoch": 0.31, + "learning_rate": 0.0008032545061688717, + "loss": 0.8867, + "step": 5839 + }, + { + "epoch": 0.31, + "learning_rate": 0.0008031852987473178, + "loss": 0.8359, + "step": 5840 + }, + { + "epoch": 0.31, + "learning_rate": 0.0008031160821382533, + "loss": 0.8516, + "step": 5841 + }, + { + "epoch": 0.31, + "learning_rate": 0.000803046856343776, + "loss": 0.8359, + "step": 5842 + }, + { + "epoch": 0.31, + "learning_rate": 0.0008029776213659836, + "loss": 0.8867, + "step": 5843 + }, + { + "epoch": 0.31, + "learning_rate": 0.0008029083772069738, + "loss": 0.9102, + "step": 5844 + }, + { + "epoch": 0.31, + "learning_rate": 0.0008028391238688454, + "loss": 0.8516, + "step": 5845 + }, + { + "epoch": 0.31, + "learning_rate": 0.0008027698613536966, + "loss": 0.7422, + "step": 5846 + }, + { + "epoch": 0.31, + "learning_rate": 0.0008027005896636266, + "loss": 0.8594, + "step": 5847 + }, + { + "epoch": 0.31, + "learning_rate": 0.0008026313088007343, + "loss": 0.8281, + "step": 5848 + }, + { + "epoch": 0.31, + "learning_rate": 0.0008025620187671194, + "loss": 0.9766, + "step": 5849 + }, + { + "epoch": 0.31, + "learning_rate": 0.0008024927195648813, + "loss": 0.9102, + "step": 5850 + }, + { + "epoch": 0.31, + "learning_rate": 0.0008024234111961204, + "loss": 0.9492, + "step": 5851 + }, + { + "epoch": 0.31, + "learning_rate": 0.0008023540936629366, + "loss": 0.8711, + "step": 5852 + }, + { + "epoch": 0.31, + "learning_rate": 0.0008022847669674304, + "loss": 0.8555, + "step": 5853 + }, + { + "epoch": 0.31, + "learning_rate": 0.0008022154311117031, + "loss": 0.8711, + "step": 5854 + }, + { + "epoch": 0.31, + "learning_rate": 0.0008021460860978554, + "loss": 0.918, + "step": 5855 + }, + { + "epoch": 0.31, + "learning_rate": 0.0008020767319279888, + "loss": 1.0156, + "step": 5856 + }, + { + "epoch": 0.31, + "learning_rate": 0.0008020073686042051, + "loss": 0.9102, + "step": 5857 + }, + { + "epoch": 0.31, + "learning_rate": 0.0008019379961286059, + "loss": 0.9102, + "step": 5858 + }, + { + "epoch": 0.31, + "learning_rate": 0.0008018686145032936, + "loss": 0.9531, + "step": 5859 + }, + { + "epoch": 0.31, + "learning_rate": 0.0008017992237303709, + "loss": 0.8242, + "step": 5860 + }, + { + "epoch": 0.32, + "learning_rate": 0.0008017298238119402, + "loss": 0.8594, + "step": 5861 + }, + { + "epoch": 0.32, + "learning_rate": 0.0008016604147501047, + "loss": 0.9531, + "step": 5862 + }, + { + "epoch": 0.32, + "learning_rate": 0.0008015909965469679, + "loss": 0.9336, + "step": 5863 + }, + { + "epoch": 0.32, + "learning_rate": 0.000801521569204633, + "loss": 0.9961, + "step": 5864 + }, + { + "epoch": 0.32, + "learning_rate": 0.0008014521327252042, + "loss": 1.0156, + "step": 5865 + }, + { + "epoch": 0.32, + "learning_rate": 0.0008013826871107857, + "loss": 0.9648, + "step": 5866 + }, + { + "epoch": 0.32, + "learning_rate": 0.0008013132323634818, + "loss": 0.8672, + "step": 5867 + }, + { + "epoch": 0.32, + "learning_rate": 0.000801243768485397, + "loss": 0.9102, + "step": 5868 + }, + { + "epoch": 0.32, + "learning_rate": 0.0008011742954786366, + "loss": 0.832, + "step": 5869 + }, + { + "epoch": 0.32, + "learning_rate": 0.0008011048133453056, + "loss": 0.8555, + "step": 5870 + }, + { + "epoch": 0.32, + "learning_rate": 0.00080103532208751, + "loss": 0.9023, + "step": 5871 + }, + { + "epoch": 0.32, + "learning_rate": 0.000800965821707355, + "loss": 0.9023, + "step": 5872 + }, + { + "epoch": 0.32, + "learning_rate": 0.0008008963122069473, + "loss": 0.9258, + "step": 5873 + }, + { + "epoch": 0.32, + "learning_rate": 0.0008008267935883926, + "loss": 0.918, + "step": 5874 + }, + { + "epoch": 0.32, + "learning_rate": 0.000800757265853798, + "loss": 0.9844, + "step": 5875 + }, + { + "epoch": 0.32, + "learning_rate": 0.0008006877290052703, + "loss": 0.8867, + "step": 5876 + }, + { + "epoch": 0.32, + "learning_rate": 0.0008006181830449168, + "loss": 0.8867, + "step": 5877 + }, + { + "epoch": 0.32, + "learning_rate": 0.000800548627974845, + "loss": 0.8516, + "step": 5878 + }, + { + "epoch": 0.32, + "learning_rate": 0.0008004790637971624, + "loss": 0.8945, + "step": 5879 + }, + { + "epoch": 0.32, + "learning_rate": 0.0008004094905139771, + "loss": 0.8594, + "step": 5880 + }, + { + "epoch": 0.32, + "learning_rate": 0.0008003399081273975, + "loss": 0.8984, + "step": 5881 + }, + { + "epoch": 0.32, + "learning_rate": 0.0008002703166395319, + "loss": 0.9062, + "step": 5882 + }, + { + "epoch": 0.32, + "learning_rate": 0.0008002007160524897, + "loss": 0.8516, + "step": 5883 + }, + { + "epoch": 0.32, + "learning_rate": 0.0008001311063683796, + "loss": 0.9727, + "step": 5884 + }, + { + "epoch": 0.32, + "learning_rate": 0.0008000614875893111, + "loss": 0.9258, + "step": 5885 + }, + { + "epoch": 0.32, + "learning_rate": 0.0007999918597173939, + "loss": 0.8711, + "step": 5886 + }, + { + "epoch": 0.32, + "learning_rate": 0.0007999222227547378, + "loss": 0.9336, + "step": 5887 + }, + { + "epoch": 0.32, + "learning_rate": 0.0007998525767034532, + "loss": 0.9336, + "step": 5888 + }, + { + "epoch": 0.32, + "learning_rate": 0.0007997829215656507, + "loss": 0.9844, + "step": 5889 + }, + { + "epoch": 0.32, + "learning_rate": 0.0007997132573434409, + "loss": 0.8359, + "step": 5890 + }, + { + "epoch": 0.32, + "learning_rate": 0.0007996435840389349, + "loss": 0.875, + "step": 5891 + }, + { + "epoch": 0.32, + "learning_rate": 0.000799573901654244, + "loss": 0.9297, + "step": 5892 + }, + { + "epoch": 0.32, + "learning_rate": 0.0007995042101914798, + "loss": 1.0156, + "step": 5893 + }, + { + "epoch": 0.32, + "learning_rate": 0.0007994345096527544, + "loss": 0.9141, + "step": 5894 + }, + { + "epoch": 0.32, + "learning_rate": 0.0007993648000401795, + "loss": 0.9727, + "step": 5895 + }, + { + "epoch": 0.32, + "learning_rate": 0.000799295081355868, + "loss": 0.9453, + "step": 5896 + }, + { + "epoch": 0.32, + "learning_rate": 0.0007992253536019322, + "loss": 0.9023, + "step": 5897 + }, + { + "epoch": 0.32, + "learning_rate": 0.0007991556167804857, + "loss": 0.9492, + "step": 5898 + }, + { + "epoch": 0.32, + "learning_rate": 0.0007990858708936409, + "loss": 0.8594, + "step": 5899 + }, + { + "epoch": 0.32, + "learning_rate": 0.0007990161159435121, + "loss": 0.875, + "step": 5900 + }, + { + "epoch": 0.32, + "learning_rate": 0.0007989463519322126, + "loss": 0.9727, + "step": 5901 + }, + { + "epoch": 0.32, + "learning_rate": 0.0007988765788618566, + "loss": 0.9453, + "step": 5902 + }, + { + "epoch": 0.32, + "learning_rate": 0.0007988067967345585, + "loss": 0.9922, + "step": 5903 + }, + { + "epoch": 0.32, + "learning_rate": 0.0007987370055524331, + "loss": 1.0312, + "step": 5904 + }, + { + "epoch": 0.32, + "learning_rate": 0.000798667205317595, + "loss": 0.9453, + "step": 5905 + }, + { + "epoch": 0.32, + "learning_rate": 0.0007985973960321597, + "loss": 0.9297, + "step": 5906 + }, + { + "epoch": 0.32, + "learning_rate": 0.0007985275776982424, + "loss": 0.9961, + "step": 5907 + }, + { + "epoch": 0.32, + "learning_rate": 0.0007984577503179587, + "loss": 0.8203, + "step": 5908 + }, + { + "epoch": 0.32, + "learning_rate": 0.0007983879138934249, + "loss": 0.8789, + "step": 5909 + }, + { + "epoch": 0.32, + "learning_rate": 0.0007983180684267572, + "loss": 1.0156, + "step": 5910 + }, + { + "epoch": 0.32, + "learning_rate": 0.0007982482139200722, + "loss": 1.0, + "step": 5911 + }, + { + "epoch": 0.32, + "learning_rate": 0.0007981783503754865, + "loss": 0.9023, + "step": 5912 + }, + { + "epoch": 0.32, + "learning_rate": 0.0007981084777951173, + "loss": 0.8828, + "step": 5913 + }, + { + "epoch": 0.32, + "learning_rate": 0.0007980385961810821, + "loss": 0.8438, + "step": 5914 + }, + { + "epoch": 0.32, + "learning_rate": 0.0007979687055354983, + "loss": 1.0391, + "step": 5915 + }, + { + "epoch": 0.32, + "learning_rate": 0.000797898805860484, + "loss": 1.0312, + "step": 5916 + }, + { + "epoch": 0.32, + "learning_rate": 0.0007978288971581573, + "loss": 0.9102, + "step": 5917 + }, + { + "epoch": 0.32, + "learning_rate": 0.0007977589794306368, + "loss": 0.9375, + "step": 5918 + }, + { + "epoch": 0.32, + "learning_rate": 0.0007976890526800411, + "loss": 0.8984, + "step": 5919 + }, + { + "epoch": 0.32, + "learning_rate": 0.0007976191169084892, + "loss": 0.8984, + "step": 5920 + }, + { + "epoch": 0.32, + "learning_rate": 0.0007975491721181006, + "loss": 0.8008, + "step": 5921 + }, + { + "epoch": 0.32, + "learning_rate": 0.0007974792183109945, + "loss": 0.9531, + "step": 5922 + }, + { + "epoch": 0.32, + "learning_rate": 0.0007974092554892911, + "loss": 0.9375, + "step": 5923 + }, + { + "epoch": 0.32, + "learning_rate": 0.0007973392836551102, + "loss": 1.0078, + "step": 5924 + }, + { + "epoch": 0.32, + "learning_rate": 0.0007972693028105724, + "loss": 0.9062, + "step": 5925 + }, + { + "epoch": 0.32, + "learning_rate": 0.0007971993129577981, + "loss": 0.8672, + "step": 5926 + }, + { + "epoch": 0.32, + "learning_rate": 0.0007971293140989085, + "loss": 0.9336, + "step": 5927 + }, + { + "epoch": 0.32, + "learning_rate": 0.0007970593062360246, + "loss": 0.875, + "step": 5928 + }, + { + "epoch": 0.32, + "learning_rate": 0.0007969892893712681, + "loss": 0.8906, + "step": 5929 + }, + { + "epoch": 0.32, + "learning_rate": 0.0007969192635067603, + "loss": 0.8516, + "step": 5930 + }, + { + "epoch": 0.32, + "learning_rate": 0.0007968492286446237, + "loss": 0.9531, + "step": 5931 + }, + { + "epoch": 0.32, + "learning_rate": 0.0007967791847869803, + "loss": 0.9609, + "step": 5932 + }, + { + "epoch": 0.32, + "learning_rate": 0.0007967091319359529, + "loss": 0.9688, + "step": 5933 + }, + { + "epoch": 0.32, + "learning_rate": 0.0007966390700936639, + "loss": 0.9062, + "step": 5934 + }, + { + "epoch": 0.32, + "learning_rate": 0.000796568999262237, + "loss": 0.8555, + "step": 5935 + }, + { + "epoch": 0.32, + "learning_rate": 0.0007964989194437948, + "loss": 0.8125, + "step": 5936 + }, + { + "epoch": 0.32, + "learning_rate": 0.0007964288306404618, + "loss": 0.9141, + "step": 5937 + }, + { + "epoch": 0.32, + "learning_rate": 0.0007963587328543613, + "loss": 0.9219, + "step": 5938 + }, + { + "epoch": 0.32, + "learning_rate": 0.0007962886260876179, + "loss": 0.9688, + "step": 5939 + }, + { + "epoch": 0.32, + "learning_rate": 0.0007962185103423557, + "loss": 0.9531, + "step": 5940 + }, + { + "epoch": 0.32, + "learning_rate": 0.0007961483856206996, + "loss": 0.918, + "step": 5941 + }, + { + "epoch": 0.32, + "learning_rate": 0.0007960782519247746, + "loss": 0.957, + "step": 5942 + }, + { + "epoch": 0.32, + "learning_rate": 0.0007960081092567063, + "loss": 0.9219, + "step": 5943 + }, + { + "epoch": 0.32, + "learning_rate": 0.0007959379576186197, + "loss": 0.9258, + "step": 5944 + }, + { + "epoch": 0.32, + "learning_rate": 0.000795867797012641, + "loss": 0.9414, + "step": 5945 + }, + { + "epoch": 0.32, + "learning_rate": 0.0007957976274408961, + "loss": 0.9805, + "step": 5946 + }, + { + "epoch": 0.32, + "learning_rate": 0.0007957274489055115, + "loss": 0.9375, + "step": 5947 + }, + { + "epoch": 0.32, + "learning_rate": 0.0007956572614086137, + "loss": 0.8281, + "step": 5948 + }, + { + "epoch": 0.32, + "learning_rate": 0.0007955870649523298, + "loss": 0.9219, + "step": 5949 + }, + { + "epoch": 0.32, + "learning_rate": 0.000795516859538787, + "loss": 0.9219, + "step": 5950 + }, + { + "epoch": 0.32, + "learning_rate": 0.0007954466451701124, + "loss": 0.8789, + "step": 5951 + }, + { + "epoch": 0.32, + "learning_rate": 0.0007953764218484341, + "loss": 0.9531, + "step": 5952 + }, + { + "epoch": 0.32, + "learning_rate": 0.0007953061895758799, + "loss": 0.9453, + "step": 5953 + }, + { + "epoch": 0.32, + "learning_rate": 0.0007952359483545782, + "loss": 0.9258, + "step": 5954 + }, + { + "epoch": 0.32, + "learning_rate": 0.0007951656981866573, + "loss": 0.957, + "step": 5955 + }, + { + "epoch": 0.32, + "learning_rate": 0.0007950954390742464, + "loss": 0.9336, + "step": 5956 + }, + { + "epoch": 0.32, + "learning_rate": 0.0007950251710194743, + "loss": 0.9258, + "step": 5957 + }, + { + "epoch": 0.32, + "learning_rate": 0.0007949548940244704, + "loss": 0.8906, + "step": 5958 + }, + { + "epoch": 0.32, + "learning_rate": 0.0007948846080913643, + "loss": 0.8828, + "step": 5959 + }, + { + "epoch": 0.32, + "learning_rate": 0.000794814313222286, + "loss": 0.832, + "step": 5960 + }, + { + "epoch": 0.32, + "learning_rate": 0.0007947440094193655, + "loss": 0.9844, + "step": 5961 + }, + { + "epoch": 0.32, + "learning_rate": 0.0007946736966847334, + "loss": 0.8945, + "step": 5962 + }, + { + "epoch": 0.32, + "learning_rate": 0.0007946033750205203, + "loss": 0.9609, + "step": 5963 + }, + { + "epoch": 0.32, + "learning_rate": 0.0007945330444288573, + "loss": 0.8242, + "step": 5964 + }, + { + "epoch": 0.32, + "learning_rate": 0.0007944627049118755, + "loss": 1.0078, + "step": 5965 + }, + { + "epoch": 0.32, + "learning_rate": 0.0007943923564717066, + "loss": 0.9414, + "step": 5966 + }, + { + "epoch": 0.32, + "learning_rate": 0.0007943219991104822, + "loss": 0.8672, + "step": 5967 + }, + { + "epoch": 0.32, + "learning_rate": 0.0007942516328303344, + "loss": 0.8242, + "step": 5968 + }, + { + "epoch": 0.32, + "learning_rate": 0.0007941812576333956, + "loss": 0.9141, + "step": 5969 + }, + { + "epoch": 0.32, + "learning_rate": 0.0007941108735217983, + "loss": 0.9414, + "step": 5970 + }, + { + "epoch": 0.32, + "learning_rate": 0.0007940404804976755, + "loss": 0.8789, + "step": 5971 + }, + { + "epoch": 0.32, + "learning_rate": 0.0007939700785631604, + "loss": 0.8984, + "step": 5972 + }, + { + "epoch": 0.32, + "learning_rate": 0.0007938996677203862, + "loss": 0.9727, + "step": 5973 + }, + { + "epoch": 0.32, + "learning_rate": 0.0007938292479714866, + "loss": 0.9062, + "step": 5974 + }, + { + "epoch": 0.32, + "learning_rate": 0.0007937588193185956, + "loss": 0.8594, + "step": 5975 + }, + { + "epoch": 0.32, + "learning_rate": 0.0007936883817638474, + "loss": 0.8555, + "step": 5976 + }, + { + "epoch": 0.32, + "learning_rate": 0.0007936179353093766, + "loss": 0.8438, + "step": 5977 + }, + { + "epoch": 0.32, + "learning_rate": 0.0007935474799573179, + "loss": 0.9531, + "step": 5978 + }, + { + "epoch": 0.32, + "learning_rate": 0.0007934770157098063, + "loss": 0.9219, + "step": 5979 + }, + { + "epoch": 0.32, + "learning_rate": 0.0007934065425689771, + "loss": 0.8867, + "step": 5980 + }, + { + "epoch": 0.32, + "learning_rate": 0.0007933360605369658, + "loss": 0.9258, + "step": 5981 + }, + { + "epoch": 0.32, + "learning_rate": 0.0007932655696159084, + "loss": 0.8789, + "step": 5982 + }, + { + "epoch": 0.32, + "learning_rate": 0.0007931950698079409, + "loss": 0.8516, + "step": 5983 + }, + { + "epoch": 0.32, + "learning_rate": 0.0007931245611151997, + "loss": 0.875, + "step": 5984 + }, + { + "epoch": 0.32, + "learning_rate": 0.0007930540435398214, + "loss": 0.9609, + "step": 5985 + }, + { + "epoch": 0.32, + "learning_rate": 0.000792983517083943, + "loss": 0.9727, + "step": 5986 + }, + { + "epoch": 0.32, + "learning_rate": 0.0007929129817497016, + "loss": 0.9023, + "step": 5987 + }, + { + "epoch": 0.32, + "learning_rate": 0.0007928424375392346, + "loss": 0.9883, + "step": 5988 + }, + { + "epoch": 0.32, + "learning_rate": 0.00079277188445468, + "loss": 0.9023, + "step": 5989 + }, + { + "epoch": 0.32, + "learning_rate": 0.0007927013224981757, + "loss": 0.8945, + "step": 5990 + }, + { + "epoch": 0.32, + "learning_rate": 0.0007926307516718596, + "loss": 0.8867, + "step": 5991 + }, + { + "epoch": 0.32, + "learning_rate": 0.0007925601719778705, + "loss": 0.8945, + "step": 5992 + }, + { + "epoch": 0.32, + "learning_rate": 0.0007924895834183472, + "loss": 0.875, + "step": 5993 + }, + { + "epoch": 0.32, + "learning_rate": 0.0007924189859954288, + "loss": 0.9102, + "step": 5994 + }, + { + "epoch": 0.32, + "learning_rate": 0.0007923483797112547, + "loss": 0.9102, + "step": 5995 + }, + { + "epoch": 0.32, + "learning_rate": 0.0007922777645679641, + "loss": 0.9805, + "step": 5996 + }, + { + "epoch": 0.32, + "learning_rate": 0.0007922071405676974, + "loss": 0.9102, + "step": 5997 + }, + { + "epoch": 0.32, + "learning_rate": 0.0007921365077125943, + "loss": 0.8711, + "step": 5998 + }, + { + "epoch": 0.32, + "learning_rate": 0.0007920658660047957, + "loss": 0.9375, + "step": 5999 + }, + { + "epoch": 0.32, + "learning_rate": 0.0007919952154464418, + "loss": 0.8789, + "step": 6000 + }, + { + "epoch": 0.32, + "learning_rate": 0.0007919245560396738, + "loss": 0.8906, + "step": 6001 + }, + { + "epoch": 0.32, + "learning_rate": 0.0007918538877866328, + "loss": 0.8867, + "step": 6002 + }, + { + "epoch": 0.32, + "learning_rate": 0.0007917832106894602, + "loss": 0.8242, + "step": 6003 + }, + { + "epoch": 0.32, + "learning_rate": 0.0007917125247502979, + "loss": 0.9258, + "step": 6004 + }, + { + "epoch": 0.32, + "learning_rate": 0.0007916418299712879, + "loss": 0.8984, + "step": 6005 + }, + { + "epoch": 0.32, + "learning_rate": 0.0007915711263545727, + "loss": 0.9375, + "step": 6006 + }, + { + "epoch": 0.32, + "learning_rate": 0.0007915004139022944, + "loss": 0.9844, + "step": 6007 + }, + { + "epoch": 0.32, + "learning_rate": 0.0007914296926165958, + "loss": 0.9258, + "step": 6008 + }, + { + "epoch": 0.32, + "learning_rate": 0.0007913589624996206, + "loss": 0.8945, + "step": 6009 + }, + { + "epoch": 0.32, + "learning_rate": 0.0007912882235535115, + "loss": 0.9609, + "step": 6010 + }, + { + "epoch": 0.32, + "learning_rate": 0.0007912174757804127, + "loss": 0.9805, + "step": 6011 + }, + { + "epoch": 0.32, + "learning_rate": 0.0007911467191824675, + "loss": 0.8555, + "step": 6012 + }, + { + "epoch": 0.32, + "learning_rate": 0.0007910759537618206, + "loss": 0.9375, + "step": 6013 + }, + { + "epoch": 0.32, + "learning_rate": 0.000791005179520616, + "loss": 0.9141, + "step": 6014 + }, + { + "epoch": 0.32, + "learning_rate": 0.0007909343964609987, + "loss": 0.8984, + "step": 6015 + }, + { + "epoch": 0.32, + "learning_rate": 0.0007908636045851135, + "loss": 0.9648, + "step": 6016 + }, + { + "epoch": 0.32, + "learning_rate": 0.0007907928038951056, + "loss": 0.9062, + "step": 6017 + }, + { + "epoch": 0.32, + "learning_rate": 0.0007907219943931203, + "loss": 0.9258, + "step": 6018 + }, + { + "epoch": 0.32, + "learning_rate": 0.000790651176081304, + "loss": 0.9961, + "step": 6019 + }, + { + "epoch": 0.32, + "learning_rate": 0.0007905803489618021, + "loss": 0.9023, + "step": 6020 + }, + { + "epoch": 0.32, + "learning_rate": 0.0007905095130367612, + "loss": 0.9297, + "step": 6021 + }, + { + "epoch": 0.32, + "learning_rate": 0.0007904386683083278, + "loss": 0.9219, + "step": 6022 + }, + { + "epoch": 0.32, + "learning_rate": 0.0007903678147786487, + "loss": 0.9336, + "step": 6023 + }, + { + "epoch": 0.32, + "learning_rate": 0.0007902969524498709, + "loss": 0.8633, + "step": 6024 + }, + { + "epoch": 0.32, + "learning_rate": 0.000790226081324142, + "loss": 0.918, + "step": 6025 + }, + { + "epoch": 0.32, + "learning_rate": 0.0007901552014036096, + "loss": 0.8945, + "step": 6026 + }, + { + "epoch": 0.32, + "learning_rate": 0.0007900843126904213, + "loss": 0.9062, + "step": 6027 + }, + { + "epoch": 0.32, + "learning_rate": 0.0007900134151867255, + "loss": 0.8711, + "step": 6028 + }, + { + "epoch": 0.32, + "learning_rate": 0.0007899425088946705, + "loss": 0.9102, + "step": 6029 + }, + { + "epoch": 0.32, + "learning_rate": 0.0007898715938164052, + "loss": 0.9258, + "step": 6030 + }, + { + "epoch": 0.32, + "learning_rate": 0.0007898006699540784, + "loss": 0.9062, + "step": 6031 + }, + { + "epoch": 0.32, + "learning_rate": 0.0007897297373098395, + "loss": 0.9336, + "step": 6032 + }, + { + "epoch": 0.32, + "learning_rate": 0.0007896587958858376, + "loss": 0.9727, + "step": 6033 + }, + { + "epoch": 0.32, + "learning_rate": 0.000789587845684223, + "loss": 0.8359, + "step": 6034 + }, + { + "epoch": 0.32, + "learning_rate": 0.0007895168867071452, + "loss": 1.0781, + "step": 6035 + }, + { + "epoch": 0.32, + "learning_rate": 0.0007894459189567547, + "loss": 0.8711, + "step": 6036 + }, + { + "epoch": 0.32, + "learning_rate": 0.0007893749424352022, + "loss": 0.9297, + "step": 6037 + }, + { + "epoch": 0.32, + "learning_rate": 0.0007893039571446383, + "loss": 0.8672, + "step": 6038 + }, + { + "epoch": 0.32, + "learning_rate": 0.0007892329630872144, + "loss": 0.9219, + "step": 6039 + }, + { + "epoch": 0.32, + "learning_rate": 0.0007891619602650815, + "loss": 0.9492, + "step": 6040 + }, + { + "epoch": 0.32, + "learning_rate": 0.0007890909486803912, + "loss": 0.8164, + "step": 6041 + }, + { + "epoch": 0.32, + "learning_rate": 0.0007890199283352959, + "loss": 0.7891, + "step": 6042 + }, + { + "epoch": 0.32, + "learning_rate": 0.0007889488992319471, + "loss": 0.9805, + "step": 6043 + }, + { + "epoch": 0.32, + "learning_rate": 0.0007888778613724976, + "loss": 0.9297, + "step": 6044 + }, + { + "epoch": 0.32, + "learning_rate": 0.0007888068147591001, + "loss": 0.7773, + "step": 6045 + }, + { + "epoch": 0.32, + "learning_rate": 0.0007887357593939073, + "loss": 0.9141, + "step": 6046 + }, + { + "epoch": 0.33, + "learning_rate": 0.0007886646952790726, + "loss": 0.8555, + "step": 6047 + }, + { + "epoch": 0.33, + "learning_rate": 0.0007885936224167495, + "loss": 0.8516, + "step": 6048 + }, + { + "epoch": 0.33, + "learning_rate": 0.0007885225408090915, + "loss": 0.9141, + "step": 6049 + }, + { + "epoch": 0.33, + "learning_rate": 0.000788451450458253, + "loss": 0.9219, + "step": 6050 + }, + { + "epoch": 0.33, + "learning_rate": 0.0007883803513663878, + "loss": 0.8945, + "step": 6051 + }, + { + "epoch": 0.33, + "learning_rate": 0.0007883092435356508, + "loss": 0.9141, + "step": 6052 + }, + { + "epoch": 0.33, + "learning_rate": 0.0007882381269681966, + "loss": 0.8711, + "step": 6053 + }, + { + "epoch": 0.33, + "learning_rate": 0.0007881670016661804, + "loss": 0.9727, + "step": 6054 + }, + { + "epoch": 0.33, + "learning_rate": 0.0007880958676317575, + "loss": 0.9883, + "step": 6055 + }, + { + "epoch": 0.33, + "learning_rate": 0.0007880247248670836, + "loss": 0.832, + "step": 6056 + }, + { + "epoch": 0.33, + "learning_rate": 0.0007879535733743142, + "loss": 0.9844, + "step": 6057 + }, + { + "epoch": 0.33, + "learning_rate": 0.0007878824131556057, + "loss": 0.8398, + "step": 6058 + }, + { + "epoch": 0.33, + "learning_rate": 0.0007878112442131145, + "loss": 0.9531, + "step": 6059 + }, + { + "epoch": 0.33, + "learning_rate": 0.0007877400665489972, + "loss": 0.8555, + "step": 6060 + }, + { + "epoch": 0.33, + "learning_rate": 0.0007876688801654107, + "loss": 0.9297, + "step": 6061 + }, + { + "epoch": 0.33, + "learning_rate": 0.0007875976850645121, + "loss": 0.9102, + "step": 6062 + }, + { + "epoch": 0.33, + "learning_rate": 0.000787526481248459, + "loss": 0.8281, + "step": 6063 + }, + { + "epoch": 0.33, + "learning_rate": 0.0007874552687194092, + "loss": 0.9297, + "step": 6064 + }, + { + "epoch": 0.33, + "learning_rate": 0.0007873840474795203, + "loss": 0.9727, + "step": 6065 + }, + { + "epoch": 0.33, + "learning_rate": 0.000787312817530951, + "loss": 0.7266, + "step": 6066 + }, + { + "epoch": 0.33, + "learning_rate": 0.0007872415788758593, + "loss": 0.9414, + "step": 6067 + }, + { + "epoch": 0.33, + "learning_rate": 0.0007871703315164043, + "loss": 0.8203, + "step": 6068 + }, + { + "epoch": 0.33, + "learning_rate": 0.000787099075454745, + "loss": 1.0234, + "step": 6069 + }, + { + "epoch": 0.33, + "learning_rate": 0.0007870278106930406, + "loss": 0.9375, + "step": 6070 + }, + { + "epoch": 0.33, + "learning_rate": 0.0007869565372334509, + "loss": 0.9297, + "step": 6071 + }, + { + "epoch": 0.33, + "learning_rate": 0.0007868852550781352, + "loss": 0.9531, + "step": 6072 + }, + { + "epoch": 0.33, + "learning_rate": 0.0007868139642292542, + "loss": 0.9023, + "step": 6073 + }, + { + "epoch": 0.33, + "learning_rate": 0.0007867426646889677, + "loss": 0.8789, + "step": 6074 + }, + { + "epoch": 0.33, + "learning_rate": 0.0007866713564594368, + "loss": 0.8555, + "step": 6075 + }, + { + "epoch": 0.33, + "learning_rate": 0.000786600039542822, + "loss": 0.9102, + "step": 6076 + }, + { + "epoch": 0.33, + "learning_rate": 0.0007865287139412847, + "loss": 0.8008, + "step": 6077 + }, + { + "epoch": 0.33, + "learning_rate": 0.000786457379656986, + "loss": 0.9297, + "step": 6078 + }, + { + "epoch": 0.33, + "learning_rate": 0.0007863860366920879, + "loss": 0.8398, + "step": 6079 + }, + { + "epoch": 0.33, + "learning_rate": 0.0007863146850487521, + "loss": 0.9727, + "step": 6080 + }, + { + "epoch": 0.33, + "learning_rate": 0.0007862433247291409, + "loss": 0.9297, + "step": 6081 + }, + { + "epoch": 0.33, + "learning_rate": 0.0007861719557354166, + "loss": 0.9453, + "step": 6082 + }, + { + "epoch": 0.33, + "learning_rate": 0.000786100578069742, + "loss": 0.8984, + "step": 6083 + }, + { + "epoch": 0.33, + "learning_rate": 0.0007860291917342801, + "loss": 0.9648, + "step": 6084 + }, + { + "epoch": 0.33, + "learning_rate": 0.0007859577967311944, + "loss": 0.9688, + "step": 6085 + }, + { + "epoch": 0.33, + "learning_rate": 0.0007858863930626478, + "loss": 0.9492, + "step": 6086 + }, + { + "epoch": 0.33, + "learning_rate": 0.0007858149807308046, + "loss": 0.9805, + "step": 6087 + }, + { + "epoch": 0.33, + "learning_rate": 0.0007857435597378287, + "loss": 0.9648, + "step": 6088 + }, + { + "epoch": 0.33, + "learning_rate": 0.0007856721300858841, + "loss": 0.8828, + "step": 6089 + }, + { + "epoch": 0.33, + "learning_rate": 0.0007856006917771357, + "loss": 0.8984, + "step": 6090 + }, + { + "epoch": 0.33, + "learning_rate": 0.0007855292448137482, + "loss": 0.9297, + "step": 6091 + }, + { + "epoch": 0.33, + "learning_rate": 0.0007854577891978867, + "loss": 0.8906, + "step": 6092 + }, + { + "epoch": 0.33, + "learning_rate": 0.0007853863249317165, + "loss": 0.9336, + "step": 6093 + }, + { + "epoch": 0.33, + "learning_rate": 0.0007853148520174032, + "loss": 0.7617, + "step": 6094 + }, + { + "epoch": 0.33, + "learning_rate": 0.0007852433704571127, + "loss": 0.8984, + "step": 6095 + }, + { + "epoch": 0.33, + "learning_rate": 0.000785171880253011, + "loss": 1.0469, + "step": 6096 + }, + { + "epoch": 0.33, + "learning_rate": 0.0007851003814072647, + "loss": 0.8477, + "step": 6097 + }, + { + "epoch": 0.33, + "learning_rate": 0.0007850288739220402, + "loss": 0.8242, + "step": 6098 + }, + { + "epoch": 0.33, + "learning_rate": 0.0007849573577995048, + "loss": 0.8867, + "step": 6099 + }, + { + "epoch": 0.33, + "learning_rate": 0.0007848858330418252, + "loss": 0.875, + "step": 6100 + }, + { + "epoch": 0.33, + "learning_rate": 0.0007848142996511692, + "loss": 0.8945, + "step": 6101 + }, + { + "epoch": 0.33, + "learning_rate": 0.0007847427576297044, + "loss": 0.9492, + "step": 6102 + }, + { + "epoch": 0.33, + "learning_rate": 0.0007846712069795987, + "loss": 0.9766, + "step": 6103 + }, + { + "epoch": 0.33, + "learning_rate": 0.0007845996477030202, + "loss": 0.8672, + "step": 6104 + }, + { + "epoch": 0.33, + "learning_rate": 0.0007845280798021375, + "loss": 0.8906, + "step": 6105 + }, + { + "epoch": 0.33, + "learning_rate": 0.0007844565032791195, + "loss": 0.8906, + "step": 6106 + }, + { + "epoch": 0.33, + "learning_rate": 0.000784384918136135, + "loss": 0.8516, + "step": 6107 + }, + { + "epoch": 0.33, + "learning_rate": 0.0007843133243753534, + "loss": 0.9141, + "step": 6108 + }, + { + "epoch": 0.33, + "learning_rate": 0.000784241721998944, + "loss": 0.9531, + "step": 6109 + }, + { + "epoch": 0.33, + "learning_rate": 0.0007841701110090769, + "loss": 0.8438, + "step": 6110 + }, + { + "epoch": 0.33, + "learning_rate": 0.0007840984914079218, + "loss": 0.8711, + "step": 6111 + }, + { + "epoch": 0.33, + "learning_rate": 0.0007840268631976495, + "loss": 0.8867, + "step": 6112 + }, + { + "epoch": 0.33, + "learning_rate": 0.0007839552263804302, + "loss": 0.8477, + "step": 6113 + }, + { + "epoch": 0.33, + "learning_rate": 0.0007838835809584347, + "loss": 0.8125, + "step": 6114 + }, + { + "epoch": 0.33, + "learning_rate": 0.0007838119269338342, + "loss": 0.8555, + "step": 6115 + }, + { + "epoch": 0.33, + "learning_rate": 0.0007837402643088001, + "loss": 0.9258, + "step": 6116 + }, + { + "epoch": 0.33, + "learning_rate": 0.0007836685930855038, + "loss": 0.9062, + "step": 6117 + }, + { + "epoch": 0.33, + "learning_rate": 0.0007835969132661176, + "loss": 0.832, + "step": 6118 + }, + { + "epoch": 0.33, + "learning_rate": 0.0007835252248528131, + "loss": 0.9336, + "step": 6119 + }, + { + "epoch": 0.33, + "learning_rate": 0.000783453527847763, + "loss": 0.7617, + "step": 6120 + }, + { + "epoch": 0.33, + "learning_rate": 0.00078338182225314, + "loss": 0.8672, + "step": 6121 + }, + { + "epoch": 0.33, + "learning_rate": 0.000783310108071117, + "loss": 0.9805, + "step": 6122 + }, + { + "epoch": 0.33, + "learning_rate": 0.0007832383853038671, + "loss": 1.0078, + "step": 6123 + }, + { + "epoch": 0.33, + "learning_rate": 0.0007831666539535636, + "loss": 0.8359, + "step": 6124 + }, + { + "epoch": 0.33, + "learning_rate": 0.0007830949140223804, + "loss": 0.8672, + "step": 6125 + }, + { + "epoch": 0.33, + "learning_rate": 0.0007830231655124914, + "loss": 0.8164, + "step": 6126 + }, + { + "epoch": 0.33, + "learning_rate": 0.0007829514084260708, + "loss": 0.8633, + "step": 6127 + }, + { + "epoch": 0.33, + "learning_rate": 0.0007828796427652931, + "loss": 0.957, + "step": 6128 + }, + { + "epoch": 0.33, + "learning_rate": 0.000782807868532333, + "loss": 0.9609, + "step": 6129 + }, + { + "epoch": 0.33, + "learning_rate": 0.0007827360857293655, + "loss": 0.8008, + "step": 6130 + }, + { + "epoch": 0.33, + "learning_rate": 0.0007826642943585659, + "loss": 0.9102, + "step": 6131 + }, + { + "epoch": 0.33, + "learning_rate": 0.0007825924944221096, + "loss": 0.9336, + "step": 6132 + }, + { + "epoch": 0.33, + "learning_rate": 0.0007825206859221727, + "loss": 0.9648, + "step": 6133 + }, + { + "epoch": 0.33, + "learning_rate": 0.0007824488688609308, + "loss": 0.7891, + "step": 6134 + }, + { + "epoch": 0.33, + "learning_rate": 0.0007823770432405603, + "loss": 0.9141, + "step": 6135 + }, + { + "epoch": 0.33, + "learning_rate": 0.000782305209063238, + "loss": 0.8242, + "step": 6136 + }, + { + "epoch": 0.33, + "learning_rate": 0.0007822333663311405, + "loss": 1.0, + "step": 6137 + }, + { + "epoch": 0.33, + "learning_rate": 0.0007821615150464448, + "loss": 0.8789, + "step": 6138 + }, + { + "epoch": 0.33, + "learning_rate": 0.0007820896552113285, + "loss": 0.8047, + "step": 6139 + }, + { + "epoch": 0.33, + "learning_rate": 0.000782017786827969, + "loss": 0.7891, + "step": 6140 + }, + { + "epoch": 0.33, + "learning_rate": 0.0007819459098985442, + "loss": 0.8594, + "step": 6141 + }, + { + "epoch": 0.33, + "learning_rate": 0.000781874024425232, + "loss": 0.8711, + "step": 6142 + }, + { + "epoch": 0.33, + "learning_rate": 0.0007818021304102112, + "loss": 0.9141, + "step": 6143 + }, + { + "epoch": 0.33, + "learning_rate": 0.00078173022785566, + "loss": 0.9688, + "step": 6144 + }, + { + "epoch": 0.33, + "learning_rate": 0.0007816583167637577, + "loss": 0.8203, + "step": 6145 + }, + { + "epoch": 0.33, + "learning_rate": 0.000781586397136683, + "loss": 0.8477, + "step": 6146 + }, + { + "epoch": 0.33, + "learning_rate": 0.0007815144689766157, + "loss": 0.7695, + "step": 6147 + }, + { + "epoch": 0.33, + "learning_rate": 0.000781442532285735, + "loss": 0.8789, + "step": 6148 + }, + { + "epoch": 0.33, + "learning_rate": 0.0007813705870662214, + "loss": 0.7656, + "step": 6149 + }, + { + "epoch": 0.33, + "learning_rate": 0.0007812986333202546, + "loss": 0.8125, + "step": 6150 + }, + { + "epoch": 0.33, + "learning_rate": 0.0007812266710500151, + "loss": 0.9297, + "step": 6151 + }, + { + "epoch": 0.33, + "learning_rate": 0.0007811547002576838, + "loss": 0.9297, + "step": 6152 + }, + { + "epoch": 0.33, + "learning_rate": 0.0007810827209454415, + "loss": 0.9258, + "step": 6153 + }, + { + "epoch": 0.33, + "learning_rate": 0.0007810107331154694, + "loss": 0.8164, + "step": 6154 + }, + { + "epoch": 0.33, + "learning_rate": 0.0007809387367699492, + "loss": 0.7695, + "step": 6155 + }, + { + "epoch": 0.33, + "learning_rate": 0.0007808667319110623, + "loss": 0.8867, + "step": 6156 + }, + { + "epoch": 0.33, + "learning_rate": 0.0007807947185409909, + "loss": 0.9688, + "step": 6157 + }, + { + "epoch": 0.33, + "learning_rate": 0.0007807226966619173, + "loss": 0.8633, + "step": 6158 + }, + { + "epoch": 0.33, + "learning_rate": 0.0007806506662760235, + "loss": 0.8281, + "step": 6159 + }, + { + "epoch": 0.33, + "learning_rate": 0.0007805786273854929, + "loss": 0.8047, + "step": 6160 + }, + { + "epoch": 0.33, + "learning_rate": 0.0007805065799925082, + "loss": 0.9297, + "step": 6161 + }, + { + "epoch": 0.33, + "learning_rate": 0.0007804345240992527, + "loss": 0.9492, + "step": 6162 + }, + { + "epoch": 0.33, + "learning_rate": 0.00078036245970791, + "loss": 0.8555, + "step": 6163 + }, + { + "epoch": 0.33, + "learning_rate": 0.0007802903868206637, + "loss": 0.9492, + "step": 6164 + }, + { + "epoch": 0.33, + "learning_rate": 0.0007802183054396981, + "loss": 1.0, + "step": 6165 + }, + { + "epoch": 0.33, + "learning_rate": 0.0007801462155671973, + "loss": 0.9219, + "step": 6166 + }, + { + "epoch": 0.33, + "learning_rate": 0.0007800741172053461, + "loss": 0.9297, + "step": 6167 + }, + { + "epoch": 0.33, + "learning_rate": 0.000780002010356329, + "loss": 0.7969, + "step": 6168 + }, + { + "epoch": 0.33, + "learning_rate": 0.0007799298950223313, + "loss": 0.8789, + "step": 6169 + }, + { + "epoch": 0.33, + "learning_rate": 0.0007798577712055381, + "loss": 0.8633, + "step": 6170 + }, + { + "epoch": 0.33, + "learning_rate": 0.0007797856389081355, + "loss": 0.9375, + "step": 6171 + }, + { + "epoch": 0.33, + "learning_rate": 0.0007797134981323086, + "loss": 0.7773, + "step": 6172 + }, + { + "epoch": 0.33, + "learning_rate": 0.0007796413488802443, + "loss": 0.8711, + "step": 6173 + }, + { + "epoch": 0.33, + "learning_rate": 0.0007795691911541283, + "loss": 0.9648, + "step": 6174 + }, + { + "epoch": 0.33, + "learning_rate": 0.0007794970249561475, + "loss": 0.8398, + "step": 6175 + }, + { + "epoch": 0.33, + "learning_rate": 0.0007794248502884887, + "loss": 0.8633, + "step": 6176 + }, + { + "epoch": 0.33, + "learning_rate": 0.0007793526671533393, + "loss": 0.8984, + "step": 6177 + }, + { + "epoch": 0.33, + "learning_rate": 0.0007792804755528862, + "loss": 0.9766, + "step": 6178 + }, + { + "epoch": 0.33, + "learning_rate": 0.0007792082754893175, + "loss": 0.8906, + "step": 6179 + }, + { + "epoch": 0.33, + "learning_rate": 0.0007791360669648209, + "loss": 0.8789, + "step": 6180 + }, + { + "epoch": 0.33, + "learning_rate": 0.0007790638499815843, + "loss": 0.832, + "step": 6181 + }, + { + "epoch": 0.33, + "learning_rate": 0.0007789916245417967, + "loss": 0.8008, + "step": 6182 + }, + { + "epoch": 0.33, + "learning_rate": 0.0007789193906476463, + "loss": 0.7969, + "step": 6183 + }, + { + "epoch": 0.33, + "learning_rate": 0.0007788471483013222, + "loss": 0.8945, + "step": 6184 + }, + { + "epoch": 0.33, + "learning_rate": 0.0007787748975050135, + "loss": 0.8594, + "step": 6185 + }, + { + "epoch": 0.33, + "learning_rate": 0.0007787026382609097, + "loss": 0.7812, + "step": 6186 + }, + { + "epoch": 0.33, + "learning_rate": 0.0007786303705712005, + "loss": 0.8281, + "step": 6187 + }, + { + "epoch": 0.33, + "learning_rate": 0.0007785580944380759, + "loss": 0.8906, + "step": 6188 + }, + { + "epoch": 0.33, + "learning_rate": 0.0007784858098637259, + "loss": 0.9258, + "step": 6189 + }, + { + "epoch": 0.33, + "learning_rate": 0.000778413516850341, + "loss": 0.9648, + "step": 6190 + }, + { + "epoch": 0.33, + "learning_rate": 0.0007783412154001121, + "loss": 0.9102, + "step": 6191 + }, + { + "epoch": 0.33, + "learning_rate": 0.0007782689055152302, + "loss": 0.8594, + "step": 6192 + }, + { + "epoch": 0.33, + "learning_rate": 0.0007781965871978863, + "loss": 0.8789, + "step": 6193 + }, + { + "epoch": 0.33, + "learning_rate": 0.0007781242604502721, + "loss": 0.8906, + "step": 6194 + }, + { + "epoch": 0.33, + "learning_rate": 0.000778051925274579, + "loss": 0.957, + "step": 6195 + }, + { + "epoch": 0.33, + "learning_rate": 0.0007779795816729993, + "loss": 0.9023, + "step": 6196 + }, + { + "epoch": 0.33, + "learning_rate": 0.0007779072296477252, + "loss": 0.8984, + "step": 6197 + }, + { + "epoch": 0.33, + "learning_rate": 0.0007778348692009492, + "loss": 0.8828, + "step": 6198 + }, + { + "epoch": 0.33, + "learning_rate": 0.000777762500334864, + "loss": 0.8789, + "step": 6199 + }, + { + "epoch": 0.33, + "learning_rate": 0.0007776901230516627, + "loss": 0.8672, + "step": 6200 + }, + { + "epoch": 0.33, + "learning_rate": 0.0007776177373535384, + "loss": 0.8984, + "step": 6201 + }, + { + "epoch": 0.33, + "learning_rate": 0.0007775453432426849, + "loss": 0.8828, + "step": 6202 + }, + { + "epoch": 0.33, + "learning_rate": 0.0007774729407212958, + "loss": 0.9141, + "step": 6203 + }, + { + "epoch": 0.33, + "learning_rate": 0.0007774005297915649, + "loss": 0.9414, + "step": 6204 + }, + { + "epoch": 0.33, + "learning_rate": 0.0007773281104556869, + "loss": 0.8984, + "step": 6205 + }, + { + "epoch": 0.33, + "learning_rate": 0.0007772556827158562, + "loss": 0.9531, + "step": 6206 + }, + { + "epoch": 0.33, + "learning_rate": 0.0007771832465742675, + "loss": 0.8984, + "step": 6207 + }, + { + "epoch": 0.33, + "learning_rate": 0.0007771108020331162, + "loss": 0.8711, + "step": 6208 + }, + { + "epoch": 0.33, + "learning_rate": 0.000777038349094597, + "loss": 0.8789, + "step": 6209 + }, + { + "epoch": 0.33, + "learning_rate": 0.0007769658877609059, + "loss": 0.8594, + "step": 6210 + }, + { + "epoch": 0.33, + "learning_rate": 0.0007768934180342386, + "loss": 0.9805, + "step": 6211 + }, + { + "epoch": 0.33, + "learning_rate": 0.0007768209399167913, + "loss": 0.8477, + "step": 6212 + }, + { + "epoch": 0.33, + "learning_rate": 0.0007767484534107603, + "loss": 0.875, + "step": 6213 + }, + { + "epoch": 0.33, + "learning_rate": 0.0007766759585183418, + "loss": 0.9219, + "step": 6214 + }, + { + "epoch": 0.33, + "learning_rate": 0.0007766034552417331, + "loss": 0.8711, + "step": 6215 + }, + { + "epoch": 0.33, + "learning_rate": 0.0007765309435831311, + "loss": 0.8438, + "step": 6216 + }, + { + "epoch": 0.33, + "learning_rate": 0.0007764584235447332, + "loss": 0.8633, + "step": 6217 + }, + { + "epoch": 0.33, + "learning_rate": 0.000776385895128737, + "loss": 0.8945, + "step": 6218 + }, + { + "epoch": 0.33, + "learning_rate": 0.0007763133583373402, + "loss": 0.8789, + "step": 6219 + }, + { + "epoch": 0.33, + "learning_rate": 0.0007762408131727409, + "loss": 0.8594, + "step": 6220 + }, + { + "epoch": 0.33, + "learning_rate": 0.0007761682596371378, + "loss": 0.957, + "step": 6221 + }, + { + "epoch": 0.33, + "learning_rate": 0.0007760956977327291, + "loss": 1.0156, + "step": 6222 + }, + { + "epoch": 0.33, + "learning_rate": 0.000776023127461714, + "loss": 0.8672, + "step": 6223 + }, + { + "epoch": 0.33, + "learning_rate": 0.0007759505488262912, + "loss": 0.875, + "step": 6224 + }, + { + "epoch": 0.33, + "learning_rate": 0.0007758779618286607, + "loss": 0.9297, + "step": 6225 + }, + { + "epoch": 0.33, + "learning_rate": 0.0007758053664710214, + "loss": 0.9141, + "step": 6226 + }, + { + "epoch": 0.33, + "learning_rate": 0.0007757327627555737, + "loss": 0.9258, + "step": 6227 + }, + { + "epoch": 0.33, + "learning_rate": 0.0007756601506845176, + "loss": 0.875, + "step": 6228 + }, + { + "epoch": 0.33, + "learning_rate": 0.0007755875302600532, + "loss": 0.8633, + "step": 6229 + }, + { + "epoch": 0.33, + "learning_rate": 0.0007755149014843817, + "loss": 0.8984, + "step": 6230 + }, + { + "epoch": 0.33, + "learning_rate": 0.0007754422643597034, + "loss": 0.9414, + "step": 6231 + }, + { + "epoch": 0.33, + "learning_rate": 0.0007753696188882199, + "loss": 0.8672, + "step": 6232 + }, + { + "epoch": 0.33, + "learning_rate": 0.0007752969650721323, + "loss": 0.9453, + "step": 6233 + }, + { + "epoch": 0.34, + "learning_rate": 0.0007752243029136423, + "loss": 0.9609, + "step": 6234 + }, + { + "epoch": 0.34, + "learning_rate": 0.0007751516324149521, + "loss": 0.8555, + "step": 6235 + }, + { + "epoch": 0.34, + "learning_rate": 0.0007750789535782635, + "loss": 0.8906, + "step": 6236 + }, + { + "epoch": 0.34, + "learning_rate": 0.000775006266405779, + "loss": 0.9609, + "step": 6237 + }, + { + "epoch": 0.34, + "learning_rate": 0.0007749335708997012, + "loss": 0.8828, + "step": 6238 + }, + { + "epoch": 0.34, + "learning_rate": 0.0007748608670622333, + "loss": 0.8906, + "step": 6239 + }, + { + "epoch": 0.34, + "learning_rate": 0.0007747881548955781, + "loss": 0.8047, + "step": 6240 + }, + { + "epoch": 0.34, + "learning_rate": 0.0007747154344019393, + "loss": 0.9297, + "step": 6241 + }, + { + "epoch": 0.34, + "learning_rate": 0.0007746427055835203, + "loss": 0.8789, + "step": 6242 + }, + { + "epoch": 0.34, + "learning_rate": 0.0007745699684425253, + "loss": 0.9531, + "step": 6243 + }, + { + "epoch": 0.34, + "learning_rate": 0.0007744972229811582, + "loss": 1.0156, + "step": 6244 + }, + { + "epoch": 0.34, + "learning_rate": 0.0007744244692016237, + "loss": 0.8242, + "step": 6245 + }, + { + "epoch": 0.34, + "learning_rate": 0.0007743517071061263, + "loss": 0.9336, + "step": 6246 + }, + { + "epoch": 0.34, + "learning_rate": 0.000774278936696871, + "loss": 0.8789, + "step": 6247 + }, + { + "epoch": 0.34, + "learning_rate": 0.0007742061579760629, + "loss": 0.8945, + "step": 6248 + }, + { + "epoch": 0.34, + "learning_rate": 0.0007741333709459078, + "loss": 0.9062, + "step": 6249 + }, + { + "epoch": 0.34, + "learning_rate": 0.0007740605756086107, + "loss": 0.9727, + "step": 6250 + }, + { + "epoch": 0.34, + "learning_rate": 0.0007739877719663782, + "loss": 0.9141, + "step": 6251 + }, + { + "epoch": 0.34, + "learning_rate": 0.0007739149600214159, + "loss": 0.9961, + "step": 6252 + }, + { + "epoch": 0.34, + "learning_rate": 0.0007738421397759308, + "loss": 0.9492, + "step": 6253 + }, + { + "epoch": 0.34, + "learning_rate": 0.0007737693112321294, + "loss": 0.8438, + "step": 6254 + }, + { + "epoch": 0.34, + "learning_rate": 0.0007736964743922184, + "loss": 0.9297, + "step": 6255 + }, + { + "epoch": 0.34, + "learning_rate": 0.0007736236292584052, + "loss": 0.9297, + "step": 6256 + }, + { + "epoch": 0.34, + "learning_rate": 0.0007735507758328972, + "loss": 0.8906, + "step": 6257 + }, + { + "epoch": 0.34, + "learning_rate": 0.0007734779141179022, + "loss": 0.9609, + "step": 6258 + }, + { + "epoch": 0.34, + "learning_rate": 0.0007734050441156282, + "loss": 0.8555, + "step": 6259 + }, + { + "epoch": 0.34, + "learning_rate": 0.0007733321658282832, + "loss": 0.8242, + "step": 6260 + }, + { + "epoch": 0.34, + "learning_rate": 0.0007732592792580757, + "loss": 0.9492, + "step": 6261 + }, + { + "epoch": 0.34, + "learning_rate": 0.0007731863844072144, + "loss": 0.8398, + "step": 6262 + }, + { + "epoch": 0.34, + "learning_rate": 0.0007731134812779083, + "loss": 0.9141, + "step": 6263 + }, + { + "epoch": 0.34, + "learning_rate": 0.0007730405698723668, + "loss": 0.8594, + "step": 6264 + }, + { + "epoch": 0.34, + "learning_rate": 0.0007729676501927989, + "loss": 0.8984, + "step": 6265 + }, + { + "epoch": 0.34, + "learning_rate": 0.0007728947222414146, + "loss": 0.8984, + "step": 6266 + }, + { + "epoch": 0.34, + "learning_rate": 0.0007728217860204237, + "loss": 0.8633, + "step": 6267 + }, + { + "epoch": 0.34, + "learning_rate": 0.0007727488415320367, + "loss": 0.8281, + "step": 6268 + }, + { + "epoch": 0.34, + "learning_rate": 0.0007726758887784638, + "loss": 0.8711, + "step": 6269 + }, + { + "epoch": 0.34, + "learning_rate": 0.0007726029277619157, + "loss": 0.8086, + "step": 6270 + }, + { + "epoch": 0.34, + "learning_rate": 0.0007725299584846037, + "loss": 0.7812, + "step": 6271 + }, + { + "epoch": 0.34, + "learning_rate": 0.0007724569809487385, + "loss": 0.8281, + "step": 6272 + }, + { + "epoch": 0.34, + "learning_rate": 0.0007723839951565319, + "loss": 0.9219, + "step": 6273 + }, + { + "epoch": 0.34, + "learning_rate": 0.0007723110011101957, + "loss": 0.9023, + "step": 6274 + }, + { + "epoch": 0.34, + "learning_rate": 0.0007722379988119414, + "loss": 0.9102, + "step": 6275 + }, + { + "epoch": 0.34, + "learning_rate": 0.0007721649882639816, + "loss": 0.9844, + "step": 6276 + }, + { + "epoch": 0.34, + "learning_rate": 0.0007720919694685286, + "loss": 0.9766, + "step": 6277 + }, + { + "epoch": 0.34, + "learning_rate": 0.0007720189424277953, + "loss": 0.9141, + "step": 6278 + }, + { + "epoch": 0.34, + "learning_rate": 0.0007719459071439943, + "loss": 0.8945, + "step": 6279 + }, + { + "epoch": 0.34, + "learning_rate": 0.0007718728636193391, + "loss": 0.9414, + "step": 6280 + }, + { + "epoch": 0.34, + "learning_rate": 0.000771799811856043, + "loss": 0.9219, + "step": 6281 + }, + { + "epoch": 0.34, + "learning_rate": 0.0007717267518563201, + "loss": 0.9336, + "step": 6282 + }, + { + "epoch": 0.34, + "learning_rate": 0.0007716536836223836, + "loss": 0.832, + "step": 6283 + }, + { + "epoch": 0.34, + "learning_rate": 0.0007715806071564484, + "loss": 0.9023, + "step": 6284 + }, + { + "epoch": 0.34, + "learning_rate": 0.0007715075224607286, + "loss": 0.9062, + "step": 6285 + }, + { + "epoch": 0.34, + "learning_rate": 0.0007714344295374393, + "loss": 0.9258, + "step": 6286 + }, + { + "epoch": 0.34, + "learning_rate": 0.000771361328388795, + "loss": 0.8945, + "step": 6287 + }, + { + "epoch": 0.34, + "learning_rate": 0.0007712882190170111, + "loss": 0.8984, + "step": 6288 + }, + { + "epoch": 0.34, + "learning_rate": 0.000771215101424303, + "loss": 0.9141, + "step": 6289 + }, + { + "epoch": 0.34, + "learning_rate": 0.0007711419756128863, + "loss": 0.9141, + "step": 6290 + }, + { + "epoch": 0.34, + "learning_rate": 0.0007710688415849774, + "loss": 0.9492, + "step": 6291 + }, + { + "epoch": 0.34, + "learning_rate": 0.0007709956993427919, + "loss": 0.7734, + "step": 6292 + }, + { + "epoch": 0.34, + "learning_rate": 0.0007709225488885468, + "loss": 0.8555, + "step": 6293 + }, + { + "epoch": 0.34, + "learning_rate": 0.0007708493902244582, + "loss": 0.8164, + "step": 6294 + }, + { + "epoch": 0.34, + "learning_rate": 0.0007707762233527438, + "loss": 0.9922, + "step": 6295 + }, + { + "epoch": 0.34, + "learning_rate": 0.0007707030482756201, + "loss": 0.75, + "step": 6296 + }, + { + "epoch": 0.34, + "learning_rate": 0.000770629864995305, + "loss": 0.9492, + "step": 6297 + }, + { + "epoch": 0.34, + "learning_rate": 0.0007705566735140159, + "loss": 0.8906, + "step": 6298 + }, + { + "epoch": 0.34, + "learning_rate": 0.0007704834738339707, + "loss": 0.8867, + "step": 6299 + }, + { + "epoch": 0.34, + "learning_rate": 0.000770410265957388, + "loss": 0.9414, + "step": 6300 + }, + { + "epoch": 0.34, + "learning_rate": 0.000770337049886486, + "loss": 0.7812, + "step": 6301 + }, + { + "epoch": 0.34, + "learning_rate": 0.0007702638256234833, + "loss": 0.9336, + "step": 6302 + }, + { + "epoch": 0.34, + "learning_rate": 0.000770190593170599, + "loss": 0.8867, + "step": 6303 + }, + { + "epoch": 0.34, + "learning_rate": 0.0007701173525300521, + "loss": 0.875, + "step": 6304 + }, + { + "epoch": 0.34, + "learning_rate": 0.0007700441037040622, + "loss": 0.8594, + "step": 6305 + }, + { + "epoch": 0.34, + "learning_rate": 0.0007699708466948489, + "loss": 0.8203, + "step": 6306 + }, + { + "epoch": 0.34, + "learning_rate": 0.0007698975815046322, + "loss": 0.9688, + "step": 6307 + }, + { + "epoch": 0.34, + "learning_rate": 0.0007698243081356319, + "loss": 0.9219, + "step": 6308 + }, + { + "epoch": 0.34, + "learning_rate": 0.0007697510265900693, + "loss": 0.8555, + "step": 6309 + }, + { + "epoch": 0.34, + "learning_rate": 0.000769677736870164, + "loss": 0.9062, + "step": 6310 + }, + { + "epoch": 0.34, + "learning_rate": 0.0007696044389781378, + "loss": 0.8359, + "step": 6311 + }, + { + "epoch": 0.34, + "learning_rate": 0.0007695311329162114, + "loss": 0.9023, + "step": 6312 + }, + { + "epoch": 0.34, + "learning_rate": 0.0007694578186866061, + "loss": 0.9414, + "step": 6313 + }, + { + "epoch": 0.34, + "learning_rate": 0.0007693844962915439, + "loss": 0.9102, + "step": 6314 + }, + { + "epoch": 0.34, + "learning_rate": 0.0007693111657332467, + "loss": 0.8711, + "step": 6315 + }, + { + "epoch": 0.34, + "learning_rate": 0.0007692378270139364, + "loss": 0.8008, + "step": 6316 + }, + { + "epoch": 0.34, + "learning_rate": 0.0007691644801358357, + "loss": 0.9219, + "step": 6317 + }, + { + "epoch": 0.34, + "learning_rate": 0.000769091125101167, + "loss": 0.8789, + "step": 6318 + }, + { + "epoch": 0.34, + "learning_rate": 0.0007690177619121535, + "loss": 0.9102, + "step": 6319 + }, + { + "epoch": 0.34, + "learning_rate": 0.0007689443905710177, + "loss": 0.8438, + "step": 6320 + }, + { + "epoch": 0.34, + "learning_rate": 0.0007688710110799837, + "loss": 0.8945, + "step": 6321 + }, + { + "epoch": 0.34, + "learning_rate": 0.0007687976234412749, + "loss": 0.8945, + "step": 6322 + }, + { + "epoch": 0.34, + "learning_rate": 0.0007687242276571151, + "loss": 0.8672, + "step": 6323 + }, + { + "epoch": 0.34, + "learning_rate": 0.0007686508237297285, + "loss": 0.9922, + "step": 6324 + }, + { + "epoch": 0.34, + "learning_rate": 0.0007685774116613396, + "loss": 0.8633, + "step": 6325 + }, + { + "epoch": 0.34, + "learning_rate": 0.0007685039914541727, + "loss": 0.918, + "step": 6326 + }, + { + "epoch": 0.34, + "learning_rate": 0.0007684305631104529, + "loss": 0.9336, + "step": 6327 + }, + { + "epoch": 0.34, + "learning_rate": 0.0007683571266324054, + "loss": 0.832, + "step": 6328 + }, + { + "epoch": 0.34, + "learning_rate": 0.0007682836820222554, + "loss": 0.9023, + "step": 6329 + }, + { + "epoch": 0.34, + "learning_rate": 0.0007682102292822286, + "loss": 0.8789, + "step": 6330 + }, + { + "epoch": 0.34, + "learning_rate": 0.0007681367684145508, + "loss": 1.0234, + "step": 6331 + }, + { + "epoch": 0.34, + "learning_rate": 0.0007680632994214483, + "loss": 0.8984, + "step": 6332 + }, + { + "epoch": 0.34, + "learning_rate": 0.0007679898223051469, + "loss": 0.875, + "step": 6333 + }, + { + "epoch": 0.34, + "learning_rate": 0.0007679163370678739, + "loss": 0.918, + "step": 6334 + }, + { + "epoch": 0.34, + "learning_rate": 0.0007678428437118558, + "loss": 0.9141, + "step": 6335 + }, + { + "epoch": 0.34, + "learning_rate": 0.0007677693422393197, + "loss": 0.8867, + "step": 6336 + }, + { + "epoch": 0.34, + "learning_rate": 0.000767695832652493, + "loss": 0.9141, + "step": 6337 + }, + { + "epoch": 0.34, + "learning_rate": 0.0007676223149536033, + "loss": 0.9688, + "step": 6338 + }, + { + "epoch": 0.34, + "learning_rate": 0.0007675487891448782, + "loss": 0.9609, + "step": 6339 + }, + { + "epoch": 0.34, + "learning_rate": 0.000767475255228546, + "loss": 0.9023, + "step": 6340 + }, + { + "epoch": 0.34, + "learning_rate": 0.000767401713206835, + "loss": 0.9531, + "step": 6341 + }, + { + "epoch": 0.34, + "learning_rate": 0.0007673281630819738, + "loss": 0.9219, + "step": 6342 + }, + { + "epoch": 0.34, + "learning_rate": 0.0007672546048561908, + "loss": 0.9219, + "step": 6343 + }, + { + "epoch": 0.34, + "learning_rate": 0.0007671810385317157, + "loss": 0.8359, + "step": 6344 + }, + { + "epoch": 0.34, + "learning_rate": 0.0007671074641107773, + "loss": 0.8477, + "step": 6345 + }, + { + "epoch": 0.34, + "learning_rate": 0.0007670338815956055, + "loss": 0.8477, + "step": 6346 + }, + { + "epoch": 0.34, + "learning_rate": 0.0007669602909884299, + "loss": 0.8164, + "step": 6347 + }, + { + "epoch": 0.34, + "learning_rate": 0.0007668866922914806, + "loss": 0.8086, + "step": 6348 + }, + { + "epoch": 0.34, + "learning_rate": 0.0007668130855069876, + "loss": 0.8711, + "step": 6349 + }, + { + "epoch": 0.34, + "learning_rate": 0.0007667394706371822, + "loss": 0.9336, + "step": 6350 + }, + { + "epoch": 0.34, + "learning_rate": 0.0007666658476842942, + "loss": 0.7969, + "step": 6351 + }, + { + "epoch": 0.34, + "learning_rate": 0.0007665922166505553, + "loss": 0.8203, + "step": 6352 + }, + { + "epoch": 0.34, + "learning_rate": 0.0007665185775381964, + "loss": 0.8125, + "step": 6353 + }, + { + "epoch": 0.34, + "learning_rate": 0.0007664449303494492, + "loss": 0.8828, + "step": 6354 + }, + { + "epoch": 0.34, + "learning_rate": 0.0007663712750865454, + "loss": 0.8945, + "step": 6355 + }, + { + "epoch": 0.34, + "learning_rate": 0.0007662976117517169, + "loss": 0.9219, + "step": 6356 + }, + { + "epoch": 0.34, + "learning_rate": 0.0007662239403471961, + "loss": 0.9648, + "step": 6357 + }, + { + "epoch": 0.34, + "learning_rate": 0.0007661502608752154, + "loss": 0.8438, + "step": 6358 + }, + { + "epoch": 0.34, + "learning_rate": 0.0007660765733380075, + "loss": 0.8867, + "step": 6359 + }, + { + "epoch": 0.34, + "learning_rate": 0.0007660028777378055, + "loss": 0.9336, + "step": 6360 + }, + { + "epoch": 0.34, + "learning_rate": 0.0007659291740768424, + "loss": 0.9414, + "step": 6361 + }, + { + "epoch": 0.34, + "learning_rate": 0.0007658554623573519, + "loss": 0.9531, + "step": 6362 + }, + { + "epoch": 0.34, + "learning_rate": 0.0007657817425815676, + "loss": 0.9375, + "step": 6363 + }, + { + "epoch": 0.34, + "learning_rate": 0.0007657080147517233, + "loss": 0.8672, + "step": 6364 + }, + { + "epoch": 0.34, + "learning_rate": 0.0007656342788700536, + "loss": 0.8828, + "step": 6365 + }, + { + "epoch": 0.34, + "learning_rate": 0.0007655605349387927, + "loss": 0.8711, + "step": 6366 + }, + { + "epoch": 0.34, + "learning_rate": 0.000765486782960175, + "loss": 0.8477, + "step": 6367 + }, + { + "epoch": 0.34, + "learning_rate": 0.000765413022936436, + "loss": 0.8867, + "step": 6368 + }, + { + "epoch": 0.34, + "learning_rate": 0.0007653392548698103, + "loss": 0.8008, + "step": 6369 + }, + { + "epoch": 0.34, + "learning_rate": 0.000765265478762534, + "loss": 0.8633, + "step": 6370 + }, + { + "epoch": 0.34, + "learning_rate": 0.000765191694616842, + "loss": 0.8008, + "step": 6371 + }, + { + "epoch": 0.34, + "learning_rate": 0.0007651179024349706, + "loss": 0.8594, + "step": 6372 + }, + { + "epoch": 0.34, + "learning_rate": 0.0007650441022191559, + "loss": 0.9609, + "step": 6373 + }, + { + "epoch": 0.34, + "learning_rate": 0.0007649702939716343, + "loss": 0.8906, + "step": 6374 + }, + { + "epoch": 0.34, + "learning_rate": 0.0007648964776946426, + "loss": 0.8164, + "step": 6375 + }, + { + "epoch": 0.34, + "learning_rate": 0.0007648226533904171, + "loss": 0.8789, + "step": 6376 + }, + { + "epoch": 0.34, + "learning_rate": 0.0007647488210611956, + "loss": 0.8711, + "step": 6377 + }, + { + "epoch": 0.34, + "learning_rate": 0.000764674980709215, + "loss": 0.8633, + "step": 6378 + }, + { + "epoch": 0.34, + "learning_rate": 0.0007646011323367133, + "loss": 0.8164, + "step": 6379 + }, + { + "epoch": 0.34, + "learning_rate": 0.000764527275945928, + "loss": 0.9531, + "step": 6380 + }, + { + "epoch": 0.34, + "learning_rate": 0.0007644534115390973, + "loss": 0.8672, + "step": 6381 + }, + { + "epoch": 0.34, + "learning_rate": 0.0007643795391184594, + "loss": 0.8711, + "step": 6382 + }, + { + "epoch": 0.34, + "learning_rate": 0.0007643056586862534, + "loss": 0.957, + "step": 6383 + }, + { + "epoch": 0.34, + "learning_rate": 0.0007642317702447175, + "loss": 0.8867, + "step": 6384 + }, + { + "epoch": 0.34, + "learning_rate": 0.0007641578737960912, + "loss": 0.8984, + "step": 6385 + }, + { + "epoch": 0.34, + "learning_rate": 0.0007640839693426134, + "loss": 0.8828, + "step": 6386 + }, + { + "epoch": 0.34, + "learning_rate": 0.0007640100568865238, + "loss": 0.8789, + "step": 6387 + }, + { + "epoch": 0.34, + "learning_rate": 0.0007639361364300625, + "loss": 0.8047, + "step": 6388 + }, + { + "epoch": 0.34, + "learning_rate": 0.0007638622079754691, + "loss": 0.8906, + "step": 6389 + }, + { + "epoch": 0.34, + "learning_rate": 0.0007637882715249842, + "loss": 0.9141, + "step": 6390 + }, + { + "epoch": 0.34, + "learning_rate": 0.0007637143270808481, + "loss": 0.8281, + "step": 6391 + }, + { + "epoch": 0.34, + "learning_rate": 0.0007636403746453015, + "loss": 0.8984, + "step": 6392 + }, + { + "epoch": 0.34, + "learning_rate": 0.0007635664142205858, + "loss": 0.8711, + "step": 6393 + }, + { + "epoch": 0.34, + "learning_rate": 0.0007634924458089418, + "loss": 0.8516, + "step": 6394 + }, + { + "epoch": 0.34, + "learning_rate": 0.0007634184694126114, + "loss": 0.8047, + "step": 6395 + }, + { + "epoch": 0.34, + "learning_rate": 0.0007633444850338359, + "loss": 0.9883, + "step": 6396 + }, + { + "epoch": 0.34, + "learning_rate": 0.0007632704926748575, + "loss": 0.8398, + "step": 6397 + }, + { + "epoch": 0.34, + "learning_rate": 0.0007631964923379184, + "loss": 0.9414, + "step": 6398 + }, + { + "epoch": 0.34, + "learning_rate": 0.000763122484025261, + "loss": 0.9141, + "step": 6399 + }, + { + "epoch": 0.34, + "learning_rate": 0.000763048467739128, + "loss": 0.9023, + "step": 6400 + }, + { + "epoch": 0.34, + "learning_rate": 0.0007629744434817625, + "loss": 0.8438, + "step": 6401 + }, + { + "epoch": 0.34, + "learning_rate": 0.0007629004112554074, + "loss": 0.9062, + "step": 6402 + }, + { + "epoch": 0.34, + "learning_rate": 0.0007628263710623063, + "loss": 0.8359, + "step": 6403 + }, + { + "epoch": 0.34, + "learning_rate": 0.0007627523229047029, + "loss": 0.9141, + "step": 6404 + }, + { + "epoch": 0.34, + "learning_rate": 0.0007626782667848409, + "loss": 0.8438, + "step": 6405 + }, + { + "epoch": 0.34, + "learning_rate": 0.0007626042027049646, + "loss": 0.8359, + "step": 6406 + }, + { + "epoch": 0.34, + "learning_rate": 0.0007625301306673183, + "loss": 0.8047, + "step": 6407 + }, + { + "epoch": 0.34, + "learning_rate": 0.000762456050674147, + "loss": 0.9102, + "step": 6408 + }, + { + "epoch": 0.34, + "learning_rate": 0.0007623819627276949, + "loss": 0.9141, + "step": 6409 + }, + { + "epoch": 0.34, + "learning_rate": 0.0007623078668302075, + "loss": 0.8555, + "step": 6410 + }, + { + "epoch": 0.34, + "learning_rate": 0.0007622337629839302, + "loss": 0.8359, + "step": 6411 + }, + { + "epoch": 0.34, + "learning_rate": 0.0007621596511911084, + "loss": 0.8672, + "step": 6412 + }, + { + "epoch": 0.34, + "learning_rate": 0.0007620855314539882, + "loss": 0.8633, + "step": 6413 + }, + { + "epoch": 0.34, + "learning_rate": 0.0007620114037748154, + "loss": 0.9023, + "step": 6414 + }, + { + "epoch": 0.34, + "learning_rate": 0.0007619372681558363, + "loss": 0.8633, + "step": 6415 + }, + { + "epoch": 0.34, + "learning_rate": 0.0007618631245992978, + "loss": 0.8633, + "step": 6416 + }, + { + "epoch": 0.34, + "learning_rate": 0.0007617889731074461, + "loss": 0.8945, + "step": 6417 + }, + { + "epoch": 0.34, + "learning_rate": 0.0007617148136825289, + "loss": 0.832, + "step": 6418 + }, + { + "epoch": 0.34, + "learning_rate": 0.000761640646326793, + "loss": 0.8398, + "step": 6419 + }, + { + "epoch": 0.35, + "learning_rate": 0.0007615664710424861, + "loss": 0.8945, + "step": 6420 + }, + { + "epoch": 0.35, + "learning_rate": 0.000761492287831856, + "loss": 0.9336, + "step": 6421 + }, + { + "epoch": 0.35, + "learning_rate": 0.0007614180966971506, + "loss": 0.8945, + "step": 6422 + }, + { + "epoch": 0.35, + "learning_rate": 0.0007613438976406181, + "loss": 0.8359, + "step": 6423 + }, + { + "epoch": 0.35, + "learning_rate": 0.0007612696906645071, + "loss": 0.8984, + "step": 6424 + }, + { + "epoch": 0.35, + "learning_rate": 0.0007611954757710662, + "loss": 0.8398, + "step": 6425 + }, + { + "epoch": 0.35, + "learning_rate": 0.0007611212529625446, + "loss": 0.9023, + "step": 6426 + }, + { + "epoch": 0.35, + "learning_rate": 0.000761047022241191, + "loss": 0.957, + "step": 6427 + }, + { + "epoch": 0.35, + "learning_rate": 0.0007609727836092553, + "loss": 0.8281, + "step": 6428 + }, + { + "epoch": 0.35, + "learning_rate": 0.000760898537068987, + "loss": 0.8867, + "step": 6429 + }, + { + "epoch": 0.35, + "learning_rate": 0.0007608242826226361, + "loss": 0.8398, + "step": 6430 + }, + { + "epoch": 0.35, + "learning_rate": 0.0007607500202724527, + "loss": 0.9023, + "step": 6431 + }, + { + "epoch": 0.35, + "learning_rate": 0.0007606757500206869, + "loss": 0.9062, + "step": 6432 + }, + { + "epoch": 0.35, + "learning_rate": 0.0007606014718695898, + "loss": 0.7773, + "step": 6433 + }, + { + "epoch": 0.35, + "learning_rate": 0.0007605271858214121, + "loss": 0.9336, + "step": 6434 + }, + { + "epoch": 0.35, + "learning_rate": 0.0007604528918784048, + "loss": 0.8594, + "step": 6435 + }, + { + "epoch": 0.35, + "learning_rate": 0.0007603785900428195, + "loss": 0.9297, + "step": 6436 + }, + { + "epoch": 0.35, + "learning_rate": 0.0007603042803169073, + "loss": 0.8711, + "step": 6437 + }, + { + "epoch": 0.35, + "learning_rate": 0.0007602299627029206, + "loss": 0.8867, + "step": 6438 + }, + { + "epoch": 0.35, + "learning_rate": 0.0007601556372031112, + "loss": 0.9609, + "step": 6439 + }, + { + "epoch": 0.35, + "learning_rate": 0.0007600813038197314, + "loss": 0.9531, + "step": 6440 + }, + { + "epoch": 0.35, + "learning_rate": 0.0007600069625550339, + "loss": 0.875, + "step": 6441 + }, + { + "epoch": 0.35, + "learning_rate": 0.0007599326134112712, + "loss": 0.8477, + "step": 6442 + }, + { + "epoch": 0.35, + "learning_rate": 0.0007598582563906964, + "loss": 0.8906, + "step": 6443 + }, + { + "epoch": 0.35, + "learning_rate": 0.000759783891495563, + "loss": 1.0, + "step": 6444 + }, + { + "epoch": 0.35, + "learning_rate": 0.0007597095187281245, + "loss": 0.8828, + "step": 6445 + }, + { + "epoch": 0.35, + "learning_rate": 0.0007596351380906342, + "loss": 0.9062, + "step": 6446 + }, + { + "epoch": 0.35, + "learning_rate": 0.0007595607495853465, + "loss": 0.8828, + "step": 6447 + }, + { + "epoch": 0.35, + "learning_rate": 0.0007594863532145156, + "loss": 0.8672, + "step": 6448 + }, + { + "epoch": 0.35, + "learning_rate": 0.0007594119489803956, + "loss": 0.9805, + "step": 6449 + }, + { + "epoch": 0.35, + "learning_rate": 0.0007593375368852415, + "loss": 0.832, + "step": 6450 + }, + { + "epoch": 0.35, + "learning_rate": 0.0007592631169313082, + "loss": 0.9297, + "step": 6451 + }, + { + "epoch": 0.35, + "learning_rate": 0.0007591886891208507, + "loss": 0.9102, + "step": 6452 + }, + { + "epoch": 0.35, + "learning_rate": 0.0007591142534561247, + "loss": 0.875, + "step": 6453 + }, + { + "epoch": 0.35, + "learning_rate": 0.0007590398099393856, + "loss": 0.8555, + "step": 6454 + }, + { + "epoch": 0.35, + "learning_rate": 0.0007589653585728894, + "loss": 0.9297, + "step": 6455 + }, + { + "epoch": 0.35, + "learning_rate": 0.0007588908993588921, + "loss": 1.0078, + "step": 6456 + }, + { + "epoch": 0.35, + "learning_rate": 0.0007588164322996502, + "loss": 0.9258, + "step": 6457 + }, + { + "epoch": 0.35, + "learning_rate": 0.0007587419573974203, + "loss": 0.8945, + "step": 6458 + }, + { + "epoch": 0.35, + "learning_rate": 0.0007586674746544592, + "loss": 0.8672, + "step": 6459 + }, + { + "epoch": 0.35, + "learning_rate": 0.0007585929840730237, + "loss": 0.9219, + "step": 6460 + }, + { + "epoch": 0.35, + "learning_rate": 0.0007585184856553715, + "loss": 0.8008, + "step": 6461 + }, + { + "epoch": 0.35, + "learning_rate": 0.00075844397940376, + "loss": 0.9883, + "step": 6462 + }, + { + "epoch": 0.35, + "learning_rate": 0.0007583694653204472, + "loss": 0.7812, + "step": 6463 + }, + { + "epoch": 0.35, + "learning_rate": 0.0007582949434076907, + "loss": 0.8789, + "step": 6464 + }, + { + "epoch": 0.35, + "learning_rate": 0.0007582204136677488, + "loss": 0.8945, + "step": 6465 + }, + { + "epoch": 0.35, + "learning_rate": 0.0007581458761028805, + "loss": 0.9766, + "step": 6466 + }, + { + "epoch": 0.35, + "learning_rate": 0.0007580713307153439, + "loss": 0.8672, + "step": 6467 + }, + { + "epoch": 0.35, + "learning_rate": 0.0007579967775073983, + "loss": 0.9023, + "step": 6468 + }, + { + "epoch": 0.35, + "learning_rate": 0.0007579222164813031, + "loss": 0.8047, + "step": 6469 + }, + { + "epoch": 0.35, + "learning_rate": 0.0007578476476393174, + "loss": 0.8828, + "step": 6470 + }, + { + "epoch": 0.35, + "learning_rate": 0.000757773070983701, + "loss": 0.8008, + "step": 6471 + }, + { + "epoch": 0.35, + "learning_rate": 0.0007576984865167138, + "loss": 0.875, + "step": 6472 + }, + { + "epoch": 0.35, + "learning_rate": 0.0007576238942406159, + "loss": 0.9297, + "step": 6473 + }, + { + "epoch": 0.35, + "learning_rate": 0.0007575492941576678, + "loss": 0.8867, + "step": 6474 + }, + { + "epoch": 0.35, + "learning_rate": 0.0007574746862701302, + "loss": 0.8086, + "step": 6475 + }, + { + "epoch": 0.35, + "learning_rate": 0.0007574000705802636, + "loss": 0.8984, + "step": 6476 + }, + { + "epoch": 0.35, + "learning_rate": 0.0007573254470903295, + "loss": 0.8984, + "step": 6477 + }, + { + "epoch": 0.35, + "learning_rate": 0.0007572508158025891, + "loss": 0.8711, + "step": 6478 + }, + { + "epoch": 0.35, + "learning_rate": 0.0007571761767193039, + "loss": 0.9062, + "step": 6479 + }, + { + "epoch": 0.35, + "learning_rate": 0.0007571015298427359, + "loss": 0.8516, + "step": 6480 + }, + { + "epoch": 0.35, + "learning_rate": 0.000757026875175147, + "loss": 0.9219, + "step": 6481 + }, + { + "epoch": 0.35, + "learning_rate": 0.0007569522127187995, + "loss": 0.8945, + "step": 6482 + }, + { + "epoch": 0.35, + "learning_rate": 0.0007568775424759557, + "loss": 0.832, + "step": 6483 + }, + { + "epoch": 0.35, + "learning_rate": 0.0007568028644488788, + "loss": 0.8789, + "step": 6484 + }, + { + "epoch": 0.35, + "learning_rate": 0.0007567281786398315, + "loss": 0.875, + "step": 6485 + }, + { + "epoch": 0.35, + "learning_rate": 0.000756653485051077, + "loss": 0.8242, + "step": 6486 + }, + { + "epoch": 0.35, + "learning_rate": 0.000756578783684879, + "loss": 0.9062, + "step": 6487 + }, + { + "epoch": 0.35, + "learning_rate": 0.0007565040745435009, + "loss": 0.8594, + "step": 6488 + }, + { + "epoch": 0.35, + "learning_rate": 0.0007564293576292068, + "loss": 0.918, + "step": 6489 + }, + { + "epoch": 0.35, + "learning_rate": 0.0007563546329442609, + "loss": 0.8242, + "step": 6490 + }, + { + "epoch": 0.35, + "learning_rate": 0.0007562799004909274, + "loss": 0.8281, + "step": 6491 + }, + { + "epoch": 0.35, + "learning_rate": 0.0007562051602714714, + "loss": 0.9102, + "step": 6492 + }, + { + "epoch": 0.35, + "learning_rate": 0.0007561304122881571, + "loss": 0.918, + "step": 6493 + }, + { + "epoch": 0.35, + "learning_rate": 0.0007560556565432502, + "loss": 0.8906, + "step": 6494 + }, + { + "epoch": 0.35, + "learning_rate": 0.0007559808930390157, + "loss": 0.8477, + "step": 6495 + }, + { + "epoch": 0.35, + "learning_rate": 0.0007559061217777193, + "loss": 0.8633, + "step": 6496 + }, + { + "epoch": 0.35, + "learning_rate": 0.0007558313427616268, + "loss": 0.8555, + "step": 6497 + }, + { + "epoch": 0.35, + "learning_rate": 0.0007557565559930045, + "loss": 0.8867, + "step": 6498 + }, + { + "epoch": 0.35, + "learning_rate": 0.000755681761474118, + "loss": 0.9141, + "step": 6499 + }, + { + "epoch": 0.35, + "learning_rate": 0.0007556069592072346, + "loss": 0.8672, + "step": 6500 + }, + { + "epoch": 0.35, + "learning_rate": 0.0007555321491946206, + "loss": 0.8867, + "step": 6501 + }, + { + "epoch": 0.35, + "learning_rate": 0.0007554573314385431, + "loss": 0.8867, + "step": 6502 + }, + { + "epoch": 0.35, + "learning_rate": 0.0007553825059412693, + "loss": 0.832, + "step": 6503 + }, + { + "epoch": 0.35, + "learning_rate": 0.0007553076727050668, + "loss": 0.9219, + "step": 6504 + }, + { + "epoch": 0.35, + "learning_rate": 0.0007552328317322032, + "loss": 0.8516, + "step": 6505 + }, + { + "epoch": 0.35, + "learning_rate": 0.0007551579830249462, + "loss": 0.8594, + "step": 6506 + }, + { + "epoch": 0.35, + "learning_rate": 0.0007550831265855644, + "loss": 0.8008, + "step": 6507 + }, + { + "epoch": 0.35, + "learning_rate": 0.0007550082624163258, + "loss": 0.9492, + "step": 6508 + }, + { + "epoch": 0.35, + "learning_rate": 0.0007549333905194992, + "loss": 0.8867, + "step": 6509 + }, + { + "epoch": 0.35, + "learning_rate": 0.0007548585108973536, + "loss": 0.8203, + "step": 6510 + }, + { + "epoch": 0.35, + "learning_rate": 0.0007547836235521578, + "loss": 0.7578, + "step": 6511 + }, + { + "epoch": 0.35, + "learning_rate": 0.0007547087284861814, + "loss": 0.8008, + "step": 6512 + }, + { + "epoch": 0.35, + "learning_rate": 0.0007546338257016939, + "loss": 0.9023, + "step": 6513 + }, + { + "epoch": 0.35, + "learning_rate": 0.0007545589152009648, + "loss": 0.8711, + "step": 6514 + }, + { + "epoch": 0.35, + "learning_rate": 0.0007544839969862647, + "loss": 1.0391, + "step": 6515 + }, + { + "epoch": 0.35, + "learning_rate": 0.0007544090710598632, + "loss": 0.8633, + "step": 6516 + }, + { + "epoch": 0.35, + "learning_rate": 0.0007543341374240315, + "loss": 0.8711, + "step": 6517 + }, + { + "epoch": 0.35, + "learning_rate": 0.0007542591960810399, + "loss": 0.9414, + "step": 6518 + }, + { + "epoch": 0.35, + "learning_rate": 0.0007541842470331594, + "loss": 0.8828, + "step": 6519 + }, + { + "epoch": 0.35, + "learning_rate": 0.0007541092902826614, + "loss": 0.9023, + "step": 6520 + }, + { + "epoch": 0.35, + "learning_rate": 0.000754034325831817, + "loss": 0.9102, + "step": 6521 + }, + { + "epoch": 0.35, + "learning_rate": 0.0007539593536828984, + "loss": 0.9414, + "step": 6522 + }, + { + "epoch": 0.35, + "learning_rate": 0.0007538843738381769, + "loss": 0.8398, + "step": 6523 + }, + { + "epoch": 0.35, + "learning_rate": 0.0007538093862999249, + "loss": 0.9219, + "step": 6524 + }, + { + "epoch": 0.35, + "learning_rate": 0.0007537343910704149, + "loss": 0.8945, + "step": 6525 + }, + { + "epoch": 0.35, + "learning_rate": 0.0007536593881519194, + "loss": 0.8516, + "step": 6526 + }, + { + "epoch": 0.35, + "learning_rate": 0.0007535843775467112, + "loss": 0.875, + "step": 6527 + }, + { + "epoch": 0.35, + "learning_rate": 0.0007535093592570632, + "loss": 0.8008, + "step": 6528 + }, + { + "epoch": 0.35, + "learning_rate": 0.0007534343332852491, + "loss": 0.8711, + "step": 6529 + }, + { + "epoch": 0.35, + "learning_rate": 0.0007533592996335421, + "loss": 0.832, + "step": 6530 + }, + { + "epoch": 0.35, + "learning_rate": 0.0007532842583042161, + "loss": 0.8711, + "step": 6531 + }, + { + "epoch": 0.35, + "learning_rate": 0.0007532092092995452, + "loss": 0.9453, + "step": 6532 + }, + { + "epoch": 0.35, + "learning_rate": 0.0007531341526218034, + "loss": 0.7617, + "step": 6533 + }, + { + "epoch": 0.35, + "learning_rate": 0.0007530590882732653, + "loss": 0.9023, + "step": 6534 + }, + { + "epoch": 0.35, + "learning_rate": 0.0007529840162562056, + "loss": 0.9414, + "step": 6535 + }, + { + "epoch": 0.35, + "learning_rate": 0.0007529089365728991, + "loss": 0.8945, + "step": 6536 + }, + { + "epoch": 0.35, + "learning_rate": 0.0007528338492256212, + "loss": 0.9258, + "step": 6537 + }, + { + "epoch": 0.35, + "learning_rate": 0.0007527587542166471, + "loss": 0.8555, + "step": 6538 + }, + { + "epoch": 0.35, + "learning_rate": 0.0007526836515482525, + "loss": 0.8828, + "step": 6539 + }, + { + "epoch": 0.35, + "learning_rate": 0.0007526085412227132, + "loss": 0.7773, + "step": 6540 + }, + { + "epoch": 0.35, + "learning_rate": 0.0007525334232423054, + "loss": 0.8945, + "step": 6541 + }, + { + "epoch": 0.35, + "learning_rate": 0.0007524582976093053, + "loss": 0.8828, + "step": 6542 + }, + { + "epoch": 0.35, + "learning_rate": 0.0007523831643259895, + "loss": 0.7891, + "step": 6543 + }, + { + "epoch": 0.35, + "learning_rate": 0.0007523080233946347, + "loss": 0.9023, + "step": 6544 + }, + { + "epoch": 0.35, + "learning_rate": 0.0007522328748175183, + "loss": 0.9727, + "step": 6545 + }, + { + "epoch": 0.35, + "learning_rate": 0.0007521577185969169, + "loss": 0.8125, + "step": 6546 + }, + { + "epoch": 0.35, + "learning_rate": 0.0007520825547351086, + "loss": 0.8945, + "step": 6547 + }, + { + "epoch": 0.35, + "learning_rate": 0.0007520073832343707, + "loss": 0.8867, + "step": 6548 + }, + { + "epoch": 0.35, + "learning_rate": 0.0007519322040969814, + "loss": 0.875, + "step": 6549 + }, + { + "epoch": 0.35, + "learning_rate": 0.0007518570173252186, + "loss": 0.8945, + "step": 6550 + }, + { + "epoch": 0.35, + "learning_rate": 0.000751781822921361, + "loss": 0.8477, + "step": 6551 + }, + { + "epoch": 0.35, + "learning_rate": 0.0007517066208876871, + "loss": 0.8438, + "step": 6552 + }, + { + "epoch": 0.35, + "learning_rate": 0.0007516314112264757, + "loss": 0.875, + "step": 6553 + }, + { + "epoch": 0.35, + "learning_rate": 0.0007515561939400062, + "loss": 0.9023, + "step": 6554 + }, + { + "epoch": 0.35, + "learning_rate": 0.0007514809690305575, + "loss": 0.8477, + "step": 6555 + }, + { + "epoch": 0.35, + "learning_rate": 0.0007514057365004094, + "loss": 0.8711, + "step": 6556 + }, + { + "epoch": 0.35, + "learning_rate": 0.0007513304963518417, + "loss": 0.8672, + "step": 6557 + }, + { + "epoch": 0.35, + "learning_rate": 0.0007512552485871344, + "loss": 0.832, + "step": 6558 + }, + { + "epoch": 0.35, + "learning_rate": 0.0007511799932085676, + "loss": 0.8945, + "step": 6559 + }, + { + "epoch": 0.35, + "learning_rate": 0.0007511047302184221, + "loss": 0.9336, + "step": 6560 + }, + { + "epoch": 0.35, + "learning_rate": 0.0007510294596189784, + "loss": 0.9102, + "step": 6561 + }, + { + "epoch": 0.35, + "learning_rate": 0.0007509541814125176, + "loss": 0.8711, + "step": 6562 + }, + { + "epoch": 0.35, + "learning_rate": 0.0007508788956013206, + "loss": 0.8789, + "step": 6563 + }, + { + "epoch": 0.35, + "learning_rate": 0.000750803602187669, + "loss": 0.9414, + "step": 6564 + }, + { + "epoch": 0.35, + "learning_rate": 0.0007507283011738445, + "loss": 0.7734, + "step": 6565 + }, + { + "epoch": 0.35, + "learning_rate": 0.0007506529925621288, + "loss": 0.8672, + "step": 6566 + }, + { + "epoch": 0.35, + "learning_rate": 0.000750577676354804, + "loss": 0.8984, + "step": 6567 + }, + { + "epoch": 0.35, + "learning_rate": 0.0007505023525541528, + "loss": 0.7695, + "step": 6568 + }, + { + "epoch": 0.35, + "learning_rate": 0.0007504270211624571, + "loss": 0.875, + "step": 6569 + }, + { + "epoch": 0.35, + "learning_rate": 0.0007503516821820002, + "loss": 0.7773, + "step": 6570 + }, + { + "epoch": 0.35, + "learning_rate": 0.0007502763356150651, + "loss": 0.9336, + "step": 6571 + }, + { + "epoch": 0.35, + "learning_rate": 0.0007502009814639348, + "loss": 0.8867, + "step": 6572 + }, + { + "epoch": 0.35, + "learning_rate": 0.0007501256197308928, + "loss": 0.8555, + "step": 6573 + }, + { + "epoch": 0.35, + "learning_rate": 0.0007500502504182232, + "loss": 0.8398, + "step": 6574 + }, + { + "epoch": 0.35, + "learning_rate": 0.0007499748735282092, + "loss": 0.8672, + "step": 6575 + }, + { + "epoch": 0.35, + "learning_rate": 0.0007498994890631359, + "loss": 0.8594, + "step": 6576 + }, + { + "epoch": 0.35, + "learning_rate": 0.0007498240970252868, + "loss": 0.9023, + "step": 6577 + }, + { + "epoch": 0.35, + "learning_rate": 0.0007497486974169473, + "loss": 0.8164, + "step": 6578 + }, + { + "epoch": 0.35, + "learning_rate": 0.0007496732902404017, + "loss": 0.8516, + "step": 6579 + }, + { + "epoch": 0.35, + "learning_rate": 0.0007495978754979353, + "loss": 0.7773, + "step": 6580 + }, + { + "epoch": 0.35, + "learning_rate": 0.0007495224531918333, + "loss": 0.8867, + "step": 6581 + }, + { + "epoch": 0.35, + "learning_rate": 0.0007494470233243814, + "loss": 0.8711, + "step": 6582 + }, + { + "epoch": 0.35, + "learning_rate": 0.0007493715858978654, + "loss": 0.8828, + "step": 6583 + }, + { + "epoch": 0.35, + "learning_rate": 0.0007492961409145711, + "loss": 0.918, + "step": 6584 + }, + { + "epoch": 0.35, + "learning_rate": 0.0007492206883767848, + "loss": 0.8203, + "step": 6585 + }, + { + "epoch": 0.35, + "learning_rate": 0.0007491452282867932, + "loss": 0.8828, + "step": 6586 + }, + { + "epoch": 0.35, + "learning_rate": 0.0007490697606468826, + "loss": 0.9531, + "step": 6587 + }, + { + "epoch": 0.35, + "learning_rate": 0.0007489942854593401, + "loss": 1.0625, + "step": 6588 + }, + { + "epoch": 0.35, + "learning_rate": 0.0007489188027264529, + "loss": 0.8789, + "step": 6589 + }, + { + "epoch": 0.35, + "learning_rate": 0.0007488433124505083, + "loss": 0.8242, + "step": 6590 + }, + { + "epoch": 0.35, + "learning_rate": 0.000748767814633794, + "loss": 0.9453, + "step": 6591 + }, + { + "epoch": 0.35, + "learning_rate": 0.0007486923092785978, + "loss": 0.8555, + "step": 6592 + }, + { + "epoch": 0.35, + "learning_rate": 0.0007486167963872077, + "loss": 0.9141, + "step": 6593 + }, + { + "epoch": 0.35, + "learning_rate": 0.0007485412759619119, + "loss": 0.9688, + "step": 6594 + }, + { + "epoch": 0.35, + "learning_rate": 0.000748465748004999, + "loss": 0.875, + "step": 6595 + }, + { + "epoch": 0.35, + "learning_rate": 0.0007483902125187579, + "loss": 0.875, + "step": 6596 + }, + { + "epoch": 0.35, + "learning_rate": 0.0007483146695054775, + "loss": 0.957, + "step": 6597 + }, + { + "epoch": 0.35, + "learning_rate": 0.0007482391189674468, + "loss": 0.8906, + "step": 6598 + }, + { + "epoch": 0.35, + "learning_rate": 0.0007481635609069555, + "loss": 0.8672, + "step": 6599 + }, + { + "epoch": 0.35, + "learning_rate": 0.0007480879953262929, + "loss": 0.9648, + "step": 6600 + }, + { + "epoch": 0.35, + "learning_rate": 0.0007480124222277494, + "loss": 0.9062, + "step": 6601 + }, + { + "epoch": 0.35, + "learning_rate": 0.0007479368416136148, + "loss": 0.8203, + "step": 6602 + }, + { + "epoch": 0.35, + "learning_rate": 0.0007478612534861793, + "loss": 0.8555, + "step": 6603 + }, + { + "epoch": 0.35, + "learning_rate": 0.0007477856578477338, + "loss": 0.8086, + "step": 6604 + }, + { + "epoch": 0.35, + "learning_rate": 0.0007477100547005689, + "loss": 0.9492, + "step": 6605 + }, + { + "epoch": 0.36, + "learning_rate": 0.0007476344440469756, + "loss": 0.9766, + "step": 6606 + }, + { + "epoch": 0.36, + "learning_rate": 0.0007475588258892453, + "loss": 0.8594, + "step": 6607 + }, + { + "epoch": 0.36, + "learning_rate": 0.0007474832002296693, + "loss": 0.8516, + "step": 6608 + }, + { + "epoch": 0.36, + "learning_rate": 0.0007474075670705395, + "loss": 0.8945, + "step": 6609 + }, + { + "epoch": 0.36, + "learning_rate": 0.0007473319264141476, + "loss": 0.9023, + "step": 6610 + }, + { + "epoch": 0.36, + "learning_rate": 0.000747256278262786, + "loss": 0.8867, + "step": 6611 + }, + { + "epoch": 0.36, + "learning_rate": 0.0007471806226187469, + "loss": 0.8906, + "step": 6612 + }, + { + "epoch": 0.36, + "learning_rate": 0.000747104959484323, + "loss": 0.832, + "step": 6613 + }, + { + "epoch": 0.36, + "learning_rate": 0.0007470292888618072, + "loss": 0.9883, + "step": 6614 + }, + { + "epoch": 0.36, + "learning_rate": 0.0007469536107534924, + "loss": 0.8672, + "step": 6615 + }, + { + "epoch": 0.36, + "learning_rate": 0.0007468779251616722, + "loss": 0.9219, + "step": 6616 + }, + { + "epoch": 0.36, + "learning_rate": 0.0007468022320886398, + "loss": 0.8633, + "step": 6617 + }, + { + "epoch": 0.36, + "learning_rate": 0.000746726531536689, + "loss": 0.832, + "step": 6618 + }, + { + "epoch": 0.36, + "learning_rate": 0.000746650823508114, + "loss": 0.8789, + "step": 6619 + }, + { + "epoch": 0.36, + "learning_rate": 0.0007465751080052087, + "loss": 0.8477, + "step": 6620 + }, + { + "epoch": 0.36, + "learning_rate": 0.0007464993850302677, + "loss": 0.9648, + "step": 6621 + }, + { + "epoch": 0.36, + "learning_rate": 0.0007464236545855857, + "loss": 0.8672, + "step": 6622 + }, + { + "epoch": 0.36, + "learning_rate": 0.0007463479166734575, + "loss": 0.8906, + "step": 6623 + }, + { + "epoch": 0.36, + "learning_rate": 0.0007462721712961782, + "loss": 0.8867, + "step": 6624 + }, + { + "epoch": 0.36, + "learning_rate": 0.000746196418456043, + "loss": 0.7891, + "step": 6625 + }, + { + "epoch": 0.36, + "learning_rate": 0.0007461206581553479, + "loss": 0.8711, + "step": 6626 + }, + { + "epoch": 0.36, + "learning_rate": 0.0007460448903963881, + "loss": 0.8477, + "step": 6627 + }, + { + "epoch": 0.36, + "learning_rate": 0.0007459691151814602, + "loss": 0.7656, + "step": 6628 + }, + { + "epoch": 0.36, + "learning_rate": 0.00074589333251286, + "loss": 0.793, + "step": 6629 + }, + { + "epoch": 0.36, + "learning_rate": 0.0007458175423928842, + "loss": 0.9141, + "step": 6630 + }, + { + "epoch": 0.36, + "learning_rate": 0.0007457417448238293, + "loss": 0.8555, + "step": 6631 + }, + { + "epoch": 0.36, + "learning_rate": 0.0007456659398079923, + "loss": 0.7852, + "step": 6632 + }, + { + "epoch": 0.36, + "learning_rate": 0.0007455901273476702, + "loss": 0.9141, + "step": 6633 + }, + { + "epoch": 0.36, + "learning_rate": 0.0007455143074451609, + "loss": 0.8164, + "step": 6634 + }, + { + "epoch": 0.36, + "learning_rate": 0.0007454384801027614, + "loss": 0.832, + "step": 6635 + }, + { + "epoch": 0.36, + "learning_rate": 0.0007453626453227698, + "loss": 0.8789, + "step": 6636 + }, + { + "epoch": 0.36, + "learning_rate": 0.0007452868031074841, + "loss": 0.8203, + "step": 6637 + }, + { + "epoch": 0.36, + "learning_rate": 0.0007452109534592024, + "loss": 0.8555, + "step": 6638 + }, + { + "epoch": 0.36, + "learning_rate": 0.0007451350963802234, + "loss": 0.875, + "step": 6639 + }, + { + "epoch": 0.36, + "learning_rate": 0.000745059231872846, + "loss": 0.8633, + "step": 6640 + }, + { + "epoch": 0.36, + "learning_rate": 0.0007449833599393686, + "loss": 0.8984, + "step": 6641 + }, + { + "epoch": 0.36, + "learning_rate": 0.0007449074805820908, + "loss": 0.8906, + "step": 6642 + }, + { + "epoch": 0.36, + "learning_rate": 0.0007448315938033118, + "loss": 0.8633, + "step": 6643 + }, + { + "epoch": 0.36, + "learning_rate": 0.0007447556996053314, + "loss": 0.9336, + "step": 6644 + }, + { + "epoch": 0.36, + "learning_rate": 0.0007446797979904492, + "loss": 0.8828, + "step": 6645 + }, + { + "epoch": 0.36, + "learning_rate": 0.0007446038889609654, + "loss": 0.8633, + "step": 6646 + }, + { + "epoch": 0.36, + "learning_rate": 0.0007445279725191802, + "loss": 0.8242, + "step": 6647 + }, + { + "epoch": 0.36, + "learning_rate": 0.0007444520486673945, + "loss": 0.8945, + "step": 6648 + }, + { + "epoch": 0.36, + "learning_rate": 0.0007443761174079085, + "loss": 0.6719, + "step": 6649 + }, + { + "epoch": 0.36, + "learning_rate": 0.0007443001787430235, + "loss": 0.9531, + "step": 6650 + }, + { + "epoch": 0.36, + "learning_rate": 0.0007442242326750406, + "loss": 0.8711, + "step": 6651 + }, + { + "epoch": 0.36, + "learning_rate": 0.0007441482792062612, + "loss": 0.9492, + "step": 6652 + }, + { + "epoch": 0.36, + "learning_rate": 0.0007440723183389869, + "loss": 0.8125, + "step": 6653 + }, + { + "epoch": 0.36, + "learning_rate": 0.0007439963500755198, + "loss": 0.9375, + "step": 6654 + }, + { + "epoch": 0.36, + "learning_rate": 0.0007439203744181618, + "loss": 0.918, + "step": 6655 + }, + { + "epoch": 0.36, + "learning_rate": 0.000743844391369215, + "loss": 0.793, + "step": 6656 + }, + { + "epoch": 0.36, + "learning_rate": 0.0007437684009309823, + "loss": 0.957, + "step": 6657 + }, + { + "epoch": 0.36, + "learning_rate": 0.0007436924031057663, + "loss": 0.8555, + "step": 6658 + }, + { + "epoch": 0.36, + "learning_rate": 0.0007436163978958701, + "loss": 0.8594, + "step": 6659 + }, + { + "epoch": 0.36, + "learning_rate": 0.0007435403853035966, + "loss": 0.8203, + "step": 6660 + }, + { + "epoch": 0.36, + "learning_rate": 0.0007434643653312496, + "loss": 0.8516, + "step": 6661 + }, + { + "epoch": 0.36, + "learning_rate": 0.0007433883379811325, + "loss": 0.8828, + "step": 6662 + }, + { + "epoch": 0.36, + "learning_rate": 0.0007433123032555493, + "loss": 0.8398, + "step": 6663 + }, + { + "epoch": 0.36, + "learning_rate": 0.0007432362611568042, + "loss": 0.9141, + "step": 6664 + }, + { + "epoch": 0.36, + "learning_rate": 0.0007431602116872014, + "loss": 0.9297, + "step": 6665 + }, + { + "epoch": 0.36, + "learning_rate": 0.0007430841548490452, + "loss": 0.9336, + "step": 6666 + }, + { + "epoch": 0.36, + "learning_rate": 0.000743008090644641, + "loss": 0.9961, + "step": 6667 + }, + { + "epoch": 0.36, + "learning_rate": 0.000742932019076293, + "loss": 0.7344, + "step": 6668 + }, + { + "epoch": 0.36, + "learning_rate": 0.0007428559401463071, + "loss": 0.9336, + "step": 6669 + }, + { + "epoch": 0.36, + "learning_rate": 0.0007427798538569883, + "loss": 0.9258, + "step": 6670 + }, + { + "epoch": 0.36, + "learning_rate": 0.0007427037602106426, + "loss": 0.9102, + "step": 6671 + }, + { + "epoch": 0.36, + "learning_rate": 0.0007426276592095758, + "loss": 0.8867, + "step": 6672 + }, + { + "epoch": 0.36, + "learning_rate": 0.0007425515508560939, + "loss": 0.8281, + "step": 6673 + }, + { + "epoch": 0.36, + "learning_rate": 0.0007424754351525032, + "loss": 0.8711, + "step": 6674 + }, + { + "epoch": 0.36, + "learning_rate": 0.0007423993121011102, + "loss": 0.8828, + "step": 6675 + }, + { + "epoch": 0.36, + "learning_rate": 0.000742323181704222, + "loss": 0.9922, + "step": 6676 + }, + { + "epoch": 0.36, + "learning_rate": 0.0007422470439641453, + "loss": 0.9062, + "step": 6677 + }, + { + "epoch": 0.36, + "learning_rate": 0.0007421708988831874, + "loss": 0.9336, + "step": 6678 + }, + { + "epoch": 0.36, + "learning_rate": 0.000742094746463656, + "loss": 0.8672, + "step": 6679 + }, + { + "epoch": 0.36, + "learning_rate": 0.0007420185867078584, + "loss": 0.8516, + "step": 6680 + }, + { + "epoch": 0.36, + "learning_rate": 0.0007419424196181026, + "loss": 0.8945, + "step": 6681 + }, + { + "epoch": 0.36, + "learning_rate": 0.0007418662451966969, + "loss": 0.8125, + "step": 6682 + }, + { + "epoch": 0.36, + "learning_rate": 0.0007417900634459493, + "loss": 0.8945, + "step": 6683 + }, + { + "epoch": 0.36, + "learning_rate": 0.0007417138743681687, + "loss": 0.9258, + "step": 6684 + }, + { + "epoch": 0.36, + "learning_rate": 0.0007416376779656636, + "loss": 0.875, + "step": 6685 + }, + { + "epoch": 0.36, + "learning_rate": 0.0007415614742407431, + "loss": 0.8984, + "step": 6686 + }, + { + "epoch": 0.36, + "learning_rate": 0.0007414852631957166, + "loss": 0.8086, + "step": 6687 + }, + { + "epoch": 0.36, + "learning_rate": 0.0007414090448328933, + "loss": 0.9141, + "step": 6688 + }, + { + "epoch": 0.36, + "learning_rate": 0.000741332819154583, + "loss": 0.8828, + "step": 6689 + }, + { + "epoch": 0.36, + "learning_rate": 0.0007412565861630954, + "loss": 0.9414, + "step": 6690 + }, + { + "epoch": 0.36, + "learning_rate": 0.0007411803458607409, + "loss": 0.8906, + "step": 6691 + }, + { + "epoch": 0.36, + "learning_rate": 0.0007411040982498297, + "loss": 0.8633, + "step": 6692 + }, + { + "epoch": 0.36, + "learning_rate": 0.0007410278433326724, + "loss": 0.9102, + "step": 6693 + }, + { + "epoch": 0.36, + "learning_rate": 0.0007409515811115794, + "loss": 0.9531, + "step": 6694 + }, + { + "epoch": 0.36, + "learning_rate": 0.0007408753115888625, + "loss": 0.8398, + "step": 6695 + }, + { + "epoch": 0.36, + "learning_rate": 0.000740799034766832, + "loss": 0.8359, + "step": 6696 + }, + { + "epoch": 0.36, + "learning_rate": 0.0007407227506478, + "loss": 0.875, + "step": 6697 + }, + { + "epoch": 0.36, + "learning_rate": 0.0007406464592340778, + "loss": 0.8867, + "step": 6698 + }, + { + "epoch": 0.36, + "learning_rate": 0.0007405701605279775, + "loss": 0.8672, + "step": 6699 + }, + { + "epoch": 0.36, + "learning_rate": 0.000740493854531811, + "loss": 0.8281, + "step": 6700 + }, + { + "epoch": 0.36, + "learning_rate": 0.0007404175412478908, + "loss": 0.7578, + "step": 6701 + }, + { + "epoch": 0.36, + "learning_rate": 0.0007403412206785294, + "loss": 0.7773, + "step": 6702 + }, + { + "epoch": 0.36, + "learning_rate": 0.0007402648928260395, + "loss": 0.8711, + "step": 6703 + }, + { + "epoch": 0.36, + "learning_rate": 0.0007401885576927341, + "loss": 0.8359, + "step": 6704 + }, + { + "epoch": 0.36, + "learning_rate": 0.0007401122152809266, + "loss": 0.9297, + "step": 6705 + }, + { + "epoch": 0.36, + "learning_rate": 0.0007400358655929301, + "loss": 0.8984, + "step": 6706 + }, + { + "epoch": 0.36, + "learning_rate": 0.0007399595086310584, + "loss": 0.9766, + "step": 6707 + }, + { + "epoch": 0.36, + "learning_rate": 0.0007398831443976254, + "loss": 0.8711, + "step": 6708 + }, + { + "epoch": 0.36, + "learning_rate": 0.0007398067728949452, + "loss": 0.8828, + "step": 6709 + }, + { + "epoch": 0.36, + "learning_rate": 0.000739730394125332, + "loss": 0.9023, + "step": 6710 + }, + { + "epoch": 0.36, + "learning_rate": 0.0007396540080911002, + "loss": 0.8164, + "step": 6711 + }, + { + "epoch": 0.36, + "learning_rate": 0.0007395776147945651, + "loss": 0.8672, + "step": 6712 + }, + { + "epoch": 0.36, + "learning_rate": 0.0007395012142380411, + "loss": 0.8477, + "step": 6713 + }, + { + "epoch": 0.36, + "learning_rate": 0.0007394248064238436, + "loss": 0.9727, + "step": 6714 + }, + { + "epoch": 0.36, + "learning_rate": 0.0007393483913542881, + "loss": 0.8516, + "step": 6715 + }, + { + "epoch": 0.36, + "learning_rate": 0.0007392719690316901, + "loss": 0.875, + "step": 6716 + }, + { + "epoch": 0.36, + "learning_rate": 0.0007391955394583653, + "loss": 0.8477, + "step": 6717 + }, + { + "epoch": 0.36, + "learning_rate": 0.0007391191026366302, + "loss": 0.832, + "step": 6718 + }, + { + "epoch": 0.36, + "learning_rate": 0.0007390426585688006, + "loss": 0.8984, + "step": 6719 + }, + { + "epoch": 0.36, + "learning_rate": 0.0007389662072571935, + "loss": 0.8203, + "step": 6720 + }, + { + "epoch": 0.36, + "learning_rate": 0.000738889748704125, + "loss": 0.8594, + "step": 6721 + }, + { + "epoch": 0.36, + "learning_rate": 0.0007388132829119127, + "loss": 0.8203, + "step": 6722 + }, + { + "epoch": 0.36, + "learning_rate": 0.0007387368098828734, + "loss": 0.8828, + "step": 6723 + }, + { + "epoch": 0.36, + "learning_rate": 0.0007386603296193246, + "loss": 0.8477, + "step": 6724 + }, + { + "epoch": 0.36, + "learning_rate": 0.0007385838421235838, + "loss": 0.8047, + "step": 6725 + }, + { + "epoch": 0.36, + "learning_rate": 0.0007385073473979689, + "loss": 0.8516, + "step": 6726 + }, + { + "epoch": 0.36, + "learning_rate": 0.0007384308454447978, + "loss": 0.9062, + "step": 6727 + }, + { + "epoch": 0.36, + "learning_rate": 0.0007383543362663891, + "loss": 0.9219, + "step": 6728 + }, + { + "epoch": 0.36, + "learning_rate": 0.0007382778198650608, + "loss": 0.9844, + "step": 6729 + }, + { + "epoch": 0.36, + "learning_rate": 0.000738201296243132, + "loss": 0.8711, + "step": 6730 + }, + { + "epoch": 0.36, + "learning_rate": 0.0007381247654029215, + "loss": 0.8789, + "step": 6731 + }, + { + "epoch": 0.36, + "learning_rate": 0.0007380482273467485, + "loss": 0.9219, + "step": 6732 + }, + { + "epoch": 0.36, + "learning_rate": 0.0007379716820769321, + "loss": 0.832, + "step": 6733 + }, + { + "epoch": 0.36, + "learning_rate": 0.0007378951295957921, + "loss": 0.8906, + "step": 6734 + }, + { + "epoch": 0.36, + "learning_rate": 0.0007378185699056483, + "loss": 0.8828, + "step": 6735 + }, + { + "epoch": 0.36, + "learning_rate": 0.0007377420030088204, + "loss": 0.7812, + "step": 6736 + }, + { + "epoch": 0.36, + "learning_rate": 0.0007376654289076292, + "loss": 0.8125, + "step": 6737 + }, + { + "epoch": 0.36, + "learning_rate": 0.0007375888476043946, + "loss": 0.8789, + "step": 6738 + }, + { + "epoch": 0.36, + "learning_rate": 0.0007375122591014376, + "loss": 0.9023, + "step": 6739 + }, + { + "epoch": 0.36, + "learning_rate": 0.0007374356634010789, + "loss": 0.8438, + "step": 6740 + }, + { + "epoch": 0.36, + "learning_rate": 0.0007373590605056397, + "loss": 0.8672, + "step": 6741 + }, + { + "epoch": 0.36, + "learning_rate": 0.0007372824504174412, + "loss": 0.9141, + "step": 6742 + }, + { + "epoch": 0.36, + "learning_rate": 0.0007372058331388053, + "loss": 0.875, + "step": 6743 + }, + { + "epoch": 0.36, + "learning_rate": 0.0007371292086720532, + "loss": 0.9023, + "step": 6744 + }, + { + "epoch": 0.36, + "learning_rate": 0.0007370525770195072, + "loss": 0.918, + "step": 6745 + }, + { + "epoch": 0.36, + "learning_rate": 0.0007369759381834895, + "loss": 0.8906, + "step": 6746 + }, + { + "epoch": 0.36, + "learning_rate": 0.0007368992921663224, + "loss": 0.9102, + "step": 6747 + }, + { + "epoch": 0.36, + "learning_rate": 0.0007368226389703287, + "loss": 0.8555, + "step": 6748 + }, + { + "epoch": 0.36, + "learning_rate": 0.0007367459785978309, + "loss": 0.8984, + "step": 6749 + }, + { + "epoch": 0.36, + "learning_rate": 0.0007366693110511524, + "loss": 0.8711, + "step": 6750 + }, + { + "epoch": 0.36, + "learning_rate": 0.0007365926363326165, + "loss": 1.0234, + "step": 6751 + }, + { + "epoch": 0.36, + "learning_rate": 0.0007365159544445464, + "loss": 0.8438, + "step": 6752 + }, + { + "epoch": 0.36, + "learning_rate": 0.0007364392653892661, + "loss": 0.9492, + "step": 6753 + }, + { + "epoch": 0.36, + "learning_rate": 0.0007363625691690993, + "loss": 0.9258, + "step": 6754 + }, + { + "epoch": 0.36, + "learning_rate": 0.0007362858657863703, + "loss": 0.9023, + "step": 6755 + }, + { + "epoch": 0.36, + "learning_rate": 0.0007362091552434032, + "loss": 0.8555, + "step": 6756 + }, + { + "epoch": 0.36, + "learning_rate": 0.0007361324375425232, + "loss": 0.9609, + "step": 6757 + }, + { + "epoch": 0.36, + "learning_rate": 0.0007360557126860543, + "loss": 0.7656, + "step": 6758 + }, + { + "epoch": 0.36, + "learning_rate": 0.000735978980676322, + "loss": 0.8281, + "step": 6759 + }, + { + "epoch": 0.36, + "learning_rate": 0.0007359022415156515, + "loss": 0.9805, + "step": 6760 + }, + { + "epoch": 0.36, + "learning_rate": 0.0007358254952063683, + "loss": 0.8086, + "step": 6761 + }, + { + "epoch": 0.36, + "learning_rate": 0.0007357487417507978, + "loss": 0.8906, + "step": 6762 + }, + { + "epoch": 0.36, + "learning_rate": 0.0007356719811512659, + "loss": 0.8672, + "step": 6763 + }, + { + "epoch": 0.36, + "learning_rate": 0.0007355952134100992, + "loss": 0.8984, + "step": 6764 + }, + { + "epoch": 0.36, + "learning_rate": 0.0007355184385296234, + "loss": 0.8477, + "step": 6765 + }, + { + "epoch": 0.36, + "learning_rate": 0.0007354416565121655, + "loss": 0.8164, + "step": 6766 + }, + { + "epoch": 0.36, + "learning_rate": 0.0007353648673600518, + "loss": 0.9453, + "step": 6767 + }, + { + "epoch": 0.36, + "learning_rate": 0.0007352880710756095, + "loss": 0.9492, + "step": 6768 + }, + { + "epoch": 0.36, + "learning_rate": 0.0007352112676611658, + "loss": 0.8867, + "step": 6769 + }, + { + "epoch": 0.36, + "learning_rate": 0.0007351344571190482, + "loss": 0.8672, + "step": 6770 + }, + { + "epoch": 0.36, + "learning_rate": 0.0007350576394515841, + "loss": 0.918, + "step": 6771 + }, + { + "epoch": 0.36, + "learning_rate": 0.0007349808146611013, + "loss": 0.8789, + "step": 6772 + }, + { + "epoch": 0.36, + "learning_rate": 0.000734903982749928, + "loss": 0.8711, + "step": 6773 + }, + { + "epoch": 0.36, + "learning_rate": 0.0007348271437203924, + "loss": 0.8906, + "step": 6774 + }, + { + "epoch": 0.36, + "learning_rate": 0.0007347502975748229, + "loss": 0.8164, + "step": 6775 + }, + { + "epoch": 0.36, + "learning_rate": 0.0007346734443155485, + "loss": 0.8398, + "step": 6776 + }, + { + "epoch": 0.36, + "learning_rate": 0.0007345965839448977, + "loss": 0.8672, + "step": 6777 + }, + { + "epoch": 0.36, + "learning_rate": 0.0007345197164651999, + "loss": 0.9336, + "step": 6778 + }, + { + "epoch": 0.36, + "learning_rate": 0.0007344428418787843, + "loss": 0.8945, + "step": 6779 + }, + { + "epoch": 0.36, + "learning_rate": 0.0007343659601879805, + "loss": 0.9414, + "step": 6780 + }, + { + "epoch": 0.36, + "learning_rate": 0.0007342890713951183, + "loss": 0.9297, + "step": 6781 + }, + { + "epoch": 0.36, + "learning_rate": 0.0007342121755025276, + "loss": 0.8203, + "step": 6782 + }, + { + "epoch": 0.36, + "learning_rate": 0.0007341352725125386, + "loss": 0.9688, + "step": 6783 + }, + { + "epoch": 0.36, + "learning_rate": 0.0007340583624274819, + "loss": 0.8438, + "step": 6784 + }, + { + "epoch": 0.36, + "learning_rate": 0.0007339814452496878, + "loss": 0.8477, + "step": 6785 + }, + { + "epoch": 0.36, + "learning_rate": 0.0007339045209814874, + "loss": 0.957, + "step": 6786 + }, + { + "epoch": 0.36, + "learning_rate": 0.0007338275896252117, + "loss": 0.8516, + "step": 6787 + }, + { + "epoch": 0.36, + "learning_rate": 0.0007337506511831919, + "loss": 0.8984, + "step": 6788 + }, + { + "epoch": 0.36, + "learning_rate": 0.0007336737056577596, + "loss": 0.8984, + "step": 6789 + }, + { + "epoch": 0.36, + "learning_rate": 0.0007335967530512463, + "loss": 0.8516, + "step": 6790 + }, + { + "epoch": 0.36, + "learning_rate": 0.0007335197933659843, + "loss": 0.8008, + "step": 6791 + }, + { + "epoch": 0.37, + "learning_rate": 0.0007334428266043054, + "loss": 0.9531, + "step": 6792 + }, + { + "epoch": 0.37, + "learning_rate": 0.0007333658527685419, + "loss": 0.9961, + "step": 6793 + }, + { + "epoch": 0.37, + "learning_rate": 0.0007332888718610268, + "loss": 0.9219, + "step": 6794 + }, + { + "epoch": 0.37, + "learning_rate": 0.0007332118838840922, + "loss": 0.8945, + "step": 6795 + }, + { + "epoch": 0.37, + "learning_rate": 0.0007331348888400717, + "loss": 0.8086, + "step": 6796 + }, + { + "epoch": 0.37, + "learning_rate": 0.0007330578867312983, + "loss": 0.8516, + "step": 6797 + }, + { + "epoch": 0.37, + "learning_rate": 0.0007329808775601053, + "loss": 0.9766, + "step": 6798 + }, + { + "epoch": 0.37, + "learning_rate": 0.0007329038613288263, + "loss": 0.8398, + "step": 6799 + }, + { + "epoch": 0.37, + "learning_rate": 0.0007328268380397954, + "loss": 0.9609, + "step": 6800 + }, + { + "epoch": 0.37, + "learning_rate": 0.0007327498076953465, + "loss": 0.9375, + "step": 6801 + }, + { + "epoch": 0.37, + "learning_rate": 0.0007326727702978139, + "loss": 0.8906, + "step": 6802 + }, + { + "epoch": 0.37, + "learning_rate": 0.0007325957258495319, + "loss": 0.8164, + "step": 6803 + }, + { + "epoch": 0.37, + "learning_rate": 0.0007325186743528357, + "loss": 0.9766, + "step": 6804 + }, + { + "epoch": 0.37, + "learning_rate": 0.0007324416158100596, + "loss": 0.875, + "step": 6805 + }, + { + "epoch": 0.37, + "learning_rate": 0.0007323645502235391, + "loss": 0.8125, + "step": 6806 + }, + { + "epoch": 0.37, + "learning_rate": 0.0007322874775956095, + "loss": 0.9648, + "step": 6807 + }, + { + "epoch": 0.37, + "learning_rate": 0.0007322103979286064, + "loss": 0.8555, + "step": 6808 + }, + { + "epoch": 0.37, + "learning_rate": 0.0007321333112248653, + "loss": 0.8672, + "step": 6809 + }, + { + "epoch": 0.37, + "learning_rate": 0.0007320562174867225, + "loss": 0.7617, + "step": 6810 + }, + { + "epoch": 0.37, + "learning_rate": 0.0007319791167165137, + "loss": 0.8164, + "step": 6811 + }, + { + "epoch": 0.37, + "learning_rate": 0.000731902008916576, + "loss": 0.8672, + "step": 6812 + }, + { + "epoch": 0.37, + "learning_rate": 0.0007318248940892456, + "loss": 0.8867, + "step": 6813 + }, + { + "epoch": 0.37, + "learning_rate": 0.0007317477722368594, + "loss": 0.8555, + "step": 6814 + }, + { + "epoch": 0.37, + "learning_rate": 0.0007316706433617545, + "loss": 0.9922, + "step": 6815 + }, + { + "epoch": 0.37, + "learning_rate": 0.0007315935074662681, + "loss": 0.918, + "step": 6816 + }, + { + "epoch": 0.37, + "learning_rate": 0.0007315163645527376, + "loss": 0.8164, + "step": 6817 + }, + { + "epoch": 0.37, + "learning_rate": 0.0007314392146235007, + "loss": 0.9102, + "step": 6818 + }, + { + "epoch": 0.37, + "learning_rate": 0.0007313620576808957, + "loss": 0.9219, + "step": 6819 + }, + { + "epoch": 0.37, + "learning_rate": 0.0007312848937272601, + "loss": 0.8555, + "step": 6820 + }, + { + "epoch": 0.37, + "learning_rate": 0.0007312077227649325, + "loss": 0.8086, + "step": 6821 + }, + { + "epoch": 0.37, + "learning_rate": 0.0007311305447962515, + "loss": 0.8555, + "step": 6822 + }, + { + "epoch": 0.37, + "learning_rate": 0.0007310533598235557, + "loss": 1.0234, + "step": 6823 + }, + { + "epoch": 0.37, + "learning_rate": 0.0007309761678491841, + "loss": 0.9492, + "step": 6824 + }, + { + "epoch": 0.37, + "learning_rate": 0.0007308989688754762, + "loss": 0.8516, + "step": 6825 + }, + { + "epoch": 0.37, + "learning_rate": 0.0007308217629047706, + "loss": 0.8828, + "step": 6826 + }, + { + "epoch": 0.37, + "learning_rate": 0.0007307445499394079, + "loss": 0.875, + "step": 6827 + }, + { + "epoch": 0.37, + "learning_rate": 0.000730667329981727, + "loss": 0.7539, + "step": 6828 + }, + { + "epoch": 0.37, + "learning_rate": 0.0007305901030340685, + "loss": 0.8359, + "step": 6829 + }, + { + "epoch": 0.37, + "learning_rate": 0.0007305128690987723, + "loss": 0.9102, + "step": 6830 + }, + { + "epoch": 0.37, + "learning_rate": 0.0007304356281781789, + "loss": 0.8984, + "step": 6831 + }, + { + "epoch": 0.37, + "learning_rate": 0.0007303583802746293, + "loss": 0.957, + "step": 6832 + }, + { + "epoch": 0.37, + "learning_rate": 0.000730281125390464, + "loss": 0.9531, + "step": 6833 + }, + { + "epoch": 0.37, + "learning_rate": 0.0007302038635280242, + "loss": 0.9258, + "step": 6834 + }, + { + "epoch": 0.37, + "learning_rate": 0.000730126594689651, + "loss": 0.9648, + "step": 6835 + }, + { + "epoch": 0.37, + "learning_rate": 0.0007300493188776861, + "loss": 0.9102, + "step": 6836 + }, + { + "epoch": 0.37, + "learning_rate": 0.0007299720360944713, + "loss": 0.9102, + "step": 6837 + }, + { + "epoch": 0.37, + "learning_rate": 0.0007298947463423482, + "loss": 0.8203, + "step": 6838 + }, + { + "epoch": 0.37, + "learning_rate": 0.0007298174496236594, + "loss": 0.8594, + "step": 6839 + }, + { + "epoch": 0.37, + "learning_rate": 0.0007297401459407468, + "loss": 0.8594, + "step": 6840 + }, + { + "epoch": 0.37, + "learning_rate": 0.0007296628352959531, + "loss": 0.8984, + "step": 6841 + }, + { + "epoch": 0.37, + "learning_rate": 0.0007295855176916211, + "loss": 0.8945, + "step": 6842 + }, + { + "epoch": 0.37, + "learning_rate": 0.0007295081931300939, + "loss": 0.8984, + "step": 6843 + }, + { + "epoch": 0.37, + "learning_rate": 0.0007294308616137143, + "loss": 0.9648, + "step": 6844 + }, + { + "epoch": 0.37, + "learning_rate": 0.0007293535231448263, + "loss": 0.8906, + "step": 6845 + }, + { + "epoch": 0.37, + "learning_rate": 0.0007292761777257728, + "loss": 0.8711, + "step": 6846 + }, + { + "epoch": 0.37, + "learning_rate": 0.0007291988253588982, + "loss": 0.8438, + "step": 6847 + }, + { + "epoch": 0.37, + "learning_rate": 0.0007291214660465463, + "loss": 0.8555, + "step": 6848 + }, + { + "epoch": 0.37, + "learning_rate": 0.0007290440997910614, + "loss": 0.8438, + "step": 6849 + }, + { + "epoch": 0.37, + "learning_rate": 0.0007289667265947878, + "loss": 0.8398, + "step": 6850 + }, + { + "epoch": 0.37, + "learning_rate": 0.0007288893464600702, + "loss": 0.8086, + "step": 6851 + }, + { + "epoch": 0.37, + "learning_rate": 0.0007288119593892538, + "loss": 0.8594, + "step": 6852 + }, + { + "epoch": 0.37, + "learning_rate": 0.0007287345653846832, + "loss": 0.9258, + "step": 6853 + }, + { + "epoch": 0.37, + "learning_rate": 0.000728657164448704, + "loss": 0.918, + "step": 6854 + }, + { + "epoch": 0.37, + "learning_rate": 0.0007285797565836616, + "loss": 0.8555, + "step": 6855 + }, + { + "epoch": 0.37, + "learning_rate": 0.0007285023417919019, + "loss": 0.8828, + "step": 6856 + }, + { + "epoch": 0.37, + "learning_rate": 0.0007284249200757704, + "loss": 0.8438, + "step": 6857 + }, + { + "epoch": 0.37, + "learning_rate": 0.0007283474914376137, + "loss": 0.9023, + "step": 6858 + }, + { + "epoch": 0.37, + "learning_rate": 0.0007282700558797776, + "loss": 0.9492, + "step": 6859 + }, + { + "epoch": 0.37, + "learning_rate": 0.0007281926134046096, + "loss": 0.8906, + "step": 6860 + }, + { + "epoch": 0.37, + "learning_rate": 0.0007281151640144552, + "loss": 0.9336, + "step": 6861 + }, + { + "epoch": 0.37, + "learning_rate": 0.0007280377077116624, + "loss": 0.8828, + "step": 6862 + }, + { + "epoch": 0.37, + "learning_rate": 0.0007279602444985778, + "loss": 0.875, + "step": 6863 + }, + { + "epoch": 0.37, + "learning_rate": 0.0007278827743775492, + "loss": 0.9102, + "step": 6864 + }, + { + "epoch": 0.37, + "learning_rate": 0.0007278052973509238, + "loss": 0.9219, + "step": 6865 + }, + { + "epoch": 0.37, + "learning_rate": 0.0007277278134210497, + "loss": 0.8789, + "step": 6866 + }, + { + "epoch": 0.37, + "learning_rate": 0.0007276503225902747, + "loss": 0.8945, + "step": 6867 + }, + { + "epoch": 0.37, + "learning_rate": 0.0007275728248609472, + "loss": 0.875, + "step": 6868 + }, + { + "epoch": 0.37, + "learning_rate": 0.0007274953202354155, + "loss": 0.9141, + "step": 6869 + }, + { + "epoch": 0.37, + "learning_rate": 0.0007274178087160284, + "loss": 0.8477, + "step": 6870 + }, + { + "epoch": 0.37, + "learning_rate": 0.0007273402903051347, + "loss": 0.8047, + "step": 6871 + }, + { + "epoch": 0.37, + "learning_rate": 0.0007272627650050834, + "loss": 0.8398, + "step": 6872 + }, + { + "epoch": 0.37, + "learning_rate": 0.0007271852328182238, + "loss": 0.9102, + "step": 6873 + }, + { + "epoch": 0.37, + "learning_rate": 0.0007271076937469055, + "loss": 0.8828, + "step": 6874 + }, + { + "epoch": 0.37, + "learning_rate": 0.0007270301477934779, + "loss": 0.9102, + "step": 6875 + }, + { + "epoch": 0.37, + "learning_rate": 0.0007269525949602911, + "loss": 0.8477, + "step": 6876 + }, + { + "epoch": 0.37, + "learning_rate": 0.0007268750352496951, + "loss": 0.8398, + "step": 6877 + }, + { + "epoch": 0.37, + "learning_rate": 0.0007267974686640405, + "loss": 0.8398, + "step": 6878 + }, + { + "epoch": 0.37, + "learning_rate": 0.0007267198952056775, + "loss": 0.8281, + "step": 6879 + }, + { + "epoch": 0.37, + "learning_rate": 0.000726642314876957, + "loss": 0.8555, + "step": 6880 + }, + { + "epoch": 0.37, + "learning_rate": 0.0007265647276802297, + "loss": 0.9062, + "step": 6881 + }, + { + "epoch": 0.37, + "learning_rate": 0.0007264871336178471, + "loss": 0.8477, + "step": 6882 + }, + { + "epoch": 0.37, + "learning_rate": 0.0007264095326921604, + "loss": 0.8047, + "step": 6883 + }, + { + "epoch": 0.37, + "learning_rate": 0.0007263319249055212, + "loss": 0.8008, + "step": 6884 + }, + { + "epoch": 0.37, + "learning_rate": 0.000726254310260281, + "loss": 0.9102, + "step": 6885 + }, + { + "epoch": 0.37, + "learning_rate": 0.000726176688758792, + "loss": 0.9453, + "step": 6886 + }, + { + "epoch": 0.37, + "learning_rate": 0.0007260990604034066, + "loss": 0.9219, + "step": 6887 + }, + { + "epoch": 0.37, + "learning_rate": 0.0007260214251964769, + "loss": 0.9414, + "step": 6888 + }, + { + "epoch": 0.37, + "learning_rate": 0.0007259437831403555, + "loss": 0.9414, + "step": 6889 + }, + { + "epoch": 0.37, + "learning_rate": 0.0007258661342373953, + "loss": 0.8867, + "step": 6890 + }, + { + "epoch": 0.37, + "learning_rate": 0.0007257884784899494, + "loss": 0.9258, + "step": 6891 + }, + { + "epoch": 0.37, + "learning_rate": 0.0007257108159003708, + "loss": 0.8711, + "step": 6892 + }, + { + "epoch": 0.37, + "learning_rate": 0.0007256331464710132, + "loss": 0.8945, + "step": 6893 + }, + { + "epoch": 0.37, + "learning_rate": 0.00072555547020423, + "loss": 0.875, + "step": 6894 + }, + { + "epoch": 0.37, + "learning_rate": 0.0007254777871023752, + "loss": 0.9336, + "step": 6895 + }, + { + "epoch": 0.37, + "learning_rate": 0.0007254000971678027, + "loss": 0.8594, + "step": 6896 + }, + { + "epoch": 0.37, + "learning_rate": 0.000725322400402867, + "loss": 0.8711, + "step": 6897 + }, + { + "epoch": 0.37, + "learning_rate": 0.0007252446968099224, + "loss": 0.8711, + "step": 6898 + }, + { + "epoch": 0.37, + "learning_rate": 0.0007251669863913235, + "loss": 0.9531, + "step": 6899 + }, + { + "epoch": 0.37, + "learning_rate": 0.0007250892691494255, + "loss": 0.9219, + "step": 6900 + }, + { + "epoch": 0.37, + "learning_rate": 0.0007250115450865831, + "loss": 0.8672, + "step": 6901 + }, + { + "epoch": 0.37, + "learning_rate": 0.0007249338142051516, + "loss": 0.9062, + "step": 6902 + }, + { + "epoch": 0.37, + "learning_rate": 0.0007248560765074869, + "loss": 0.8516, + "step": 6903 + }, + { + "epoch": 0.37, + "learning_rate": 0.0007247783319959444, + "loss": 0.9922, + "step": 6904 + }, + { + "epoch": 0.37, + "learning_rate": 0.0007247005806728801, + "loss": 0.8828, + "step": 6905 + }, + { + "epoch": 0.37, + "learning_rate": 0.0007246228225406501, + "loss": 0.9023, + "step": 6906 + }, + { + "epoch": 0.37, + "learning_rate": 0.0007245450576016108, + "loss": 0.8438, + "step": 6907 + }, + { + "epoch": 0.37, + "learning_rate": 0.0007244672858581187, + "loss": 0.9453, + "step": 6908 + }, + { + "epoch": 0.37, + "learning_rate": 0.0007243895073125304, + "loss": 0.9766, + "step": 6909 + }, + { + "epoch": 0.37, + "learning_rate": 0.0007243117219672029, + "loss": 0.8789, + "step": 6910 + }, + { + "epoch": 0.37, + "learning_rate": 0.0007242339298244936, + "loss": 0.8125, + "step": 6911 + }, + { + "epoch": 0.37, + "learning_rate": 0.0007241561308867593, + "loss": 0.918, + "step": 6912 + }, + { + "epoch": 0.37, + "learning_rate": 0.0007240783251563583, + "loss": 0.9219, + "step": 6913 + }, + { + "epoch": 0.37, + "learning_rate": 0.0007240005126356478, + "loss": 0.8906, + "step": 6914 + }, + { + "epoch": 0.37, + "learning_rate": 0.000723922693326986, + "loss": 0.9219, + "step": 6915 + }, + { + "epoch": 0.37, + "learning_rate": 0.0007238448672327311, + "loss": 0.957, + "step": 6916 + }, + { + "epoch": 0.37, + "learning_rate": 0.0007237670343552414, + "loss": 0.8906, + "step": 6917 + }, + { + "epoch": 0.37, + "learning_rate": 0.0007236891946968755, + "loss": 0.8398, + "step": 6918 + }, + { + "epoch": 0.37, + "learning_rate": 0.0007236113482599923, + "loss": 0.9297, + "step": 6919 + }, + { + "epoch": 0.37, + "learning_rate": 0.0007235334950469506, + "loss": 0.9062, + "step": 6920 + }, + { + "epoch": 0.37, + "learning_rate": 0.00072345563506011, + "loss": 0.8984, + "step": 6921 + }, + { + "epoch": 0.37, + "learning_rate": 0.0007233777683018294, + "loss": 0.9258, + "step": 6922 + }, + { + "epoch": 0.37, + "learning_rate": 0.0007232998947744687, + "loss": 0.8711, + "step": 6923 + }, + { + "epoch": 0.37, + "learning_rate": 0.0007232220144803877, + "loss": 0.7852, + "step": 6924 + }, + { + "epoch": 0.37, + "learning_rate": 0.0007231441274219464, + "loss": 0.8789, + "step": 6925 + }, + { + "epoch": 0.37, + "learning_rate": 0.0007230662336015053, + "loss": 0.8789, + "step": 6926 + }, + { + "epoch": 0.37, + "learning_rate": 0.0007229883330214242, + "loss": 0.8828, + "step": 6927 + }, + { + "epoch": 0.37, + "learning_rate": 0.0007229104256840644, + "loss": 0.9453, + "step": 6928 + }, + { + "epoch": 0.37, + "learning_rate": 0.0007228325115917863, + "loss": 0.875, + "step": 6929 + }, + { + "epoch": 0.37, + "learning_rate": 0.0007227545907469513, + "loss": 0.9414, + "step": 6930 + }, + { + "epoch": 0.37, + "learning_rate": 0.0007226766631519204, + "loss": 0.9648, + "step": 6931 + }, + { + "epoch": 0.37, + "learning_rate": 0.0007225987288090552, + "loss": 0.9297, + "step": 6932 + }, + { + "epoch": 0.37, + "learning_rate": 0.0007225207877207174, + "loss": 0.793, + "step": 6933 + }, + { + "epoch": 0.37, + "learning_rate": 0.0007224428398892686, + "loss": 0.9297, + "step": 6934 + }, + { + "epoch": 0.37, + "learning_rate": 0.0007223648853170713, + "loss": 0.9727, + "step": 6935 + }, + { + "epoch": 0.37, + "learning_rate": 0.0007222869240064875, + "loss": 0.8711, + "step": 6936 + }, + { + "epoch": 0.37, + "learning_rate": 0.0007222089559598794, + "loss": 0.7969, + "step": 6937 + }, + { + "epoch": 0.37, + "learning_rate": 0.0007221309811796104, + "loss": 0.8906, + "step": 6938 + }, + { + "epoch": 0.37, + "learning_rate": 0.0007220529996680429, + "loss": 0.8711, + "step": 6939 + }, + { + "epoch": 0.37, + "learning_rate": 0.00072197501142754, + "loss": 0.9258, + "step": 6940 + }, + { + "epoch": 0.37, + "learning_rate": 0.0007218970164604653, + "loss": 0.9023, + "step": 6941 + }, + { + "epoch": 0.37, + "learning_rate": 0.0007218190147691821, + "loss": 0.8789, + "step": 6942 + }, + { + "epoch": 0.37, + "learning_rate": 0.000721741006356054, + "loss": 0.9102, + "step": 6943 + }, + { + "epoch": 0.37, + "learning_rate": 0.000721662991223445, + "loss": 0.8711, + "step": 6944 + }, + { + "epoch": 0.37, + "learning_rate": 0.0007215849693737194, + "loss": 1.0078, + "step": 6945 + }, + { + "epoch": 0.37, + "learning_rate": 0.0007215069408092413, + "loss": 0.9492, + "step": 6946 + }, + { + "epoch": 0.37, + "learning_rate": 0.0007214289055323752, + "loss": 0.8398, + "step": 6947 + }, + { + "epoch": 0.37, + "learning_rate": 0.0007213508635454859, + "loss": 0.918, + "step": 6948 + }, + { + "epoch": 0.37, + "learning_rate": 0.0007212728148509385, + "loss": 0.8438, + "step": 6949 + }, + { + "epoch": 0.37, + "learning_rate": 0.0007211947594510979, + "loss": 0.9062, + "step": 6950 + }, + { + "epoch": 0.37, + "learning_rate": 0.0007211166973483294, + "loss": 0.9961, + "step": 6951 + }, + { + "epoch": 0.37, + "learning_rate": 0.0007210386285449988, + "loss": 0.8672, + "step": 6952 + }, + { + "epoch": 0.37, + "learning_rate": 0.0007209605530434716, + "loss": 0.9297, + "step": 6953 + }, + { + "epoch": 0.37, + "learning_rate": 0.0007208824708461139, + "loss": 0.8906, + "step": 6954 + }, + { + "epoch": 0.37, + "learning_rate": 0.0007208043819552916, + "loss": 0.8125, + "step": 6955 + }, + { + "epoch": 0.37, + "learning_rate": 0.0007207262863733715, + "loss": 0.8594, + "step": 6956 + }, + { + "epoch": 0.37, + "learning_rate": 0.0007206481841027196, + "loss": 0.8867, + "step": 6957 + }, + { + "epoch": 0.37, + "learning_rate": 0.0007205700751457032, + "loss": 0.8828, + "step": 6958 + }, + { + "epoch": 0.37, + "learning_rate": 0.0007204919595046888, + "loss": 0.9766, + "step": 6959 + }, + { + "epoch": 0.37, + "learning_rate": 0.0007204138371820437, + "loss": 0.9727, + "step": 6960 + }, + { + "epoch": 0.37, + "learning_rate": 0.0007203357081801355, + "loss": 0.8242, + "step": 6961 + }, + { + "epoch": 0.37, + "learning_rate": 0.0007202575725013314, + "loss": 0.8594, + "step": 6962 + }, + { + "epoch": 0.37, + "learning_rate": 0.0007201794301479996, + "loss": 0.9961, + "step": 6963 + }, + { + "epoch": 0.37, + "learning_rate": 0.0007201012811225077, + "loss": 0.9531, + "step": 6964 + }, + { + "epoch": 0.37, + "learning_rate": 0.0007200231254272239, + "loss": 0.9102, + "step": 6965 + }, + { + "epoch": 0.37, + "learning_rate": 0.0007199449630645167, + "loss": 0.9219, + "step": 6966 + }, + { + "epoch": 0.37, + "learning_rate": 0.0007198667940367547, + "loss": 0.8594, + "step": 6967 + }, + { + "epoch": 0.37, + "learning_rate": 0.0007197886183463065, + "loss": 0.9648, + "step": 6968 + }, + { + "epoch": 0.37, + "learning_rate": 0.0007197104359955415, + "loss": 0.8789, + "step": 6969 + }, + { + "epoch": 0.37, + "learning_rate": 0.0007196322469868281, + "loss": 0.8711, + "step": 6970 + }, + { + "epoch": 0.37, + "learning_rate": 0.0007195540513225366, + "loss": 0.8047, + "step": 6971 + }, + { + "epoch": 0.37, + "learning_rate": 0.0007194758490050359, + "loss": 0.7109, + "step": 6972 + }, + { + "epoch": 0.37, + "learning_rate": 0.0007193976400366962, + "loss": 0.7891, + "step": 6973 + }, + { + "epoch": 0.37, + "learning_rate": 0.0007193194244198873, + "loss": 0.8281, + "step": 6974 + }, + { + "epoch": 0.37, + "learning_rate": 0.0007192412021569794, + "loss": 0.9062, + "step": 6975 + }, + { + "epoch": 0.37, + "learning_rate": 0.0007191629732503428, + "loss": 0.9102, + "step": 6976 + }, + { + "epoch": 0.37, + "learning_rate": 0.0007190847377023484, + "loss": 0.8438, + "step": 6977 + }, + { + "epoch": 0.38, + "learning_rate": 0.0007190064955153666, + "loss": 0.9023, + "step": 6978 + }, + { + "epoch": 0.38, + "learning_rate": 0.0007189282466917688, + "loss": 0.9453, + "step": 6979 + }, + { + "epoch": 0.38, + "learning_rate": 0.0007188499912339257, + "loss": 0.8359, + "step": 6980 + }, + { + "epoch": 0.38, + "learning_rate": 0.0007187717291442092, + "loss": 0.9883, + "step": 6981 + }, + { + "epoch": 0.38, + "learning_rate": 0.0007186934604249906, + "loss": 0.8984, + "step": 6982 + }, + { + "epoch": 0.38, + "learning_rate": 0.0007186151850786419, + "loss": 0.957, + "step": 6983 + }, + { + "epoch": 0.38, + "learning_rate": 0.0007185369031075348, + "loss": 0.8867, + "step": 6984 + }, + { + "epoch": 0.38, + "learning_rate": 0.0007184586145140419, + "loss": 0.8555, + "step": 6985 + }, + { + "epoch": 0.38, + "learning_rate": 0.0007183803193005351, + "loss": 0.9727, + "step": 6986 + }, + { + "epoch": 0.38, + "learning_rate": 0.0007183020174693876, + "loss": 0.7891, + "step": 6987 + }, + { + "epoch": 0.38, + "learning_rate": 0.0007182237090229718, + "loss": 0.8945, + "step": 6988 + }, + { + "epoch": 0.38, + "learning_rate": 0.0007181453939636607, + "loss": 0.9219, + "step": 6989 + }, + { + "epoch": 0.38, + "learning_rate": 0.0007180670722938276, + "loss": 0.832, + "step": 6990 + }, + { + "epoch": 0.38, + "learning_rate": 0.0007179887440158459, + "loss": 0.8398, + "step": 6991 + }, + { + "epoch": 0.38, + "learning_rate": 0.0007179104091320893, + "loss": 0.9023, + "step": 6992 + }, + { + "epoch": 0.38, + "learning_rate": 0.0007178320676449315, + "loss": 0.8711, + "step": 6993 + }, + { + "epoch": 0.38, + "learning_rate": 0.0007177537195567464, + "loss": 0.918, + "step": 6994 + }, + { + "epoch": 0.38, + "learning_rate": 0.0007176753648699084, + "loss": 0.8477, + "step": 6995 + }, + { + "epoch": 0.38, + "learning_rate": 0.0007175970035867917, + "loss": 0.9023, + "step": 6996 + }, + { + "epoch": 0.38, + "learning_rate": 0.0007175186357097712, + "loss": 0.875, + "step": 6997 + }, + { + "epoch": 0.38, + "learning_rate": 0.0007174402612412214, + "loss": 0.8789, + "step": 6998 + }, + { + "epoch": 0.38, + "learning_rate": 0.0007173618801835176, + "loss": 0.9766, + "step": 6999 + }, + { + "epoch": 0.38, + "learning_rate": 0.0007172834925390347, + "loss": 0.9219, + "step": 7000 + }, + { + "epoch": 0.38, + "learning_rate": 0.0007172050983101482, + "loss": 0.8945, + "step": 7001 + }, + { + "epoch": 0.38, + "learning_rate": 0.0007171266974992338, + "loss": 0.918, + "step": 7002 + }, + { + "epoch": 0.38, + "learning_rate": 0.0007170482901086672, + "loss": 0.918, + "step": 7003 + }, + { + "epoch": 0.38, + "learning_rate": 0.0007169698761408245, + "loss": 0.8789, + "step": 7004 + }, + { + "epoch": 0.38, + "learning_rate": 0.0007168914555980819, + "loss": 0.875, + "step": 7005 + }, + { + "epoch": 0.38, + "learning_rate": 0.0007168130284828158, + "loss": 0.8633, + "step": 7006 + }, + { + "epoch": 0.38, + "learning_rate": 0.0007167345947974026, + "loss": 0.9023, + "step": 7007 + }, + { + "epoch": 0.38, + "learning_rate": 0.0007166561545442194, + "loss": 0.9141, + "step": 7008 + }, + { + "epoch": 0.38, + "learning_rate": 0.0007165777077256429, + "loss": 0.8125, + "step": 7009 + }, + { + "epoch": 0.38, + "learning_rate": 0.0007164992543440505, + "loss": 0.8867, + "step": 7010 + }, + { + "epoch": 0.38, + "learning_rate": 0.0007164207944018194, + "loss": 0.918, + "step": 7011 + }, + { + "epoch": 0.38, + "learning_rate": 0.0007163423279013275, + "loss": 0.8555, + "step": 7012 + }, + { + "epoch": 0.38, + "learning_rate": 0.0007162638548449524, + "loss": 0.7734, + "step": 7013 + }, + { + "epoch": 0.38, + "learning_rate": 0.000716185375235072, + "loss": 0.8242, + "step": 7014 + }, + { + "epoch": 0.38, + "learning_rate": 0.0007161068890740647, + "loss": 0.8555, + "step": 7015 + }, + { + "epoch": 0.38, + "learning_rate": 0.0007160283963643088, + "loss": 0.8711, + "step": 7016 + }, + { + "epoch": 0.38, + "learning_rate": 0.0007159498971081829, + "loss": 0.875, + "step": 7017 + }, + { + "epoch": 0.38, + "learning_rate": 0.0007158713913080657, + "loss": 0.9375, + "step": 7018 + }, + { + "epoch": 0.38, + "learning_rate": 0.0007157928789663362, + "loss": 0.8867, + "step": 7019 + }, + { + "epoch": 0.38, + "learning_rate": 0.0007157143600853738, + "loss": 0.8477, + "step": 7020 + }, + { + "epoch": 0.38, + "learning_rate": 0.0007156358346675576, + "loss": 0.8945, + "step": 7021 + }, + { + "epoch": 0.38, + "learning_rate": 0.0007155573027152673, + "loss": 0.8242, + "step": 7022 + }, + { + "epoch": 0.38, + "learning_rate": 0.0007154787642308825, + "loss": 0.9375, + "step": 7023 + }, + { + "epoch": 0.38, + "learning_rate": 0.0007154002192167835, + "loss": 0.8398, + "step": 7024 + }, + { + "epoch": 0.38, + "learning_rate": 0.0007153216676753503, + "loss": 0.8711, + "step": 7025 + }, + { + "epoch": 0.38, + "learning_rate": 0.0007152431096089634, + "loss": 0.8711, + "step": 7026 + }, + { + "epoch": 0.38, + "learning_rate": 0.0007151645450200031, + "loss": 0.8906, + "step": 7027 + }, + { + "epoch": 0.38, + "learning_rate": 0.0007150859739108504, + "loss": 0.8555, + "step": 7028 + }, + { + "epoch": 0.38, + "learning_rate": 0.0007150073962838858, + "loss": 0.8984, + "step": 7029 + }, + { + "epoch": 0.38, + "learning_rate": 0.0007149288121414914, + "loss": 0.8789, + "step": 7030 + }, + { + "epoch": 0.38, + "learning_rate": 0.0007148502214860477, + "loss": 0.832, + "step": 7031 + }, + { + "epoch": 0.38, + "learning_rate": 0.0007147716243199365, + "loss": 0.9258, + "step": 7032 + }, + { + "epoch": 0.38, + "learning_rate": 0.0007146930206455397, + "loss": 0.8711, + "step": 7033 + }, + { + "epoch": 0.38, + "learning_rate": 0.000714614410465239, + "loss": 0.9258, + "step": 7034 + }, + { + "epoch": 0.38, + "learning_rate": 0.000714535793781417, + "loss": 0.8555, + "step": 7035 + }, + { + "epoch": 0.38, + "learning_rate": 0.0007144571705964555, + "loss": 0.8125, + "step": 7036 + }, + { + "epoch": 0.38, + "learning_rate": 0.0007143785409127373, + "loss": 0.8242, + "step": 7037 + }, + { + "epoch": 0.38, + "learning_rate": 0.0007142999047326452, + "loss": 0.918, + "step": 7038 + }, + { + "epoch": 0.38, + "learning_rate": 0.0007142212620585619, + "loss": 0.9023, + "step": 7039 + }, + { + "epoch": 0.38, + "learning_rate": 0.000714142612892871, + "loss": 0.8633, + "step": 7040 + }, + { + "epoch": 0.38, + "learning_rate": 0.0007140639572379553, + "loss": 0.9375, + "step": 7041 + }, + { + "epoch": 0.38, + "learning_rate": 0.0007139852950961984, + "loss": 0.8242, + "step": 7042 + }, + { + "epoch": 0.38, + "learning_rate": 0.0007139066264699844, + "loss": 0.8359, + "step": 7043 + }, + { + "epoch": 0.38, + "learning_rate": 0.0007138279513616968, + "loss": 0.8906, + "step": 7044 + }, + { + "epoch": 0.38, + "learning_rate": 0.00071374926977372, + "loss": 0.8984, + "step": 7045 + }, + { + "epoch": 0.38, + "learning_rate": 0.0007136705817084382, + "loss": 0.8477, + "step": 7046 + }, + { + "epoch": 0.38, + "learning_rate": 0.0007135918871682359, + "loss": 0.8086, + "step": 7047 + }, + { + "epoch": 0.38, + "learning_rate": 0.0007135131861554977, + "loss": 0.8867, + "step": 7048 + }, + { + "epoch": 0.38, + "learning_rate": 0.0007134344786726088, + "loss": 0.9258, + "step": 7049 + }, + { + "epoch": 0.38, + "learning_rate": 0.000713355764721954, + "loss": 0.8555, + "step": 7050 + }, + { + "epoch": 0.38, + "learning_rate": 0.0007132770443059186, + "loss": 0.957, + "step": 7051 + }, + { + "epoch": 0.38, + "learning_rate": 0.0007131983174268882, + "loss": 0.8711, + "step": 7052 + }, + { + "epoch": 0.38, + "learning_rate": 0.0007131195840872486, + "loss": 0.9375, + "step": 7053 + }, + { + "epoch": 0.38, + "learning_rate": 0.0007130408442893854, + "loss": 0.918, + "step": 7054 + }, + { + "epoch": 0.38, + "learning_rate": 0.0007129620980356851, + "loss": 0.8477, + "step": 7055 + }, + { + "epoch": 0.38, + "learning_rate": 0.0007128833453285333, + "loss": 0.9297, + "step": 7056 + }, + { + "epoch": 0.38, + "learning_rate": 0.0007128045861703172, + "loss": 0.8516, + "step": 7057 + }, + { + "epoch": 0.38, + "learning_rate": 0.000712725820563423, + "loss": 0.9141, + "step": 7058 + }, + { + "epoch": 0.38, + "learning_rate": 0.0007126470485102377, + "loss": 0.875, + "step": 7059 + }, + { + "epoch": 0.38, + "learning_rate": 0.0007125682700131481, + "loss": 0.875, + "step": 7060 + }, + { + "epoch": 0.38, + "learning_rate": 0.0007124894850745419, + "loss": 0.8125, + "step": 7061 + }, + { + "epoch": 0.38, + "learning_rate": 0.0007124106936968062, + "loss": 0.8867, + "step": 7062 + }, + { + "epoch": 0.38, + "learning_rate": 0.0007123318958823289, + "loss": 0.9102, + "step": 7063 + }, + { + "epoch": 0.38, + "learning_rate": 0.0007122530916334975, + "loss": 0.9492, + "step": 7064 + }, + { + "epoch": 0.38, + "learning_rate": 0.0007121742809527003, + "loss": 0.7812, + "step": 7065 + }, + { + "epoch": 0.38, + "learning_rate": 0.0007120954638423255, + "loss": 0.9023, + "step": 7066 + }, + { + "epoch": 0.38, + "learning_rate": 0.0007120166403047615, + "loss": 0.9297, + "step": 7067 + }, + { + "epoch": 0.38, + "learning_rate": 0.0007119378103423966, + "loss": 0.793, + "step": 7068 + }, + { + "epoch": 0.38, + "learning_rate": 0.0007118589739576201, + "loss": 0.7695, + "step": 7069 + }, + { + "epoch": 0.38, + "learning_rate": 0.0007117801311528207, + "loss": 0.8672, + "step": 7070 + }, + { + "epoch": 0.38, + "learning_rate": 0.0007117012819303876, + "loss": 0.9062, + "step": 7071 + }, + { + "epoch": 0.38, + "learning_rate": 0.0007116224262927103, + "loss": 0.9023, + "step": 7072 + }, + { + "epoch": 0.38, + "learning_rate": 0.0007115435642421784, + "loss": 0.8242, + "step": 7073 + }, + { + "epoch": 0.38, + "learning_rate": 0.0007114646957811816, + "loss": 0.8906, + "step": 7074 + }, + { + "epoch": 0.38, + "learning_rate": 0.0007113858209121099, + "loss": 0.8867, + "step": 7075 + }, + { + "epoch": 0.38, + "learning_rate": 0.0007113069396373535, + "loss": 0.8945, + "step": 7076 + }, + { + "epoch": 0.38, + "learning_rate": 0.0007112280519593026, + "loss": 0.8398, + "step": 7077 + }, + { + "epoch": 0.38, + "learning_rate": 0.000711149157880348, + "loss": 0.8125, + "step": 7078 + }, + { + "epoch": 0.38, + "learning_rate": 0.0007110702574028802, + "loss": 0.8672, + "step": 7079 + }, + { + "epoch": 0.38, + "learning_rate": 0.0007109913505292904, + "loss": 0.9258, + "step": 7080 + }, + { + "epoch": 0.38, + "learning_rate": 0.0007109124372619696, + "loss": 0.7969, + "step": 7081 + }, + { + "epoch": 0.38, + "learning_rate": 0.0007108335176033092, + "loss": 0.8633, + "step": 7082 + }, + { + "epoch": 0.38, + "learning_rate": 0.0007107545915557005, + "loss": 0.9453, + "step": 7083 + }, + { + "epoch": 0.38, + "learning_rate": 0.0007106756591215356, + "loss": 0.8984, + "step": 7084 + }, + { + "epoch": 0.38, + "learning_rate": 0.0007105967203032059, + "loss": 0.8477, + "step": 7085 + }, + { + "epoch": 0.38, + "learning_rate": 0.0007105177751031041, + "loss": 0.8359, + "step": 7086 + }, + { + "epoch": 0.38, + "learning_rate": 0.000710438823523622, + "loss": 0.9492, + "step": 7087 + }, + { + "epoch": 0.38, + "learning_rate": 0.0007103598655671525, + "loss": 0.8438, + "step": 7088 + }, + { + "epoch": 0.38, + "learning_rate": 0.0007102809012360879, + "loss": 0.8672, + "step": 7089 + }, + { + "epoch": 0.38, + "learning_rate": 0.0007102019305328213, + "loss": 0.8711, + "step": 7090 + }, + { + "epoch": 0.38, + "learning_rate": 0.0007101229534597457, + "loss": 0.8633, + "step": 7091 + }, + { + "epoch": 0.38, + "learning_rate": 0.0007100439700192545, + "loss": 0.8945, + "step": 7092 + }, + { + "epoch": 0.38, + "learning_rate": 0.0007099649802137411, + "loss": 0.8203, + "step": 7093 + }, + { + "epoch": 0.38, + "learning_rate": 0.000709885984045599, + "loss": 0.9688, + "step": 7094 + }, + { + "epoch": 0.38, + "learning_rate": 0.0007098069815172221, + "loss": 0.9336, + "step": 7095 + }, + { + "epoch": 0.38, + "learning_rate": 0.0007097279726310047, + "loss": 0.832, + "step": 7096 + }, + { + "epoch": 0.38, + "learning_rate": 0.0007096489573893406, + "loss": 0.875, + "step": 7097 + }, + { + "epoch": 0.38, + "learning_rate": 0.0007095699357946244, + "loss": 0.9766, + "step": 7098 + }, + { + "epoch": 0.38, + "learning_rate": 0.0007094909078492509, + "loss": 0.8438, + "step": 7099 + }, + { + "epoch": 0.38, + "learning_rate": 0.0007094118735556146, + "loss": 0.8633, + "step": 7100 + }, + { + "epoch": 0.38, + "learning_rate": 0.0007093328329161109, + "loss": 0.8789, + "step": 7101 + }, + { + "epoch": 0.38, + "learning_rate": 0.0007092537859331345, + "loss": 0.8477, + "step": 7102 + }, + { + "epoch": 0.38, + "learning_rate": 0.0007091747326090812, + "loss": 0.9609, + "step": 7103 + }, + { + "epoch": 0.38, + "learning_rate": 0.0007090956729463463, + "loss": 0.8477, + "step": 7104 + }, + { + "epoch": 0.38, + "learning_rate": 0.0007090166069473257, + "loss": 0.8711, + "step": 7105 + }, + { + "epoch": 0.38, + "learning_rate": 0.0007089375346144153, + "loss": 0.9492, + "step": 7106 + }, + { + "epoch": 0.38, + "learning_rate": 0.0007088584559500112, + "loss": 0.8477, + "step": 7107 + }, + { + "epoch": 0.38, + "learning_rate": 0.0007087793709565099, + "loss": 0.875, + "step": 7108 + }, + { + "epoch": 0.38, + "learning_rate": 0.000708700279636308, + "loss": 0.8672, + "step": 7109 + }, + { + "epoch": 0.38, + "learning_rate": 0.0007086211819918018, + "loss": 0.8594, + "step": 7110 + }, + { + "epoch": 0.38, + "learning_rate": 0.0007085420780253888, + "loss": 0.9648, + "step": 7111 + }, + { + "epoch": 0.38, + "learning_rate": 0.0007084629677394656, + "loss": 0.9023, + "step": 7112 + }, + { + "epoch": 0.38, + "learning_rate": 0.0007083838511364298, + "loss": 0.9297, + "step": 7113 + }, + { + "epoch": 0.38, + "learning_rate": 0.0007083047282186788, + "loss": 0.8281, + "step": 7114 + }, + { + "epoch": 0.38, + "learning_rate": 0.0007082255989886104, + "loss": 0.9023, + "step": 7115 + }, + { + "epoch": 0.38, + "learning_rate": 0.0007081464634486224, + "loss": 0.8477, + "step": 7116 + }, + { + "epoch": 0.38, + "learning_rate": 0.0007080673216011127, + "loss": 0.9414, + "step": 7117 + }, + { + "epoch": 0.38, + "learning_rate": 0.0007079881734484795, + "loss": 0.7734, + "step": 7118 + }, + { + "epoch": 0.38, + "learning_rate": 0.0007079090189931218, + "loss": 0.918, + "step": 7119 + }, + { + "epoch": 0.38, + "learning_rate": 0.0007078298582374376, + "loss": 0.8984, + "step": 7120 + }, + { + "epoch": 0.38, + "learning_rate": 0.0007077506911838263, + "loss": 0.875, + "step": 7121 + }, + { + "epoch": 0.38, + "learning_rate": 0.0007076715178346864, + "loss": 0.8945, + "step": 7122 + }, + { + "epoch": 0.38, + "learning_rate": 0.0007075923381924174, + "loss": 0.9453, + "step": 7123 + }, + { + "epoch": 0.38, + "learning_rate": 0.0007075131522594187, + "loss": 0.8477, + "step": 7124 + }, + { + "epoch": 0.38, + "learning_rate": 0.0007074339600380899, + "loss": 0.9102, + "step": 7125 + }, + { + "epoch": 0.38, + "learning_rate": 0.0007073547615308307, + "loss": 0.8516, + "step": 7126 + }, + { + "epoch": 0.38, + "learning_rate": 0.000707275556740041, + "loss": 0.9453, + "step": 7127 + }, + { + "epoch": 0.38, + "learning_rate": 0.0007071963456681211, + "loss": 0.9258, + "step": 7128 + }, + { + "epoch": 0.38, + "learning_rate": 0.0007071171283174714, + "loss": 1.0312, + "step": 7129 + }, + { + "epoch": 0.38, + "learning_rate": 0.0007070379046904922, + "loss": 0.8867, + "step": 7130 + }, + { + "epoch": 0.38, + "learning_rate": 0.0007069586747895846, + "loss": 0.8164, + "step": 7131 + }, + { + "epoch": 0.38, + "learning_rate": 0.0007068794386171493, + "loss": 0.875, + "step": 7132 + }, + { + "epoch": 0.38, + "learning_rate": 0.0007068001961755874, + "loss": 0.8789, + "step": 7133 + }, + { + "epoch": 0.38, + "learning_rate": 0.0007067209474673003, + "loss": 0.9453, + "step": 7134 + }, + { + "epoch": 0.38, + "learning_rate": 0.0007066416924946895, + "loss": 0.832, + "step": 7135 + }, + { + "epoch": 0.38, + "learning_rate": 0.0007065624312601566, + "loss": 0.8711, + "step": 7136 + }, + { + "epoch": 0.38, + "learning_rate": 0.0007064831637661035, + "loss": 0.8555, + "step": 7137 + }, + { + "epoch": 0.38, + "learning_rate": 0.0007064038900149321, + "loss": 0.918, + "step": 7138 + }, + { + "epoch": 0.38, + "learning_rate": 0.0007063246100090452, + "loss": 0.9102, + "step": 7139 + }, + { + "epoch": 0.38, + "learning_rate": 0.0007062453237508446, + "loss": 0.8516, + "step": 7140 + }, + { + "epoch": 0.38, + "learning_rate": 0.0007061660312427333, + "loss": 0.875, + "step": 7141 + }, + { + "epoch": 0.38, + "learning_rate": 0.000706086732487114, + "loss": 0.8672, + "step": 7142 + }, + { + "epoch": 0.38, + "learning_rate": 0.0007060074274863897, + "loss": 0.7852, + "step": 7143 + }, + { + "epoch": 0.38, + "learning_rate": 0.0007059281162429638, + "loss": 0.9023, + "step": 7144 + }, + { + "epoch": 0.38, + "learning_rate": 0.0007058487987592394, + "loss": 0.9375, + "step": 7145 + }, + { + "epoch": 0.38, + "learning_rate": 0.0007057694750376203, + "loss": 0.8477, + "step": 7146 + }, + { + "epoch": 0.38, + "learning_rate": 0.0007056901450805102, + "loss": 0.8477, + "step": 7147 + }, + { + "epoch": 0.38, + "learning_rate": 0.0007056108088903131, + "loss": 0.8047, + "step": 7148 + }, + { + "epoch": 0.38, + "learning_rate": 0.000705531466469433, + "loss": 0.8906, + "step": 7149 + }, + { + "epoch": 0.38, + "learning_rate": 0.0007054521178202744, + "loss": 0.8867, + "step": 7150 + }, + { + "epoch": 0.38, + "learning_rate": 0.0007053727629452418, + "loss": 0.9141, + "step": 7151 + }, + { + "epoch": 0.38, + "learning_rate": 0.0007052934018467399, + "loss": 0.8438, + "step": 7152 + }, + { + "epoch": 0.38, + "learning_rate": 0.0007052140345271733, + "loss": 0.8867, + "step": 7153 + }, + { + "epoch": 0.38, + "learning_rate": 0.0007051346609889477, + "loss": 0.9414, + "step": 7154 + }, + { + "epoch": 0.38, + "learning_rate": 0.0007050552812344677, + "loss": 0.8828, + "step": 7155 + }, + { + "epoch": 0.38, + "learning_rate": 0.0007049758952661394, + "loss": 0.8398, + "step": 7156 + }, + { + "epoch": 0.38, + "learning_rate": 0.0007048965030863681, + "loss": 0.9102, + "step": 7157 + }, + { + "epoch": 0.38, + "learning_rate": 0.0007048171046975597, + "loss": 0.9844, + "step": 7158 + }, + { + "epoch": 0.38, + "learning_rate": 0.0007047377001021203, + "loss": 0.8594, + "step": 7159 + }, + { + "epoch": 0.38, + "learning_rate": 0.000704658289302456, + "loss": 0.793, + "step": 7160 + }, + { + "epoch": 0.38, + "learning_rate": 0.0007045788723009731, + "loss": 0.9648, + "step": 7161 + }, + { + "epoch": 0.38, + "learning_rate": 0.0007044994491000786, + "loss": 0.9062, + "step": 7162 + }, + { + "epoch": 0.38, + "learning_rate": 0.0007044200197021789, + "loss": 0.8438, + "step": 7163 + }, + { + "epoch": 0.39, + "learning_rate": 0.0007043405841096811, + "loss": 0.7891, + "step": 7164 + }, + { + "epoch": 0.39, + "learning_rate": 0.0007042611423249925, + "loss": 0.7695, + "step": 7165 + }, + { + "epoch": 0.39, + "learning_rate": 0.0007041816943505203, + "loss": 0.9375, + "step": 7166 + }, + { + "epoch": 0.39, + "learning_rate": 0.0007041022401886721, + "loss": 0.8867, + "step": 7167 + }, + { + "epoch": 0.39, + "learning_rate": 0.0007040227798418556, + "loss": 0.832, + "step": 7168 + }, + { + "epoch": 0.39, + "learning_rate": 0.0007039433133124787, + "loss": 0.7773, + "step": 7169 + }, + { + "epoch": 0.39, + "learning_rate": 0.0007038638406029494, + "loss": 0.918, + "step": 7170 + }, + { + "epoch": 0.39, + "learning_rate": 0.000703784361715676, + "loss": 0.8555, + "step": 7171 + }, + { + "epoch": 0.39, + "learning_rate": 0.0007037048766530674, + "loss": 0.8594, + "step": 7172 + }, + { + "epoch": 0.39, + "learning_rate": 0.0007036253854175315, + "loss": 0.8789, + "step": 7173 + }, + { + "epoch": 0.39, + "learning_rate": 0.0007035458880114778, + "loss": 0.9375, + "step": 7174 + }, + { + "epoch": 0.39, + "learning_rate": 0.0007034663844373149, + "loss": 0.8047, + "step": 7175 + }, + { + "epoch": 0.39, + "learning_rate": 0.0007033868746974523, + "loss": 0.8516, + "step": 7176 + }, + { + "epoch": 0.39, + "learning_rate": 0.0007033073587942994, + "loss": 0.8906, + "step": 7177 + }, + { + "epoch": 0.39, + "learning_rate": 0.0007032278367302655, + "loss": 0.9141, + "step": 7178 + }, + { + "epoch": 0.39, + "learning_rate": 0.0007031483085077608, + "loss": 0.8984, + "step": 7179 + }, + { + "epoch": 0.39, + "learning_rate": 0.000703068774129195, + "loss": 0.875, + "step": 7180 + }, + { + "epoch": 0.39, + "learning_rate": 0.0007029892335969782, + "loss": 0.8477, + "step": 7181 + }, + { + "epoch": 0.39, + "learning_rate": 0.000702909686913521, + "loss": 0.9688, + "step": 7182 + }, + { + "epoch": 0.39, + "learning_rate": 0.0007028301340812335, + "loss": 0.8633, + "step": 7183 + }, + { + "epoch": 0.39, + "learning_rate": 0.0007027505751025269, + "loss": 0.8906, + "step": 7184 + }, + { + "epoch": 0.39, + "learning_rate": 0.0007026710099798118, + "loss": 0.8867, + "step": 7185 + }, + { + "epoch": 0.39, + "learning_rate": 0.0007025914387154993, + "loss": 0.8164, + "step": 7186 + }, + { + "epoch": 0.39, + "learning_rate": 0.000702511861312001, + "loss": 0.8828, + "step": 7187 + }, + { + "epoch": 0.39, + "learning_rate": 0.0007024322777717277, + "loss": 0.8477, + "step": 7188 + }, + { + "epoch": 0.39, + "learning_rate": 0.0007023526880970916, + "loss": 0.9375, + "step": 7189 + }, + { + "epoch": 0.39, + "learning_rate": 0.0007022730922905046, + "loss": 0.8867, + "step": 7190 + }, + { + "epoch": 0.39, + "learning_rate": 0.0007021934903543781, + "loss": 0.8867, + "step": 7191 + }, + { + "epoch": 0.39, + "learning_rate": 0.0007021138822911249, + "loss": 0.8477, + "step": 7192 + }, + { + "epoch": 0.39, + "learning_rate": 0.0007020342681031571, + "loss": 0.875, + "step": 7193 + }, + { + "epoch": 0.39, + "learning_rate": 0.0007019546477928873, + "loss": 0.8359, + "step": 7194 + }, + { + "epoch": 0.39, + "learning_rate": 0.0007018750213627285, + "loss": 0.875, + "step": 7195 + }, + { + "epoch": 0.39, + "learning_rate": 0.000701795388815093, + "loss": 0.8438, + "step": 7196 + }, + { + "epoch": 0.39, + "learning_rate": 0.0007017157501523947, + "loss": 0.8906, + "step": 7197 + }, + { + "epoch": 0.39, + "learning_rate": 0.0007016361053770464, + "loss": 0.8633, + "step": 7198 + }, + { + "epoch": 0.39, + "learning_rate": 0.000701556454491462, + "loss": 0.8633, + "step": 7199 + }, + { + "epoch": 0.39, + "learning_rate": 0.0007014767974980548, + "loss": 0.8594, + "step": 7200 + }, + { + "epoch": 0.39, + "learning_rate": 0.0007013971343992388, + "loss": 0.8672, + "step": 7201 + }, + { + "epoch": 0.39, + "learning_rate": 0.0007013174651974281, + "loss": 0.875, + "step": 7202 + }, + { + "epoch": 0.39, + "learning_rate": 0.000701237789895037, + "loss": 0.7852, + "step": 7203 + }, + { + "epoch": 0.39, + "learning_rate": 0.0007011581084944797, + "loss": 0.8594, + "step": 7204 + }, + { + "epoch": 0.39, + "learning_rate": 0.0007010784209981711, + "loss": 0.8438, + "step": 7205 + }, + { + "epoch": 0.39, + "learning_rate": 0.0007009987274085257, + "loss": 0.9648, + "step": 7206 + }, + { + "epoch": 0.39, + "learning_rate": 0.0007009190277279586, + "loss": 0.8477, + "step": 7207 + }, + { + "epoch": 0.39, + "learning_rate": 0.0007008393219588851, + "loss": 0.8164, + "step": 7208 + }, + { + "epoch": 0.39, + "learning_rate": 0.0007007596101037204, + "loss": 0.8008, + "step": 7209 + }, + { + "epoch": 0.39, + "learning_rate": 0.00070067989216488, + "loss": 0.8633, + "step": 7210 + }, + { + "epoch": 0.39, + "learning_rate": 0.0007006001681447798, + "loss": 0.8984, + "step": 7211 + }, + { + "epoch": 0.39, + "learning_rate": 0.0007005204380458353, + "loss": 0.7891, + "step": 7212 + }, + { + "epoch": 0.39, + "learning_rate": 0.0007004407018704632, + "loss": 0.8125, + "step": 7213 + }, + { + "epoch": 0.39, + "learning_rate": 0.0007003609596210791, + "loss": 0.8711, + "step": 7214 + }, + { + "epoch": 0.39, + "learning_rate": 0.0007002812113001, + "loss": 0.8438, + "step": 7215 + }, + { + "epoch": 0.39, + "learning_rate": 0.0007002014569099422, + "loss": 0.7969, + "step": 7216 + }, + { + "epoch": 0.39, + "learning_rate": 0.0007001216964530227, + "loss": 0.8555, + "step": 7217 + }, + { + "epoch": 0.39, + "learning_rate": 0.0007000419299317584, + "loss": 0.8828, + "step": 7218 + }, + { + "epoch": 0.39, + "learning_rate": 0.0006999621573485666, + "loss": 0.9023, + "step": 7219 + }, + { + "epoch": 0.39, + "learning_rate": 0.0006998823787058646, + "loss": 0.9297, + "step": 7220 + }, + { + "epoch": 0.39, + "learning_rate": 0.0006998025940060697, + "loss": 0.8203, + "step": 7221 + }, + { + "epoch": 0.39, + "learning_rate": 0.0006997228032516001, + "loss": 0.8438, + "step": 7222 + }, + { + "epoch": 0.39, + "learning_rate": 0.0006996430064448735, + "loss": 0.8828, + "step": 7223 + }, + { + "epoch": 0.39, + "learning_rate": 0.0006995632035883079, + "loss": 0.8672, + "step": 7224 + }, + { + "epoch": 0.39, + "learning_rate": 0.0006994833946843219, + "loss": 0.9102, + "step": 7225 + }, + { + "epoch": 0.39, + "learning_rate": 0.0006994035797353337, + "loss": 0.8984, + "step": 7226 + }, + { + "epoch": 0.39, + "learning_rate": 0.0006993237587437619, + "loss": 0.8867, + "step": 7227 + }, + { + "epoch": 0.39, + "learning_rate": 0.0006992439317120257, + "loss": 0.8438, + "step": 7228 + }, + { + "epoch": 0.39, + "learning_rate": 0.0006991640986425436, + "loss": 0.9297, + "step": 7229 + }, + { + "epoch": 0.39, + "learning_rate": 0.0006990842595377354, + "loss": 0.9531, + "step": 7230 + }, + { + "epoch": 0.39, + "learning_rate": 0.00069900441440002, + "loss": 0.9219, + "step": 7231 + }, + { + "epoch": 0.39, + "learning_rate": 0.0006989245632318172, + "loss": 0.8594, + "step": 7232 + }, + { + "epoch": 0.39, + "learning_rate": 0.0006988447060355468, + "loss": 0.8164, + "step": 7233 + }, + { + "epoch": 0.39, + "learning_rate": 0.0006987648428136284, + "loss": 0.8984, + "step": 7234 + }, + { + "epoch": 0.39, + "learning_rate": 0.0006986849735684826, + "loss": 0.957, + "step": 7235 + }, + { + "epoch": 0.39, + "learning_rate": 0.0006986050983025292, + "loss": 0.9883, + "step": 7236 + }, + { + "epoch": 0.39, + "learning_rate": 0.0006985252170181891, + "loss": 0.8906, + "step": 7237 + }, + { + "epoch": 0.39, + "learning_rate": 0.000698445329717883, + "loss": 0.8672, + "step": 7238 + }, + { + "epoch": 0.39, + "learning_rate": 0.0006983654364040311, + "loss": 0.8555, + "step": 7239 + }, + { + "epoch": 0.39, + "learning_rate": 0.0006982855370790553, + "loss": 0.8594, + "step": 7240 + }, + { + "epoch": 0.39, + "learning_rate": 0.0006982056317453762, + "loss": 1.0156, + "step": 7241 + }, + { + "epoch": 0.39, + "learning_rate": 0.0006981257204054154, + "loss": 0.8984, + "step": 7242 + }, + { + "epoch": 0.39, + "learning_rate": 0.0006980458030615945, + "loss": 0.8906, + "step": 7243 + }, + { + "epoch": 0.39, + "learning_rate": 0.0006979658797163353, + "loss": 0.8203, + "step": 7244 + }, + { + "epoch": 0.39, + "learning_rate": 0.0006978859503720595, + "loss": 0.8398, + "step": 7245 + }, + { + "epoch": 0.39, + "learning_rate": 0.0006978060150311895, + "loss": 0.9336, + "step": 7246 + }, + { + "epoch": 0.39, + "learning_rate": 0.0006977260736961474, + "loss": 0.8906, + "step": 7247 + }, + { + "epoch": 0.39, + "learning_rate": 0.000697646126369356, + "loss": 0.8164, + "step": 7248 + }, + { + "epoch": 0.39, + "learning_rate": 0.0006975661730532375, + "loss": 0.8789, + "step": 7249 + }, + { + "epoch": 0.39, + "learning_rate": 0.000697486213750215, + "loss": 0.9414, + "step": 7250 + }, + { + "epoch": 0.39, + "learning_rate": 0.0006974062484627116, + "loss": 0.8047, + "step": 7251 + }, + { + "epoch": 0.39, + "learning_rate": 0.0006973262771931504, + "loss": 0.8359, + "step": 7252 + }, + { + "epoch": 0.39, + "learning_rate": 0.0006972462999439549, + "loss": 0.9062, + "step": 7253 + }, + { + "epoch": 0.39, + "learning_rate": 0.0006971663167175485, + "loss": 0.7773, + "step": 7254 + }, + { + "epoch": 0.39, + "learning_rate": 0.000697086327516355, + "loss": 1.0078, + "step": 7255 + }, + { + "epoch": 0.39, + "learning_rate": 0.0006970063323427987, + "loss": 0.918, + "step": 7256 + }, + { + "epoch": 0.39, + "learning_rate": 0.000696926331199303, + "loss": 0.9102, + "step": 7257 + }, + { + "epoch": 0.39, + "learning_rate": 0.0006968463240882928, + "loss": 0.9102, + "step": 7258 + }, + { + "epoch": 0.39, + "learning_rate": 0.0006967663110121923, + "loss": 0.9336, + "step": 7259 + }, + { + "epoch": 0.39, + "learning_rate": 0.0006966862919734262, + "loss": 0.9258, + "step": 7260 + }, + { + "epoch": 0.39, + "learning_rate": 0.0006966062669744196, + "loss": 0.8398, + "step": 7261 + }, + { + "epoch": 0.39, + "learning_rate": 0.000696526236017597, + "loss": 0.8594, + "step": 7262 + }, + { + "epoch": 0.39, + "learning_rate": 0.0006964461991053841, + "loss": 0.8438, + "step": 7263 + }, + { + "epoch": 0.39, + "learning_rate": 0.0006963661562402059, + "loss": 0.9102, + "step": 7264 + }, + { + "epoch": 0.39, + "learning_rate": 0.0006962861074244884, + "loss": 0.9141, + "step": 7265 + }, + { + "epoch": 0.39, + "learning_rate": 0.000696206052660657, + "loss": 0.8242, + "step": 7266 + }, + { + "epoch": 0.39, + "learning_rate": 0.0006961259919511376, + "loss": 0.875, + "step": 7267 + }, + { + "epoch": 0.39, + "learning_rate": 0.0006960459252983564, + "loss": 0.832, + "step": 7268 + }, + { + "epoch": 0.39, + "learning_rate": 0.0006959658527047399, + "loss": 0.8203, + "step": 7269 + }, + { + "epoch": 0.39, + "learning_rate": 0.000695885774172714, + "loss": 0.8789, + "step": 7270 + }, + { + "epoch": 0.39, + "learning_rate": 0.0006958056897047061, + "loss": 0.875, + "step": 7271 + }, + { + "epoch": 0.39, + "learning_rate": 0.0006957255993031423, + "loss": 0.8477, + "step": 7272 + }, + { + "epoch": 0.39, + "learning_rate": 0.0006956455029704501, + "loss": 0.8867, + "step": 7273 + }, + { + "epoch": 0.39, + "learning_rate": 0.0006955654007090564, + "loss": 0.7734, + "step": 7274 + }, + { + "epoch": 0.39, + "learning_rate": 0.0006954852925213886, + "loss": 0.9141, + "step": 7275 + }, + { + "epoch": 0.39, + "learning_rate": 0.0006954051784098745, + "loss": 0.7344, + "step": 7276 + }, + { + "epoch": 0.39, + "learning_rate": 0.0006953250583769416, + "loss": 0.9023, + "step": 7277 + }, + { + "epoch": 0.39, + "learning_rate": 0.0006952449324250178, + "loss": 0.8438, + "step": 7278 + }, + { + "epoch": 0.39, + "learning_rate": 0.0006951648005565311, + "loss": 0.9805, + "step": 7279 + }, + { + "epoch": 0.39, + "learning_rate": 0.0006950846627739098, + "loss": 0.8125, + "step": 7280 + }, + { + "epoch": 0.39, + "learning_rate": 0.0006950045190795827, + "loss": 0.793, + "step": 7281 + }, + { + "epoch": 0.39, + "learning_rate": 0.0006949243694759778, + "loss": 0.8867, + "step": 7282 + }, + { + "epoch": 0.39, + "learning_rate": 0.0006948442139655245, + "loss": 0.875, + "step": 7283 + }, + { + "epoch": 0.39, + "learning_rate": 0.0006947640525506514, + "loss": 0.8438, + "step": 7284 + }, + { + "epoch": 0.39, + "learning_rate": 0.0006946838852337875, + "loss": 0.8047, + "step": 7285 + }, + { + "epoch": 0.39, + "learning_rate": 0.0006946037120173627, + "loss": 0.7695, + "step": 7286 + }, + { + "epoch": 0.39, + "learning_rate": 0.0006945235329038061, + "loss": 0.8555, + "step": 7287 + }, + { + "epoch": 0.39, + "learning_rate": 0.0006944433478955473, + "loss": 0.8203, + "step": 7288 + }, + { + "epoch": 0.39, + "learning_rate": 0.0006943631569950166, + "loss": 0.9492, + "step": 7289 + }, + { + "epoch": 0.39, + "learning_rate": 0.0006942829602046436, + "loss": 0.9688, + "step": 7290 + }, + { + "epoch": 0.39, + "learning_rate": 0.0006942027575268588, + "loss": 0.9141, + "step": 7291 + }, + { + "epoch": 0.39, + "learning_rate": 0.0006941225489640926, + "loss": 0.8633, + "step": 7292 + }, + { + "epoch": 0.39, + "learning_rate": 0.0006940423345187755, + "loss": 0.75, + "step": 7293 + }, + { + "epoch": 0.39, + "learning_rate": 0.0006939621141933382, + "loss": 0.9141, + "step": 7294 + }, + { + "epoch": 0.39, + "learning_rate": 0.0006938818879902117, + "loss": 0.8633, + "step": 7295 + }, + { + "epoch": 0.39, + "learning_rate": 0.0006938016559118272, + "loss": 0.8398, + "step": 7296 + }, + { + "epoch": 0.39, + "learning_rate": 0.0006937214179606158, + "loss": 0.8711, + "step": 7297 + }, + { + "epoch": 0.39, + "learning_rate": 0.0006936411741390093, + "loss": 0.8984, + "step": 7298 + }, + { + "epoch": 0.39, + "learning_rate": 0.000693560924449439, + "loss": 0.8633, + "step": 7299 + }, + { + "epoch": 0.39, + "learning_rate": 0.000693480668894337, + "loss": 0.793, + "step": 7300 + }, + { + "epoch": 0.39, + "learning_rate": 0.0006934004074761351, + "loss": 0.9023, + "step": 7301 + }, + { + "epoch": 0.39, + "learning_rate": 0.0006933201401972656, + "loss": 0.9102, + "step": 7302 + }, + { + "epoch": 0.39, + "learning_rate": 0.0006932398670601607, + "loss": 0.8125, + "step": 7303 + }, + { + "epoch": 0.39, + "learning_rate": 0.0006931595880672533, + "loss": 0.8516, + "step": 7304 + }, + { + "epoch": 0.39, + "learning_rate": 0.0006930793032209757, + "loss": 0.8438, + "step": 7305 + }, + { + "epoch": 0.39, + "learning_rate": 0.0006929990125237609, + "loss": 0.7773, + "step": 7306 + }, + { + "epoch": 0.39, + "learning_rate": 0.0006929187159780423, + "loss": 0.8242, + "step": 7307 + }, + { + "epoch": 0.39, + "learning_rate": 0.0006928384135862528, + "loss": 0.875, + "step": 7308 + }, + { + "epoch": 0.39, + "learning_rate": 0.000692758105350826, + "loss": 0.9297, + "step": 7309 + }, + { + "epoch": 0.39, + "learning_rate": 0.0006926777912741953, + "loss": 0.9297, + "step": 7310 + }, + { + "epoch": 0.39, + "learning_rate": 0.0006925974713587949, + "loss": 0.7852, + "step": 7311 + }, + { + "epoch": 0.39, + "learning_rate": 0.0006925171456070581, + "loss": 0.8242, + "step": 7312 + }, + { + "epoch": 0.39, + "learning_rate": 0.0006924368140214194, + "loss": 0.9023, + "step": 7313 + }, + { + "epoch": 0.39, + "learning_rate": 0.0006923564766043135, + "loss": 0.8594, + "step": 7314 + }, + { + "epoch": 0.39, + "learning_rate": 0.0006922761333581741, + "loss": 0.8242, + "step": 7315 + }, + { + "epoch": 0.39, + "learning_rate": 0.0006921957842854364, + "loss": 0.9492, + "step": 7316 + }, + { + "epoch": 0.39, + "learning_rate": 0.0006921154293885353, + "loss": 0.8477, + "step": 7317 + }, + { + "epoch": 0.39, + "learning_rate": 0.0006920350686699055, + "loss": 0.8672, + "step": 7318 + }, + { + "epoch": 0.39, + "learning_rate": 0.0006919547021319823, + "loss": 0.8906, + "step": 7319 + }, + { + "epoch": 0.39, + "learning_rate": 0.0006918743297772012, + "loss": 0.8633, + "step": 7320 + }, + { + "epoch": 0.39, + "learning_rate": 0.0006917939516079974, + "loss": 0.8828, + "step": 7321 + }, + { + "epoch": 0.39, + "learning_rate": 0.0006917135676268072, + "loss": 0.9453, + "step": 7322 + }, + { + "epoch": 0.39, + "learning_rate": 0.0006916331778360658, + "loss": 0.8906, + "step": 7323 + }, + { + "epoch": 0.39, + "learning_rate": 0.00069155278223821, + "loss": 0.8984, + "step": 7324 + }, + { + "epoch": 0.39, + "learning_rate": 0.0006914723808356753, + "loss": 0.918, + "step": 7325 + }, + { + "epoch": 0.39, + "learning_rate": 0.0006913919736308986, + "loss": 0.8945, + "step": 7326 + }, + { + "epoch": 0.39, + "learning_rate": 0.0006913115606263166, + "loss": 0.9375, + "step": 7327 + }, + { + "epoch": 0.39, + "learning_rate": 0.0006912311418243659, + "loss": 0.7969, + "step": 7328 + }, + { + "epoch": 0.39, + "learning_rate": 0.0006911507172274832, + "loss": 0.875, + "step": 7329 + }, + { + "epoch": 0.39, + "learning_rate": 0.000691070286838106, + "loss": 0.9297, + "step": 7330 + }, + { + "epoch": 0.39, + "learning_rate": 0.0006909898506586715, + "loss": 0.8242, + "step": 7331 + }, + { + "epoch": 0.39, + "learning_rate": 0.0006909094086916171, + "loss": 0.9531, + "step": 7332 + }, + { + "epoch": 0.39, + "learning_rate": 0.0006908289609393807, + "loss": 0.8867, + "step": 7333 + }, + { + "epoch": 0.39, + "learning_rate": 0.0006907485074043998, + "loss": 0.8828, + "step": 7334 + }, + { + "epoch": 0.39, + "learning_rate": 0.0006906680480891125, + "loss": 0.9414, + "step": 7335 + }, + { + "epoch": 0.39, + "learning_rate": 0.0006905875829959571, + "loss": 0.8398, + "step": 7336 + }, + { + "epoch": 0.39, + "learning_rate": 0.000690507112127372, + "loss": 0.8555, + "step": 7337 + }, + { + "epoch": 0.39, + "learning_rate": 0.0006904266354857953, + "loss": 0.8281, + "step": 7338 + }, + { + "epoch": 0.39, + "learning_rate": 0.0006903461530736664, + "loss": 0.8945, + "step": 7339 + }, + { + "epoch": 0.39, + "learning_rate": 0.0006902656648934235, + "loss": 0.8945, + "step": 7340 + }, + { + "epoch": 0.39, + "learning_rate": 0.0006901851709475062, + "loss": 0.8711, + "step": 7341 + }, + { + "epoch": 0.39, + "learning_rate": 0.0006901046712383534, + "loss": 0.8789, + "step": 7342 + }, + { + "epoch": 0.39, + "learning_rate": 0.0006900241657684047, + "loss": 0.8008, + "step": 7343 + }, + { + "epoch": 0.39, + "learning_rate": 0.0006899436545400995, + "loss": 0.8984, + "step": 7344 + }, + { + "epoch": 0.39, + "learning_rate": 0.0006898631375558777, + "loss": 0.7891, + "step": 7345 + }, + { + "epoch": 0.39, + "learning_rate": 0.0006897826148181789, + "loss": 0.8398, + "step": 7346 + }, + { + "epoch": 0.39, + "learning_rate": 0.0006897020863294439, + "loss": 0.8516, + "step": 7347 + }, + { + "epoch": 0.39, + "learning_rate": 0.0006896215520921122, + "loss": 0.8242, + "step": 7348 + }, + { + "epoch": 0.39, + "learning_rate": 0.0006895410121086248, + "loss": 0.8711, + "step": 7349 + }, + { + "epoch": 0.4, + "learning_rate": 0.000689460466381422, + "loss": 0.8984, + "step": 7350 + }, + { + "epoch": 0.4, + "learning_rate": 0.0006893799149129448, + "loss": 0.9297, + "step": 7351 + }, + { + "epoch": 0.4, + "learning_rate": 0.0006892993577056341, + "loss": 0.8633, + "step": 7352 + }, + { + "epoch": 0.4, + "learning_rate": 0.0006892187947619311, + "loss": 0.8359, + "step": 7353 + }, + { + "epoch": 0.4, + "learning_rate": 0.0006891382260842769, + "loss": 0.8398, + "step": 7354 + }, + { + "epoch": 0.4, + "learning_rate": 0.0006890576516751134, + "loss": 0.9219, + "step": 7355 + }, + { + "epoch": 0.4, + "learning_rate": 0.0006889770715368818, + "loss": 0.7422, + "step": 7356 + }, + { + "epoch": 0.4, + "learning_rate": 0.0006888964856720244, + "loss": 0.8633, + "step": 7357 + }, + { + "epoch": 0.4, + "learning_rate": 0.0006888158940829828, + "loss": 0.918, + "step": 7358 + }, + { + "epoch": 0.4, + "learning_rate": 0.0006887352967721995, + "loss": 0.8438, + "step": 7359 + }, + { + "epoch": 0.4, + "learning_rate": 0.0006886546937421167, + "loss": 0.8516, + "step": 7360 + }, + { + "epoch": 0.4, + "learning_rate": 0.0006885740849951771, + "loss": 0.8867, + "step": 7361 + }, + { + "epoch": 0.4, + "learning_rate": 0.0006884934705338232, + "loss": 0.8633, + "step": 7362 + }, + { + "epoch": 0.4, + "learning_rate": 0.000688412850360498, + "loss": 0.8906, + "step": 7363 + }, + { + "epoch": 0.4, + "learning_rate": 0.0006883322244776444, + "loss": 0.9219, + "step": 7364 + }, + { + "epoch": 0.4, + "learning_rate": 0.000688251592887706, + "loss": 0.8359, + "step": 7365 + }, + { + "epoch": 0.4, + "learning_rate": 0.0006881709555931257, + "loss": 0.8398, + "step": 7366 + }, + { + "epoch": 0.4, + "learning_rate": 0.0006880903125963475, + "loss": 0.9766, + "step": 7367 + }, + { + "epoch": 0.4, + "learning_rate": 0.000688009663899815, + "loss": 0.8789, + "step": 7368 + }, + { + "epoch": 0.4, + "learning_rate": 0.000687929009505972, + "loss": 0.9492, + "step": 7369 + }, + { + "epoch": 0.4, + "learning_rate": 0.0006878483494172628, + "loss": 0.7734, + "step": 7370 + }, + { + "epoch": 0.4, + "learning_rate": 0.0006877676836361315, + "loss": 0.9688, + "step": 7371 + }, + { + "epoch": 0.4, + "learning_rate": 0.0006876870121650228, + "loss": 0.9414, + "step": 7372 + }, + { + "epoch": 0.4, + "learning_rate": 0.0006876063350063808, + "loss": 0.832, + "step": 7373 + }, + { + "epoch": 0.4, + "learning_rate": 0.0006875256521626507, + "loss": 0.9492, + "step": 7374 + }, + { + "epoch": 0.4, + "learning_rate": 0.0006874449636362775, + "loss": 0.8516, + "step": 7375 + }, + { + "epoch": 0.4, + "learning_rate": 0.0006873642694297061, + "loss": 0.9648, + "step": 7376 + }, + { + "epoch": 0.4, + "learning_rate": 0.0006872835695453819, + "loss": 0.957, + "step": 7377 + }, + { + "epoch": 0.4, + "learning_rate": 0.0006872028639857504, + "loss": 0.8047, + "step": 7378 + }, + { + "epoch": 0.4, + "learning_rate": 0.000687122152753257, + "loss": 0.9609, + "step": 7379 + }, + { + "epoch": 0.4, + "learning_rate": 0.000687041435850348, + "loss": 0.8945, + "step": 7380 + }, + { + "epoch": 0.4, + "learning_rate": 0.0006869607132794687, + "loss": 0.8711, + "step": 7381 + }, + { + "epoch": 0.4, + "learning_rate": 0.0006868799850430659, + "loss": 0.8711, + "step": 7382 + }, + { + "epoch": 0.4, + "learning_rate": 0.0006867992511435857, + "loss": 0.8867, + "step": 7383 + }, + { + "epoch": 0.4, + "learning_rate": 0.0006867185115834747, + "loss": 0.8203, + "step": 7384 + }, + { + "epoch": 0.4, + "learning_rate": 0.0006866377663651792, + "loss": 0.875, + "step": 7385 + }, + { + "epoch": 0.4, + "learning_rate": 0.0006865570154911464, + "loss": 0.7969, + "step": 7386 + }, + { + "epoch": 0.4, + "learning_rate": 0.0006864762589638233, + "loss": 0.832, + "step": 7387 + }, + { + "epoch": 0.4, + "learning_rate": 0.0006863954967856571, + "loss": 0.8984, + "step": 7388 + }, + { + "epoch": 0.4, + "learning_rate": 0.0006863147289590948, + "loss": 0.8438, + "step": 7389 + }, + { + "epoch": 0.4, + "learning_rate": 0.0006862339554865846, + "loss": 0.8945, + "step": 7390 + }, + { + "epoch": 0.4, + "learning_rate": 0.0006861531763705734, + "loss": 0.8789, + "step": 7391 + }, + { + "epoch": 0.4, + "learning_rate": 0.0006860723916135097, + "loss": 0.832, + "step": 7392 + }, + { + "epoch": 0.4, + "learning_rate": 0.0006859916012178412, + "loss": 0.8555, + "step": 7393 + }, + { + "epoch": 0.4, + "learning_rate": 0.0006859108051860164, + "loss": 0.9609, + "step": 7394 + }, + { + "epoch": 0.4, + "learning_rate": 0.0006858300035204834, + "loss": 0.9141, + "step": 7395 + }, + { + "epoch": 0.4, + "learning_rate": 0.0006857491962236909, + "loss": 0.9023, + "step": 7396 + }, + { + "epoch": 0.4, + "learning_rate": 0.0006856683832980875, + "loss": 0.8398, + "step": 7397 + }, + { + "epoch": 0.4, + "learning_rate": 0.0006855875647461224, + "loss": 0.8359, + "step": 7398 + }, + { + "epoch": 0.4, + "learning_rate": 0.0006855067405702443, + "loss": 0.9375, + "step": 7399 + }, + { + "epoch": 0.4, + "learning_rate": 0.0006854259107729027, + "loss": 0.8086, + "step": 7400 + }, + { + "epoch": 0.4, + "learning_rate": 0.0006853450753565469, + "loss": 0.832, + "step": 7401 + }, + { + "epoch": 0.4, + "learning_rate": 0.0006852642343236265, + "loss": 0.8203, + "step": 7402 + }, + { + "epoch": 0.4, + "learning_rate": 0.0006851833876765911, + "loss": 0.9023, + "step": 7403 + }, + { + "epoch": 0.4, + "learning_rate": 0.0006851025354178911, + "loss": 0.8398, + "step": 7404 + }, + { + "epoch": 0.4, + "learning_rate": 0.000685021677549976, + "loss": 0.8945, + "step": 7405 + }, + { + "epoch": 0.4, + "learning_rate": 0.0006849408140752962, + "loss": 0.8906, + "step": 7406 + }, + { + "epoch": 0.4, + "learning_rate": 0.0006848599449963025, + "loss": 0.9062, + "step": 7407 + }, + { + "epoch": 0.4, + "learning_rate": 0.000684779070315445, + "loss": 0.8477, + "step": 7408 + }, + { + "epoch": 0.4, + "learning_rate": 0.000684698190035175, + "loss": 0.8086, + "step": 7409 + }, + { + "epoch": 0.4, + "learning_rate": 0.0006846173041579429, + "loss": 0.9297, + "step": 7410 + }, + { + "epoch": 0.4, + "learning_rate": 0.0006845364126862001, + "loss": 0.8164, + "step": 7411 + }, + { + "epoch": 0.4, + "learning_rate": 0.0006844555156223978, + "loss": 0.7969, + "step": 7412 + }, + { + "epoch": 0.4, + "learning_rate": 0.0006843746129689876, + "loss": 0.9336, + "step": 7413 + }, + { + "epoch": 0.4, + "learning_rate": 0.0006842937047284209, + "loss": 0.8711, + "step": 7414 + }, + { + "epoch": 0.4, + "learning_rate": 0.0006842127909031496, + "loss": 0.832, + "step": 7415 + }, + { + "epoch": 0.4, + "learning_rate": 0.0006841318714956256, + "loss": 0.875, + "step": 7416 + }, + { + "epoch": 0.4, + "learning_rate": 0.0006840509465083012, + "loss": 0.8594, + "step": 7417 + }, + { + "epoch": 0.4, + "learning_rate": 0.0006839700159436284, + "loss": 0.8516, + "step": 7418 + }, + { + "epoch": 0.4, + "learning_rate": 0.0006838890798040598, + "loss": 0.9375, + "step": 7419 + }, + { + "epoch": 0.4, + "learning_rate": 0.000683808138092048, + "loss": 0.9336, + "step": 7420 + }, + { + "epoch": 0.4, + "learning_rate": 0.000683727190810046, + "loss": 0.9297, + "step": 7421 + }, + { + "epoch": 0.4, + "learning_rate": 0.0006836462379605065, + "loss": 0.8672, + "step": 7422 + }, + { + "epoch": 0.4, + "learning_rate": 0.0006835652795458829, + "loss": 0.8555, + "step": 7423 + }, + { + "epoch": 0.4, + "learning_rate": 0.0006834843155686281, + "loss": 0.8633, + "step": 7424 + }, + { + "epoch": 0.4, + "learning_rate": 0.0006834033460311961, + "loss": 0.8867, + "step": 7425 + }, + { + "epoch": 0.4, + "learning_rate": 0.0006833223709360401, + "loss": 0.8945, + "step": 7426 + }, + { + "epoch": 0.4, + "learning_rate": 0.0006832413902856141, + "loss": 0.8867, + "step": 7427 + }, + { + "epoch": 0.4, + "learning_rate": 0.000683160404082372, + "loss": 0.8516, + "step": 7428 + }, + { + "epoch": 0.4, + "learning_rate": 0.0006830794123287681, + "loss": 0.7969, + "step": 7429 + }, + { + "epoch": 0.4, + "learning_rate": 0.0006829984150272566, + "loss": 0.8516, + "step": 7430 + }, + { + "epoch": 0.4, + "learning_rate": 0.0006829174121802922, + "loss": 0.9609, + "step": 7431 + }, + { + "epoch": 0.4, + "learning_rate": 0.000682836403790329, + "loss": 0.8438, + "step": 7432 + }, + { + "epoch": 0.4, + "learning_rate": 0.0006827553898598223, + "loss": 0.8438, + "step": 7433 + }, + { + "epoch": 0.4, + "learning_rate": 0.0006826743703912271, + "loss": 0.9062, + "step": 7434 + }, + { + "epoch": 0.4, + "learning_rate": 0.0006825933453869983, + "loss": 0.8164, + "step": 7435 + }, + { + "epoch": 0.4, + "learning_rate": 0.0006825123148495915, + "loss": 0.8203, + "step": 7436 + }, + { + "epoch": 0.4, + "learning_rate": 0.0006824312787814619, + "loss": 0.9062, + "step": 7437 + }, + { + "epoch": 0.4, + "learning_rate": 0.0006823502371850654, + "loss": 0.9648, + "step": 7438 + }, + { + "epoch": 0.4, + "learning_rate": 0.0006822691900628577, + "loss": 0.8438, + "step": 7439 + }, + { + "epoch": 0.4, + "learning_rate": 0.0006821881374172947, + "loss": 0.8477, + "step": 7440 + }, + { + "epoch": 0.4, + "learning_rate": 0.0006821070792508329, + "loss": 0.9531, + "step": 7441 + }, + { + "epoch": 0.4, + "learning_rate": 0.0006820260155659283, + "loss": 0.9297, + "step": 7442 + }, + { + "epoch": 0.4, + "learning_rate": 0.0006819449463650374, + "loss": 0.9375, + "step": 7443 + }, + { + "epoch": 0.4, + "learning_rate": 0.0006818638716506171, + "loss": 0.8555, + "step": 7444 + }, + { + "epoch": 0.4, + "learning_rate": 0.000681782791425124, + "loss": 0.8359, + "step": 7445 + }, + { + "epoch": 0.4, + "learning_rate": 0.0006817017056910154, + "loss": 0.8945, + "step": 7446 + }, + { + "epoch": 0.4, + "learning_rate": 0.0006816206144507481, + "loss": 0.9102, + "step": 7447 + }, + { + "epoch": 0.4, + "learning_rate": 0.0006815395177067796, + "loss": 0.8477, + "step": 7448 + }, + { + "epoch": 0.4, + "learning_rate": 0.0006814584154615675, + "loss": 0.8828, + "step": 7449 + }, + { + "epoch": 0.4, + "learning_rate": 0.0006813773077175692, + "loss": 0.8672, + "step": 7450 + }, + { + "epoch": 0.4, + "learning_rate": 0.0006812961944772428, + "loss": 0.9258, + "step": 7451 + }, + { + "epoch": 0.4, + "learning_rate": 0.0006812150757430462, + "loss": 0.8398, + "step": 7452 + }, + { + "epoch": 0.4, + "learning_rate": 0.0006811339515174374, + "loss": 0.7578, + "step": 7453 + }, + { + "epoch": 0.4, + "learning_rate": 0.0006810528218028751, + "loss": 0.9062, + "step": 7454 + }, + { + "epoch": 0.4, + "learning_rate": 0.0006809716866018173, + "loss": 0.8398, + "step": 7455 + }, + { + "epoch": 0.4, + "learning_rate": 0.0006808905459167231, + "loss": 0.957, + "step": 7456 + }, + { + "epoch": 0.4, + "learning_rate": 0.0006808093997500512, + "loss": 0.9297, + "step": 7457 + }, + { + "epoch": 0.4, + "learning_rate": 0.0006807282481042605, + "loss": 0.8828, + "step": 7458 + }, + { + "epoch": 0.4, + "learning_rate": 0.0006806470909818102, + "loss": 0.8789, + "step": 7459 + }, + { + "epoch": 0.4, + "learning_rate": 0.0006805659283851597, + "loss": 0.75, + "step": 7460 + }, + { + "epoch": 0.4, + "learning_rate": 0.0006804847603167685, + "loss": 0.9062, + "step": 7461 + }, + { + "epoch": 0.4, + "learning_rate": 0.0006804035867790962, + "loss": 0.9023, + "step": 7462 + }, + { + "epoch": 0.4, + "learning_rate": 0.0006803224077746024, + "loss": 0.8359, + "step": 7463 + }, + { + "epoch": 0.4, + "learning_rate": 0.0006802412233057476, + "loss": 0.8555, + "step": 7464 + }, + { + "epoch": 0.4, + "learning_rate": 0.0006801600333749915, + "loss": 0.8945, + "step": 7465 + }, + { + "epoch": 0.4, + "learning_rate": 0.0006800788379847946, + "loss": 0.8828, + "step": 7466 + }, + { + "epoch": 0.4, + "learning_rate": 0.0006799976371376175, + "loss": 0.8086, + "step": 7467 + }, + { + "epoch": 0.4, + "learning_rate": 0.0006799164308359207, + "loss": 0.8711, + "step": 7468 + }, + { + "epoch": 0.4, + "learning_rate": 0.000679835219082165, + "loss": 0.8516, + "step": 7469 + }, + { + "epoch": 0.4, + "learning_rate": 0.0006797540018788115, + "loss": 0.8711, + "step": 7470 + }, + { + "epoch": 0.4, + "learning_rate": 0.0006796727792283214, + "loss": 0.793, + "step": 7471 + }, + { + "epoch": 0.4, + "learning_rate": 0.0006795915511331557, + "loss": 0.918, + "step": 7472 + }, + { + "epoch": 0.4, + "learning_rate": 0.0006795103175957761, + "loss": 0.8594, + "step": 7473 + }, + { + "epoch": 0.4, + "learning_rate": 0.0006794290786186443, + "loss": 0.8242, + "step": 7474 + }, + { + "epoch": 0.4, + "learning_rate": 0.0006793478342042219, + "loss": 0.9766, + "step": 7475 + }, + { + "epoch": 0.4, + "learning_rate": 0.0006792665843549711, + "loss": 0.9453, + "step": 7476 + }, + { + "epoch": 0.4, + "learning_rate": 0.000679185329073354, + "loss": 0.8945, + "step": 7477 + }, + { + "epoch": 0.4, + "learning_rate": 0.0006791040683618328, + "loss": 0.8242, + "step": 7478 + }, + { + "epoch": 0.4, + "learning_rate": 0.00067902280222287, + "loss": 0.8242, + "step": 7479 + }, + { + "epoch": 0.4, + "learning_rate": 0.0006789415306589281, + "loss": 0.8047, + "step": 7480 + }, + { + "epoch": 0.4, + "learning_rate": 0.0006788602536724702, + "loss": 0.918, + "step": 7481 + }, + { + "epoch": 0.4, + "learning_rate": 0.0006787789712659589, + "loss": 0.918, + "step": 7482 + }, + { + "epoch": 0.4, + "learning_rate": 0.0006786976834418577, + "loss": 0.9648, + "step": 7483 + }, + { + "epoch": 0.4, + "learning_rate": 0.0006786163902026297, + "loss": 0.8164, + "step": 7484 + }, + { + "epoch": 0.4, + "learning_rate": 0.0006785350915507382, + "loss": 0.9531, + "step": 7485 + }, + { + "epoch": 0.4, + "learning_rate": 0.0006784537874886472, + "loss": 0.9023, + "step": 7486 + }, + { + "epoch": 0.4, + "learning_rate": 0.0006783724780188201, + "loss": 0.875, + "step": 7487 + }, + { + "epoch": 0.4, + "learning_rate": 0.000678291163143721, + "loss": 0.8359, + "step": 7488 + }, + { + "epoch": 0.4, + "learning_rate": 0.0006782098428658141, + "loss": 0.9336, + "step": 7489 + }, + { + "epoch": 0.4, + "learning_rate": 0.0006781285171875635, + "loss": 0.9023, + "step": 7490 + }, + { + "epoch": 0.4, + "learning_rate": 0.0006780471861114336, + "loss": 0.8594, + "step": 7491 + }, + { + "epoch": 0.4, + "learning_rate": 0.0006779658496398893, + "loss": 0.8828, + "step": 7492 + }, + { + "epoch": 0.4, + "learning_rate": 0.000677884507775395, + "loss": 0.7656, + "step": 7493 + }, + { + "epoch": 0.4, + "learning_rate": 0.000677803160520416, + "loss": 0.9062, + "step": 7494 + }, + { + "epoch": 0.4, + "learning_rate": 0.0006777218078774171, + "loss": 0.9531, + "step": 7495 + }, + { + "epoch": 0.4, + "learning_rate": 0.0006776404498488635, + "loss": 0.8867, + "step": 7496 + }, + { + "epoch": 0.4, + "learning_rate": 0.000677559086437221, + "loss": 0.9219, + "step": 7497 + }, + { + "epoch": 0.4, + "learning_rate": 0.0006774777176449546, + "loss": 0.9062, + "step": 7498 + }, + { + "epoch": 0.4, + "learning_rate": 0.0006773963434745307, + "loss": 0.8867, + "step": 7499 + }, + { + "epoch": 0.4, + "learning_rate": 0.0006773149639284149, + "loss": 0.8359, + "step": 7500 + }, + { + "epoch": 0.4, + "learning_rate": 0.000677233579009073, + "loss": 0.957, + "step": 7501 + }, + { + "epoch": 0.4, + "learning_rate": 0.0006771521887189715, + "loss": 0.8281, + "step": 7502 + }, + { + "epoch": 0.4, + "learning_rate": 0.000677070793060577, + "loss": 0.9297, + "step": 7503 + }, + { + "epoch": 0.4, + "learning_rate": 0.0006769893920363557, + "loss": 0.8945, + "step": 7504 + }, + { + "epoch": 0.4, + "learning_rate": 0.0006769079856487744, + "loss": 0.8516, + "step": 7505 + }, + { + "epoch": 0.4, + "learning_rate": 0.0006768265739003, + "loss": 0.8516, + "step": 7506 + }, + { + "epoch": 0.4, + "learning_rate": 0.0006767451567933997, + "loss": 0.9141, + "step": 7507 + }, + { + "epoch": 0.4, + "learning_rate": 0.0006766637343305404, + "loss": 0.8984, + "step": 7508 + }, + { + "epoch": 0.4, + "learning_rate": 0.0006765823065141898, + "loss": 0.9219, + "step": 7509 + }, + { + "epoch": 0.4, + "learning_rate": 0.0006765008733468152, + "loss": 0.9453, + "step": 7510 + }, + { + "epoch": 0.4, + "learning_rate": 0.0006764194348308845, + "loss": 0.875, + "step": 7511 + }, + { + "epoch": 0.4, + "learning_rate": 0.0006763379909688652, + "loss": 0.832, + "step": 7512 + }, + { + "epoch": 0.4, + "learning_rate": 0.0006762565417632257, + "loss": 0.8281, + "step": 7513 + }, + { + "epoch": 0.4, + "learning_rate": 0.0006761750872164338, + "loss": 0.8125, + "step": 7514 + }, + { + "epoch": 0.4, + "learning_rate": 0.0006760936273309583, + "loss": 0.8281, + "step": 7515 + }, + { + "epoch": 0.4, + "learning_rate": 0.0006760121621092673, + "loss": 0.8711, + "step": 7516 + }, + { + "epoch": 0.4, + "learning_rate": 0.0006759306915538299, + "loss": 0.9023, + "step": 7517 + }, + { + "epoch": 0.4, + "learning_rate": 0.0006758492156671143, + "loss": 0.8555, + "step": 7518 + }, + { + "epoch": 0.4, + "learning_rate": 0.00067576773445159, + "loss": 0.8633, + "step": 7519 + }, + { + "epoch": 0.4, + "learning_rate": 0.0006756862479097258, + "loss": 0.8164, + "step": 7520 + }, + { + "epoch": 0.4, + "learning_rate": 0.0006756047560439914, + "loss": 0.8633, + "step": 7521 + }, + { + "epoch": 0.4, + "learning_rate": 0.000675523258856856, + "loss": 0.875, + "step": 7522 + }, + { + "epoch": 0.4, + "learning_rate": 0.0006754417563507893, + "loss": 0.8555, + "step": 7523 + }, + { + "epoch": 0.4, + "learning_rate": 0.0006753602485282612, + "loss": 0.832, + "step": 7524 + }, + { + "epoch": 0.4, + "learning_rate": 0.0006752787353917413, + "loss": 0.9805, + "step": 7525 + }, + { + "epoch": 0.4, + "learning_rate": 0.0006751972169437002, + "loss": 0.832, + "step": 7526 + }, + { + "epoch": 0.4, + "learning_rate": 0.0006751156931866078, + "loss": 0.7695, + "step": 7527 + }, + { + "epoch": 0.4, + "learning_rate": 0.0006750341641229347, + "loss": 0.8555, + "step": 7528 + }, + { + "epoch": 0.4, + "learning_rate": 0.0006749526297551515, + "loss": 0.832, + "step": 7529 + }, + { + "epoch": 0.4, + "learning_rate": 0.000674871090085729, + "loss": 0.8359, + "step": 7530 + }, + { + "epoch": 0.4, + "learning_rate": 0.0006747895451171378, + "loss": 0.8164, + "step": 7531 + }, + { + "epoch": 0.4, + "learning_rate": 0.0006747079948518496, + "loss": 0.9609, + "step": 7532 + }, + { + "epoch": 0.4, + "learning_rate": 0.000674626439292335, + "loss": 0.8047, + "step": 7533 + }, + { + "epoch": 0.4, + "learning_rate": 0.0006745448784410657, + "loss": 0.8438, + "step": 7534 + }, + { + "epoch": 0.4, + "learning_rate": 0.0006744633123005134, + "loss": 0.8164, + "step": 7535 + }, + { + "epoch": 0.41, + "learning_rate": 0.0006743817408731496, + "loss": 0.8398, + "step": 7536 + }, + { + "epoch": 0.41, + "learning_rate": 0.0006743001641614461, + "loss": 0.8203, + "step": 7537 + }, + { + "epoch": 0.41, + "learning_rate": 0.0006742185821678752, + "loss": 0.8789, + "step": 7538 + }, + { + "epoch": 0.41, + "learning_rate": 0.0006741369948949088, + "loss": 0.8555, + "step": 7539 + }, + { + "epoch": 0.41, + "learning_rate": 0.0006740554023450198, + "loss": 0.7773, + "step": 7540 + }, + { + "epoch": 0.41, + "learning_rate": 0.00067397380452068, + "loss": 0.8984, + "step": 7541 + }, + { + "epoch": 0.41, + "learning_rate": 0.0006738922014243627, + "loss": 0.8945, + "step": 7542 + }, + { + "epoch": 0.41, + "learning_rate": 0.0006738105930585404, + "loss": 0.8594, + "step": 7543 + }, + { + "epoch": 0.41, + "learning_rate": 0.0006737289794256862, + "loss": 0.957, + "step": 7544 + }, + { + "epoch": 0.41, + "learning_rate": 0.0006736473605282733, + "loss": 0.8945, + "step": 7545 + }, + { + "epoch": 0.41, + "learning_rate": 0.0006735657363687751, + "loss": 0.8594, + "step": 7546 + }, + { + "epoch": 0.41, + "learning_rate": 0.0006734841069496646, + "loss": 0.875, + "step": 7547 + }, + { + "epoch": 0.41, + "learning_rate": 0.0006734024722734161, + "loss": 0.8477, + "step": 7548 + }, + { + "epoch": 0.41, + "learning_rate": 0.000673320832342503, + "loss": 0.8203, + "step": 7549 + }, + { + "epoch": 0.41, + "learning_rate": 0.0006732391871593995, + "loss": 0.8203, + "step": 7550 + }, + { + "epoch": 0.41, + "learning_rate": 0.0006731575367265792, + "loss": 0.8281, + "step": 7551 + }, + { + "epoch": 0.41, + "learning_rate": 0.000673075881046517, + "loss": 0.9062, + "step": 7552 + }, + { + "epoch": 0.41, + "learning_rate": 0.000672994220121687, + "loss": 0.8516, + "step": 7553 + }, + { + "epoch": 0.41, + "learning_rate": 0.0006729125539545641, + "loss": 0.8906, + "step": 7554 + }, + { + "epoch": 0.41, + "learning_rate": 0.0006728308825476226, + "loss": 0.8828, + "step": 7555 + }, + { + "epoch": 0.41, + "learning_rate": 0.0006727492059033376, + "loss": 0.8711, + "step": 7556 + }, + { + "epoch": 0.41, + "learning_rate": 0.0006726675240241845, + "loss": 0.8906, + "step": 7557 + }, + { + "epoch": 0.41, + "learning_rate": 0.000672585836912638, + "loss": 0.8438, + "step": 7558 + }, + { + "epoch": 0.41, + "learning_rate": 0.0006725041445711737, + "loss": 0.8555, + "step": 7559 + }, + { + "epoch": 0.41, + "learning_rate": 0.0006724224470022673, + "loss": 0.8945, + "step": 7560 + }, + { + "epoch": 0.41, + "learning_rate": 0.0006723407442083945, + "loss": 0.8633, + "step": 7561 + }, + { + "epoch": 0.41, + "learning_rate": 0.0006722590361920306, + "loss": 0.8945, + "step": 7562 + }, + { + "epoch": 0.41, + "learning_rate": 0.0006721773229556525, + "loss": 0.8555, + "step": 7563 + }, + { + "epoch": 0.41, + "learning_rate": 0.0006720956045017358, + "loss": 0.9492, + "step": 7564 + }, + { + "epoch": 0.41, + "learning_rate": 0.000672013880832757, + "loss": 0.9805, + "step": 7565 + }, + { + "epoch": 0.41, + "learning_rate": 0.0006719321519511925, + "loss": 0.8164, + "step": 7566 + }, + { + "epoch": 0.41, + "learning_rate": 0.0006718504178595192, + "loss": 0.8867, + "step": 7567 + }, + { + "epoch": 0.41, + "learning_rate": 0.0006717686785602137, + "loss": 0.9375, + "step": 7568 + }, + { + "epoch": 0.41, + "learning_rate": 0.0006716869340557529, + "loss": 0.9531, + "step": 7569 + }, + { + "epoch": 0.41, + "learning_rate": 0.0006716051843486141, + "loss": 0.9453, + "step": 7570 + }, + { + "epoch": 0.41, + "learning_rate": 0.0006715234294412745, + "loss": 0.8984, + "step": 7571 + }, + { + "epoch": 0.41, + "learning_rate": 0.0006714416693362114, + "loss": 0.8828, + "step": 7572 + }, + { + "epoch": 0.41, + "learning_rate": 0.0006713599040359028, + "loss": 0.9375, + "step": 7573 + }, + { + "epoch": 0.41, + "learning_rate": 0.0006712781335428262, + "loss": 0.8828, + "step": 7574 + }, + { + "epoch": 0.41, + "learning_rate": 0.0006711963578594595, + "loss": 0.875, + "step": 7575 + }, + { + "epoch": 0.41, + "learning_rate": 0.0006711145769882808, + "loss": 0.8945, + "step": 7576 + }, + { + "epoch": 0.41, + "learning_rate": 0.0006710327909317684, + "loss": 0.9141, + "step": 7577 + }, + { + "epoch": 0.41, + "learning_rate": 0.0006709509996924006, + "loss": 0.8398, + "step": 7578 + }, + { + "epoch": 0.41, + "learning_rate": 0.000670869203272656, + "loss": 0.8125, + "step": 7579 + }, + { + "epoch": 0.41, + "learning_rate": 0.0006707874016750131, + "loss": 0.8359, + "step": 7580 + }, + { + "epoch": 0.41, + "learning_rate": 0.0006707055949019511, + "loss": 0.918, + "step": 7581 + }, + { + "epoch": 0.41, + "learning_rate": 0.0006706237829559487, + "loss": 0.832, + "step": 7582 + }, + { + "epoch": 0.41, + "learning_rate": 0.0006705419658394853, + "loss": 0.9375, + "step": 7583 + }, + { + "epoch": 0.41, + "learning_rate": 0.0006704601435550401, + "loss": 0.8984, + "step": 7584 + }, + { + "epoch": 0.41, + "learning_rate": 0.0006703783161050927, + "loss": 0.8594, + "step": 7585 + }, + { + "epoch": 0.41, + "learning_rate": 0.0006702964834921226, + "loss": 0.8516, + "step": 7586 + }, + { + "epoch": 0.41, + "learning_rate": 0.0006702146457186097, + "loss": 0.8125, + "step": 7587 + }, + { + "epoch": 0.41, + "learning_rate": 0.0006701328027870339, + "loss": 0.957, + "step": 7588 + }, + { + "epoch": 0.41, + "learning_rate": 0.0006700509546998753, + "loss": 0.8594, + "step": 7589 + }, + { + "epoch": 0.41, + "learning_rate": 0.0006699691014596141, + "loss": 0.832, + "step": 7590 + }, + { + "epoch": 0.41, + "learning_rate": 0.000669887243068731, + "loss": 0.875, + "step": 7591 + }, + { + "epoch": 0.41, + "learning_rate": 0.0006698053795297063, + "loss": 0.9727, + "step": 7592 + }, + { + "epoch": 0.41, + "learning_rate": 0.0006697235108450207, + "loss": 0.8789, + "step": 7593 + }, + { + "epoch": 0.41, + "learning_rate": 0.0006696416370171553, + "loss": 0.8711, + "step": 7594 + }, + { + "epoch": 0.41, + "learning_rate": 0.000669559758048591, + "loss": 0.8164, + "step": 7595 + }, + { + "epoch": 0.41, + "learning_rate": 0.0006694778739418092, + "loss": 0.832, + "step": 7596 + }, + { + "epoch": 0.41, + "learning_rate": 0.000669395984699291, + "loss": 0.8789, + "step": 7597 + }, + { + "epoch": 0.41, + "learning_rate": 0.0006693140903235182, + "loss": 0.8672, + "step": 7598 + }, + { + "epoch": 0.41, + "learning_rate": 0.0006692321908169721, + "loss": 0.8906, + "step": 7599 + }, + { + "epoch": 0.41, + "learning_rate": 0.0006691502861821348, + "loss": 0.9453, + "step": 7600 + }, + { + "epoch": 0.41, + "learning_rate": 0.0006690683764214883, + "loss": 0.918, + "step": 7601 + }, + { + "epoch": 0.41, + "learning_rate": 0.0006689864615375145, + "loss": 0.8281, + "step": 7602 + }, + { + "epoch": 0.41, + "learning_rate": 0.0006689045415326959, + "loss": 0.8945, + "step": 7603 + }, + { + "epoch": 0.41, + "learning_rate": 0.0006688226164095148, + "loss": 0.8594, + "step": 7604 + }, + { + "epoch": 0.41, + "learning_rate": 0.0006687406861704539, + "loss": 0.8242, + "step": 7605 + }, + { + "epoch": 0.41, + "learning_rate": 0.000668658750817996, + "loss": 0.875, + "step": 7606 + }, + { + "epoch": 0.41, + "learning_rate": 0.0006685768103546238, + "loss": 0.8359, + "step": 7607 + }, + { + "epoch": 0.41, + "learning_rate": 0.0006684948647828206, + "loss": 0.8867, + "step": 7608 + }, + { + "epoch": 0.41, + "learning_rate": 0.0006684129141050696, + "loss": 0.8594, + "step": 7609 + }, + { + "epoch": 0.41, + "learning_rate": 0.0006683309583238539, + "loss": 0.8906, + "step": 7610 + }, + { + "epoch": 0.41, + "learning_rate": 0.0006682489974416575, + "loss": 0.8398, + "step": 7611 + }, + { + "epoch": 0.41, + "learning_rate": 0.0006681670314609636, + "loss": 0.8789, + "step": 7612 + }, + { + "epoch": 0.41, + "learning_rate": 0.0006680850603842563, + "loss": 0.9141, + "step": 7613 + }, + { + "epoch": 0.41, + "learning_rate": 0.0006680030842140196, + "loss": 0.8672, + "step": 7614 + }, + { + "epoch": 0.41, + "learning_rate": 0.0006679211029527373, + "loss": 0.8633, + "step": 7615 + }, + { + "epoch": 0.41, + "learning_rate": 0.0006678391166028944, + "loss": 0.8242, + "step": 7616 + }, + { + "epoch": 0.41, + "learning_rate": 0.0006677571251669747, + "loss": 0.8164, + "step": 7617 + }, + { + "epoch": 0.41, + "learning_rate": 0.0006676751286474632, + "loss": 0.7852, + "step": 7618 + }, + { + "epoch": 0.41, + "learning_rate": 0.0006675931270468444, + "loss": 0.9648, + "step": 7619 + }, + { + "epoch": 0.41, + "learning_rate": 0.0006675111203676034, + "loss": 0.8516, + "step": 7620 + }, + { + "epoch": 0.41, + "learning_rate": 0.0006674291086122252, + "loss": 0.8359, + "step": 7621 + }, + { + "epoch": 0.41, + "learning_rate": 0.000667347091783195, + "loss": 0.9609, + "step": 7622 + }, + { + "epoch": 0.41, + "learning_rate": 0.0006672650698829982, + "loss": 0.8203, + "step": 7623 + }, + { + "epoch": 0.41, + "learning_rate": 0.0006671830429141205, + "loss": 0.8789, + "step": 7624 + }, + { + "epoch": 0.41, + "learning_rate": 0.0006671010108790471, + "loss": 0.8711, + "step": 7625 + }, + { + "epoch": 0.41, + "learning_rate": 0.0006670189737802646, + "loss": 0.8633, + "step": 7626 + }, + { + "epoch": 0.41, + "learning_rate": 0.0006669369316202584, + "loss": 0.9023, + "step": 7627 + }, + { + "epoch": 0.41, + "learning_rate": 0.0006668548844015147, + "loss": 0.8516, + "step": 7628 + }, + { + "epoch": 0.41, + "learning_rate": 0.0006667728321265201, + "loss": 0.8789, + "step": 7629 + }, + { + "epoch": 0.41, + "learning_rate": 0.0006666907747977606, + "loss": 0.8906, + "step": 7630 + }, + { + "epoch": 0.41, + "learning_rate": 0.0006666087124177234, + "loss": 0.9219, + "step": 7631 + }, + { + "epoch": 0.41, + "learning_rate": 0.0006665266449888947, + "loss": 0.9023, + "step": 7632 + }, + { + "epoch": 0.41, + "learning_rate": 0.0006664445725137617, + "loss": 0.7891, + "step": 7633 + }, + { + "epoch": 0.41, + "learning_rate": 0.0006663624949948115, + "loss": 0.8711, + "step": 7634 + }, + { + "epoch": 0.41, + "learning_rate": 0.0006662804124345312, + "loss": 0.7891, + "step": 7635 + }, + { + "epoch": 0.41, + "learning_rate": 0.0006661983248354083, + "loss": 0.8945, + "step": 7636 + }, + { + "epoch": 0.41, + "learning_rate": 0.0006661162321999301, + "loss": 0.875, + "step": 7637 + }, + { + "epoch": 0.41, + "learning_rate": 0.0006660341345305843, + "loss": 0.8398, + "step": 7638 + }, + { + "epoch": 0.41, + "learning_rate": 0.0006659520318298591, + "loss": 0.8203, + "step": 7639 + }, + { + "epoch": 0.41, + "learning_rate": 0.0006658699241002419, + "loss": 0.8867, + "step": 7640 + }, + { + "epoch": 0.41, + "learning_rate": 0.0006657878113442213, + "loss": 0.7422, + "step": 7641 + }, + { + "epoch": 0.41, + "learning_rate": 0.0006657056935642856, + "loss": 0.8477, + "step": 7642 + }, + { + "epoch": 0.41, + "learning_rate": 0.0006656235707629229, + "loss": 0.8438, + "step": 7643 + }, + { + "epoch": 0.41, + "learning_rate": 0.000665541442942622, + "loss": 0.9414, + "step": 7644 + }, + { + "epoch": 0.41, + "learning_rate": 0.0006654593101058716, + "loss": 0.8945, + "step": 7645 + }, + { + "epoch": 0.41, + "learning_rate": 0.0006653771722551606, + "loss": 0.8984, + "step": 7646 + }, + { + "epoch": 0.41, + "learning_rate": 0.0006652950293929781, + "loss": 0.8633, + "step": 7647 + }, + { + "epoch": 0.41, + "learning_rate": 0.000665212881521813, + "loss": 0.7656, + "step": 7648 + }, + { + "epoch": 0.41, + "learning_rate": 0.0006651307286441552, + "loss": 0.9688, + "step": 7649 + }, + { + "epoch": 0.41, + "learning_rate": 0.0006650485707624938, + "loss": 0.7617, + "step": 7650 + }, + { + "epoch": 0.41, + "learning_rate": 0.0006649664078793186, + "loss": 0.8047, + "step": 7651 + }, + { + "epoch": 0.41, + "learning_rate": 0.0006648842399971192, + "loss": 0.7656, + "step": 7652 + }, + { + "epoch": 0.41, + "learning_rate": 0.0006648020671183859, + "loss": 0.8242, + "step": 7653 + }, + { + "epoch": 0.41, + "learning_rate": 0.0006647198892456086, + "loss": 0.832, + "step": 7654 + }, + { + "epoch": 0.41, + "learning_rate": 0.0006646377063812774, + "loss": 0.8984, + "step": 7655 + }, + { + "epoch": 0.41, + "learning_rate": 0.000664555518527883, + "loss": 0.8242, + "step": 7656 + }, + { + "epoch": 0.41, + "learning_rate": 0.0006644733256879159, + "loss": 0.8555, + "step": 7657 + }, + { + "epoch": 0.41, + "learning_rate": 0.0006643911278638667, + "loss": 0.9102, + "step": 7658 + }, + { + "epoch": 0.41, + "learning_rate": 0.0006643089250582265, + "loss": 0.7656, + "step": 7659 + }, + { + "epoch": 0.41, + "learning_rate": 0.0006642267172734859, + "loss": 0.9102, + "step": 7660 + }, + { + "epoch": 0.41, + "learning_rate": 0.0006641445045121364, + "loss": 0.9062, + "step": 7661 + }, + { + "epoch": 0.41, + "learning_rate": 0.0006640622867766692, + "loss": 0.8125, + "step": 7662 + }, + { + "epoch": 0.41, + "learning_rate": 0.000663980064069576, + "loss": 0.8477, + "step": 7663 + }, + { + "epoch": 0.41, + "learning_rate": 0.0006638978363933481, + "loss": 0.8867, + "step": 7664 + }, + { + "epoch": 0.41, + "learning_rate": 0.0006638156037504773, + "loss": 0.8633, + "step": 7665 + }, + { + "epoch": 0.41, + "learning_rate": 0.0006637333661434556, + "loss": 0.9023, + "step": 7666 + }, + { + "epoch": 0.41, + "learning_rate": 0.0006636511235747751, + "loss": 0.9102, + "step": 7667 + }, + { + "epoch": 0.41, + "learning_rate": 0.000663568876046928, + "loss": 0.8945, + "step": 7668 + }, + { + "epoch": 0.41, + "learning_rate": 0.0006634866235624067, + "loss": 0.9727, + "step": 7669 + }, + { + "epoch": 0.41, + "learning_rate": 0.0006634043661237037, + "loss": 0.8477, + "step": 7670 + }, + { + "epoch": 0.41, + "learning_rate": 0.0006633221037333116, + "loss": 1.0078, + "step": 7671 + }, + { + "epoch": 0.41, + "learning_rate": 0.0006632398363937234, + "loss": 0.7969, + "step": 7672 + }, + { + "epoch": 0.41, + "learning_rate": 0.0006631575641074318, + "loss": 0.8281, + "step": 7673 + }, + { + "epoch": 0.41, + "learning_rate": 0.00066307528687693, + "loss": 0.7695, + "step": 7674 + }, + { + "epoch": 0.41, + "learning_rate": 0.0006629930047047115, + "loss": 0.8789, + "step": 7675 + }, + { + "epoch": 0.41, + "learning_rate": 0.0006629107175932696, + "loss": 0.8242, + "step": 7676 + }, + { + "epoch": 0.41, + "learning_rate": 0.0006628284255450977, + "loss": 0.875, + "step": 7677 + }, + { + "epoch": 0.41, + "learning_rate": 0.0006627461285626897, + "loss": 0.8945, + "step": 7678 + }, + { + "epoch": 0.41, + "learning_rate": 0.0006626638266485394, + "loss": 0.957, + "step": 7679 + }, + { + "epoch": 0.41, + "learning_rate": 0.0006625815198051409, + "loss": 0.9219, + "step": 7680 + }, + { + "epoch": 0.41, + "learning_rate": 0.0006624992080349881, + "loss": 0.9258, + "step": 7681 + }, + { + "epoch": 0.41, + "learning_rate": 0.0006624168913405757, + "loss": 0.7695, + "step": 7682 + }, + { + "epoch": 0.41, + "learning_rate": 0.0006623345697243979, + "loss": 0.9023, + "step": 7683 + }, + { + "epoch": 0.41, + "learning_rate": 0.0006622522431889493, + "loss": 0.8945, + "step": 7684 + }, + { + "epoch": 0.41, + "learning_rate": 0.0006621699117367251, + "loss": 0.8984, + "step": 7685 + }, + { + "epoch": 0.41, + "learning_rate": 0.0006620875753702196, + "loss": 0.8281, + "step": 7686 + }, + { + "epoch": 0.41, + "learning_rate": 0.0006620052340919281, + "loss": 0.9062, + "step": 7687 + }, + { + "epoch": 0.41, + "learning_rate": 0.0006619228879043461, + "loss": 0.8867, + "step": 7688 + }, + { + "epoch": 0.41, + "learning_rate": 0.0006618405368099685, + "loss": 0.8359, + "step": 7689 + }, + { + "epoch": 0.41, + "learning_rate": 0.0006617581808112911, + "loss": 0.832, + "step": 7690 + }, + { + "epoch": 0.41, + "learning_rate": 0.0006616758199108094, + "loss": 0.8711, + "step": 7691 + }, + { + "epoch": 0.41, + "learning_rate": 0.0006615934541110194, + "loss": 0.7734, + "step": 7692 + }, + { + "epoch": 0.41, + "learning_rate": 0.0006615110834144168, + "loss": 0.8867, + "step": 7693 + }, + { + "epoch": 0.41, + "learning_rate": 0.000661428707823498, + "loss": 1.0625, + "step": 7694 + }, + { + "epoch": 0.41, + "learning_rate": 0.0006613463273407589, + "loss": 0.7656, + "step": 7695 + }, + { + "epoch": 0.41, + "learning_rate": 0.0006612639419686963, + "loss": 0.9219, + "step": 7696 + }, + { + "epoch": 0.41, + "learning_rate": 0.0006611815517098065, + "loss": 0.8906, + "step": 7697 + }, + { + "epoch": 0.41, + "learning_rate": 0.0006610991565665861, + "loss": 0.9844, + "step": 7698 + }, + { + "epoch": 0.41, + "learning_rate": 0.0006610167565415322, + "loss": 0.9414, + "step": 7699 + }, + { + "epoch": 0.41, + "learning_rate": 0.0006609343516371416, + "loss": 0.7812, + "step": 7700 + }, + { + "epoch": 0.41, + "learning_rate": 0.0006608519418559115, + "loss": 0.9219, + "step": 7701 + }, + { + "epoch": 0.41, + "learning_rate": 0.0006607695272003391, + "loss": 0.8125, + "step": 7702 + }, + { + "epoch": 0.41, + "learning_rate": 0.0006606871076729222, + "loss": 0.9219, + "step": 7703 + }, + { + "epoch": 0.41, + "learning_rate": 0.0006606046832761577, + "loss": 0.9336, + "step": 7704 + }, + { + "epoch": 0.41, + "learning_rate": 0.0006605222540125442, + "loss": 0.8398, + "step": 7705 + }, + { + "epoch": 0.41, + "learning_rate": 0.0006604398198845788, + "loss": 0.8398, + "step": 7706 + }, + { + "epoch": 0.41, + "learning_rate": 0.00066035738089476, + "loss": 0.7773, + "step": 7707 + }, + { + "epoch": 0.41, + "learning_rate": 0.0006602749370455858, + "loss": 0.7969, + "step": 7708 + }, + { + "epoch": 0.41, + "learning_rate": 0.0006601924883395544, + "loss": 0.8906, + "step": 7709 + }, + { + "epoch": 0.41, + "learning_rate": 0.0006601100347791647, + "loss": 0.8867, + "step": 7710 + }, + { + "epoch": 0.41, + "learning_rate": 0.0006600275763669148, + "loss": 0.7969, + "step": 7711 + }, + { + "epoch": 0.41, + "learning_rate": 0.0006599451131053035, + "loss": 0.8789, + "step": 7712 + }, + { + "epoch": 0.41, + "learning_rate": 0.0006598626449968302, + "loss": 0.8281, + "step": 7713 + }, + { + "epoch": 0.41, + "learning_rate": 0.0006597801720439935, + "loss": 0.7773, + "step": 7714 + }, + { + "epoch": 0.41, + "learning_rate": 0.0006596976942492928, + "loss": 0.7812, + "step": 7715 + }, + { + "epoch": 0.41, + "learning_rate": 0.0006596152116152273, + "loss": 0.9141, + "step": 7716 + }, + { + "epoch": 0.41, + "learning_rate": 0.0006595327241442966, + "loss": 0.8789, + "step": 7717 + }, + { + "epoch": 0.41, + "learning_rate": 0.0006594502318390005, + "loss": 0.8867, + "step": 7718 + }, + { + "epoch": 0.41, + "learning_rate": 0.0006593677347018384, + "loss": 0.8672, + "step": 7719 + }, + { + "epoch": 0.41, + "learning_rate": 0.0006592852327353105, + "loss": 0.8789, + "step": 7720 + }, + { + "epoch": 0.41, + "learning_rate": 0.0006592027259419168, + "loss": 0.9062, + "step": 7721 + }, + { + "epoch": 0.42, + "learning_rate": 0.0006591202143241576, + "loss": 0.8281, + "step": 7722 + }, + { + "epoch": 0.42, + "learning_rate": 0.0006590376978845333, + "loss": 0.9219, + "step": 7723 + }, + { + "epoch": 0.42, + "learning_rate": 0.0006589551766255442, + "loss": 0.8633, + "step": 7724 + }, + { + "epoch": 0.42, + "learning_rate": 0.0006588726505496913, + "loss": 0.9297, + "step": 7725 + }, + { + "epoch": 0.42, + "learning_rate": 0.000658790119659475, + "loss": 0.8945, + "step": 7726 + }, + { + "epoch": 0.42, + "learning_rate": 0.0006587075839573966, + "loss": 0.7812, + "step": 7727 + }, + { + "epoch": 0.42, + "learning_rate": 0.000658625043445957, + "loss": 0.8828, + "step": 7728 + }, + { + "epoch": 0.42, + "learning_rate": 0.0006585424981276576, + "loss": 0.9375, + "step": 7729 + }, + { + "epoch": 0.42, + "learning_rate": 0.0006584599480049997, + "loss": 0.8438, + "step": 7730 + }, + { + "epoch": 0.42, + "learning_rate": 0.000658377393080485, + "loss": 0.9453, + "step": 7731 + }, + { + "epoch": 0.42, + "learning_rate": 0.0006582948333566148, + "loss": 0.7812, + "step": 7732 + }, + { + "epoch": 0.42, + "learning_rate": 0.0006582122688358914, + "loss": 0.9844, + "step": 7733 + }, + { + "epoch": 0.42, + "learning_rate": 0.0006581296995208164, + "loss": 0.8008, + "step": 7734 + }, + { + "epoch": 0.42, + "learning_rate": 0.0006580471254138921, + "loss": 0.9414, + "step": 7735 + }, + { + "epoch": 0.42, + "learning_rate": 0.0006579645465176208, + "loss": 0.832, + "step": 7736 + }, + { + "epoch": 0.42, + "learning_rate": 0.0006578819628345049, + "loss": 0.8086, + "step": 7737 + }, + { + "epoch": 0.42, + "learning_rate": 0.0006577993743670468, + "loss": 0.8789, + "step": 7738 + }, + { + "epoch": 0.42, + "learning_rate": 0.0006577167811177495, + "loss": 0.875, + "step": 7739 + }, + { + "epoch": 0.42, + "learning_rate": 0.0006576341830891155, + "loss": 0.9492, + "step": 7740 + }, + { + "epoch": 0.42, + "learning_rate": 0.0006575515802836479, + "loss": 0.8711, + "step": 7741 + }, + { + "epoch": 0.42, + "learning_rate": 0.00065746897270385, + "loss": 0.7656, + "step": 7742 + }, + { + "epoch": 0.42, + "learning_rate": 0.000657386360352225, + "loss": 0.9219, + "step": 7743 + }, + { + "epoch": 0.42, + "learning_rate": 0.0006573037432312763, + "loss": 0.8633, + "step": 7744 + }, + { + "epoch": 0.42, + "learning_rate": 0.0006572211213435073, + "loss": 0.8633, + "step": 7745 + }, + { + "epoch": 0.42, + "learning_rate": 0.0006571384946914221, + "loss": 0.875, + "step": 7746 + }, + { + "epoch": 0.42, + "learning_rate": 0.000657055863277524, + "loss": 0.9727, + "step": 7747 + }, + { + "epoch": 0.42, + "learning_rate": 0.0006569732271043178, + "loss": 0.875, + "step": 7748 + }, + { + "epoch": 0.42, + "learning_rate": 0.0006568905861743068, + "loss": 0.8125, + "step": 7749 + }, + { + "epoch": 0.42, + "learning_rate": 0.000656807940489996, + "loss": 0.8711, + "step": 7750 + }, + { + "epoch": 0.42, + "learning_rate": 0.0006567252900538894, + "loss": 0.8711, + "step": 7751 + }, + { + "epoch": 0.42, + "learning_rate": 0.0006566426348684916, + "loss": 0.7969, + "step": 7752 + }, + { + "epoch": 0.42, + "learning_rate": 0.0006565599749363076, + "loss": 0.8164, + "step": 7753 + }, + { + "epoch": 0.42, + "learning_rate": 0.000656477310259842, + "loss": 0.9062, + "step": 7754 + }, + { + "epoch": 0.42, + "learning_rate": 0.0006563946408415998, + "loss": 0.9141, + "step": 7755 + }, + { + "epoch": 0.42, + "learning_rate": 0.0006563119666840864, + "loss": 0.8203, + "step": 7756 + }, + { + "epoch": 0.42, + "learning_rate": 0.0006562292877898069, + "loss": 0.8164, + "step": 7757 + }, + { + "epoch": 0.42, + "learning_rate": 0.0006561466041612669, + "loss": 0.8828, + "step": 7758 + }, + { + "epoch": 0.42, + "learning_rate": 0.0006560639158009715, + "loss": 0.8555, + "step": 7759 + }, + { + "epoch": 0.42, + "learning_rate": 0.0006559812227114272, + "loss": 0.8008, + "step": 7760 + }, + { + "epoch": 0.42, + "learning_rate": 0.0006558985248951394, + "loss": 0.8438, + "step": 7761 + }, + { + "epoch": 0.42, + "learning_rate": 0.0006558158223546142, + "loss": 0.832, + "step": 7762 + }, + { + "epoch": 0.42, + "learning_rate": 0.0006557331150923578, + "loss": 0.7812, + "step": 7763 + }, + { + "epoch": 0.42, + "learning_rate": 0.0006556504031108763, + "loss": 0.8945, + "step": 7764 + }, + { + "epoch": 0.42, + "learning_rate": 0.0006555676864126764, + "loss": 0.8711, + "step": 7765 + }, + { + "epoch": 0.42, + "learning_rate": 0.0006554849650002647, + "loss": 0.8555, + "step": 7766 + }, + { + "epoch": 0.42, + "learning_rate": 0.0006554022388761476, + "loss": 0.9492, + "step": 7767 + }, + { + "epoch": 0.42, + "learning_rate": 0.0006553195080428322, + "loss": 0.8594, + "step": 7768 + }, + { + "epoch": 0.42, + "learning_rate": 0.0006552367725028256, + "loss": 0.8164, + "step": 7769 + }, + { + "epoch": 0.42, + "learning_rate": 0.000655154032258635, + "loss": 0.8867, + "step": 7770 + }, + { + "epoch": 0.42, + "learning_rate": 0.0006550712873127675, + "loss": 0.8242, + "step": 7771 + }, + { + "epoch": 0.42, + "learning_rate": 0.0006549885376677306, + "loss": 0.832, + "step": 7772 + }, + { + "epoch": 0.42, + "learning_rate": 0.0006549057833260319, + "loss": 0.832, + "step": 7773 + }, + { + "epoch": 0.42, + "learning_rate": 0.000654823024290179, + "loss": 0.8242, + "step": 7774 + }, + { + "epoch": 0.42, + "learning_rate": 0.00065474026056268, + "loss": 0.8867, + "step": 7775 + }, + { + "epoch": 0.42, + "learning_rate": 0.0006546574921460429, + "loss": 0.7969, + "step": 7776 + }, + { + "epoch": 0.42, + "learning_rate": 0.0006545747190427755, + "loss": 0.8242, + "step": 7777 + }, + { + "epoch": 0.42, + "learning_rate": 0.0006544919412553867, + "loss": 0.9453, + "step": 7778 + }, + { + "epoch": 0.42, + "learning_rate": 0.0006544091587863844, + "loss": 0.9336, + "step": 7779 + }, + { + "epoch": 0.42, + "learning_rate": 0.0006543263716382773, + "loss": 0.8711, + "step": 7780 + }, + { + "epoch": 0.42, + "learning_rate": 0.0006542435798135744, + "loss": 0.8867, + "step": 7781 + }, + { + "epoch": 0.42, + "learning_rate": 0.0006541607833147842, + "loss": 0.9453, + "step": 7782 + }, + { + "epoch": 0.42, + "learning_rate": 0.000654077982144416, + "loss": 0.8594, + "step": 7783 + }, + { + "epoch": 0.42, + "learning_rate": 0.000653995176304979, + "loss": 0.9219, + "step": 7784 + }, + { + "epoch": 0.42, + "learning_rate": 0.000653912365798982, + "loss": 0.7734, + "step": 7785 + }, + { + "epoch": 0.42, + "learning_rate": 0.0006538295506289349, + "loss": 0.8477, + "step": 7786 + }, + { + "epoch": 0.42, + "learning_rate": 0.0006537467307973472, + "loss": 0.9258, + "step": 7787 + }, + { + "epoch": 0.42, + "learning_rate": 0.0006536639063067282, + "loss": 0.8828, + "step": 7788 + }, + { + "epoch": 0.42, + "learning_rate": 0.0006535810771595885, + "loss": 0.9531, + "step": 7789 + }, + { + "epoch": 0.42, + "learning_rate": 0.0006534982433584373, + "loss": 0.8945, + "step": 7790 + }, + { + "epoch": 0.42, + "learning_rate": 0.0006534154049057854, + "loss": 0.8047, + "step": 7791 + }, + { + "epoch": 0.42, + "learning_rate": 0.0006533325618041426, + "loss": 0.9688, + "step": 7792 + }, + { + "epoch": 0.42, + "learning_rate": 0.0006532497140560197, + "loss": 0.875, + "step": 7793 + }, + { + "epoch": 0.42, + "learning_rate": 0.0006531668616639268, + "loss": 0.8672, + "step": 7794 + }, + { + "epoch": 0.42, + "learning_rate": 0.0006530840046303751, + "loss": 0.8164, + "step": 7795 + }, + { + "epoch": 0.42, + "learning_rate": 0.0006530011429578752, + "loss": 0.8242, + "step": 7796 + }, + { + "epoch": 0.42, + "learning_rate": 0.0006529182766489379, + "loss": 0.8086, + "step": 7797 + }, + { + "epoch": 0.42, + "learning_rate": 0.0006528354057060746, + "loss": 0.9375, + "step": 7798 + }, + { + "epoch": 0.42, + "learning_rate": 0.0006527525301317966, + "loss": 0.8867, + "step": 7799 + }, + { + "epoch": 0.42, + "learning_rate": 0.000652669649928615, + "loss": 0.9609, + "step": 7800 + }, + { + "epoch": 0.42, + "learning_rate": 0.0006525867650990416, + "loss": 0.8164, + "step": 7801 + }, + { + "epoch": 0.42, + "learning_rate": 0.000652503875645588, + "loss": 0.875, + "step": 7802 + }, + { + "epoch": 0.42, + "learning_rate": 0.000652420981570766, + "loss": 0.9609, + "step": 7803 + }, + { + "epoch": 0.42, + "learning_rate": 0.0006523380828770876, + "loss": 0.7773, + "step": 7804 + }, + { + "epoch": 0.42, + "learning_rate": 0.0006522551795670648, + "loss": 0.9414, + "step": 7805 + }, + { + "epoch": 0.42, + "learning_rate": 0.0006521722716432101, + "loss": 0.8359, + "step": 7806 + }, + { + "epoch": 0.42, + "learning_rate": 0.0006520893591080356, + "loss": 0.8906, + "step": 7807 + }, + { + "epoch": 0.42, + "learning_rate": 0.0006520064419640539, + "loss": 0.8906, + "step": 7808 + }, + { + "epoch": 0.42, + "learning_rate": 0.0006519235202137779, + "loss": 0.9609, + "step": 7809 + }, + { + "epoch": 0.42, + "learning_rate": 0.00065184059385972, + "loss": 0.9258, + "step": 7810 + }, + { + "epoch": 0.42, + "learning_rate": 0.0006517576629043933, + "loss": 0.8398, + "step": 7811 + }, + { + "epoch": 0.42, + "learning_rate": 0.000651674727350311, + "loss": 0.8242, + "step": 7812 + }, + { + "epoch": 0.42, + "learning_rate": 0.0006515917871999864, + "loss": 0.8398, + "step": 7813 + }, + { + "epoch": 0.42, + "learning_rate": 0.0006515088424559325, + "loss": 0.8867, + "step": 7814 + }, + { + "epoch": 0.42, + "learning_rate": 0.000651425893120663, + "loss": 0.8711, + "step": 7815 + }, + { + "epoch": 0.42, + "learning_rate": 0.0006513429391966915, + "loss": 0.9219, + "step": 7816 + }, + { + "epoch": 0.42, + "learning_rate": 0.0006512599806865319, + "loss": 1.0, + "step": 7817 + }, + { + "epoch": 0.42, + "learning_rate": 0.000651177017592698, + "loss": 0.8164, + "step": 7818 + }, + { + "epoch": 0.42, + "learning_rate": 0.0006510940499177039, + "loss": 0.8555, + "step": 7819 + }, + { + "epoch": 0.42, + "learning_rate": 0.0006510110776640637, + "loss": 0.8398, + "step": 7820 + }, + { + "epoch": 0.42, + "learning_rate": 0.000650928100834292, + "loss": 0.8711, + "step": 7821 + }, + { + "epoch": 0.42, + "learning_rate": 0.0006508451194309028, + "loss": 0.8633, + "step": 7822 + }, + { + "epoch": 0.42, + "learning_rate": 0.0006507621334564111, + "loss": 0.9258, + "step": 7823 + }, + { + "epoch": 0.42, + "learning_rate": 0.0006506791429133317, + "loss": 0.8555, + "step": 7824 + }, + { + "epoch": 0.42, + "learning_rate": 0.0006505961478041791, + "loss": 0.8906, + "step": 7825 + }, + { + "epoch": 0.42, + "learning_rate": 0.0006505131481314686, + "loss": 0.8594, + "step": 7826 + }, + { + "epoch": 0.42, + "learning_rate": 0.0006504301438977154, + "loss": 0.8555, + "step": 7827 + }, + { + "epoch": 0.42, + "learning_rate": 0.0006503471351054347, + "loss": 0.8906, + "step": 7828 + }, + { + "epoch": 0.42, + "learning_rate": 0.0006502641217571418, + "loss": 0.9141, + "step": 7829 + }, + { + "epoch": 0.42, + "learning_rate": 0.0006501811038553525, + "loss": 0.7969, + "step": 7830 + }, + { + "epoch": 0.42, + "learning_rate": 0.0006500980814025823, + "loss": 0.8516, + "step": 7831 + }, + { + "epoch": 0.42, + "learning_rate": 0.0006500150544013475, + "loss": 0.8945, + "step": 7832 + }, + { + "epoch": 0.42, + "learning_rate": 0.0006499320228541634, + "loss": 0.8438, + "step": 7833 + }, + { + "epoch": 0.42, + "learning_rate": 0.0006498489867635467, + "loss": 0.8125, + "step": 7834 + }, + { + "epoch": 0.42, + "learning_rate": 0.0006497659461320135, + "loss": 0.8203, + "step": 7835 + }, + { + "epoch": 0.42, + "learning_rate": 0.0006496829009620802, + "loss": 0.8867, + "step": 7836 + }, + { + "epoch": 0.42, + "learning_rate": 0.0006495998512562633, + "loss": 0.9492, + "step": 7837 + }, + { + "epoch": 0.42, + "learning_rate": 0.0006495167970170794, + "loss": 0.8516, + "step": 7838 + }, + { + "epoch": 0.42, + "learning_rate": 0.0006494337382470457, + "loss": 0.8594, + "step": 7839 + }, + { + "epoch": 0.42, + "learning_rate": 0.0006493506749486786, + "loss": 0.8789, + "step": 7840 + }, + { + "epoch": 0.42, + "learning_rate": 0.0006492676071244953, + "loss": 0.8281, + "step": 7841 + }, + { + "epoch": 0.42, + "learning_rate": 0.0006491845347770137, + "loss": 0.8203, + "step": 7842 + }, + { + "epoch": 0.42, + "learning_rate": 0.0006491014579087502, + "loss": 0.8438, + "step": 7843 + }, + { + "epoch": 0.42, + "learning_rate": 0.0006490183765222229, + "loss": 0.8203, + "step": 7844 + }, + { + "epoch": 0.42, + "learning_rate": 0.0006489352906199493, + "loss": 0.9297, + "step": 7845 + }, + { + "epoch": 0.42, + "learning_rate": 0.0006488522002044472, + "loss": 0.8867, + "step": 7846 + }, + { + "epoch": 0.42, + "learning_rate": 0.0006487691052782346, + "loss": 0.8984, + "step": 7847 + }, + { + "epoch": 0.42, + "learning_rate": 0.0006486860058438293, + "loss": 0.9414, + "step": 7848 + }, + { + "epoch": 0.42, + "learning_rate": 0.0006486029019037496, + "loss": 0.8008, + "step": 7849 + }, + { + "epoch": 0.42, + "learning_rate": 0.0006485197934605139, + "loss": 0.9062, + "step": 7850 + }, + { + "epoch": 0.42, + "learning_rate": 0.0006484366805166405, + "loss": 0.8242, + "step": 7851 + }, + { + "epoch": 0.42, + "learning_rate": 0.0006483535630746482, + "loss": 0.957, + "step": 7852 + }, + { + "epoch": 0.42, + "learning_rate": 0.0006482704411370555, + "loss": 0.8906, + "step": 7853 + }, + { + "epoch": 0.42, + "learning_rate": 0.0006481873147063814, + "loss": 0.8945, + "step": 7854 + }, + { + "epoch": 0.42, + "learning_rate": 0.0006481041837851451, + "loss": 0.918, + "step": 7855 + }, + { + "epoch": 0.42, + "learning_rate": 0.0006480210483758653, + "loss": 0.8281, + "step": 7856 + }, + { + "epoch": 0.42, + "learning_rate": 0.0006479379084810617, + "loss": 0.8086, + "step": 7857 + }, + { + "epoch": 0.42, + "learning_rate": 0.0006478547641032534, + "loss": 0.8555, + "step": 7858 + }, + { + "epoch": 0.42, + "learning_rate": 0.0006477716152449602, + "loss": 0.8789, + "step": 7859 + }, + { + "epoch": 0.42, + "learning_rate": 0.0006476884619087016, + "loss": 0.8867, + "step": 7860 + }, + { + "epoch": 0.42, + "learning_rate": 0.0006476053040969974, + "loss": 1.0, + "step": 7861 + }, + { + "epoch": 0.42, + "learning_rate": 0.0006475221418123677, + "loss": 0.8633, + "step": 7862 + }, + { + "epoch": 0.42, + "learning_rate": 0.0006474389750573325, + "loss": 0.8398, + "step": 7863 + }, + { + "epoch": 0.42, + "learning_rate": 0.000647355803834412, + "loss": 0.8594, + "step": 7864 + }, + { + "epoch": 0.42, + "learning_rate": 0.0006472726281461268, + "loss": 0.8633, + "step": 7865 + }, + { + "epoch": 0.42, + "learning_rate": 0.000647189447994997, + "loss": 0.9297, + "step": 7866 + }, + { + "epoch": 0.42, + "learning_rate": 0.0006471062633835435, + "loss": 1.0, + "step": 7867 + }, + { + "epoch": 0.42, + "learning_rate": 0.000647023074314287, + "loss": 0.8086, + "step": 7868 + }, + { + "epoch": 0.42, + "learning_rate": 0.0006469398807897484, + "loss": 0.9375, + "step": 7869 + }, + { + "epoch": 0.42, + "learning_rate": 0.0006468566828124487, + "loss": 0.8359, + "step": 7870 + }, + { + "epoch": 0.42, + "learning_rate": 0.0006467734803849093, + "loss": 0.875, + "step": 7871 + }, + { + "epoch": 0.42, + "learning_rate": 0.0006466902735096511, + "loss": 0.9141, + "step": 7872 + }, + { + "epoch": 0.42, + "learning_rate": 0.0006466070621891958, + "loss": 0.7891, + "step": 7873 + }, + { + "epoch": 0.42, + "learning_rate": 0.000646523846426065, + "loss": 0.8008, + "step": 7874 + }, + { + "epoch": 0.42, + "learning_rate": 0.0006464406262227804, + "loss": 0.8789, + "step": 7875 + }, + { + "epoch": 0.42, + "learning_rate": 0.0006463574015818636, + "loss": 0.9414, + "step": 7876 + }, + { + "epoch": 0.42, + "learning_rate": 0.0006462741725058369, + "loss": 0.8555, + "step": 7877 + }, + { + "epoch": 0.42, + "learning_rate": 0.0006461909389972225, + "loss": 0.8281, + "step": 7878 + }, + { + "epoch": 0.42, + "learning_rate": 0.0006461077010585421, + "loss": 0.8555, + "step": 7879 + }, + { + "epoch": 0.42, + "learning_rate": 0.0006460244586923186, + "loss": 1.0078, + "step": 7880 + }, + { + "epoch": 0.42, + "learning_rate": 0.0006459412119010744, + "loss": 0.9414, + "step": 7881 + }, + { + "epoch": 0.42, + "learning_rate": 0.0006458579606873318, + "loss": 0.8555, + "step": 7882 + }, + { + "epoch": 0.42, + "learning_rate": 0.0006457747050536142, + "loss": 0.8203, + "step": 7883 + }, + { + "epoch": 0.42, + "learning_rate": 0.0006456914450024441, + "loss": 0.8789, + "step": 7884 + }, + { + "epoch": 0.42, + "learning_rate": 0.0006456081805363447, + "loss": 0.9062, + "step": 7885 + }, + { + "epoch": 0.42, + "learning_rate": 0.0006455249116578389, + "loss": 0.8164, + "step": 7886 + }, + { + "epoch": 0.42, + "learning_rate": 0.0006454416383694504, + "loss": 0.9453, + "step": 7887 + }, + { + "epoch": 0.42, + "learning_rate": 0.0006453583606737026, + "loss": 0.8633, + "step": 7888 + }, + { + "epoch": 0.42, + "learning_rate": 0.0006452750785731189, + "loss": 0.7969, + "step": 7889 + }, + { + "epoch": 0.42, + "learning_rate": 0.000645191792070223, + "loss": 0.7852, + "step": 7890 + }, + { + "epoch": 0.42, + "learning_rate": 0.000645108501167539, + "loss": 0.8789, + "step": 7891 + }, + { + "epoch": 0.42, + "learning_rate": 0.0006450252058675907, + "loss": 0.8984, + "step": 7892 + }, + { + "epoch": 0.42, + "learning_rate": 0.0006449419061729023, + "loss": 0.8945, + "step": 7893 + }, + { + "epoch": 0.42, + "learning_rate": 0.0006448586020859982, + "loss": 0.8828, + "step": 7894 + }, + { + "epoch": 0.42, + "learning_rate": 0.0006447752936094024, + "loss": 0.875, + "step": 7895 + }, + { + "epoch": 0.42, + "learning_rate": 0.0006446919807456398, + "loss": 0.8164, + "step": 7896 + }, + { + "epoch": 0.42, + "learning_rate": 0.0006446086634972348, + "loss": 0.8906, + "step": 7897 + }, + { + "epoch": 0.42, + "learning_rate": 0.0006445253418667125, + "loss": 0.8281, + "step": 7898 + }, + { + "epoch": 0.42, + "learning_rate": 0.0006444420158565973, + "loss": 0.8555, + "step": 7899 + }, + { + "epoch": 0.42, + "learning_rate": 0.0006443586854694148, + "loss": 0.9297, + "step": 7900 + }, + { + "epoch": 0.42, + "learning_rate": 0.0006442753507076897, + "loss": 0.9023, + "step": 7901 + }, + { + "epoch": 0.42, + "learning_rate": 0.0006441920115739477, + "loss": 0.8789, + "step": 7902 + }, + { + "epoch": 0.42, + "learning_rate": 0.0006441086680707141, + "loss": 0.8633, + "step": 7903 + }, + { + "epoch": 0.42, + "learning_rate": 0.0006440253202005145, + "loss": 0.8398, + "step": 7904 + }, + { + "epoch": 0.42, + "learning_rate": 0.0006439419679658745, + "loss": 0.875, + "step": 7905 + }, + { + "epoch": 0.42, + "learning_rate": 0.0006438586113693203, + "loss": 0.9219, + "step": 7906 + }, + { + "epoch": 0.42, + "learning_rate": 0.0006437752504133774, + "loss": 0.8203, + "step": 7907 + }, + { + "epoch": 0.43, + "learning_rate": 0.0006436918851005722, + "loss": 0.8555, + "step": 7908 + }, + { + "epoch": 0.43, + "learning_rate": 0.0006436085154334308, + "loss": 0.9375, + "step": 7909 + }, + { + "epoch": 0.43, + "learning_rate": 0.0006435251414144797, + "loss": 0.8516, + "step": 7910 + }, + { + "epoch": 0.43, + "learning_rate": 0.0006434417630462455, + "loss": 0.9023, + "step": 7911 + }, + { + "epoch": 0.43, + "learning_rate": 0.0006433583803312546, + "loss": 0.8438, + "step": 7912 + }, + { + "epoch": 0.43, + "learning_rate": 0.0006432749932720338, + "loss": 0.8633, + "step": 7913 + }, + { + "epoch": 0.43, + "learning_rate": 0.0006431916018711101, + "loss": 0.8633, + "step": 7914 + }, + { + "epoch": 0.43, + "learning_rate": 0.0006431082061310105, + "loss": 0.9453, + "step": 7915 + }, + { + "epoch": 0.43, + "learning_rate": 0.0006430248060542623, + "loss": 0.793, + "step": 7916 + }, + { + "epoch": 0.43, + "learning_rate": 0.0006429414016433925, + "loss": 0.8906, + "step": 7917 + }, + { + "epoch": 0.43, + "learning_rate": 0.0006428579929009289, + "loss": 0.8477, + "step": 7918 + }, + { + "epoch": 0.43, + "learning_rate": 0.0006427745798293987, + "loss": 0.75, + "step": 7919 + }, + { + "epoch": 0.43, + "learning_rate": 0.0006426911624313297, + "loss": 0.8008, + "step": 7920 + }, + { + "epoch": 0.43, + "learning_rate": 0.0006426077407092499, + "loss": 0.8555, + "step": 7921 + }, + { + "epoch": 0.43, + "learning_rate": 0.0006425243146656871, + "loss": 0.9336, + "step": 7922 + }, + { + "epoch": 0.43, + "learning_rate": 0.0006424408843031693, + "loss": 0.9453, + "step": 7923 + }, + { + "epoch": 0.43, + "learning_rate": 0.000642357449624225, + "loss": 0.8242, + "step": 7924 + }, + { + "epoch": 0.43, + "learning_rate": 0.000642274010631382, + "loss": 0.8086, + "step": 7925 + }, + { + "epoch": 0.43, + "learning_rate": 0.0006421905673271696, + "loss": 0.7656, + "step": 7926 + }, + { + "epoch": 0.43, + "learning_rate": 0.0006421071197141155, + "loss": 0.8789, + "step": 7927 + }, + { + "epoch": 0.43, + "learning_rate": 0.0006420236677947491, + "loss": 0.8594, + "step": 7928 + }, + { + "epoch": 0.43, + "learning_rate": 0.0006419402115715991, + "loss": 0.9023, + "step": 7929 + }, + { + "epoch": 0.43, + "learning_rate": 0.0006418567510471942, + "loss": 0.8477, + "step": 7930 + }, + { + "epoch": 0.43, + "learning_rate": 0.0006417732862240642, + "loss": 0.8711, + "step": 7931 + }, + { + "epoch": 0.43, + "learning_rate": 0.0006416898171047375, + "loss": 0.8242, + "step": 7932 + }, + { + "epoch": 0.43, + "learning_rate": 0.0006416063436917442, + "loss": 0.8633, + "step": 7933 + }, + { + "epoch": 0.43, + "learning_rate": 0.0006415228659876134, + "loss": 0.793, + "step": 7934 + }, + { + "epoch": 0.43, + "learning_rate": 0.0006414393839948749, + "loss": 0.8125, + "step": 7935 + }, + { + "epoch": 0.43, + "learning_rate": 0.0006413558977160584, + "loss": 0.793, + "step": 7936 + }, + { + "epoch": 0.43, + "learning_rate": 0.0006412724071536939, + "loss": 0.8789, + "step": 7937 + }, + { + "epoch": 0.43, + "learning_rate": 0.0006411889123103115, + "loss": 0.9414, + "step": 7938 + }, + { + "epoch": 0.43, + "learning_rate": 0.0006411054131884412, + "loss": 0.7695, + "step": 7939 + }, + { + "epoch": 0.43, + "learning_rate": 0.0006410219097906134, + "loss": 0.9023, + "step": 7940 + }, + { + "epoch": 0.43, + "learning_rate": 0.0006409384021193586, + "loss": 0.8047, + "step": 7941 + }, + { + "epoch": 0.43, + "learning_rate": 0.0006408548901772071, + "loss": 0.8203, + "step": 7942 + }, + { + "epoch": 0.43, + "learning_rate": 0.0006407713739666898, + "loss": 0.9258, + "step": 7943 + }, + { + "epoch": 0.43, + "learning_rate": 0.0006406878534903376, + "loss": 0.9336, + "step": 7944 + }, + { + "epoch": 0.43, + "learning_rate": 0.0006406043287506811, + "loss": 0.8242, + "step": 7945 + }, + { + "epoch": 0.43, + "learning_rate": 0.0006405207997502517, + "loss": 0.9492, + "step": 7946 + }, + { + "epoch": 0.43, + "learning_rate": 0.0006404372664915805, + "loss": 0.8867, + "step": 7947 + }, + { + "epoch": 0.43, + "learning_rate": 0.0006403537289771988, + "loss": 0.8633, + "step": 7948 + }, + { + "epoch": 0.43, + "learning_rate": 0.0006402701872096381, + "loss": 0.8984, + "step": 7949 + }, + { + "epoch": 0.43, + "learning_rate": 0.0006401866411914296, + "loss": 0.8906, + "step": 7950 + }, + { + "epoch": 0.43, + "learning_rate": 0.000640103090925106, + "loss": 0.918, + "step": 7951 + }, + { + "epoch": 0.43, + "learning_rate": 0.0006400195364131981, + "loss": 0.6992, + "step": 7952 + }, + { + "epoch": 0.43, + "learning_rate": 0.0006399359776582384, + "loss": 0.8906, + "step": 7953 + }, + { + "epoch": 0.43, + "learning_rate": 0.000639852414662759, + "loss": 0.918, + "step": 7954 + }, + { + "epoch": 0.43, + "learning_rate": 0.000639768847429292, + "loss": 0.9102, + "step": 7955 + }, + { + "epoch": 0.43, + "learning_rate": 0.0006396852759603699, + "loss": 0.8008, + "step": 7956 + }, + { + "epoch": 0.43, + "learning_rate": 0.0006396017002585249, + "loss": 0.7852, + "step": 7957 + }, + { + "epoch": 0.43, + "learning_rate": 0.0006395181203262899, + "loss": 0.9453, + "step": 7958 + }, + { + "epoch": 0.43, + "learning_rate": 0.0006394345361661977, + "loss": 0.8984, + "step": 7959 + }, + { + "epoch": 0.43, + "learning_rate": 0.0006393509477807808, + "loss": 0.7734, + "step": 7960 + }, + { + "epoch": 0.43, + "learning_rate": 0.0006392673551725727, + "loss": 0.8672, + "step": 7961 + }, + { + "epoch": 0.43, + "learning_rate": 0.000639183758344106, + "loss": 0.8047, + "step": 7962 + }, + { + "epoch": 0.43, + "learning_rate": 0.0006391001572979144, + "loss": 0.9062, + "step": 7963 + }, + { + "epoch": 0.43, + "learning_rate": 0.0006390165520365311, + "loss": 0.8633, + "step": 7964 + }, + { + "epoch": 0.43, + "learning_rate": 0.0006389329425624895, + "loss": 0.8594, + "step": 7965 + }, + { + "epoch": 0.43, + "learning_rate": 0.0006388493288783236, + "loss": 0.8242, + "step": 7966 + }, + { + "epoch": 0.43, + "learning_rate": 0.0006387657109865667, + "loss": 0.8867, + "step": 7967 + }, + { + "epoch": 0.43, + "learning_rate": 0.0006386820888897531, + "loss": 0.8594, + "step": 7968 + }, + { + "epoch": 0.43, + "learning_rate": 0.0006385984625904167, + "loss": 0.9805, + "step": 7969 + }, + { + "epoch": 0.43, + "learning_rate": 0.0006385148320910915, + "loss": 0.8711, + "step": 7970 + }, + { + "epoch": 0.43, + "learning_rate": 0.000638431197394312, + "loss": 0.8203, + "step": 7971 + }, + { + "epoch": 0.43, + "learning_rate": 0.0006383475585026124, + "loss": 0.7852, + "step": 7972 + }, + { + "epoch": 0.43, + "learning_rate": 0.0006382639154185272, + "loss": 0.8867, + "step": 7973 + }, + { + "epoch": 0.43, + "learning_rate": 0.0006381802681445916, + "loss": 0.8164, + "step": 7974 + }, + { + "epoch": 0.43, + "learning_rate": 0.0006380966166833396, + "loss": 0.8867, + "step": 7975 + }, + { + "epoch": 0.43, + "learning_rate": 0.0006380129610373067, + "loss": 0.9844, + "step": 7976 + }, + { + "epoch": 0.43, + "learning_rate": 0.0006379293012090277, + "loss": 0.9102, + "step": 7977 + }, + { + "epoch": 0.43, + "learning_rate": 0.0006378456372010378, + "loss": 0.8906, + "step": 7978 + }, + { + "epoch": 0.43, + "learning_rate": 0.0006377619690158722, + "loss": 0.8945, + "step": 7979 + }, + { + "epoch": 0.43, + "learning_rate": 0.0006376782966560665, + "loss": 0.9258, + "step": 7980 + }, + { + "epoch": 0.43, + "learning_rate": 0.0006375946201241561, + "loss": 0.8672, + "step": 7981 + }, + { + "epoch": 0.43, + "learning_rate": 0.0006375109394226768, + "loss": 0.8047, + "step": 7982 + }, + { + "epoch": 0.43, + "learning_rate": 0.0006374272545541643, + "loss": 0.8359, + "step": 7983 + }, + { + "epoch": 0.43, + "learning_rate": 0.0006373435655211547, + "loss": 0.7617, + "step": 7984 + }, + { + "epoch": 0.43, + "learning_rate": 0.0006372598723261837, + "loss": 0.8828, + "step": 7985 + }, + { + "epoch": 0.43, + "learning_rate": 0.0006371761749717879, + "loss": 0.8164, + "step": 7986 + }, + { + "epoch": 0.43, + "learning_rate": 0.0006370924734605032, + "loss": 0.9297, + "step": 7987 + }, + { + "epoch": 0.43, + "learning_rate": 0.0006370087677948664, + "loss": 0.918, + "step": 7988 + }, + { + "epoch": 0.43, + "learning_rate": 0.0006369250579774138, + "loss": 0.832, + "step": 7989 + }, + { + "epoch": 0.43, + "learning_rate": 0.0006368413440106822, + "loss": 0.8477, + "step": 7990 + }, + { + "epoch": 0.43, + "learning_rate": 0.0006367576258972084, + "loss": 0.8672, + "step": 7991 + }, + { + "epoch": 0.43, + "learning_rate": 0.0006366739036395292, + "loss": 0.7539, + "step": 7992 + }, + { + "epoch": 0.43, + "learning_rate": 0.0006365901772401819, + "loss": 0.8281, + "step": 7993 + }, + { + "epoch": 0.43, + "learning_rate": 0.0006365064467017035, + "loss": 0.8984, + "step": 7994 + }, + { + "epoch": 0.43, + "learning_rate": 0.0006364227120266312, + "loss": 0.8867, + "step": 7995 + }, + { + "epoch": 0.43, + "learning_rate": 0.0006363389732175029, + "loss": 0.8867, + "step": 7996 + }, + { + "epoch": 0.43, + "learning_rate": 0.0006362552302768558, + "loss": 0.9141, + "step": 7997 + }, + { + "epoch": 0.43, + "learning_rate": 0.0006361714832072276, + "loss": 0.8203, + "step": 7998 + }, + { + "epoch": 0.43, + "learning_rate": 0.0006360877320111561, + "loss": 0.7734, + "step": 7999 + }, + { + "epoch": 0.43, + "learning_rate": 0.0006360039766911793, + "loss": 0.8906, + "step": 8000 + }, + { + "epoch": 0.43, + "learning_rate": 0.0006359202172498353, + "loss": 0.8359, + "step": 8001 + }, + { + "epoch": 0.43, + "learning_rate": 0.0006358364536896622, + "loss": 0.8555, + "step": 8002 + }, + { + "epoch": 0.43, + "learning_rate": 0.0006357526860131986, + "loss": 0.8633, + "step": 8003 + }, + { + "epoch": 0.43, + "learning_rate": 0.0006356689142229824, + "loss": 0.8945, + "step": 8004 + }, + { + "epoch": 0.43, + "learning_rate": 0.0006355851383215526, + "loss": 0.9023, + "step": 8005 + }, + { + "epoch": 0.43, + "learning_rate": 0.0006355013583114476, + "loss": 0.8633, + "step": 8006 + }, + { + "epoch": 0.43, + "learning_rate": 0.0006354175741952067, + "loss": 0.8945, + "step": 8007 + }, + { + "epoch": 0.43, + "learning_rate": 0.0006353337859753681, + "loss": 0.8945, + "step": 8008 + }, + { + "epoch": 0.43, + "learning_rate": 0.0006352499936544714, + "loss": 0.8008, + "step": 8009 + }, + { + "epoch": 0.43, + "learning_rate": 0.0006351661972350555, + "loss": 0.957, + "step": 8010 + }, + { + "epoch": 0.43, + "learning_rate": 0.00063508239671966, + "loss": 0.8945, + "step": 8011 + }, + { + "epoch": 0.43, + "learning_rate": 0.000634998592110824, + "loss": 0.8945, + "step": 8012 + }, + { + "epoch": 0.43, + "learning_rate": 0.0006349147834110873, + "loss": 0.8867, + "step": 8013 + }, + { + "epoch": 0.43, + "learning_rate": 0.0006348309706229895, + "loss": 0.8125, + "step": 8014 + }, + { + "epoch": 0.43, + "learning_rate": 0.0006347471537490703, + "loss": 0.8164, + "step": 8015 + }, + { + "epoch": 0.43, + "learning_rate": 0.0006346633327918698, + "loss": 0.9023, + "step": 8016 + }, + { + "epoch": 0.43, + "learning_rate": 0.000634579507753928, + "loss": 0.8047, + "step": 8017 + }, + { + "epoch": 0.43, + "learning_rate": 0.000634495678637785, + "loss": 0.8398, + "step": 8018 + }, + { + "epoch": 0.43, + "learning_rate": 0.0006344118454459812, + "loss": 0.832, + "step": 8019 + }, + { + "epoch": 0.43, + "learning_rate": 0.0006343280081810569, + "loss": 0.8516, + "step": 8020 + }, + { + "epoch": 0.43, + "learning_rate": 0.0006342441668455527, + "loss": 0.8008, + "step": 8021 + }, + { + "epoch": 0.43, + "learning_rate": 0.0006341603214420094, + "loss": 1.0078, + "step": 8022 + }, + { + "epoch": 0.43, + "learning_rate": 0.0006340764719729676, + "loss": 0.7891, + "step": 8023 + }, + { + "epoch": 0.43, + "learning_rate": 0.0006339926184409681, + "loss": 0.8359, + "step": 8024 + }, + { + "epoch": 0.43, + "learning_rate": 0.0006339087608485525, + "loss": 0.8594, + "step": 8025 + }, + { + "epoch": 0.43, + "learning_rate": 0.0006338248991982613, + "loss": 0.8672, + "step": 8026 + }, + { + "epoch": 0.43, + "learning_rate": 0.0006337410334926362, + "loss": 0.7461, + "step": 8027 + }, + { + "epoch": 0.43, + "learning_rate": 0.0006336571637342184, + "loss": 0.7656, + "step": 8028 + }, + { + "epoch": 0.43, + "learning_rate": 0.0006335732899255494, + "loss": 0.8398, + "step": 8029 + }, + { + "epoch": 0.43, + "learning_rate": 0.0006334894120691711, + "loss": 0.9023, + "step": 8030 + }, + { + "epoch": 0.43, + "learning_rate": 0.0006334055301676252, + "loss": 0.9336, + "step": 8031 + }, + { + "epoch": 0.43, + "learning_rate": 0.0006333216442234533, + "loss": 0.9141, + "step": 8032 + }, + { + "epoch": 0.43, + "learning_rate": 0.0006332377542391978, + "loss": 0.7656, + "step": 8033 + }, + { + "epoch": 0.43, + "learning_rate": 0.0006331538602174005, + "loss": 0.75, + "step": 8034 + }, + { + "epoch": 0.43, + "learning_rate": 0.0006330699621606042, + "loss": 0.8828, + "step": 8035 + }, + { + "epoch": 0.43, + "learning_rate": 0.0006329860600713507, + "loss": 0.7891, + "step": 8036 + }, + { + "epoch": 0.43, + "learning_rate": 0.0006329021539521827, + "loss": 0.9141, + "step": 8037 + }, + { + "epoch": 0.43, + "learning_rate": 0.0006328182438056429, + "loss": 0.8594, + "step": 8038 + }, + { + "epoch": 0.43, + "learning_rate": 0.000632734329634274, + "loss": 0.7305, + "step": 8039 + }, + { + "epoch": 0.43, + "learning_rate": 0.0006326504114406192, + "loss": 0.8086, + "step": 8040 + }, + { + "epoch": 0.43, + "learning_rate": 0.0006325664892272208, + "loss": 0.8242, + "step": 8041 + }, + { + "epoch": 0.43, + "learning_rate": 0.0006324825629966226, + "loss": 0.9883, + "step": 8042 + }, + { + "epoch": 0.43, + "learning_rate": 0.0006323986327513674, + "loss": 0.8438, + "step": 8043 + }, + { + "epoch": 0.43, + "learning_rate": 0.0006323146984939989, + "loss": 0.7852, + "step": 8044 + }, + { + "epoch": 0.43, + "learning_rate": 0.0006322307602270602, + "loss": 0.7617, + "step": 8045 + }, + { + "epoch": 0.43, + "learning_rate": 0.0006321468179530953, + "loss": 0.9023, + "step": 8046 + }, + { + "epoch": 0.43, + "learning_rate": 0.0006320628716746477, + "loss": 0.8203, + "step": 8047 + }, + { + "epoch": 0.43, + "learning_rate": 0.0006319789213942613, + "loss": 0.8867, + "step": 8048 + }, + { + "epoch": 0.43, + "learning_rate": 0.0006318949671144798, + "loss": 0.9023, + "step": 8049 + }, + { + "epoch": 0.43, + "learning_rate": 0.0006318110088378479, + "loss": 0.8281, + "step": 8050 + }, + { + "epoch": 0.43, + "learning_rate": 0.0006317270465669092, + "loss": 0.9102, + "step": 8051 + }, + { + "epoch": 0.43, + "learning_rate": 0.0006316430803042084, + "loss": 0.8086, + "step": 8052 + }, + { + "epoch": 0.43, + "learning_rate": 0.0006315591100522898, + "loss": 0.9336, + "step": 8053 + }, + { + "epoch": 0.43, + "learning_rate": 0.0006314751358136981, + "loss": 0.8281, + "step": 8054 + }, + { + "epoch": 0.43, + "learning_rate": 0.0006313911575909778, + "loss": 0.8242, + "step": 8055 + }, + { + "epoch": 0.43, + "learning_rate": 0.000631307175386674, + "loss": 0.8594, + "step": 8056 + }, + { + "epoch": 0.43, + "learning_rate": 0.0006312231892033314, + "loss": 0.875, + "step": 8057 + }, + { + "epoch": 0.43, + "learning_rate": 0.0006311391990434952, + "loss": 0.8398, + "step": 8058 + }, + { + "epoch": 0.43, + "learning_rate": 0.0006310552049097102, + "loss": 0.8633, + "step": 8059 + }, + { + "epoch": 0.43, + "learning_rate": 0.0006309712068045223, + "loss": 0.8555, + "step": 8060 + }, + { + "epoch": 0.43, + "learning_rate": 0.0006308872047304765, + "loss": 0.9453, + "step": 8061 + }, + { + "epoch": 0.43, + "learning_rate": 0.0006308031986901184, + "loss": 0.8555, + "step": 8062 + }, + { + "epoch": 0.43, + "learning_rate": 0.0006307191886859938, + "loss": 0.7383, + "step": 8063 + }, + { + "epoch": 0.43, + "learning_rate": 0.0006306351747206484, + "loss": 0.8359, + "step": 8064 + }, + { + "epoch": 0.43, + "learning_rate": 0.000630551156796628, + "loss": 0.7617, + "step": 8065 + }, + { + "epoch": 0.43, + "learning_rate": 0.0006304671349164789, + "loss": 0.7891, + "step": 8066 + }, + { + "epoch": 0.43, + "learning_rate": 0.0006303831090827467, + "loss": 0.8945, + "step": 8067 + }, + { + "epoch": 0.43, + "learning_rate": 0.0006302990792979785, + "loss": 0.9258, + "step": 8068 + }, + { + "epoch": 0.43, + "learning_rate": 0.0006302150455647198, + "loss": 0.8789, + "step": 8069 + }, + { + "epoch": 0.43, + "learning_rate": 0.0006301310078855177, + "loss": 0.8555, + "step": 8070 + }, + { + "epoch": 0.43, + "learning_rate": 0.0006300469662629184, + "loss": 0.7305, + "step": 8071 + }, + { + "epoch": 0.43, + "learning_rate": 0.000629962920699469, + "loss": 0.9375, + "step": 8072 + }, + { + "epoch": 0.43, + "learning_rate": 0.0006298788711977162, + "loss": 0.8906, + "step": 8073 + }, + { + "epoch": 0.43, + "learning_rate": 0.0006297948177602071, + "loss": 0.8125, + "step": 8074 + }, + { + "epoch": 0.43, + "learning_rate": 0.0006297107603894885, + "loss": 0.8633, + "step": 8075 + }, + { + "epoch": 0.43, + "learning_rate": 0.0006296266990881077, + "loss": 0.7656, + "step": 8076 + }, + { + "epoch": 0.43, + "learning_rate": 0.0006295426338586122, + "loss": 0.9844, + "step": 8077 + }, + { + "epoch": 0.43, + "learning_rate": 0.0006294585647035495, + "loss": 0.8477, + "step": 8078 + }, + { + "epoch": 0.43, + "learning_rate": 0.0006293744916254669, + "loss": 0.8359, + "step": 8079 + }, + { + "epoch": 0.43, + "learning_rate": 0.0006292904146269122, + "loss": 0.8672, + "step": 8080 + }, + { + "epoch": 0.43, + "learning_rate": 0.0006292063337104334, + "loss": 0.8984, + "step": 8081 + }, + { + "epoch": 0.43, + "learning_rate": 0.0006291222488785781, + "loss": 0.7891, + "step": 8082 + }, + { + "epoch": 0.43, + "learning_rate": 0.0006290381601338947, + "loss": 0.8867, + "step": 8083 + }, + { + "epoch": 0.43, + "learning_rate": 0.000628954067478931, + "loss": 0.9766, + "step": 8084 + }, + { + "epoch": 0.43, + "learning_rate": 0.0006288699709162355, + "loss": 0.8672, + "step": 8085 + }, + { + "epoch": 0.43, + "learning_rate": 0.0006287858704483567, + "loss": 0.8789, + "step": 8086 + }, + { + "epoch": 0.43, + "learning_rate": 0.0006287017660778429, + "loss": 0.7891, + "step": 8087 + }, + { + "epoch": 0.43, + "learning_rate": 0.0006286176578072429, + "loss": 0.9023, + "step": 8088 + }, + { + "epoch": 0.43, + "learning_rate": 0.0006285335456391052, + "loss": 0.8359, + "step": 8089 + }, + { + "epoch": 0.43, + "learning_rate": 0.000628449429575979, + "loss": 0.9297, + "step": 8090 + }, + { + "epoch": 0.43, + "learning_rate": 0.0006283653096204132, + "loss": 0.7539, + "step": 8091 + }, + { + "epoch": 0.43, + "learning_rate": 0.0006282811857749567, + "loss": 0.8359, + "step": 8092 + }, + { + "epoch": 0.43, + "learning_rate": 0.0006281970580421591, + "loss": 0.8789, + "step": 8093 + }, + { + "epoch": 0.44, + "learning_rate": 0.0006281129264245693, + "loss": 0.9336, + "step": 8094 + }, + { + "epoch": 0.44, + "learning_rate": 0.000628028790924737, + "loss": 0.8945, + "step": 8095 + }, + { + "epoch": 0.44, + "learning_rate": 0.0006279446515452121, + "loss": 0.9141, + "step": 8096 + }, + { + "epoch": 0.44, + "learning_rate": 0.0006278605082885438, + "loss": 0.8008, + "step": 8097 + }, + { + "epoch": 0.44, + "learning_rate": 0.0006277763611572821, + "loss": 0.8398, + "step": 8098 + }, + { + "epoch": 0.44, + "learning_rate": 0.0006276922101539769, + "loss": 0.8672, + "step": 8099 + }, + { + "epoch": 0.44, + "learning_rate": 0.0006276080552811783, + "loss": 0.8438, + "step": 8100 + }, + { + "epoch": 0.44, + "learning_rate": 0.0006275238965414365, + "loss": 0.9453, + "step": 8101 + }, + { + "epoch": 0.44, + "learning_rate": 0.0006274397339373017, + "loss": 0.7773, + "step": 8102 + }, + { + "epoch": 0.44, + "learning_rate": 0.0006273555674713243, + "loss": 0.9336, + "step": 8103 + }, + { + "epoch": 0.44, + "learning_rate": 0.000627271397146055, + "loss": 0.9102, + "step": 8104 + }, + { + "epoch": 0.44, + "learning_rate": 0.0006271872229640443, + "loss": 0.8516, + "step": 8105 + }, + { + "epoch": 0.44, + "learning_rate": 0.0006271030449278428, + "loss": 0.9102, + "step": 8106 + }, + { + "epoch": 0.44, + "learning_rate": 0.0006270188630400017, + "loss": 0.7891, + "step": 8107 + }, + { + "epoch": 0.44, + "learning_rate": 0.0006269346773030717, + "loss": 0.8359, + "step": 8108 + }, + { + "epoch": 0.44, + "learning_rate": 0.000626850487719604, + "loss": 0.8984, + "step": 8109 + }, + { + "epoch": 0.44, + "learning_rate": 0.00062676629429215, + "loss": 0.8711, + "step": 8110 + }, + { + "epoch": 0.44, + "learning_rate": 0.0006266820970232608, + "loss": 0.918, + "step": 8111 + }, + { + "epoch": 0.44, + "learning_rate": 0.0006265978959154879, + "loss": 0.9297, + "step": 8112 + }, + { + "epoch": 0.44, + "learning_rate": 0.0006265136909713828, + "loss": 0.8555, + "step": 8113 + }, + { + "epoch": 0.44, + "learning_rate": 0.0006264294821934974, + "loss": 0.8203, + "step": 8114 + }, + { + "epoch": 0.44, + "learning_rate": 0.0006263452695843834, + "loss": 0.8711, + "step": 8115 + }, + { + "epoch": 0.44, + "learning_rate": 0.0006262610531465928, + "loss": 0.8945, + "step": 8116 + }, + { + "epoch": 0.44, + "learning_rate": 0.0006261768328826773, + "loss": 0.9023, + "step": 8117 + }, + { + "epoch": 0.44, + "learning_rate": 0.0006260926087951896, + "loss": 0.8789, + "step": 8118 + }, + { + "epoch": 0.44, + "learning_rate": 0.0006260083808866814, + "loss": 0.832, + "step": 8119 + }, + { + "epoch": 0.44, + "learning_rate": 0.0006259241491597056, + "loss": 0.8555, + "step": 8120 + }, + { + "epoch": 0.44, + "learning_rate": 0.0006258399136168143, + "loss": 0.7852, + "step": 8121 + }, + { + "epoch": 0.44, + "learning_rate": 0.0006257556742605602, + "loss": 0.8789, + "step": 8122 + }, + { + "epoch": 0.44, + "learning_rate": 0.0006256714310934961, + "loss": 0.8438, + "step": 8123 + }, + { + "epoch": 0.44, + "learning_rate": 0.0006255871841181751, + "loss": 0.918, + "step": 8124 + }, + { + "epoch": 0.44, + "learning_rate": 0.0006255029333371494, + "loss": 0.9414, + "step": 8125 + }, + { + "epoch": 0.44, + "learning_rate": 0.0006254186787529731, + "loss": 0.8828, + "step": 8126 + }, + { + "epoch": 0.44, + "learning_rate": 0.0006253344203681985, + "loss": 0.8281, + "step": 8127 + }, + { + "epoch": 0.44, + "learning_rate": 0.0006252501581853793, + "loss": 0.9258, + "step": 8128 + }, + { + "epoch": 0.44, + "learning_rate": 0.000625165892207069, + "loss": 0.8594, + "step": 8129 + }, + { + "epoch": 0.44, + "learning_rate": 0.000625081622435821, + "loss": 0.9141, + "step": 8130 + }, + { + "epoch": 0.44, + "learning_rate": 0.0006249973488741889, + "loss": 0.8359, + "step": 8131 + }, + { + "epoch": 0.44, + "learning_rate": 0.0006249130715247266, + "loss": 0.8242, + "step": 8132 + }, + { + "epoch": 0.44, + "learning_rate": 0.0006248287903899878, + "loss": 0.8984, + "step": 8133 + }, + { + "epoch": 0.44, + "learning_rate": 0.0006247445054725268, + "loss": 0.8789, + "step": 8134 + }, + { + "epoch": 0.44, + "learning_rate": 0.0006246602167748974, + "loss": 0.7578, + "step": 8135 + }, + { + "epoch": 0.44, + "learning_rate": 0.0006245759242996539, + "loss": 0.9531, + "step": 8136 + }, + { + "epoch": 0.44, + "learning_rate": 0.0006244916280493509, + "loss": 0.8516, + "step": 8137 + }, + { + "epoch": 0.44, + "learning_rate": 0.0006244073280265425, + "loss": 0.8516, + "step": 8138 + }, + { + "epoch": 0.44, + "learning_rate": 0.0006243230242337834, + "loss": 0.8828, + "step": 8139 + }, + { + "epoch": 0.44, + "learning_rate": 0.0006242387166736283, + "loss": 0.8633, + "step": 8140 + }, + { + "epoch": 0.44, + "learning_rate": 0.000624154405348632, + "loss": 0.8945, + "step": 8141 + }, + { + "epoch": 0.44, + "learning_rate": 0.0006240700902613493, + "loss": 0.8984, + "step": 8142 + }, + { + "epoch": 0.44, + "learning_rate": 0.0006239857714143354, + "loss": 0.8242, + "step": 8143 + }, + { + "epoch": 0.44, + "learning_rate": 0.0006239014488101455, + "loss": 0.9492, + "step": 8144 + }, + { + "epoch": 0.44, + "learning_rate": 0.0006238171224513344, + "loss": 0.793, + "step": 8145 + }, + { + "epoch": 0.44, + "learning_rate": 0.0006237327923404579, + "loss": 0.9727, + "step": 8146 + }, + { + "epoch": 0.44, + "learning_rate": 0.0006236484584800714, + "loss": 0.8828, + "step": 8147 + }, + { + "epoch": 0.44, + "learning_rate": 0.0006235641208727304, + "loss": 0.8633, + "step": 8148 + }, + { + "epoch": 0.44, + "learning_rate": 0.0006234797795209908, + "loss": 0.9844, + "step": 8149 + }, + { + "epoch": 0.44, + "learning_rate": 0.000623395434427408, + "loss": 0.8242, + "step": 8150 + }, + { + "epoch": 0.44, + "learning_rate": 0.0006233110855945383, + "loss": 0.8398, + "step": 8151 + }, + { + "epoch": 0.44, + "learning_rate": 0.0006232267330249378, + "loss": 0.832, + "step": 8152 + }, + { + "epoch": 0.44, + "learning_rate": 0.0006231423767211623, + "loss": 0.8594, + "step": 8153 + }, + { + "epoch": 0.44, + "learning_rate": 0.0006230580166857686, + "loss": 0.9023, + "step": 8154 + }, + { + "epoch": 0.44, + "learning_rate": 0.0006229736529213126, + "loss": 0.9297, + "step": 8155 + }, + { + "epoch": 0.44, + "learning_rate": 0.000622889285430351, + "loss": 0.8359, + "step": 8156 + }, + { + "epoch": 0.44, + "learning_rate": 0.0006228049142154402, + "loss": 0.8867, + "step": 8157 + }, + { + "epoch": 0.44, + "learning_rate": 0.0006227205392791372, + "loss": 0.8008, + "step": 8158 + }, + { + "epoch": 0.44, + "learning_rate": 0.000622636160623999, + "loss": 0.8281, + "step": 8159 + }, + { + "epoch": 0.44, + "learning_rate": 0.0006225517782525819, + "loss": 0.8125, + "step": 8160 + }, + { + "epoch": 0.44, + "learning_rate": 0.0006224673921674436, + "loss": 0.8398, + "step": 8161 + }, + { + "epoch": 0.44, + "learning_rate": 0.0006223830023711409, + "loss": 0.8867, + "step": 8162 + }, + { + "epoch": 0.44, + "learning_rate": 0.0006222986088662315, + "loss": 0.8242, + "step": 8163 + }, + { + "epoch": 0.44, + "learning_rate": 0.0006222142116552723, + "loss": 0.9023, + "step": 8164 + }, + { + "epoch": 0.44, + "learning_rate": 0.0006221298107408211, + "loss": 0.8633, + "step": 8165 + }, + { + "epoch": 0.44, + "learning_rate": 0.0006220454061254353, + "loss": 0.832, + "step": 8166 + }, + { + "epoch": 0.44, + "learning_rate": 0.000621960997811673, + "loss": 0.8828, + "step": 8167 + }, + { + "epoch": 0.44, + "learning_rate": 0.0006218765858020917, + "loss": 0.9062, + "step": 8168 + }, + { + "epoch": 0.44, + "learning_rate": 0.0006217921700992496, + "loss": 0.7656, + "step": 8169 + }, + { + "epoch": 0.44, + "learning_rate": 0.0006217077507057047, + "loss": 0.9297, + "step": 8170 + }, + { + "epoch": 0.44, + "learning_rate": 0.0006216233276240151, + "loss": 0.7812, + "step": 8171 + }, + { + "epoch": 0.44, + "learning_rate": 0.0006215389008567393, + "loss": 0.8281, + "step": 8172 + }, + { + "epoch": 0.44, + "learning_rate": 0.0006214544704064354, + "loss": 0.8555, + "step": 8173 + }, + { + "epoch": 0.44, + "learning_rate": 0.0006213700362756623, + "loss": 0.8867, + "step": 8174 + }, + { + "epoch": 0.44, + "learning_rate": 0.0006212855984669782, + "loss": 0.9102, + "step": 8175 + }, + { + "epoch": 0.44, + "learning_rate": 0.0006212011569829421, + "loss": 0.8438, + "step": 8176 + }, + { + "epoch": 0.44, + "learning_rate": 0.0006211167118261129, + "loss": 0.8555, + "step": 8177 + }, + { + "epoch": 0.44, + "learning_rate": 0.0006210322629990494, + "loss": 0.8086, + "step": 8178 + }, + { + "epoch": 0.44, + "learning_rate": 0.0006209478105043109, + "loss": 0.8984, + "step": 8179 + }, + { + "epoch": 0.44, + "learning_rate": 0.0006208633543444563, + "loss": 0.8125, + "step": 8180 + }, + { + "epoch": 0.44, + "learning_rate": 0.0006207788945220452, + "loss": 0.918, + "step": 8181 + }, + { + "epoch": 0.44, + "learning_rate": 0.0006206944310396368, + "loss": 0.9258, + "step": 8182 + }, + { + "epoch": 0.44, + "learning_rate": 0.0006206099638997907, + "loss": 0.8984, + "step": 8183 + }, + { + "epoch": 0.44, + "learning_rate": 0.0006205254931050664, + "loss": 0.8867, + "step": 8184 + }, + { + "epoch": 0.44, + "learning_rate": 0.0006204410186580238, + "loss": 0.8203, + "step": 8185 + }, + { + "epoch": 0.44, + "learning_rate": 0.0006203565405612227, + "loss": 0.7891, + "step": 8186 + }, + { + "epoch": 0.44, + "learning_rate": 0.0006202720588172233, + "loss": 0.8867, + "step": 8187 + }, + { + "epoch": 0.44, + "learning_rate": 0.0006201875734285851, + "loss": 0.7773, + "step": 8188 + }, + { + "epoch": 0.44, + "learning_rate": 0.0006201030843978688, + "loss": 0.8164, + "step": 8189 + }, + { + "epoch": 0.44, + "learning_rate": 0.0006200185917276345, + "loss": 0.7891, + "step": 8190 + }, + { + "epoch": 0.44, + "learning_rate": 0.0006199340954204426, + "loss": 0.8008, + "step": 8191 + }, + { + "epoch": 0.44, + "learning_rate": 0.0006198495954788538, + "loss": 0.875, + "step": 8192 + }, + { + "epoch": 0.44, + "learning_rate": 0.0006197650919054283, + "loss": 0.9141, + "step": 8193 + }, + { + "epoch": 0.44, + "learning_rate": 0.0006196805847027274, + "loss": 0.8594, + "step": 8194 + }, + { + "epoch": 0.44, + "learning_rate": 0.0006195960738733115, + "loss": 0.8125, + "step": 8195 + }, + { + "epoch": 0.44, + "learning_rate": 0.0006195115594197418, + "loss": 0.8398, + "step": 8196 + }, + { + "epoch": 0.44, + "learning_rate": 0.0006194270413445793, + "loss": 0.8867, + "step": 8197 + }, + { + "epoch": 0.44, + "learning_rate": 0.000619342519650385, + "loss": 0.9258, + "step": 8198 + }, + { + "epoch": 0.44, + "learning_rate": 0.0006192579943397204, + "loss": 0.8281, + "step": 8199 + }, + { + "epoch": 0.44, + "learning_rate": 0.000619173465415147, + "loss": 0.8867, + "step": 8200 + }, + { + "epoch": 0.44, + "learning_rate": 0.0006190889328792259, + "loss": 0.8516, + "step": 8201 + }, + { + "epoch": 0.44, + "learning_rate": 0.0006190043967345192, + "loss": 0.8086, + "step": 8202 + }, + { + "epoch": 0.44, + "learning_rate": 0.000618919856983588, + "loss": 0.8242, + "step": 8203 + }, + { + "epoch": 0.44, + "learning_rate": 0.0006188353136289947, + "loss": 0.8438, + "step": 8204 + }, + { + "epoch": 0.44, + "learning_rate": 0.0006187507666733011, + "loss": 0.8672, + "step": 8205 + }, + { + "epoch": 0.44, + "learning_rate": 0.0006186662161190692, + "loss": 0.9219, + "step": 8206 + }, + { + "epoch": 0.44, + "learning_rate": 0.0006185816619688611, + "loss": 0.8555, + "step": 8207 + }, + { + "epoch": 0.44, + "learning_rate": 0.0006184971042252392, + "loss": 0.8789, + "step": 8208 + }, + { + "epoch": 0.44, + "learning_rate": 0.0006184125428907656, + "loss": 0.9336, + "step": 8209 + }, + { + "epoch": 0.44, + "learning_rate": 0.0006183279779680032, + "loss": 0.875, + "step": 8210 + }, + { + "epoch": 0.44, + "learning_rate": 0.0006182434094595142, + "loss": 0.8672, + "step": 8211 + }, + { + "epoch": 0.44, + "learning_rate": 0.0006181588373678616, + "loss": 0.8477, + "step": 8212 + }, + { + "epoch": 0.44, + "learning_rate": 0.0006180742616956081, + "loss": 0.8672, + "step": 8213 + }, + { + "epoch": 0.44, + "learning_rate": 0.0006179896824453166, + "loss": 0.8672, + "step": 8214 + }, + { + "epoch": 0.44, + "learning_rate": 0.0006179050996195502, + "loss": 0.9609, + "step": 8215 + }, + { + "epoch": 0.44, + "learning_rate": 0.000617820513220872, + "loss": 0.8555, + "step": 8216 + }, + { + "epoch": 0.44, + "learning_rate": 0.000617735923251845, + "loss": 0.8906, + "step": 8217 + }, + { + "epoch": 0.44, + "learning_rate": 0.0006176513297150331, + "loss": 0.875, + "step": 8218 + }, + { + "epoch": 0.44, + "learning_rate": 0.0006175667326129992, + "loss": 0.9141, + "step": 8219 + }, + { + "epoch": 0.44, + "learning_rate": 0.0006174821319483074, + "loss": 0.9375, + "step": 8220 + }, + { + "epoch": 0.44, + "learning_rate": 0.0006173975277235209, + "loss": 0.8359, + "step": 8221 + }, + { + "epoch": 0.44, + "learning_rate": 0.0006173129199412037, + "loss": 0.8398, + "step": 8222 + }, + { + "epoch": 0.44, + "learning_rate": 0.0006172283086039197, + "loss": 0.8633, + "step": 8223 + }, + { + "epoch": 0.44, + "learning_rate": 0.0006171436937142328, + "loss": 0.8438, + "step": 8224 + }, + { + "epoch": 0.44, + "learning_rate": 0.0006170590752747071, + "loss": 0.8867, + "step": 8225 + }, + { + "epoch": 0.44, + "learning_rate": 0.0006169744532879072, + "loss": 0.9219, + "step": 8226 + }, + { + "epoch": 0.44, + "learning_rate": 0.0006168898277563969, + "loss": 0.8945, + "step": 8227 + }, + { + "epoch": 0.44, + "learning_rate": 0.0006168051986827408, + "loss": 0.8047, + "step": 8228 + }, + { + "epoch": 0.44, + "learning_rate": 0.0006167205660695037, + "loss": 0.8555, + "step": 8229 + }, + { + "epoch": 0.44, + "learning_rate": 0.00061663592991925, + "loss": 0.8203, + "step": 8230 + }, + { + "epoch": 0.44, + "learning_rate": 0.0006165512902345445, + "loss": 0.8164, + "step": 8231 + }, + { + "epoch": 0.44, + "learning_rate": 0.0006164666470179519, + "loss": 0.8594, + "step": 8232 + }, + { + "epoch": 0.44, + "learning_rate": 0.0006163820002720377, + "loss": 0.8086, + "step": 8233 + }, + { + "epoch": 0.44, + "learning_rate": 0.0006162973499993661, + "loss": 0.8125, + "step": 8234 + }, + { + "epoch": 0.44, + "learning_rate": 0.0006162126962025032, + "loss": 0.875, + "step": 8235 + }, + { + "epoch": 0.44, + "learning_rate": 0.0006161280388840136, + "loss": 0.8672, + "step": 8236 + }, + { + "epoch": 0.44, + "learning_rate": 0.0006160433780464631, + "loss": 0.9219, + "step": 8237 + }, + { + "epoch": 0.44, + "learning_rate": 0.000615958713692417, + "loss": 0.8555, + "step": 8238 + }, + { + "epoch": 0.44, + "learning_rate": 0.0006158740458244409, + "loss": 0.8438, + "step": 8239 + }, + { + "epoch": 0.44, + "learning_rate": 0.0006157893744451007, + "loss": 0.8047, + "step": 8240 + }, + { + "epoch": 0.44, + "learning_rate": 0.0006157046995569622, + "loss": 0.8594, + "step": 8241 + }, + { + "epoch": 0.44, + "learning_rate": 0.000615620021162591, + "loss": 0.8398, + "step": 8242 + }, + { + "epoch": 0.44, + "learning_rate": 0.0006155353392645536, + "loss": 0.7812, + "step": 8243 + }, + { + "epoch": 0.44, + "learning_rate": 0.0006154506538654157, + "loss": 1.0312, + "step": 8244 + }, + { + "epoch": 0.44, + "learning_rate": 0.0006153659649677439, + "loss": 0.8359, + "step": 8245 + }, + { + "epoch": 0.44, + "learning_rate": 0.0006152812725741044, + "loss": 0.9531, + "step": 8246 + }, + { + "epoch": 0.44, + "learning_rate": 0.0006151965766870636, + "loss": 0.957, + "step": 8247 + }, + { + "epoch": 0.44, + "learning_rate": 0.0006151118773091882, + "loss": 0.957, + "step": 8248 + }, + { + "epoch": 0.44, + "learning_rate": 0.0006150271744430447, + "loss": 0.8711, + "step": 8249 + }, + { + "epoch": 0.44, + "learning_rate": 0.0006149424680912, + "loss": 0.9141, + "step": 8250 + }, + { + "epoch": 0.44, + "learning_rate": 0.0006148577582562211, + "loss": 0.8789, + "step": 8251 + }, + { + "epoch": 0.44, + "learning_rate": 0.0006147730449406745, + "loss": 0.9492, + "step": 8252 + }, + { + "epoch": 0.44, + "learning_rate": 0.000614688328147128, + "loss": 0.9062, + "step": 8253 + }, + { + "epoch": 0.44, + "learning_rate": 0.0006146036078781481, + "loss": 0.8945, + "step": 8254 + }, + { + "epoch": 0.44, + "learning_rate": 0.0006145188841363028, + "loss": 0.9961, + "step": 8255 + }, + { + "epoch": 0.44, + "learning_rate": 0.000614434156924159, + "loss": 0.9023, + "step": 8256 + }, + { + "epoch": 0.44, + "learning_rate": 0.0006143494262442842, + "loss": 0.8828, + "step": 8257 + }, + { + "epoch": 0.44, + "learning_rate": 0.0006142646920992463, + "loss": 0.8789, + "step": 8258 + }, + { + "epoch": 0.44, + "learning_rate": 0.0006141799544916128, + "loss": 0.8594, + "step": 8259 + }, + { + "epoch": 0.44, + "learning_rate": 0.0006140952134239516, + "loss": 0.8203, + "step": 8260 + }, + { + "epoch": 0.44, + "learning_rate": 0.0006140104688988308, + "loss": 0.9375, + "step": 8261 + }, + { + "epoch": 0.44, + "learning_rate": 0.0006139257209188181, + "loss": 0.8398, + "step": 8262 + }, + { + "epoch": 0.44, + "learning_rate": 0.000613840969486482, + "loss": 0.9219, + "step": 8263 + }, + { + "epoch": 0.44, + "learning_rate": 0.0006137562146043906, + "loss": 0.8789, + "step": 8264 + }, + { + "epoch": 0.44, + "learning_rate": 0.000613671456275112, + "loss": 0.7969, + "step": 8265 + }, + { + "epoch": 0.44, + "learning_rate": 0.0006135866945012152, + "loss": 0.8984, + "step": 8266 + }, + { + "epoch": 0.44, + "learning_rate": 0.0006135019292852681, + "loss": 0.7891, + "step": 8267 + }, + { + "epoch": 0.44, + "learning_rate": 0.00061341716062984, + "loss": 0.8281, + "step": 8268 + }, + { + "epoch": 0.44, + "learning_rate": 0.0006133323885374993, + "loss": 0.8867, + "step": 8269 + }, + { + "epoch": 0.44, + "learning_rate": 0.0006132476130108149, + "loss": 0.9609, + "step": 8270 + }, + { + "epoch": 0.44, + "learning_rate": 0.0006131628340523558, + "loss": 0.8711, + "step": 8271 + }, + { + "epoch": 0.44, + "learning_rate": 0.0006130780516646913, + "loss": 0.8633, + "step": 8272 + }, + { + "epoch": 0.44, + "learning_rate": 0.0006129932658503902, + "loss": 0.8711, + "step": 8273 + }, + { + "epoch": 0.44, + "learning_rate": 0.0006129084766120221, + "loss": 0.9453, + "step": 8274 + }, + { + "epoch": 0.44, + "learning_rate": 0.0006128236839521562, + "loss": 0.7227, + "step": 8275 + }, + { + "epoch": 0.44, + "learning_rate": 0.0006127388878733622, + "loss": 0.9609, + "step": 8276 + }, + { + "epoch": 0.44, + "learning_rate": 0.0006126540883782094, + "loss": 0.9766, + "step": 8277 + }, + { + "epoch": 0.44, + "learning_rate": 0.0006125692854692677, + "loss": 0.8359, + "step": 8278 + }, + { + "epoch": 0.44, + "learning_rate": 0.0006124844791491068, + "loss": 0.8164, + "step": 8279 + }, + { + "epoch": 0.45, + "learning_rate": 0.0006123996694202969, + "loss": 0.7812, + "step": 8280 + }, + { + "epoch": 0.45, + "learning_rate": 0.0006123148562854077, + "loss": 0.9688, + "step": 8281 + }, + { + "epoch": 0.45, + "learning_rate": 0.0006122300397470096, + "loss": 0.8086, + "step": 8282 + }, + { + "epoch": 0.45, + "learning_rate": 0.0006121452198076723, + "loss": 0.9336, + "step": 8283 + }, + { + "epoch": 0.45, + "learning_rate": 0.0006120603964699667, + "loss": 0.9102, + "step": 8284 + }, + { + "epoch": 0.45, + "learning_rate": 0.0006119755697364628, + "loss": 0.8008, + "step": 8285 + }, + { + "epoch": 0.45, + "learning_rate": 0.0006118907396097317, + "loss": 0.8047, + "step": 8286 + }, + { + "epoch": 0.45, + "learning_rate": 0.0006118059060923433, + "loss": 0.7852, + "step": 8287 + }, + { + "epoch": 0.45, + "learning_rate": 0.0006117210691868688, + "loss": 0.8086, + "step": 8288 + }, + { + "epoch": 0.45, + "learning_rate": 0.0006116362288958789, + "loss": 0.918, + "step": 8289 + }, + { + "epoch": 0.45, + "learning_rate": 0.0006115513852219445, + "loss": 0.8672, + "step": 8290 + }, + { + "epoch": 0.45, + "learning_rate": 0.0006114665381676368, + "loss": 0.957, + "step": 8291 + }, + { + "epoch": 0.45, + "learning_rate": 0.000611381687735527, + "loss": 0.8555, + "step": 8292 + }, + { + "epoch": 0.45, + "learning_rate": 0.0006112968339281858, + "loss": 0.9375, + "step": 8293 + }, + { + "epoch": 0.45, + "learning_rate": 0.0006112119767481853, + "loss": 0.8672, + "step": 8294 + }, + { + "epoch": 0.45, + "learning_rate": 0.0006111271161980963, + "loss": 0.9102, + "step": 8295 + }, + { + "epoch": 0.45, + "learning_rate": 0.0006110422522804908, + "loss": 0.8633, + "step": 8296 + }, + { + "epoch": 0.45, + "learning_rate": 0.0006109573849979404, + "loss": 0.875, + "step": 8297 + }, + { + "epoch": 0.45, + "learning_rate": 0.0006108725143530166, + "loss": 0.7383, + "step": 8298 + }, + { + "epoch": 0.45, + "learning_rate": 0.0006107876403482914, + "loss": 0.9375, + "step": 8299 + }, + { + "epoch": 0.45, + "learning_rate": 0.0006107027629863368, + "loss": 0.8164, + "step": 8300 + }, + { + "epoch": 0.45, + "learning_rate": 0.0006106178822697252, + "loss": 0.8125, + "step": 8301 + }, + { + "epoch": 0.45, + "learning_rate": 0.0006105329982010279, + "loss": 0.8906, + "step": 8302 + }, + { + "epoch": 0.45, + "learning_rate": 0.0006104481107828179, + "loss": 0.8438, + "step": 8303 + }, + { + "epoch": 0.45, + "learning_rate": 0.0006103632200176674, + "loss": 0.8242, + "step": 8304 + }, + { + "epoch": 0.45, + "learning_rate": 0.0006102783259081488, + "loss": 0.8047, + "step": 8305 + }, + { + "epoch": 0.45, + "learning_rate": 0.0006101934284568345, + "loss": 0.9375, + "step": 8306 + }, + { + "epoch": 0.45, + "learning_rate": 0.0006101085276662975, + "loss": 0.8633, + "step": 8307 + }, + { + "epoch": 0.45, + "learning_rate": 0.0006100236235391103, + "loss": 0.9102, + "step": 8308 + }, + { + "epoch": 0.45, + "learning_rate": 0.0006099387160778462, + "loss": 0.918, + "step": 8309 + }, + { + "epoch": 0.45, + "learning_rate": 0.0006098538052850776, + "loss": 0.8711, + "step": 8310 + }, + { + "epoch": 0.45, + "learning_rate": 0.000609768891163378, + "loss": 0.9023, + "step": 8311 + }, + { + "epoch": 0.45, + "learning_rate": 0.0006096839737153204, + "loss": 0.8555, + "step": 8312 + }, + { + "epoch": 0.45, + "learning_rate": 0.0006095990529434781, + "loss": 0.9023, + "step": 8313 + }, + { + "epoch": 0.45, + "learning_rate": 0.0006095141288504244, + "loss": 0.8438, + "step": 8314 + }, + { + "epoch": 0.45, + "learning_rate": 0.000609429201438733, + "loss": 0.8594, + "step": 8315 + }, + { + "epoch": 0.45, + "learning_rate": 0.0006093442707109772, + "loss": 0.8984, + "step": 8316 + }, + { + "epoch": 0.45, + "learning_rate": 0.0006092593366697308, + "loss": 0.8594, + "step": 8317 + }, + { + "epoch": 0.45, + "learning_rate": 0.0006091743993175676, + "loss": 0.832, + "step": 8318 + }, + { + "epoch": 0.45, + "learning_rate": 0.0006090894586570617, + "loss": 0.875, + "step": 8319 + }, + { + "epoch": 0.45, + "learning_rate": 0.0006090045146907866, + "loss": 0.8984, + "step": 8320 + }, + { + "epoch": 0.45, + "learning_rate": 0.0006089195674213168, + "loss": 0.8086, + "step": 8321 + }, + { + "epoch": 0.45, + "learning_rate": 0.0006088346168512263, + "loss": 0.7734, + "step": 8322 + }, + { + "epoch": 0.45, + "learning_rate": 0.0006087496629830894, + "loss": 0.8906, + "step": 8323 + }, + { + "epoch": 0.45, + "learning_rate": 0.0006086647058194805, + "loss": 0.8281, + "step": 8324 + }, + { + "epoch": 0.45, + "learning_rate": 0.0006085797453629741, + "loss": 0.9648, + "step": 8325 + }, + { + "epoch": 0.45, + "learning_rate": 0.0006084947816161446, + "loss": 0.8906, + "step": 8326 + }, + { + "epoch": 0.45, + "learning_rate": 0.0006084098145815672, + "loss": 0.8398, + "step": 8327 + }, + { + "epoch": 0.45, + "learning_rate": 0.0006083248442618159, + "loss": 0.8125, + "step": 8328 + }, + { + "epoch": 0.45, + "learning_rate": 0.0006082398706594663, + "loss": 0.8633, + "step": 8329 + }, + { + "epoch": 0.45, + "learning_rate": 0.0006081548937770929, + "loss": 0.9102, + "step": 8330 + }, + { + "epoch": 0.45, + "learning_rate": 0.000608069913617271, + "loss": 0.793, + "step": 8331 + }, + { + "epoch": 0.45, + "learning_rate": 0.0006079849301825756, + "loss": 0.8359, + "step": 8332 + }, + { + "epoch": 0.45, + "learning_rate": 0.0006078999434755823, + "loss": 0.9844, + "step": 8333 + }, + { + "epoch": 0.45, + "learning_rate": 0.0006078149534988662, + "loss": 0.8633, + "step": 8334 + }, + { + "epoch": 0.45, + "learning_rate": 0.0006077299602550028, + "loss": 0.8281, + "step": 8335 + }, + { + "epoch": 0.45, + "learning_rate": 0.0006076449637465678, + "loss": 0.8711, + "step": 8336 + }, + { + "epoch": 0.45, + "learning_rate": 0.0006075599639761368, + "loss": 0.9102, + "step": 8337 + }, + { + "epoch": 0.45, + "learning_rate": 0.0006074749609462855, + "loss": 0.8398, + "step": 8338 + }, + { + "epoch": 0.45, + "learning_rate": 0.0006073899546595899, + "loss": 0.8359, + "step": 8339 + }, + { + "epoch": 0.45, + "learning_rate": 0.000607304945118626, + "loss": 0.8867, + "step": 8340 + }, + { + "epoch": 0.45, + "learning_rate": 0.0006072199323259696, + "loss": 0.8086, + "step": 8341 + }, + { + "epoch": 0.45, + "learning_rate": 0.0006071349162841972, + "loss": 0.7891, + "step": 8342 + }, + { + "epoch": 0.45, + "learning_rate": 0.0006070498969958848, + "loss": 0.8828, + "step": 8343 + }, + { + "epoch": 0.45, + "learning_rate": 0.0006069648744636091, + "loss": 0.9453, + "step": 8344 + }, + { + "epoch": 0.45, + "learning_rate": 0.0006068798486899463, + "loss": 0.8672, + "step": 8345 + }, + { + "epoch": 0.45, + "learning_rate": 0.0006067948196774728, + "loss": 0.9375, + "step": 8346 + }, + { + "epoch": 0.45, + "learning_rate": 0.0006067097874287657, + "loss": 0.8164, + "step": 8347 + }, + { + "epoch": 0.45, + "learning_rate": 0.0006066247519464015, + "loss": 0.7578, + "step": 8348 + }, + { + "epoch": 0.45, + "learning_rate": 0.0006065397132329569, + "loss": 0.9023, + "step": 8349 + }, + { + "epoch": 0.45, + "learning_rate": 0.0006064546712910091, + "loss": 0.9102, + "step": 8350 + }, + { + "epoch": 0.45, + "learning_rate": 0.0006063696261231349, + "loss": 0.8633, + "step": 8351 + }, + { + "epoch": 0.45, + "learning_rate": 0.000606284577731912, + "loss": 0.8594, + "step": 8352 + }, + { + "epoch": 0.45, + "learning_rate": 0.000606199526119917, + "loss": 0.8438, + "step": 8353 + }, + { + "epoch": 0.45, + "learning_rate": 0.0006061144712897277, + "loss": 0.8438, + "step": 8354 + }, + { + "epoch": 0.45, + "learning_rate": 0.0006060294132439212, + "loss": 0.9023, + "step": 8355 + }, + { + "epoch": 0.45, + "learning_rate": 0.0006059443519850753, + "loss": 0.8789, + "step": 8356 + }, + { + "epoch": 0.45, + "learning_rate": 0.0006058592875157673, + "loss": 0.9141, + "step": 8357 + }, + { + "epoch": 0.45, + "learning_rate": 0.0006057742198385754, + "loss": 0.8867, + "step": 8358 + }, + { + "epoch": 0.45, + "learning_rate": 0.000605689148956077, + "loss": 0.8906, + "step": 8359 + }, + { + "epoch": 0.45, + "learning_rate": 0.0006056040748708506, + "loss": 0.9219, + "step": 8360 + }, + { + "epoch": 0.45, + "learning_rate": 0.0006055189975854733, + "loss": 0.9688, + "step": 8361 + }, + { + "epoch": 0.45, + "learning_rate": 0.0006054339171025243, + "loss": 0.8438, + "step": 8362 + }, + { + "epoch": 0.45, + "learning_rate": 0.0006053488334245809, + "loss": 0.8008, + "step": 8363 + }, + { + "epoch": 0.45, + "learning_rate": 0.0006052637465542219, + "loss": 0.7852, + "step": 8364 + }, + { + "epoch": 0.45, + "learning_rate": 0.0006051786564940258, + "loss": 0.8945, + "step": 8365 + }, + { + "epoch": 0.45, + "learning_rate": 0.0006050935632465706, + "loss": 0.7891, + "step": 8366 + }, + { + "epoch": 0.45, + "learning_rate": 0.0006050084668144354, + "loss": 0.7891, + "step": 8367 + }, + { + "epoch": 0.45, + "learning_rate": 0.0006049233672001987, + "loss": 0.8906, + "step": 8368 + }, + { + "epoch": 0.45, + "learning_rate": 0.0006048382644064391, + "loss": 0.9062, + "step": 8369 + }, + { + "epoch": 0.45, + "learning_rate": 0.0006047531584357362, + "loss": 0.9062, + "step": 8370 + }, + { + "epoch": 0.45, + "learning_rate": 0.0006046680492906681, + "loss": 0.8906, + "step": 8371 + }, + { + "epoch": 0.45, + "learning_rate": 0.0006045829369738144, + "loss": 0.8789, + "step": 8372 + }, + { + "epoch": 0.45, + "learning_rate": 0.0006044978214877542, + "loss": 0.8281, + "step": 8373 + }, + { + "epoch": 0.45, + "learning_rate": 0.0006044127028350666, + "loss": 0.9023, + "step": 8374 + }, + { + "epoch": 0.45, + "learning_rate": 0.0006043275810183315, + "loss": 0.8281, + "step": 8375 + }, + { + "epoch": 0.45, + "learning_rate": 0.0006042424560401276, + "loss": 0.8438, + "step": 8376 + }, + { + "epoch": 0.45, + "learning_rate": 0.0006041573279030352, + "loss": 0.8828, + "step": 8377 + }, + { + "epoch": 0.45, + "learning_rate": 0.0006040721966096334, + "loss": 0.7852, + "step": 8378 + }, + { + "epoch": 0.45, + "learning_rate": 0.0006039870621625023, + "loss": 0.8164, + "step": 8379 + }, + { + "epoch": 0.45, + "learning_rate": 0.0006039019245642215, + "loss": 0.8945, + "step": 8380 + }, + { + "epoch": 0.45, + "learning_rate": 0.0006038167838173712, + "loss": 0.8516, + "step": 8381 + }, + { + "epoch": 0.45, + "learning_rate": 0.0006037316399245314, + "loss": 0.7617, + "step": 8382 + }, + { + "epoch": 0.45, + "learning_rate": 0.0006036464928882821, + "loss": 0.8555, + "step": 8383 + }, + { + "epoch": 0.45, + "learning_rate": 0.0006035613427112035, + "loss": 0.9102, + "step": 8384 + }, + { + "epoch": 0.45, + "learning_rate": 0.0006034761893958762, + "loss": 0.832, + "step": 8385 + }, + { + "epoch": 0.45, + "learning_rate": 0.0006033910329448803, + "loss": 0.8984, + "step": 8386 + }, + { + "epoch": 0.45, + "learning_rate": 0.0006033058733607966, + "loss": 0.8203, + "step": 8387 + }, + { + "epoch": 0.45, + "learning_rate": 0.0006032207106462055, + "loss": 0.8672, + "step": 8388 + }, + { + "epoch": 0.45, + "learning_rate": 0.0006031355448036879, + "loss": 0.8477, + "step": 8389 + }, + { + "epoch": 0.45, + "learning_rate": 0.0006030503758358245, + "loss": 0.8477, + "step": 8390 + }, + { + "epoch": 0.45, + "learning_rate": 0.0006029652037451963, + "loss": 0.918, + "step": 8391 + }, + { + "epoch": 0.45, + "learning_rate": 0.0006028800285343839, + "loss": 0.8438, + "step": 8392 + }, + { + "epoch": 0.45, + "learning_rate": 0.0006027948502059691, + "loss": 0.8477, + "step": 8393 + }, + { + "epoch": 0.45, + "learning_rate": 0.0006027096687625324, + "loss": 0.8906, + "step": 8394 + }, + { + "epoch": 0.45, + "learning_rate": 0.0006026244842066555, + "loss": 0.8594, + "step": 8395 + }, + { + "epoch": 0.45, + "learning_rate": 0.0006025392965409196, + "loss": 0.8086, + "step": 8396 + }, + { + "epoch": 0.45, + "learning_rate": 0.000602454105767906, + "loss": 0.7969, + "step": 8397 + }, + { + "epoch": 0.45, + "learning_rate": 0.0006023689118901968, + "loss": 0.8359, + "step": 8398 + }, + { + "epoch": 0.45, + "learning_rate": 0.000602283714910373, + "loss": 0.793, + "step": 8399 + }, + { + "epoch": 0.45, + "learning_rate": 0.0006021985148310168, + "loss": 0.8828, + "step": 8400 + }, + { + "epoch": 0.45, + "learning_rate": 0.0006021133116547099, + "loss": 0.8047, + "step": 8401 + }, + { + "epoch": 0.45, + "learning_rate": 0.0006020281053840341, + "loss": 0.7578, + "step": 8402 + }, + { + "epoch": 0.45, + "learning_rate": 0.0006019428960215717, + "loss": 0.8906, + "step": 8403 + }, + { + "epoch": 0.45, + "learning_rate": 0.0006018576835699046, + "loss": 0.8594, + "step": 8404 + }, + { + "epoch": 0.45, + "learning_rate": 0.0006017724680316151, + "loss": 0.7617, + "step": 8405 + }, + { + "epoch": 0.45, + "learning_rate": 0.0006016872494092857, + "loss": 0.8594, + "step": 8406 + }, + { + "epoch": 0.45, + "learning_rate": 0.0006016020277054984, + "loss": 0.8984, + "step": 8407 + }, + { + "epoch": 0.45, + "learning_rate": 0.000601516802922836, + "loss": 0.8906, + "step": 8408 + }, + { + "epoch": 0.45, + "learning_rate": 0.000601431575063881, + "loss": 0.918, + "step": 8409 + }, + { + "epoch": 0.45, + "learning_rate": 0.0006013463441312161, + "loss": 0.8711, + "step": 8410 + }, + { + "epoch": 0.45, + "learning_rate": 0.0006012611101274242, + "loss": 0.7852, + "step": 8411 + }, + { + "epoch": 0.45, + "learning_rate": 0.0006011758730550878, + "loss": 0.8945, + "step": 8412 + }, + { + "epoch": 0.45, + "learning_rate": 0.0006010906329167902, + "loss": 0.8594, + "step": 8413 + }, + { + "epoch": 0.45, + "learning_rate": 0.0006010053897151144, + "loss": 0.8945, + "step": 8414 + }, + { + "epoch": 0.45, + "learning_rate": 0.0006009201434526434, + "loss": 0.8477, + "step": 8415 + }, + { + "epoch": 0.45, + "learning_rate": 0.0006008348941319608, + "loss": 0.875, + "step": 8416 + }, + { + "epoch": 0.45, + "learning_rate": 0.0006007496417556494, + "loss": 0.875, + "step": 8417 + }, + { + "epoch": 0.45, + "learning_rate": 0.0006006643863262932, + "loss": 0.8594, + "step": 8418 + }, + { + "epoch": 0.45, + "learning_rate": 0.000600579127846475, + "loss": 0.8633, + "step": 8419 + }, + { + "epoch": 0.45, + "learning_rate": 0.0006004938663187792, + "loss": 0.8594, + "step": 8420 + }, + { + "epoch": 0.45, + "learning_rate": 0.000600408601745789, + "loss": 0.793, + "step": 8421 + }, + { + "epoch": 0.45, + "learning_rate": 0.0006003233341300884, + "loss": 0.9258, + "step": 8422 + }, + { + "epoch": 0.45, + "learning_rate": 0.0006002380634742613, + "loss": 0.8516, + "step": 8423 + }, + { + "epoch": 0.45, + "learning_rate": 0.0006001527897808916, + "loss": 0.9297, + "step": 8424 + }, + { + "epoch": 0.45, + "learning_rate": 0.0006000675130525632, + "loss": 0.8477, + "step": 8425 + }, + { + "epoch": 0.45, + "learning_rate": 0.0005999822332918608, + "loss": 0.8359, + "step": 8426 + }, + { + "epoch": 0.45, + "learning_rate": 0.000599896950501368, + "loss": 0.8789, + "step": 8427 + }, + { + "epoch": 0.45, + "learning_rate": 0.0005998116646836695, + "loss": 0.8555, + "step": 8428 + }, + { + "epoch": 0.45, + "learning_rate": 0.0005997263758413497, + "loss": 0.8086, + "step": 8429 + }, + { + "epoch": 0.45, + "learning_rate": 0.0005996410839769933, + "loss": 0.9102, + "step": 8430 + }, + { + "epoch": 0.45, + "learning_rate": 0.0005995557890931846, + "loss": 0.8477, + "step": 8431 + }, + { + "epoch": 0.45, + "learning_rate": 0.0005994704911925084, + "loss": 0.9023, + "step": 8432 + }, + { + "epoch": 0.45, + "learning_rate": 0.0005993851902775498, + "loss": 0.8125, + "step": 8433 + }, + { + "epoch": 0.45, + "learning_rate": 0.0005992998863508933, + "loss": 0.8711, + "step": 8434 + }, + { + "epoch": 0.45, + "learning_rate": 0.000599214579415124, + "loss": 0.8555, + "step": 8435 + }, + { + "epoch": 0.45, + "learning_rate": 0.0005991292694728273, + "loss": 0.8203, + "step": 8436 + }, + { + "epoch": 0.45, + "learning_rate": 0.0005990439565265878, + "loss": 1.0078, + "step": 8437 + }, + { + "epoch": 0.45, + "learning_rate": 0.0005989586405789913, + "loss": 0.9531, + "step": 8438 + }, + { + "epoch": 0.45, + "learning_rate": 0.0005988733216326229, + "loss": 0.9375, + "step": 8439 + }, + { + "epoch": 0.45, + "learning_rate": 0.0005987879996900681, + "loss": 0.8516, + "step": 8440 + }, + { + "epoch": 0.45, + "learning_rate": 0.0005987026747539124, + "loss": 0.9961, + "step": 8441 + }, + { + "epoch": 0.45, + "learning_rate": 0.0005986173468267413, + "loss": 0.9453, + "step": 8442 + }, + { + "epoch": 0.45, + "learning_rate": 0.0005985320159111408, + "loss": 0.7539, + "step": 8443 + }, + { + "epoch": 0.45, + "learning_rate": 0.0005984466820096965, + "loss": 0.9609, + "step": 8444 + }, + { + "epoch": 0.45, + "learning_rate": 0.0005983613451249943, + "loss": 0.8906, + "step": 8445 + }, + { + "epoch": 0.45, + "learning_rate": 0.0005982760052596203, + "loss": 0.8398, + "step": 8446 + }, + { + "epoch": 0.45, + "learning_rate": 0.0005981906624161605, + "loss": 0.9141, + "step": 8447 + }, + { + "epoch": 0.45, + "learning_rate": 0.0005981053165972012, + "loss": 0.9492, + "step": 8448 + }, + { + "epoch": 0.45, + "learning_rate": 0.0005980199678053284, + "loss": 0.9062, + "step": 8449 + }, + { + "epoch": 0.45, + "learning_rate": 0.0005979346160431286, + "loss": 0.8867, + "step": 8450 + }, + { + "epoch": 0.45, + "learning_rate": 0.0005978492613131884, + "loss": 0.8398, + "step": 8451 + }, + { + "epoch": 0.45, + "learning_rate": 0.0005977639036180939, + "loss": 0.918, + "step": 8452 + }, + { + "epoch": 0.45, + "learning_rate": 0.0005976785429604322, + "loss": 0.8945, + "step": 8453 + }, + { + "epoch": 0.45, + "learning_rate": 0.0005975931793427897, + "loss": 0.8164, + "step": 8454 + }, + { + "epoch": 0.45, + "learning_rate": 0.0005975078127677534, + "loss": 0.8047, + "step": 8455 + }, + { + "epoch": 0.45, + "learning_rate": 0.00059742244323791, + "loss": 0.9453, + "step": 8456 + }, + { + "epoch": 0.45, + "learning_rate": 0.0005973370707558467, + "loss": 0.8477, + "step": 8457 + }, + { + "epoch": 0.45, + "learning_rate": 0.0005972516953241502, + "loss": 0.9062, + "step": 8458 + }, + { + "epoch": 0.45, + "learning_rate": 0.0005971663169454081, + "loss": 0.875, + "step": 8459 + }, + { + "epoch": 0.45, + "learning_rate": 0.0005970809356222071, + "loss": 0.9375, + "step": 8460 + }, + { + "epoch": 0.45, + "learning_rate": 0.0005969955513571352, + "loss": 0.9023, + "step": 8461 + }, + { + "epoch": 0.45, + "learning_rate": 0.0005969101641527792, + "loss": 0.8711, + "step": 8462 + }, + { + "epoch": 0.45, + "learning_rate": 0.000596824774011727, + "loss": 0.7617, + "step": 8463 + }, + { + "epoch": 0.45, + "learning_rate": 0.0005967393809365662, + "loss": 0.8477, + "step": 8464 + }, + { + "epoch": 0.45, + "learning_rate": 0.0005966539849298844, + "loss": 0.8438, + "step": 8465 + }, + { + "epoch": 0.46, + "learning_rate": 0.0005965685859942693, + "loss": 0.918, + "step": 8466 + }, + { + "epoch": 0.46, + "learning_rate": 0.0005964831841323089, + "loss": 0.8789, + "step": 8467 + }, + { + "epoch": 0.46, + "learning_rate": 0.0005963977793465908, + "loss": 0.9258, + "step": 8468 + }, + { + "epoch": 0.46, + "learning_rate": 0.0005963123716397036, + "loss": 0.9141, + "step": 8469 + }, + { + "epoch": 0.46, + "learning_rate": 0.0005962269610142351, + "loss": 0.8281, + "step": 8470 + }, + { + "epoch": 0.46, + "learning_rate": 0.0005961415474727736, + "loss": 0.8398, + "step": 8471 + }, + { + "epoch": 0.46, + "learning_rate": 0.0005960561310179074, + "loss": 0.8984, + "step": 8472 + }, + { + "epoch": 0.46, + "learning_rate": 0.0005959707116522249, + "loss": 0.8281, + "step": 8473 + }, + { + "epoch": 0.46, + "learning_rate": 0.0005958852893783147, + "loss": 0.9102, + "step": 8474 + }, + { + "epoch": 0.46, + "learning_rate": 0.0005957998641987652, + "loss": 0.9023, + "step": 8475 + }, + { + "epoch": 0.46, + "learning_rate": 0.0005957144361161651, + "loss": 0.8555, + "step": 8476 + }, + { + "epoch": 0.46, + "learning_rate": 0.0005956290051331032, + "loss": 0.8633, + "step": 8477 + }, + { + "epoch": 0.46, + "learning_rate": 0.0005955435712521682, + "loss": 0.9219, + "step": 8478 + }, + { + "epoch": 0.46, + "learning_rate": 0.0005954581344759495, + "loss": 0.8125, + "step": 8479 + }, + { + "epoch": 0.46, + "learning_rate": 0.0005953726948070354, + "loss": 0.9297, + "step": 8480 + }, + { + "epoch": 0.46, + "learning_rate": 0.0005952872522480155, + "loss": 0.8555, + "step": 8481 + }, + { + "epoch": 0.46, + "learning_rate": 0.0005952018068014789, + "loss": 0.8477, + "step": 8482 + }, + { + "epoch": 0.46, + "learning_rate": 0.0005951163584700147, + "loss": 0.9492, + "step": 8483 + }, + { + "epoch": 0.46, + "learning_rate": 0.0005950309072562124, + "loss": 0.8281, + "step": 8484 + }, + { + "epoch": 0.46, + "learning_rate": 0.0005949454531626614, + "loss": 0.8008, + "step": 8485 + }, + { + "epoch": 0.46, + "learning_rate": 0.0005948599961919515, + "loss": 0.9375, + "step": 8486 + }, + { + "epoch": 0.46, + "learning_rate": 0.0005947745363466721, + "loss": 0.9609, + "step": 8487 + }, + { + "epoch": 0.46, + "learning_rate": 0.0005946890736294129, + "loss": 0.918, + "step": 8488 + }, + { + "epoch": 0.46, + "learning_rate": 0.0005946036080427637, + "loss": 0.8477, + "step": 8489 + }, + { + "epoch": 0.46, + "learning_rate": 0.0005945181395893144, + "loss": 0.8867, + "step": 8490 + }, + { + "epoch": 0.46, + "learning_rate": 0.0005944326682716551, + "loss": 0.8516, + "step": 8491 + }, + { + "epoch": 0.46, + "learning_rate": 0.0005943471940923757, + "loss": 0.8594, + "step": 8492 + }, + { + "epoch": 0.46, + "learning_rate": 0.0005942617170540663, + "loss": 0.875, + "step": 8493 + }, + { + "epoch": 0.46, + "learning_rate": 0.0005941762371593174, + "loss": 0.8203, + "step": 8494 + }, + { + "epoch": 0.46, + "learning_rate": 0.000594090754410719, + "loss": 0.9492, + "step": 8495 + }, + { + "epoch": 0.46, + "learning_rate": 0.0005940052688108618, + "loss": 0.8281, + "step": 8496 + }, + { + "epoch": 0.46, + "learning_rate": 0.0005939197803623362, + "loss": 0.8281, + "step": 8497 + }, + { + "epoch": 0.46, + "learning_rate": 0.0005938342890677326, + "loss": 0.9141, + "step": 8498 + }, + { + "epoch": 0.46, + "learning_rate": 0.0005937487949296419, + "loss": 0.8672, + "step": 8499 + }, + { + "epoch": 0.46, + "learning_rate": 0.0005936632979506547, + "loss": 0.8438, + "step": 8500 + }, + { + "epoch": 0.46, + "learning_rate": 0.000593577798133362, + "loss": 0.8477, + "step": 8501 + }, + { + "epoch": 0.46, + "learning_rate": 0.0005934922954803546, + "loss": 0.8203, + "step": 8502 + }, + { + "epoch": 0.46, + "learning_rate": 0.0005934067899942235, + "loss": 0.9062, + "step": 8503 + }, + { + "epoch": 0.46, + "learning_rate": 0.0005933212816775599, + "loss": 0.9023, + "step": 8504 + }, + { + "epoch": 0.46, + "learning_rate": 0.0005932357705329549, + "loss": 0.8359, + "step": 8505 + }, + { + "epoch": 0.46, + "learning_rate": 0.0005931502565629998, + "loss": 0.8594, + "step": 8506 + }, + { + "epoch": 0.46, + "learning_rate": 0.000593064739770286, + "loss": 0.8477, + "step": 8507 + }, + { + "epoch": 0.46, + "learning_rate": 0.0005929792201574048, + "loss": 0.8672, + "step": 8508 + }, + { + "epoch": 0.46, + "learning_rate": 0.0005928936977269479, + "loss": 0.9258, + "step": 8509 + }, + { + "epoch": 0.46, + "learning_rate": 0.0005928081724815067, + "loss": 0.8711, + "step": 8510 + }, + { + "epoch": 0.46, + "learning_rate": 0.000592722644423673, + "loss": 0.8867, + "step": 8511 + }, + { + "epoch": 0.46, + "learning_rate": 0.0005926371135560387, + "loss": 0.9883, + "step": 8512 + }, + { + "epoch": 0.46, + "learning_rate": 0.0005925515798811956, + "loss": 0.9102, + "step": 8513 + }, + { + "epoch": 0.46, + "learning_rate": 0.0005924660434017355, + "loss": 0.8906, + "step": 8514 + }, + { + "epoch": 0.46, + "learning_rate": 0.0005923805041202505, + "loss": 0.7773, + "step": 8515 + }, + { + "epoch": 0.46, + "learning_rate": 0.000592294962039333, + "loss": 0.8906, + "step": 8516 + }, + { + "epoch": 0.46, + "learning_rate": 0.0005922094171615748, + "loss": 0.8984, + "step": 8517 + }, + { + "epoch": 0.46, + "learning_rate": 0.0005921238694895684, + "loss": 0.7773, + "step": 8518 + }, + { + "epoch": 0.46, + "learning_rate": 0.0005920383190259061, + "loss": 0.9141, + "step": 8519 + }, + { + "epoch": 0.46, + "learning_rate": 0.0005919527657731806, + "loss": 0.8086, + "step": 8520 + }, + { + "epoch": 0.46, + "learning_rate": 0.000591867209733984, + "loss": 0.8438, + "step": 8521 + }, + { + "epoch": 0.46, + "learning_rate": 0.0005917816509109095, + "loss": 1.0, + "step": 8522 + }, + { + "epoch": 0.46, + "learning_rate": 0.0005916960893065492, + "loss": 0.7656, + "step": 8523 + }, + { + "epoch": 0.46, + "learning_rate": 0.0005916105249234962, + "loss": 0.8555, + "step": 8524 + }, + { + "epoch": 0.46, + "learning_rate": 0.0005915249577643436, + "loss": 0.8828, + "step": 8525 + }, + { + "epoch": 0.46, + "learning_rate": 0.0005914393878316838, + "loss": 0.8594, + "step": 8526 + }, + { + "epoch": 0.46, + "learning_rate": 0.0005913538151281105, + "loss": 0.8164, + "step": 8527 + }, + { + "epoch": 0.46, + "learning_rate": 0.0005912682396562164, + "loss": 0.7852, + "step": 8528 + }, + { + "epoch": 0.46, + "learning_rate": 0.0005911826614185948, + "loss": 0.8555, + "step": 8529 + }, + { + "epoch": 0.46, + "learning_rate": 0.000591097080417839, + "loss": 0.9688, + "step": 8530 + }, + { + "epoch": 0.46, + "learning_rate": 0.0005910114966565425, + "loss": 0.9023, + "step": 8531 + }, + { + "epoch": 0.46, + "learning_rate": 0.0005909259101372988, + "loss": 0.8633, + "step": 8532 + }, + { + "epoch": 0.46, + "learning_rate": 0.0005908403208627012, + "loss": 0.9102, + "step": 8533 + }, + { + "epoch": 0.46, + "learning_rate": 0.0005907547288353434, + "loss": 0.793, + "step": 8534 + }, + { + "epoch": 0.46, + "learning_rate": 0.0005906691340578195, + "loss": 0.8164, + "step": 8535 + }, + { + "epoch": 0.46, + "learning_rate": 0.0005905835365327228, + "loss": 0.8633, + "step": 8536 + }, + { + "epoch": 0.46, + "learning_rate": 0.0005904979362626475, + "loss": 0.7852, + "step": 8537 + }, + { + "epoch": 0.46, + "learning_rate": 0.0005904123332501874, + "loss": 0.8555, + "step": 8538 + }, + { + "epoch": 0.46, + "learning_rate": 0.0005903267274979366, + "loss": 0.8555, + "step": 8539 + }, + { + "epoch": 0.46, + "learning_rate": 0.0005902411190084893, + "loss": 0.8711, + "step": 8540 + }, + { + "epoch": 0.46, + "learning_rate": 0.0005901555077844396, + "loss": 0.7266, + "step": 8541 + }, + { + "epoch": 0.46, + "learning_rate": 0.0005900698938283819, + "loss": 0.8945, + "step": 8542 + }, + { + "epoch": 0.46, + "learning_rate": 0.0005899842771429106, + "loss": 0.8594, + "step": 8543 + }, + { + "epoch": 0.46, + "learning_rate": 0.00058989865773062, + "loss": 0.8672, + "step": 8544 + }, + { + "epoch": 0.46, + "learning_rate": 0.0005898130355941049, + "loss": 0.8008, + "step": 8545 + }, + { + "epoch": 0.46, + "learning_rate": 0.0005897274107359597, + "loss": 0.875, + "step": 8546 + }, + { + "epoch": 0.46, + "learning_rate": 0.0005896417831587793, + "loss": 0.8359, + "step": 8547 + }, + { + "epoch": 0.46, + "learning_rate": 0.0005895561528651584, + "loss": 0.8672, + "step": 8548 + }, + { + "epoch": 0.46, + "learning_rate": 0.000589470519857692, + "loss": 0.8555, + "step": 8549 + }, + { + "epoch": 0.46, + "learning_rate": 0.0005893848841389747, + "loss": 0.8906, + "step": 8550 + }, + { + "epoch": 0.46, + "learning_rate": 0.0005892992457116019, + "loss": 0.8906, + "step": 8551 + }, + { + "epoch": 0.46, + "learning_rate": 0.0005892136045781687, + "loss": 0.9453, + "step": 8552 + }, + { + "epoch": 0.46, + "learning_rate": 0.0005891279607412703, + "loss": 0.8516, + "step": 8553 + }, + { + "epoch": 0.46, + "learning_rate": 0.0005890423142035018, + "loss": 0.9219, + "step": 8554 + }, + { + "epoch": 0.46, + "learning_rate": 0.0005889566649674587, + "loss": 0.8594, + "step": 8555 + }, + { + "epoch": 0.46, + "learning_rate": 0.0005888710130357365, + "loss": 0.832, + "step": 8556 + }, + { + "epoch": 0.46, + "learning_rate": 0.0005887853584109305, + "loss": 0.8438, + "step": 8557 + }, + { + "epoch": 0.46, + "learning_rate": 0.0005886997010956367, + "loss": 0.8359, + "step": 8558 + }, + { + "epoch": 0.46, + "learning_rate": 0.0005886140410924505, + "loss": 0.8906, + "step": 8559 + }, + { + "epoch": 0.46, + "learning_rate": 0.0005885283784039678, + "loss": 0.8516, + "step": 8560 + }, + { + "epoch": 0.46, + "learning_rate": 0.0005884427130327843, + "loss": 0.8789, + "step": 8561 + }, + { + "epoch": 0.46, + "learning_rate": 0.0005883570449814962, + "loss": 0.793, + "step": 8562 + }, + { + "epoch": 0.46, + "learning_rate": 0.0005882713742526995, + "loss": 0.7852, + "step": 8563 + }, + { + "epoch": 0.46, + "learning_rate": 0.0005881857008489901, + "loss": 0.7461, + "step": 8564 + }, + { + "epoch": 0.46, + "learning_rate": 0.0005881000247729643, + "loss": 0.8516, + "step": 8565 + }, + { + "epoch": 0.46, + "learning_rate": 0.0005880143460272184, + "loss": 0.9531, + "step": 8566 + }, + { + "epoch": 0.46, + "learning_rate": 0.0005879286646143486, + "loss": 0.7266, + "step": 8567 + }, + { + "epoch": 0.46, + "learning_rate": 0.0005878429805369516, + "loss": 0.8711, + "step": 8568 + }, + { + "epoch": 0.46, + "learning_rate": 0.0005877572937976235, + "loss": 0.918, + "step": 8569 + }, + { + "epoch": 0.46, + "learning_rate": 0.0005876716043989615, + "loss": 0.8633, + "step": 8570 + }, + { + "epoch": 0.46, + "learning_rate": 0.0005875859123435615, + "loss": 0.7852, + "step": 8571 + }, + { + "epoch": 0.46, + "learning_rate": 0.0005875002176340209, + "loss": 0.9844, + "step": 8572 + }, + { + "epoch": 0.46, + "learning_rate": 0.0005874145202729361, + "loss": 0.8594, + "step": 8573 + }, + { + "epoch": 0.46, + "learning_rate": 0.0005873288202629045, + "loss": 0.7891, + "step": 8574 + }, + { + "epoch": 0.46, + "learning_rate": 0.0005872431176065226, + "loss": 0.8516, + "step": 8575 + }, + { + "epoch": 0.46, + "learning_rate": 0.0005871574123063876, + "loss": 0.8672, + "step": 8576 + }, + { + "epoch": 0.46, + "learning_rate": 0.0005870717043650967, + "loss": 0.9688, + "step": 8577 + }, + { + "epoch": 0.46, + "learning_rate": 0.0005869859937852473, + "loss": 0.8086, + "step": 8578 + }, + { + "epoch": 0.46, + "learning_rate": 0.0005869002805694364, + "loss": 0.8945, + "step": 8579 + }, + { + "epoch": 0.46, + "learning_rate": 0.0005868145647202616, + "loss": 0.8555, + "step": 8580 + }, + { + "epoch": 0.46, + "learning_rate": 0.0005867288462403205, + "loss": 0.8906, + "step": 8581 + }, + { + "epoch": 0.46, + "learning_rate": 0.0005866431251322102, + "loss": 0.8203, + "step": 8582 + }, + { + "epoch": 0.46, + "learning_rate": 0.0005865574013985287, + "loss": 0.8398, + "step": 8583 + }, + { + "epoch": 0.46, + "learning_rate": 0.0005864716750418737, + "loss": 0.9336, + "step": 8584 + }, + { + "epoch": 0.46, + "learning_rate": 0.0005863859460648427, + "loss": 0.9258, + "step": 8585 + }, + { + "epoch": 0.46, + "learning_rate": 0.000586300214470034, + "loss": 0.8672, + "step": 8586 + }, + { + "epoch": 0.46, + "learning_rate": 0.0005862144802600452, + "loss": 0.8594, + "step": 8587 + }, + { + "epoch": 0.46, + "learning_rate": 0.0005861287434374746, + "loss": 0.9258, + "step": 8588 + }, + { + "epoch": 0.46, + "learning_rate": 0.0005860430040049198, + "loss": 0.8828, + "step": 8589 + }, + { + "epoch": 0.46, + "learning_rate": 0.0005859572619649797, + "loss": 0.875, + "step": 8590 + }, + { + "epoch": 0.46, + "learning_rate": 0.000585871517320252, + "loss": 0.8203, + "step": 8591 + }, + { + "epoch": 0.46, + "learning_rate": 0.0005857857700733354, + "loss": 0.8047, + "step": 8592 + }, + { + "epoch": 0.46, + "learning_rate": 0.0005857000202268281, + "loss": 0.9414, + "step": 8593 + }, + { + "epoch": 0.46, + "learning_rate": 0.0005856142677833287, + "loss": 0.8125, + "step": 8594 + }, + { + "epoch": 0.46, + "learning_rate": 0.0005855285127454357, + "loss": 0.9727, + "step": 8595 + }, + { + "epoch": 0.46, + "learning_rate": 0.000585442755115748, + "loss": 0.9102, + "step": 8596 + }, + { + "epoch": 0.46, + "learning_rate": 0.0005853569948968639, + "loss": 0.8242, + "step": 8597 + }, + { + "epoch": 0.46, + "learning_rate": 0.0005852712320913826, + "loss": 0.8398, + "step": 8598 + }, + { + "epoch": 0.46, + "learning_rate": 0.0005851854667019029, + "loss": 0.9219, + "step": 8599 + }, + { + "epoch": 0.46, + "learning_rate": 0.0005850996987310236, + "loss": 0.7773, + "step": 8600 + }, + { + "epoch": 0.46, + "learning_rate": 0.0005850139281813442, + "loss": 0.793, + "step": 8601 + }, + { + "epoch": 0.46, + "learning_rate": 0.0005849281550554631, + "loss": 0.8398, + "step": 8602 + }, + { + "epoch": 0.46, + "learning_rate": 0.0005848423793559804, + "loss": 0.9062, + "step": 8603 + }, + { + "epoch": 0.46, + "learning_rate": 0.0005847566010854946, + "loss": 0.8672, + "step": 8604 + }, + { + "epoch": 0.46, + "learning_rate": 0.0005846708202466054, + "loss": 0.793, + "step": 8605 + }, + { + "epoch": 0.46, + "learning_rate": 0.0005845850368419123, + "loss": 0.8789, + "step": 8606 + }, + { + "epoch": 0.46, + "learning_rate": 0.0005844992508740146, + "loss": 0.8242, + "step": 8607 + }, + { + "epoch": 0.46, + "learning_rate": 0.0005844134623455122, + "loss": 0.832, + "step": 8608 + }, + { + "epoch": 0.46, + "learning_rate": 0.0005843276712590047, + "loss": 0.8516, + "step": 8609 + }, + { + "epoch": 0.46, + "learning_rate": 0.0005842418776170914, + "loss": 0.9219, + "step": 8610 + }, + { + "epoch": 0.46, + "learning_rate": 0.0005841560814223728, + "loss": 0.7383, + "step": 8611 + }, + { + "epoch": 0.46, + "learning_rate": 0.0005840702826774483, + "loss": 0.8359, + "step": 8612 + }, + { + "epoch": 0.46, + "learning_rate": 0.0005839844813849182, + "loss": 0.9219, + "step": 8613 + }, + { + "epoch": 0.46, + "learning_rate": 0.0005838986775473825, + "loss": 0.8203, + "step": 8614 + }, + { + "epoch": 0.46, + "learning_rate": 0.0005838128711674411, + "loss": 0.8867, + "step": 8615 + }, + { + "epoch": 0.46, + "learning_rate": 0.0005837270622476946, + "loss": 0.7891, + "step": 8616 + }, + { + "epoch": 0.46, + "learning_rate": 0.000583641250790743, + "loss": 0.8555, + "step": 8617 + }, + { + "epoch": 0.46, + "learning_rate": 0.0005835554367991867, + "loss": 0.8477, + "step": 8618 + }, + { + "epoch": 0.46, + "learning_rate": 0.0005834696202756262, + "loss": 0.9258, + "step": 8619 + }, + { + "epoch": 0.46, + "learning_rate": 0.000583383801222662, + "loss": 0.8359, + "step": 8620 + }, + { + "epoch": 0.46, + "learning_rate": 0.0005832979796428949, + "loss": 0.8672, + "step": 8621 + }, + { + "epoch": 0.46, + "learning_rate": 0.0005832121555389251, + "loss": 0.8789, + "step": 8622 + }, + { + "epoch": 0.46, + "learning_rate": 0.0005831263289133538, + "loss": 0.8516, + "step": 8623 + }, + { + "epoch": 0.46, + "learning_rate": 0.0005830404997687819, + "loss": 0.8516, + "step": 8624 + }, + { + "epoch": 0.46, + "learning_rate": 0.0005829546681078097, + "loss": 0.8984, + "step": 8625 + }, + { + "epoch": 0.46, + "learning_rate": 0.0005828688339330388, + "loss": 0.8945, + "step": 8626 + }, + { + "epoch": 0.46, + "learning_rate": 0.0005827829972470701, + "loss": 0.8906, + "step": 8627 + }, + { + "epoch": 0.46, + "learning_rate": 0.0005826971580525044, + "loss": 0.875, + "step": 8628 + }, + { + "epoch": 0.46, + "learning_rate": 0.0005826113163519435, + "loss": 0.9219, + "step": 8629 + }, + { + "epoch": 0.46, + "learning_rate": 0.000582525472147988, + "loss": 0.8086, + "step": 8630 + }, + { + "epoch": 0.46, + "learning_rate": 0.0005824396254432398, + "loss": 0.8438, + "step": 8631 + }, + { + "epoch": 0.46, + "learning_rate": 0.0005823537762403001, + "loss": 0.8242, + "step": 8632 + }, + { + "epoch": 0.46, + "learning_rate": 0.0005822679245417704, + "loss": 0.8555, + "step": 8633 + }, + { + "epoch": 0.46, + "learning_rate": 0.0005821820703502525, + "loss": 0.8906, + "step": 8634 + }, + { + "epoch": 0.46, + "learning_rate": 0.0005820962136683477, + "loss": 0.9297, + "step": 8635 + }, + { + "epoch": 0.46, + "learning_rate": 0.0005820103544986584, + "loss": 0.8477, + "step": 8636 + }, + { + "epoch": 0.46, + "learning_rate": 0.0005819244928437855, + "loss": 0.8438, + "step": 8637 + }, + { + "epoch": 0.46, + "learning_rate": 0.0005818386287063316, + "loss": 0.8711, + "step": 8638 + }, + { + "epoch": 0.46, + "learning_rate": 0.0005817527620888982, + "loss": 0.8516, + "step": 8639 + }, + { + "epoch": 0.46, + "learning_rate": 0.0005816668929940879, + "loss": 0.8516, + "step": 8640 + }, + { + "epoch": 0.46, + "learning_rate": 0.0005815810214245023, + "loss": 0.8281, + "step": 8641 + }, + { + "epoch": 0.46, + "learning_rate": 0.0005814951473827438, + "loss": 0.8242, + "step": 8642 + }, + { + "epoch": 0.46, + "learning_rate": 0.0005814092708714145, + "loss": 0.8555, + "step": 8643 + }, + { + "epoch": 0.46, + "learning_rate": 0.0005813233918931172, + "loss": 0.9141, + "step": 8644 + }, + { + "epoch": 0.46, + "learning_rate": 0.0005812375104504536, + "loss": 0.7656, + "step": 8645 + }, + { + "epoch": 0.46, + "learning_rate": 0.0005811516265460268, + "loss": 0.9648, + "step": 8646 + }, + { + "epoch": 0.46, + "learning_rate": 0.0005810657401824392, + "loss": 0.7734, + "step": 8647 + }, + { + "epoch": 0.46, + "learning_rate": 0.0005809798513622933, + "loss": 0.8438, + "step": 8648 + }, + { + "epoch": 0.46, + "learning_rate": 0.000580893960088192, + "loss": 0.8594, + "step": 8649 + }, + { + "epoch": 0.46, + "learning_rate": 0.0005808080663627378, + "loss": 0.7578, + "step": 8650 + }, + { + "epoch": 0.46, + "learning_rate": 0.0005807221701885339, + "loss": 0.8867, + "step": 8651 + }, + { + "epoch": 0.47, + "learning_rate": 0.000580636271568183, + "loss": 0.7852, + "step": 8652 + }, + { + "epoch": 0.47, + "learning_rate": 0.0005805503705042882, + "loss": 0.8281, + "step": 8653 + }, + { + "epoch": 0.47, + "learning_rate": 0.0005804644669994527, + "loss": 0.8008, + "step": 8654 + }, + { + "epoch": 0.47, + "learning_rate": 0.0005803785610562794, + "loss": 0.918, + "step": 8655 + }, + { + "epoch": 0.47, + "learning_rate": 0.0005802926526773717, + "loss": 0.8711, + "step": 8656 + }, + { + "epoch": 0.47, + "learning_rate": 0.0005802067418653329, + "loss": 0.8398, + "step": 8657 + }, + { + "epoch": 0.47, + "learning_rate": 0.0005801208286227664, + "loss": 0.9062, + "step": 8658 + }, + { + "epoch": 0.47, + "learning_rate": 0.0005800349129522756, + "loss": 0.8477, + "step": 8659 + }, + { + "epoch": 0.47, + "learning_rate": 0.000579948994856464, + "loss": 0.8008, + "step": 8660 + }, + { + "epoch": 0.47, + "learning_rate": 0.0005798630743379351, + "loss": 0.9492, + "step": 8661 + }, + { + "epoch": 0.47, + "learning_rate": 0.0005797771513992931, + "loss": 0.9375, + "step": 8662 + }, + { + "epoch": 0.47, + "learning_rate": 0.0005796912260431409, + "loss": 0.8633, + "step": 8663 + }, + { + "epoch": 0.47, + "learning_rate": 0.0005796052982720831, + "loss": 0.8594, + "step": 8664 + }, + { + "epoch": 0.47, + "learning_rate": 0.0005795193680887231, + "loss": 0.9102, + "step": 8665 + }, + { + "epoch": 0.47, + "learning_rate": 0.0005794334354956652, + "loss": 0.7969, + "step": 8666 + }, + { + "epoch": 0.47, + "learning_rate": 0.0005793475004955132, + "loss": 0.8555, + "step": 8667 + }, + { + "epoch": 0.47, + "learning_rate": 0.0005792615630908713, + "loss": 0.8203, + "step": 8668 + }, + { + "epoch": 0.47, + "learning_rate": 0.0005791756232843436, + "loss": 0.8828, + "step": 8669 + }, + { + "epoch": 0.47, + "learning_rate": 0.0005790896810785347, + "loss": 0.9102, + "step": 8670 + }, + { + "epoch": 0.47, + "learning_rate": 0.0005790037364760484, + "loss": 0.8203, + "step": 8671 + }, + { + "epoch": 0.47, + "learning_rate": 0.0005789177894794894, + "loss": 0.9102, + "step": 8672 + }, + { + "epoch": 0.47, + "learning_rate": 0.0005788318400914621, + "loss": 0.8711, + "step": 8673 + }, + { + "epoch": 0.47, + "learning_rate": 0.0005787458883145713, + "loss": 0.8164, + "step": 8674 + }, + { + "epoch": 0.47, + "learning_rate": 0.0005786599341514214, + "loss": 0.8281, + "step": 8675 + }, + { + "epoch": 0.47, + "learning_rate": 0.0005785739776046168, + "loss": 0.8438, + "step": 8676 + }, + { + "epoch": 0.47, + "learning_rate": 0.0005784880186767631, + "loss": 0.9336, + "step": 8677 + }, + { + "epoch": 0.47, + "learning_rate": 0.0005784020573704642, + "loss": 0.7617, + "step": 8678 + }, + { + "epoch": 0.47, + "learning_rate": 0.0005783160936883255, + "loss": 0.8789, + "step": 8679 + }, + { + "epoch": 0.47, + "learning_rate": 0.0005782301276329521, + "loss": 0.9102, + "step": 8680 + }, + { + "epoch": 0.47, + "learning_rate": 0.0005781441592069487, + "loss": 0.9023, + "step": 8681 + }, + { + "epoch": 0.47, + "learning_rate": 0.0005780581884129206, + "loss": 0.8398, + "step": 8682 + }, + { + "epoch": 0.47, + "learning_rate": 0.0005779722152534729, + "loss": 0.8633, + "step": 8683 + }, + { + "epoch": 0.47, + "learning_rate": 0.000577886239731211, + "loss": 0.8867, + "step": 8684 + }, + { + "epoch": 0.47, + "learning_rate": 0.0005778002618487403, + "loss": 0.8047, + "step": 8685 + }, + { + "epoch": 0.47, + "learning_rate": 0.000577714281608666, + "loss": 0.8047, + "step": 8686 + }, + { + "epoch": 0.47, + "learning_rate": 0.0005776282990135938, + "loss": 0.8477, + "step": 8687 + }, + { + "epoch": 0.47, + "learning_rate": 0.000577542314066129, + "loss": 0.832, + "step": 8688 + }, + { + "epoch": 0.47, + "learning_rate": 0.0005774563267688775, + "loss": 0.8789, + "step": 8689 + }, + { + "epoch": 0.47, + "learning_rate": 0.0005773703371244448, + "loss": 0.8164, + "step": 8690 + }, + { + "epoch": 0.47, + "learning_rate": 0.0005772843451354368, + "loss": 0.8477, + "step": 8691 + }, + { + "epoch": 0.47, + "learning_rate": 0.0005771983508044593, + "loss": 0.8477, + "step": 8692 + }, + { + "epoch": 0.47, + "learning_rate": 0.0005771123541341181, + "loss": 0.8984, + "step": 8693 + }, + { + "epoch": 0.47, + "learning_rate": 0.0005770263551270192, + "loss": 0.8359, + "step": 8694 + }, + { + "epoch": 0.47, + "learning_rate": 0.0005769403537857689, + "loss": 0.7578, + "step": 8695 + }, + { + "epoch": 0.47, + "learning_rate": 0.000576854350112973, + "loss": 0.8477, + "step": 8696 + }, + { + "epoch": 0.47, + "learning_rate": 0.0005767683441112381, + "loss": 0.8477, + "step": 8697 + }, + { + "epoch": 0.47, + "learning_rate": 0.0005766823357831698, + "loss": 0.8672, + "step": 8698 + }, + { + "epoch": 0.47, + "learning_rate": 0.000576596325131375, + "loss": 0.8711, + "step": 8699 + }, + { + "epoch": 0.47, + "learning_rate": 0.00057651031215846, + "loss": 0.9375, + "step": 8700 + }, + { + "epoch": 0.47, + "learning_rate": 0.0005764242968670314, + "loss": 0.7422, + "step": 8701 + }, + { + "epoch": 0.47, + "learning_rate": 0.0005763382792596953, + "loss": 0.7734, + "step": 8702 + }, + { + "epoch": 0.47, + "learning_rate": 0.0005762522593390587, + "loss": 0.9336, + "step": 8703 + }, + { + "epoch": 0.47, + "learning_rate": 0.0005761662371077281, + "loss": 0.8398, + "step": 8704 + }, + { + "epoch": 0.47, + "learning_rate": 0.0005760802125683105, + "loss": 0.9336, + "step": 8705 + }, + { + "epoch": 0.47, + "learning_rate": 0.0005759941857234123, + "loss": 0.7773, + "step": 8706 + }, + { + "epoch": 0.47, + "learning_rate": 0.0005759081565756407, + "loss": 0.7695, + "step": 8707 + }, + { + "epoch": 0.47, + "learning_rate": 0.0005758221251276026, + "loss": 0.8555, + "step": 8708 + }, + { + "epoch": 0.47, + "learning_rate": 0.0005757360913819051, + "loss": 0.8633, + "step": 8709 + }, + { + "epoch": 0.47, + "learning_rate": 0.0005756500553411553, + "loss": 0.8555, + "step": 8710 + }, + { + "epoch": 0.47, + "learning_rate": 0.0005755640170079602, + "loss": 0.8516, + "step": 8711 + }, + { + "epoch": 0.47, + "learning_rate": 0.0005754779763849272, + "loss": 0.8828, + "step": 8712 + }, + { + "epoch": 0.47, + "learning_rate": 0.0005753919334746637, + "loss": 0.9062, + "step": 8713 + }, + { + "epoch": 0.47, + "learning_rate": 0.000575305888279777, + "loss": 0.8906, + "step": 8714 + }, + { + "epoch": 0.47, + "learning_rate": 0.0005752198408028745, + "loss": 0.8047, + "step": 8715 + }, + { + "epoch": 0.47, + "learning_rate": 0.0005751337910465636, + "loss": 0.8359, + "step": 8716 + }, + { + "epoch": 0.47, + "learning_rate": 0.0005750477390134521, + "loss": 0.8633, + "step": 8717 + }, + { + "epoch": 0.47, + "learning_rate": 0.0005749616847061476, + "loss": 0.7773, + "step": 8718 + }, + { + "epoch": 0.47, + "learning_rate": 0.0005748756281272578, + "loss": 0.918, + "step": 8719 + }, + { + "epoch": 0.47, + "learning_rate": 0.0005747895692793906, + "loss": 0.8398, + "step": 8720 + }, + { + "epoch": 0.47, + "learning_rate": 0.0005747035081651536, + "loss": 0.8008, + "step": 8721 + }, + { + "epoch": 0.47, + "learning_rate": 0.0005746174447871551, + "loss": 0.875, + "step": 8722 + }, + { + "epoch": 0.47, + "learning_rate": 0.0005745313791480028, + "loss": 0.9102, + "step": 8723 + }, + { + "epoch": 0.47, + "learning_rate": 0.000574445311250305, + "loss": 0.9141, + "step": 8724 + }, + { + "epoch": 0.47, + "learning_rate": 0.0005743592410966696, + "loss": 0.9023, + "step": 8725 + }, + { + "epoch": 0.47, + "learning_rate": 0.000574273168689705, + "loss": 0.875, + "step": 8726 + }, + { + "epoch": 0.47, + "learning_rate": 0.0005741870940320195, + "loss": 0.8672, + "step": 8727 + }, + { + "epoch": 0.47, + "learning_rate": 0.0005741010171262213, + "loss": 0.8828, + "step": 8728 + }, + { + "epoch": 0.47, + "learning_rate": 0.0005740149379749187, + "loss": 0.8203, + "step": 8729 + }, + { + "epoch": 0.47, + "learning_rate": 0.0005739288565807207, + "loss": 0.8906, + "step": 8730 + }, + { + "epoch": 0.47, + "learning_rate": 0.0005738427729462352, + "loss": 0.8711, + "step": 8731 + }, + { + "epoch": 0.47, + "learning_rate": 0.0005737566870740713, + "loss": 0.8555, + "step": 8732 + }, + { + "epoch": 0.47, + "learning_rate": 0.0005736705989668374, + "loss": 0.7773, + "step": 8733 + }, + { + "epoch": 0.47, + "learning_rate": 0.0005735845086271424, + "loss": 0.8672, + "step": 8734 + }, + { + "epoch": 0.47, + "learning_rate": 0.0005734984160575951, + "loss": 0.8711, + "step": 8735 + }, + { + "epoch": 0.47, + "learning_rate": 0.0005734123212608043, + "loss": 0.9688, + "step": 8736 + }, + { + "epoch": 0.47, + "learning_rate": 0.000573326224239379, + "loss": 0.8789, + "step": 8737 + }, + { + "epoch": 0.47, + "learning_rate": 0.0005732401249959284, + "loss": 0.8633, + "step": 8738 + }, + { + "epoch": 0.47, + "learning_rate": 0.0005731540235330612, + "loss": 0.8281, + "step": 8739 + }, + { + "epoch": 0.47, + "learning_rate": 0.0005730679198533871, + "loss": 0.8555, + "step": 8740 + }, + { + "epoch": 0.47, + "learning_rate": 0.0005729818139595148, + "loss": 0.8672, + "step": 8741 + }, + { + "epoch": 0.47, + "learning_rate": 0.0005728957058540538, + "loss": 0.8008, + "step": 8742 + }, + { + "epoch": 0.47, + "learning_rate": 0.0005728095955396136, + "loss": 0.8711, + "step": 8743 + }, + { + "epoch": 0.47, + "learning_rate": 0.0005727234830188033, + "loss": 0.8945, + "step": 8744 + }, + { + "epoch": 0.47, + "learning_rate": 0.0005726373682942328, + "loss": 0.9023, + "step": 8745 + }, + { + "epoch": 0.47, + "learning_rate": 0.0005725512513685112, + "loss": 0.8086, + "step": 8746 + }, + { + "epoch": 0.47, + "learning_rate": 0.0005724651322442484, + "loss": 0.9102, + "step": 8747 + }, + { + "epoch": 0.47, + "learning_rate": 0.0005723790109240542, + "loss": 1.0, + "step": 8748 + }, + { + "epoch": 0.47, + "learning_rate": 0.0005722928874105381, + "loss": 0.8242, + "step": 8749 + }, + { + "epoch": 0.47, + "learning_rate": 0.0005722067617063101, + "loss": 0.9297, + "step": 8750 + }, + { + "epoch": 0.47, + "learning_rate": 0.0005721206338139802, + "loss": 0.8555, + "step": 8751 + }, + { + "epoch": 0.47, + "learning_rate": 0.0005720345037361578, + "loss": 0.8477, + "step": 8752 + }, + { + "epoch": 0.47, + "learning_rate": 0.0005719483714754538, + "loss": 0.8594, + "step": 8753 + }, + { + "epoch": 0.47, + "learning_rate": 0.0005718622370344774, + "loss": 0.8711, + "step": 8754 + }, + { + "epoch": 0.47, + "learning_rate": 0.0005717761004158392, + "loss": 0.8477, + "step": 8755 + }, + { + "epoch": 0.47, + "learning_rate": 0.0005716899616221495, + "loss": 0.8359, + "step": 8756 + }, + { + "epoch": 0.47, + "learning_rate": 0.0005716038206560186, + "loss": 0.9336, + "step": 8757 + }, + { + "epoch": 0.47, + "learning_rate": 0.0005715176775200567, + "loss": 0.8281, + "step": 8758 + }, + { + "epoch": 0.47, + "learning_rate": 0.0005714315322168742, + "loss": 0.8594, + "step": 8759 + }, + { + "epoch": 0.47, + "learning_rate": 0.0005713453847490813, + "loss": 0.8477, + "step": 8760 + }, + { + "epoch": 0.47, + "learning_rate": 0.0005712592351192894, + "loss": 0.9023, + "step": 8761 + }, + { + "epoch": 0.47, + "learning_rate": 0.0005711730833301083, + "loss": 0.8711, + "step": 8762 + }, + { + "epoch": 0.47, + "learning_rate": 0.0005710869293841492, + "loss": 0.8359, + "step": 8763 + }, + { + "epoch": 0.47, + "learning_rate": 0.0005710007732840224, + "loss": 0.9023, + "step": 8764 + }, + { + "epoch": 0.47, + "learning_rate": 0.0005709146150323391, + "loss": 0.9375, + "step": 8765 + }, + { + "epoch": 0.47, + "learning_rate": 0.00057082845463171, + "loss": 0.8789, + "step": 8766 + }, + { + "epoch": 0.47, + "learning_rate": 0.000570742292084746, + "loss": 0.7656, + "step": 8767 + }, + { + "epoch": 0.47, + "learning_rate": 0.0005706561273940583, + "loss": 0.8398, + "step": 8768 + }, + { + "epoch": 0.47, + "learning_rate": 0.0005705699605622577, + "loss": 0.8281, + "step": 8769 + }, + { + "epoch": 0.47, + "learning_rate": 0.0005704837915919554, + "loss": 0.9688, + "step": 8770 + }, + { + "epoch": 0.47, + "learning_rate": 0.0005703976204857631, + "loss": 0.8203, + "step": 8771 + }, + { + "epoch": 0.47, + "learning_rate": 0.0005703114472462911, + "loss": 0.8242, + "step": 8772 + }, + { + "epoch": 0.47, + "learning_rate": 0.0005702252718761517, + "loss": 0.8438, + "step": 8773 + }, + { + "epoch": 0.47, + "learning_rate": 0.0005701390943779558, + "loss": 0.8633, + "step": 8774 + }, + { + "epoch": 0.47, + "learning_rate": 0.0005700529147543149, + "loss": 0.8164, + "step": 8775 + }, + { + "epoch": 0.47, + "learning_rate": 0.0005699667330078406, + "loss": 0.7773, + "step": 8776 + }, + { + "epoch": 0.47, + "learning_rate": 0.0005698805491411445, + "loss": 0.832, + "step": 8777 + }, + { + "epoch": 0.47, + "learning_rate": 0.000569794363156838, + "loss": 0.8008, + "step": 8778 + }, + { + "epoch": 0.47, + "learning_rate": 0.0005697081750575334, + "loss": 0.8594, + "step": 8779 + }, + { + "epoch": 0.47, + "learning_rate": 0.0005696219848458417, + "loss": 0.8398, + "step": 8780 + }, + { + "epoch": 0.47, + "learning_rate": 0.0005695357925243756, + "loss": 0.8984, + "step": 8781 + }, + { + "epoch": 0.47, + "learning_rate": 0.0005694495980957463, + "loss": 0.832, + "step": 8782 + }, + { + "epoch": 0.47, + "learning_rate": 0.000569363401562566, + "loss": 0.8594, + "step": 8783 + }, + { + "epoch": 0.47, + "learning_rate": 0.0005692772029274469, + "loss": 0.9023, + "step": 8784 + }, + { + "epoch": 0.47, + "learning_rate": 0.0005691910021930009, + "loss": 0.8906, + "step": 8785 + }, + { + "epoch": 0.47, + "learning_rate": 0.0005691047993618405, + "loss": 0.8516, + "step": 8786 + }, + { + "epoch": 0.47, + "learning_rate": 0.0005690185944365775, + "loss": 0.7969, + "step": 8787 + }, + { + "epoch": 0.47, + "learning_rate": 0.0005689323874198244, + "loss": 0.8906, + "step": 8788 + }, + { + "epoch": 0.47, + "learning_rate": 0.0005688461783141935, + "loss": 0.7461, + "step": 8789 + }, + { + "epoch": 0.47, + "learning_rate": 0.0005687599671222972, + "loss": 0.8242, + "step": 8790 + }, + { + "epoch": 0.47, + "learning_rate": 0.0005686737538467482, + "loss": 0.7969, + "step": 8791 + }, + { + "epoch": 0.47, + "learning_rate": 0.0005685875384901588, + "loss": 0.9219, + "step": 8792 + }, + { + "epoch": 0.47, + "learning_rate": 0.0005685013210551415, + "loss": 0.875, + "step": 8793 + }, + { + "epoch": 0.47, + "learning_rate": 0.0005684151015443095, + "loss": 0.8906, + "step": 8794 + }, + { + "epoch": 0.47, + "learning_rate": 0.0005683288799602749, + "loss": 0.918, + "step": 8795 + }, + { + "epoch": 0.47, + "learning_rate": 0.000568242656305651, + "loss": 0.8711, + "step": 8796 + }, + { + "epoch": 0.47, + "learning_rate": 0.0005681564305830503, + "loss": 0.8164, + "step": 8797 + }, + { + "epoch": 0.47, + "learning_rate": 0.000568070202795086, + "loss": 0.8945, + "step": 8798 + }, + { + "epoch": 0.47, + "learning_rate": 0.0005679839729443709, + "loss": 0.8164, + "step": 8799 + }, + { + "epoch": 0.47, + "learning_rate": 0.0005678977410335181, + "loss": 0.793, + "step": 8800 + }, + { + "epoch": 0.47, + "learning_rate": 0.0005678115070651407, + "loss": 0.8789, + "step": 8801 + }, + { + "epoch": 0.47, + "learning_rate": 0.0005677252710418519, + "loss": 0.8945, + "step": 8802 + }, + { + "epoch": 0.47, + "learning_rate": 0.0005676390329662648, + "loss": 0.9023, + "step": 8803 + }, + { + "epoch": 0.47, + "learning_rate": 0.0005675527928409932, + "loss": 0.8906, + "step": 8804 + }, + { + "epoch": 0.47, + "learning_rate": 0.0005674665506686498, + "loss": 0.8281, + "step": 8805 + }, + { + "epoch": 0.47, + "learning_rate": 0.0005673803064518482, + "loss": 0.9219, + "step": 8806 + }, + { + "epoch": 0.47, + "learning_rate": 0.0005672940601932022, + "loss": 0.8359, + "step": 8807 + }, + { + "epoch": 0.47, + "learning_rate": 0.000567207811895325, + "loss": 0.8828, + "step": 8808 + }, + { + "epoch": 0.47, + "learning_rate": 0.0005671215615608304, + "loss": 0.8867, + "step": 8809 + }, + { + "epoch": 0.47, + "learning_rate": 0.0005670353091923319, + "loss": 0.8906, + "step": 8810 + }, + { + "epoch": 0.47, + "learning_rate": 0.0005669490547924436, + "loss": 0.9297, + "step": 8811 + }, + { + "epoch": 0.47, + "learning_rate": 0.0005668627983637787, + "loss": 0.9258, + "step": 8812 + }, + { + "epoch": 0.47, + "learning_rate": 0.0005667765399089514, + "loss": 0.9844, + "step": 8813 + }, + { + "epoch": 0.47, + "learning_rate": 0.0005666902794305758, + "loss": 0.9141, + "step": 8814 + }, + { + "epoch": 0.47, + "learning_rate": 0.0005666040169312655, + "loss": 0.8711, + "step": 8815 + }, + { + "epoch": 0.47, + "learning_rate": 0.0005665177524136348, + "loss": 0.9531, + "step": 8816 + }, + { + "epoch": 0.47, + "learning_rate": 0.0005664314858802977, + "loss": 0.7656, + "step": 8817 + }, + { + "epoch": 0.47, + "learning_rate": 0.0005663452173338683, + "loss": 0.9023, + "step": 8818 + }, + { + "epoch": 0.47, + "learning_rate": 0.000566258946776961, + "loss": 0.9297, + "step": 8819 + }, + { + "epoch": 0.47, + "learning_rate": 0.0005661726742121898, + "loss": 0.8828, + "step": 8820 + }, + { + "epoch": 0.47, + "learning_rate": 0.0005660863996421693, + "loss": 0.8789, + "step": 8821 + }, + { + "epoch": 0.47, + "learning_rate": 0.0005660001230695139, + "loss": 0.8555, + "step": 8822 + }, + { + "epoch": 0.47, + "learning_rate": 0.0005659138444968378, + "loss": 0.8984, + "step": 8823 + }, + { + "epoch": 0.47, + "learning_rate": 0.0005658275639267559, + "loss": 0.8555, + "step": 8824 + }, + { + "epoch": 0.47, + "learning_rate": 0.0005657412813618824, + "loss": 0.9141, + "step": 8825 + }, + { + "epoch": 0.47, + "learning_rate": 0.0005656549968048322, + "loss": 0.8984, + "step": 8826 + }, + { + "epoch": 0.47, + "learning_rate": 0.0005655687102582199, + "loss": 0.8867, + "step": 8827 + }, + { + "epoch": 0.47, + "learning_rate": 0.0005654824217246603, + "loss": 0.8242, + "step": 8828 + }, + { + "epoch": 0.47, + "learning_rate": 0.0005653961312067683, + "loss": 0.9219, + "step": 8829 + }, + { + "epoch": 0.47, + "learning_rate": 0.0005653098387071586, + "loss": 0.793, + "step": 8830 + }, + { + "epoch": 0.47, + "learning_rate": 0.0005652235442284464, + "loss": 0.6914, + "step": 8831 + }, + { + "epoch": 0.47, + "learning_rate": 0.0005651372477732464, + "loss": 0.8594, + "step": 8832 + }, + { + "epoch": 0.47, + "learning_rate": 0.0005650509493441739, + "loss": 0.8672, + "step": 8833 + }, + { + "epoch": 0.47, + "learning_rate": 0.0005649646489438439, + "loss": 0.7969, + "step": 8834 + }, + { + "epoch": 0.47, + "learning_rate": 0.0005648783465748718, + "loss": 0.8398, + "step": 8835 + }, + { + "epoch": 0.47, + "learning_rate": 0.0005647920422398725, + "loss": 0.9375, + "step": 8836 + }, + { + "epoch": 0.47, + "learning_rate": 0.0005647057359414617, + "loss": 0.9375, + "step": 8837 + }, + { + "epoch": 0.48, + "learning_rate": 0.0005646194276822542, + "loss": 0.8242, + "step": 8838 + }, + { + "epoch": 0.48, + "learning_rate": 0.0005645331174648662, + "loss": 0.8477, + "step": 8839 + }, + { + "epoch": 0.48, + "learning_rate": 0.0005644468052919125, + "loss": 0.8789, + "step": 8840 + }, + { + "epoch": 0.48, + "learning_rate": 0.000564360491166009, + "loss": 0.8086, + "step": 8841 + }, + { + "epoch": 0.48, + "learning_rate": 0.0005642741750897712, + "loss": 0.875, + "step": 8842 + }, + { + "epoch": 0.48, + "learning_rate": 0.0005641878570658148, + "loss": 0.875, + "step": 8843 + }, + { + "epoch": 0.48, + "learning_rate": 0.0005641015370967554, + "loss": 0.9688, + "step": 8844 + }, + { + "epoch": 0.48, + "learning_rate": 0.0005640152151852089, + "loss": 0.9023, + "step": 8845 + }, + { + "epoch": 0.48, + "learning_rate": 0.000563928891333791, + "loss": 0.8398, + "step": 8846 + }, + { + "epoch": 0.48, + "learning_rate": 0.000563842565545118, + "loss": 0.7852, + "step": 8847 + }, + { + "epoch": 0.48, + "learning_rate": 0.0005637562378218052, + "loss": 0.8398, + "step": 8848 + }, + { + "epoch": 0.48, + "learning_rate": 0.0005636699081664692, + "loss": 0.8125, + "step": 8849 + }, + { + "epoch": 0.48, + "learning_rate": 0.0005635835765817256, + "loss": 0.9102, + "step": 8850 + }, + { + "epoch": 0.48, + "learning_rate": 0.000563497243070191, + "loss": 0.8359, + "step": 8851 + }, + { + "epoch": 0.48, + "learning_rate": 0.0005634109076344813, + "loss": 0.793, + "step": 8852 + }, + { + "epoch": 0.48, + "learning_rate": 0.0005633245702772127, + "loss": 0.9102, + "step": 8853 + }, + { + "epoch": 0.48, + "learning_rate": 0.0005632382310010017, + "loss": 0.918, + "step": 8854 + }, + { + "epoch": 0.48, + "learning_rate": 0.0005631518898084647, + "loss": 0.8242, + "step": 8855 + }, + { + "epoch": 0.48, + "learning_rate": 0.0005630655467022177, + "loss": 0.7734, + "step": 8856 + }, + { + "epoch": 0.48, + "learning_rate": 0.0005629792016848777, + "loss": 0.9062, + "step": 8857 + }, + { + "epoch": 0.48, + "learning_rate": 0.000562892854759061, + "loss": 0.7734, + "step": 8858 + }, + { + "epoch": 0.48, + "learning_rate": 0.0005628065059273841, + "loss": 0.8828, + "step": 8859 + }, + { + "epoch": 0.48, + "learning_rate": 0.0005627201551924639, + "loss": 0.9688, + "step": 8860 + }, + { + "epoch": 0.48, + "learning_rate": 0.0005626338025569169, + "loss": 0.8203, + "step": 8861 + }, + { + "epoch": 0.48, + "learning_rate": 0.00056254744802336, + "loss": 0.8672, + "step": 8862 + }, + { + "epoch": 0.48, + "learning_rate": 0.0005624610915944098, + "loss": 0.7695, + "step": 8863 + }, + { + "epoch": 0.48, + "learning_rate": 0.0005623747332726835, + "loss": 0.8047, + "step": 8864 + }, + { + "epoch": 0.48, + "learning_rate": 0.000562288373060798, + "loss": 0.8281, + "step": 8865 + }, + { + "epoch": 0.48, + "learning_rate": 0.00056220201096137, + "loss": 0.875, + "step": 8866 + }, + { + "epoch": 0.48, + "learning_rate": 0.0005621156469770167, + "loss": 0.8359, + "step": 8867 + }, + { + "epoch": 0.48, + "learning_rate": 0.0005620292811103555, + "loss": 0.8281, + "step": 8868 + }, + { + "epoch": 0.48, + "learning_rate": 0.0005619429133640031, + "loss": 0.8945, + "step": 8869 + }, + { + "epoch": 0.48, + "learning_rate": 0.0005618565437405772, + "loss": 0.9023, + "step": 8870 + }, + { + "epoch": 0.48, + "learning_rate": 0.0005617701722426946, + "loss": 0.8477, + "step": 8871 + }, + { + "epoch": 0.48, + "learning_rate": 0.0005616837988729731, + "loss": 0.8125, + "step": 8872 + }, + { + "epoch": 0.48, + "learning_rate": 0.0005615974236340296, + "loss": 0.8516, + "step": 8873 + }, + { + "epoch": 0.48, + "learning_rate": 0.0005615110465284819, + "loss": 0.8164, + "step": 8874 + }, + { + "epoch": 0.48, + "learning_rate": 0.0005614246675589475, + "loss": 0.832, + "step": 8875 + }, + { + "epoch": 0.48, + "learning_rate": 0.000561338286728044, + "loss": 0.8672, + "step": 8876 + }, + { + "epoch": 0.48, + "learning_rate": 0.0005612519040383888, + "loss": 0.8359, + "step": 8877 + }, + { + "epoch": 0.48, + "learning_rate": 0.0005611655194925997, + "loss": 0.9922, + "step": 8878 + }, + { + "epoch": 0.48, + "learning_rate": 0.0005610791330932945, + "loss": 0.8242, + "step": 8879 + }, + { + "epoch": 0.48, + "learning_rate": 0.0005609927448430909, + "loss": 0.8086, + "step": 8880 + }, + { + "epoch": 0.48, + "learning_rate": 0.0005609063547446069, + "loss": 0.8242, + "step": 8881 + }, + { + "epoch": 0.48, + "learning_rate": 0.0005608199628004601, + "loss": 0.8203, + "step": 8882 + }, + { + "epoch": 0.48, + "learning_rate": 0.0005607335690132687, + "loss": 0.875, + "step": 8883 + }, + { + "epoch": 0.48, + "learning_rate": 0.0005606471733856507, + "loss": 0.8828, + "step": 8884 + }, + { + "epoch": 0.48, + "learning_rate": 0.0005605607759202241, + "loss": 0.9023, + "step": 8885 + }, + { + "epoch": 0.48, + "learning_rate": 0.0005604743766196071, + "loss": 0.8164, + "step": 8886 + }, + { + "epoch": 0.48, + "learning_rate": 0.0005603879754864177, + "loss": 0.8438, + "step": 8887 + }, + { + "epoch": 0.48, + "learning_rate": 0.0005603015725232746, + "loss": 0.9609, + "step": 8888 + }, + { + "epoch": 0.48, + "learning_rate": 0.0005602151677327955, + "loss": 0.8281, + "step": 8889 + }, + { + "epoch": 0.48, + "learning_rate": 0.0005601287611175992, + "loss": 0.8047, + "step": 8890 + }, + { + "epoch": 0.48, + "learning_rate": 0.0005600423526803037, + "loss": 0.8164, + "step": 8891 + }, + { + "epoch": 0.48, + "learning_rate": 0.0005599559424235279, + "loss": 0.8633, + "step": 8892 + }, + { + "epoch": 0.48, + "learning_rate": 0.0005598695303498899, + "loss": 0.9219, + "step": 8893 + }, + { + "epoch": 0.48, + "learning_rate": 0.0005597831164620087, + "loss": 0.8672, + "step": 8894 + }, + { + "epoch": 0.48, + "learning_rate": 0.0005596967007625025, + "loss": 0.8789, + "step": 8895 + }, + { + "epoch": 0.48, + "learning_rate": 0.0005596102832539902, + "loss": 0.8438, + "step": 8896 + }, + { + "epoch": 0.48, + "learning_rate": 0.0005595238639390907, + "loss": 0.8633, + "step": 8897 + }, + { + "epoch": 0.48, + "learning_rate": 0.0005594374428204223, + "loss": 0.8594, + "step": 8898 + }, + { + "epoch": 0.48, + "learning_rate": 0.0005593510199006043, + "loss": 0.7227, + "step": 8899 + }, + { + "epoch": 0.48, + "learning_rate": 0.0005592645951822554, + "loss": 0.8438, + "step": 8900 + }, + { + "epoch": 0.48, + "learning_rate": 0.0005591781686679947, + "loss": 0.8359, + "step": 8901 + }, + { + "epoch": 0.48, + "learning_rate": 0.000559091740360441, + "loss": 0.8008, + "step": 8902 + }, + { + "epoch": 0.48, + "learning_rate": 0.0005590053102622136, + "loss": 0.793, + "step": 8903 + }, + { + "epoch": 0.48, + "learning_rate": 0.0005589188783759313, + "loss": 0.793, + "step": 8904 + }, + { + "epoch": 0.48, + "learning_rate": 0.0005588324447042135, + "loss": 0.7812, + "step": 8905 + }, + { + "epoch": 0.48, + "learning_rate": 0.0005587460092496794, + "loss": 0.9141, + "step": 8906 + }, + { + "epoch": 0.48, + "learning_rate": 0.0005586595720149483, + "loss": 0.8906, + "step": 8907 + }, + { + "epoch": 0.48, + "learning_rate": 0.0005585731330026394, + "loss": 0.7539, + "step": 8908 + }, + { + "epoch": 0.48, + "learning_rate": 0.0005584866922153722, + "loss": 0.9492, + "step": 8909 + }, + { + "epoch": 0.48, + "learning_rate": 0.0005584002496557661, + "loss": 0.8125, + "step": 8910 + }, + { + "epoch": 0.48, + "learning_rate": 0.0005583138053264406, + "loss": 0.8125, + "step": 8911 + }, + { + "epoch": 0.48, + "learning_rate": 0.0005582273592300152, + "loss": 0.8359, + "step": 8912 + }, + { + "epoch": 0.48, + "learning_rate": 0.0005581409113691097, + "loss": 0.8789, + "step": 8913 + }, + { + "epoch": 0.48, + "learning_rate": 0.0005580544617463434, + "loss": 0.875, + "step": 8914 + }, + { + "epoch": 0.48, + "learning_rate": 0.0005579680103643364, + "loss": 0.8438, + "step": 8915 + }, + { + "epoch": 0.48, + "learning_rate": 0.0005578815572257083, + "loss": 0.7891, + "step": 8916 + }, + { + "epoch": 0.48, + "learning_rate": 0.0005577951023330786, + "loss": 0.9766, + "step": 8917 + }, + { + "epoch": 0.48, + "learning_rate": 0.0005577086456890676, + "loss": 0.9102, + "step": 8918 + }, + { + "epoch": 0.48, + "learning_rate": 0.0005576221872962952, + "loss": 0.9453, + "step": 8919 + }, + { + "epoch": 0.48, + "learning_rate": 0.0005575357271573811, + "loss": 0.8125, + "step": 8920 + }, + { + "epoch": 0.48, + "learning_rate": 0.0005574492652749457, + "loss": 0.8242, + "step": 8921 + }, + { + "epoch": 0.48, + "learning_rate": 0.0005573628016516084, + "loss": 0.8242, + "step": 8922 + }, + { + "epoch": 0.48, + "learning_rate": 0.0005572763362899903, + "loss": 0.8711, + "step": 8923 + }, + { + "epoch": 0.48, + "learning_rate": 0.0005571898691927108, + "loss": 0.8867, + "step": 8924 + }, + { + "epoch": 0.48, + "learning_rate": 0.0005571034003623904, + "loss": 0.8359, + "step": 8925 + }, + { + "epoch": 0.48, + "learning_rate": 0.0005570169298016494, + "loss": 0.8047, + "step": 8926 + }, + { + "epoch": 0.48, + "learning_rate": 0.0005569304575131082, + "loss": 0.9297, + "step": 8927 + }, + { + "epoch": 0.48, + "learning_rate": 0.0005568439834993871, + "loss": 0.8516, + "step": 8928 + }, + { + "epoch": 0.48, + "learning_rate": 0.0005567575077631065, + "loss": 0.9062, + "step": 8929 + }, + { + "epoch": 0.48, + "learning_rate": 0.000556671030306887, + "loss": 0.8555, + "step": 8930 + }, + { + "epoch": 0.48, + "learning_rate": 0.0005565845511333494, + "loss": 0.7383, + "step": 8931 + }, + { + "epoch": 0.48, + "learning_rate": 0.0005564980702451137, + "loss": 0.8398, + "step": 8932 + }, + { + "epoch": 0.48, + "learning_rate": 0.0005564115876448009, + "loss": 0.8633, + "step": 8933 + }, + { + "epoch": 0.48, + "learning_rate": 0.0005563251033350318, + "loss": 0.7656, + "step": 8934 + }, + { + "epoch": 0.48, + "learning_rate": 0.000556238617318427, + "loss": 0.7188, + "step": 8935 + }, + { + "epoch": 0.48, + "learning_rate": 0.0005561521295976076, + "loss": 0.9023, + "step": 8936 + }, + { + "epoch": 0.48, + "learning_rate": 0.0005560656401751939, + "loss": 0.8516, + "step": 8937 + }, + { + "epoch": 0.48, + "learning_rate": 0.0005559791490538073, + "loss": 0.8164, + "step": 8938 + }, + { + "epoch": 0.48, + "learning_rate": 0.0005558926562360685, + "loss": 0.8555, + "step": 8939 + }, + { + "epoch": 0.48, + "learning_rate": 0.0005558061617245987, + "loss": 0.8633, + "step": 8940 + }, + { + "epoch": 0.48, + "learning_rate": 0.0005557196655220189, + "loss": 0.8008, + "step": 8941 + }, + { + "epoch": 0.48, + "learning_rate": 0.0005556331676309502, + "loss": 0.8047, + "step": 8942 + }, + { + "epoch": 0.48, + "learning_rate": 0.0005555466680540138, + "loss": 0.8203, + "step": 8943 + }, + { + "epoch": 0.48, + "learning_rate": 0.0005554601667938308, + "loss": 0.7891, + "step": 8944 + }, + { + "epoch": 0.48, + "learning_rate": 0.0005553736638530225, + "loss": 0.8242, + "step": 8945 + }, + { + "epoch": 0.48, + "learning_rate": 0.0005552871592342106, + "loss": 0.9961, + "step": 8946 + }, + { + "epoch": 0.48, + "learning_rate": 0.0005552006529400159, + "loss": 0.8828, + "step": 8947 + }, + { + "epoch": 0.48, + "learning_rate": 0.0005551141449730602, + "loss": 0.7969, + "step": 8948 + }, + { + "epoch": 0.48, + "learning_rate": 0.0005550276353359649, + "loss": 0.875, + "step": 8949 + }, + { + "epoch": 0.48, + "learning_rate": 0.0005549411240313515, + "loss": 0.9023, + "step": 8950 + }, + { + "epoch": 0.48, + "learning_rate": 0.0005548546110618415, + "loss": 0.8398, + "step": 8951 + }, + { + "epoch": 0.48, + "learning_rate": 0.0005547680964300565, + "loss": 0.8398, + "step": 8952 + }, + { + "epoch": 0.48, + "learning_rate": 0.0005546815801386185, + "loss": 0.8125, + "step": 8953 + }, + { + "epoch": 0.48, + "learning_rate": 0.0005545950621901489, + "loss": 0.8906, + "step": 8954 + }, + { + "epoch": 0.48, + "learning_rate": 0.0005545085425872695, + "loss": 0.8359, + "step": 8955 + }, + { + "epoch": 0.48, + "learning_rate": 0.0005544220213326023, + "loss": 0.8086, + "step": 8956 + }, + { + "epoch": 0.48, + "learning_rate": 0.0005543354984287689, + "loss": 0.8281, + "step": 8957 + }, + { + "epoch": 0.48, + "learning_rate": 0.0005542489738783917, + "loss": 0.8555, + "step": 8958 + }, + { + "epoch": 0.48, + "learning_rate": 0.0005541624476840924, + "loss": 0.8711, + "step": 8959 + }, + { + "epoch": 0.48, + "learning_rate": 0.0005540759198484928, + "loss": 0.8867, + "step": 8960 + }, + { + "epoch": 0.48, + "learning_rate": 0.0005539893903742153, + "loss": 0.8516, + "step": 8961 + }, + { + "epoch": 0.48, + "learning_rate": 0.0005539028592638819, + "loss": 0.9023, + "step": 8962 + }, + { + "epoch": 0.48, + "learning_rate": 0.0005538163265201148, + "loss": 0.7617, + "step": 8963 + }, + { + "epoch": 0.48, + "learning_rate": 0.0005537297921455364, + "loss": 0.8984, + "step": 8964 + }, + { + "epoch": 0.48, + "learning_rate": 0.0005536432561427686, + "loss": 0.8945, + "step": 8965 + }, + { + "epoch": 0.48, + "learning_rate": 0.0005535567185144339, + "loss": 0.9297, + "step": 8966 + }, + { + "epoch": 0.48, + "learning_rate": 0.0005534701792631549, + "loss": 0.7852, + "step": 8967 + }, + { + "epoch": 0.48, + "learning_rate": 0.0005533836383915537, + "loss": 0.8633, + "step": 8968 + }, + { + "epoch": 0.48, + "learning_rate": 0.000553297095902253, + "loss": 0.8633, + "step": 8969 + }, + { + "epoch": 0.48, + "learning_rate": 0.000553210551797875, + "loss": 0.9297, + "step": 8970 + }, + { + "epoch": 0.48, + "learning_rate": 0.0005531240060810428, + "loss": 0.7969, + "step": 8971 + }, + { + "epoch": 0.48, + "learning_rate": 0.0005530374587543785, + "loss": 0.7969, + "step": 8972 + }, + { + "epoch": 0.48, + "learning_rate": 0.000552950909820505, + "loss": 0.8555, + "step": 8973 + }, + { + "epoch": 0.48, + "learning_rate": 0.000552864359282045, + "loss": 0.7461, + "step": 8974 + }, + { + "epoch": 0.48, + "learning_rate": 0.0005527778071416214, + "loss": 0.8828, + "step": 8975 + }, + { + "epoch": 0.48, + "learning_rate": 0.0005526912534018568, + "loss": 0.832, + "step": 8976 + }, + { + "epoch": 0.48, + "learning_rate": 0.000552604698065374, + "loss": 0.8516, + "step": 8977 + }, + { + "epoch": 0.48, + "learning_rate": 0.0005525181411347961, + "loss": 0.8008, + "step": 8978 + }, + { + "epoch": 0.48, + "learning_rate": 0.0005524315826127463, + "loss": 0.8281, + "step": 8979 + }, + { + "epoch": 0.48, + "learning_rate": 0.000552345022501847, + "loss": 0.8828, + "step": 8980 + }, + { + "epoch": 0.48, + "learning_rate": 0.0005522584608047218, + "loss": 0.8438, + "step": 8981 + }, + { + "epoch": 0.48, + "learning_rate": 0.0005521718975239934, + "loss": 0.7773, + "step": 8982 + }, + { + "epoch": 0.48, + "learning_rate": 0.0005520853326622852, + "loss": 0.7852, + "step": 8983 + }, + { + "epoch": 0.48, + "learning_rate": 0.0005519987662222204, + "loss": 0.7852, + "step": 8984 + }, + { + "epoch": 0.48, + "learning_rate": 0.000551912198206422, + "loss": 0.9805, + "step": 8985 + }, + { + "epoch": 0.48, + "learning_rate": 0.0005518256286175137, + "loss": 0.8789, + "step": 8986 + }, + { + "epoch": 0.48, + "learning_rate": 0.0005517390574581184, + "loss": 0.918, + "step": 8987 + }, + { + "epoch": 0.48, + "learning_rate": 0.0005516524847308597, + "loss": 0.9023, + "step": 8988 + }, + { + "epoch": 0.48, + "learning_rate": 0.0005515659104383614, + "loss": 0.7695, + "step": 8989 + }, + { + "epoch": 0.48, + "learning_rate": 0.0005514793345832462, + "loss": 0.8125, + "step": 8990 + }, + { + "epoch": 0.48, + "learning_rate": 0.0005513927571681382, + "loss": 0.9062, + "step": 8991 + }, + { + "epoch": 0.48, + "learning_rate": 0.0005513061781956609, + "loss": 0.8398, + "step": 8992 + }, + { + "epoch": 0.48, + "learning_rate": 0.0005512195976684379, + "loss": 0.8633, + "step": 8993 + }, + { + "epoch": 0.48, + "learning_rate": 0.0005511330155890928, + "loss": 0.7891, + "step": 8994 + }, + { + "epoch": 0.48, + "learning_rate": 0.0005510464319602493, + "loss": 0.9727, + "step": 8995 + }, + { + "epoch": 0.48, + "learning_rate": 0.0005509598467845312, + "loss": 0.9023, + "step": 8996 + }, + { + "epoch": 0.48, + "learning_rate": 0.0005508732600645627, + "loss": 0.8008, + "step": 8997 + }, + { + "epoch": 0.48, + "learning_rate": 0.0005507866718029668, + "loss": 0.8477, + "step": 8998 + }, + { + "epoch": 0.48, + "learning_rate": 0.0005507000820023682, + "loss": 0.9102, + "step": 8999 + }, + { + "epoch": 0.48, + "learning_rate": 0.0005506134906653906, + "loss": 0.8438, + "step": 9000 + }, + { + "epoch": 0.48, + "learning_rate": 0.0005505268977946579, + "loss": 0.918, + "step": 9001 + }, + { + "epoch": 0.48, + "learning_rate": 0.0005504403033927943, + "loss": 0.8242, + "step": 9002 + }, + { + "epoch": 0.48, + "learning_rate": 0.0005503537074624238, + "loss": 0.8438, + "step": 9003 + }, + { + "epoch": 0.48, + "learning_rate": 0.0005502671100061704, + "loss": 0.9453, + "step": 9004 + }, + { + "epoch": 0.48, + "learning_rate": 0.0005501805110266586, + "loss": 0.9492, + "step": 9005 + }, + { + "epoch": 0.48, + "learning_rate": 0.0005500939105265123, + "loss": 0.8516, + "step": 9006 + }, + { + "epoch": 0.48, + "learning_rate": 0.0005500073085083563, + "loss": 0.9023, + "step": 9007 + }, + { + "epoch": 0.48, + "learning_rate": 0.0005499207049748144, + "loss": 0.8438, + "step": 9008 + }, + { + "epoch": 0.48, + "learning_rate": 0.0005498340999285112, + "loss": 0.8828, + "step": 9009 + }, + { + "epoch": 0.48, + "learning_rate": 0.000549747493372071, + "loss": 0.8477, + "step": 9010 + }, + { + "epoch": 0.48, + "learning_rate": 0.0005496608853081183, + "loss": 0.7656, + "step": 9011 + }, + { + "epoch": 0.48, + "learning_rate": 0.0005495742757392779, + "loss": 0.8438, + "step": 9012 + }, + { + "epoch": 0.48, + "learning_rate": 0.0005494876646681738, + "loss": 0.8398, + "step": 9013 + }, + { + "epoch": 0.48, + "learning_rate": 0.0005494010520974311, + "loss": 0.7266, + "step": 9014 + }, + { + "epoch": 0.48, + "learning_rate": 0.000549314438029674, + "loss": 0.7969, + "step": 9015 + }, + { + "epoch": 0.48, + "learning_rate": 0.0005492278224675278, + "loss": 0.8789, + "step": 9016 + }, + { + "epoch": 0.48, + "learning_rate": 0.0005491412054136166, + "loss": 0.8984, + "step": 9017 + }, + { + "epoch": 0.48, + "learning_rate": 0.0005490545868705656, + "loss": 0.8477, + "step": 9018 + }, + { + "epoch": 0.48, + "learning_rate": 0.0005489679668409994, + "loss": 0.8516, + "step": 9019 + }, + { + "epoch": 0.48, + "learning_rate": 0.0005488813453275431, + "loss": 0.8789, + "step": 9020 + }, + { + "epoch": 0.48, + "learning_rate": 0.0005487947223328213, + "loss": 0.8164, + "step": 9021 + }, + { + "epoch": 0.48, + "learning_rate": 0.0005487080978594593, + "loss": 0.8398, + "step": 9022 + }, + { + "epoch": 0.48, + "learning_rate": 0.0005486214719100818, + "loss": 0.8945, + "step": 9023 + }, + { + "epoch": 0.49, + "learning_rate": 0.000548534844487314, + "loss": 0.8164, + "step": 9024 + }, + { + "epoch": 0.49, + "learning_rate": 0.000548448215593781, + "loss": 0.8828, + "step": 9025 + }, + { + "epoch": 0.49, + "learning_rate": 0.0005483615852321079, + "loss": 0.8398, + "step": 9026 + }, + { + "epoch": 0.49, + "learning_rate": 0.00054827495340492, + "loss": 0.8906, + "step": 9027 + }, + { + "epoch": 0.49, + "learning_rate": 0.0005481883201148423, + "loss": 0.8125, + "step": 9028 + }, + { + "epoch": 0.49, + "learning_rate": 0.0005481016853645002, + "loss": 0.8984, + "step": 9029 + }, + { + "epoch": 0.49, + "learning_rate": 0.0005480150491565194, + "loss": 0.8086, + "step": 9030 + }, + { + "epoch": 0.49, + "learning_rate": 0.0005479284114935244, + "loss": 0.8477, + "step": 9031 + }, + { + "epoch": 0.49, + "learning_rate": 0.0005478417723781415, + "loss": 0.9258, + "step": 9032 + }, + { + "epoch": 0.49, + "learning_rate": 0.0005477551318129954, + "loss": 0.8984, + "step": 9033 + }, + { + "epoch": 0.49, + "learning_rate": 0.000547668489800712, + "loss": 0.918, + "step": 9034 + }, + { + "epoch": 0.49, + "learning_rate": 0.0005475818463439169, + "loss": 0.8516, + "step": 9035 + }, + { + "epoch": 0.49, + "learning_rate": 0.0005474952014452355, + "loss": 0.9102, + "step": 9036 + }, + { + "epoch": 0.49, + "learning_rate": 0.0005474085551072936, + "loss": 0.8594, + "step": 9037 + }, + { + "epoch": 0.49, + "learning_rate": 0.0005473219073327167, + "loss": 0.8867, + "step": 9038 + }, + { + "epoch": 0.49, + "learning_rate": 0.0005472352581241304, + "loss": 0.9648, + "step": 9039 + }, + { + "epoch": 0.49, + "learning_rate": 0.000547148607484161, + "loss": 0.8164, + "step": 9040 + }, + { + "epoch": 0.49, + "learning_rate": 0.0005470619554154335, + "loss": 0.8242, + "step": 9041 + }, + { + "epoch": 0.49, + "learning_rate": 0.0005469753019205744, + "loss": 0.8086, + "step": 9042 + }, + { + "epoch": 0.49, + "learning_rate": 0.0005468886470022092, + "loss": 0.8867, + "step": 9043 + }, + { + "epoch": 0.49, + "learning_rate": 0.000546801990662964, + "loss": 0.8516, + "step": 9044 + }, + { + "epoch": 0.49, + "learning_rate": 0.0005467153329054648, + "loss": 0.8711, + "step": 9045 + }, + { + "epoch": 0.49, + "learning_rate": 0.0005466286737323375, + "loss": 0.9258, + "step": 9046 + }, + { + "epoch": 0.49, + "learning_rate": 0.0005465420131462083, + "loss": 0.9062, + "step": 9047 + }, + { + "epoch": 0.49, + "learning_rate": 0.0005464553511497032, + "loss": 0.8555, + "step": 9048 + }, + { + "epoch": 0.49, + "learning_rate": 0.0005463686877454485, + "loss": 0.9453, + "step": 9049 + }, + { + "epoch": 0.49, + "learning_rate": 0.00054628202293607, + "loss": 0.7578, + "step": 9050 + }, + { + "epoch": 0.49, + "learning_rate": 0.0005461953567241943, + "loss": 0.8086, + "step": 9051 + }, + { + "epoch": 0.49, + "learning_rate": 0.0005461086891124476, + "loss": 0.8164, + "step": 9052 + }, + { + "epoch": 0.49, + "learning_rate": 0.0005460220201034561, + "loss": 0.9609, + "step": 9053 + }, + { + "epoch": 0.49, + "learning_rate": 0.0005459353496998461, + "loss": 0.8633, + "step": 9054 + }, + { + "epoch": 0.49, + "learning_rate": 0.0005458486779042443, + "loss": 0.75, + "step": 9055 + }, + { + "epoch": 0.49, + "learning_rate": 0.0005457620047192768, + "loss": 0.9102, + "step": 9056 + }, + { + "epoch": 0.49, + "learning_rate": 0.0005456753301475704, + "loss": 0.8672, + "step": 9057 + }, + { + "epoch": 0.49, + "learning_rate": 0.0005455886541917514, + "loss": 0.9062, + "step": 9058 + }, + { + "epoch": 0.49, + "learning_rate": 0.0005455019768544464, + "loss": 0.7969, + "step": 9059 + }, + { + "epoch": 0.49, + "learning_rate": 0.000545415298138282, + "loss": 0.7734, + "step": 9060 + }, + { + "epoch": 0.49, + "learning_rate": 0.000545328618045885, + "loss": 0.8359, + "step": 9061 + }, + { + "epoch": 0.49, + "learning_rate": 0.0005452419365798818, + "loss": 0.8516, + "step": 9062 + }, + { + "epoch": 0.49, + "learning_rate": 0.0005451552537428995, + "loss": 0.9023, + "step": 9063 + }, + { + "epoch": 0.49, + "learning_rate": 0.0005450685695375646, + "loss": 0.7383, + "step": 9064 + }, + { + "epoch": 0.49, + "learning_rate": 0.0005449818839665042, + "loss": 0.8359, + "step": 9065 + }, + { + "epoch": 0.49, + "learning_rate": 0.0005448951970323445, + "loss": 0.9023, + "step": 9066 + }, + { + "epoch": 0.49, + "learning_rate": 0.0005448085087377132, + "loss": 0.9219, + "step": 9067 + }, + { + "epoch": 0.49, + "learning_rate": 0.0005447218190852369, + "loss": 0.8516, + "step": 9068 + }, + { + "epoch": 0.49, + "learning_rate": 0.0005446351280775424, + "loss": 0.8828, + "step": 9069 + }, + { + "epoch": 0.49, + "learning_rate": 0.0005445484357172571, + "loss": 0.9141, + "step": 9070 + }, + { + "epoch": 0.49, + "learning_rate": 0.0005444617420070077, + "loss": 0.8672, + "step": 9071 + }, + { + "epoch": 0.49, + "learning_rate": 0.0005443750469494215, + "loss": 0.8398, + "step": 9072 + }, + { + "epoch": 0.49, + "learning_rate": 0.0005442883505471258, + "loss": 0.9102, + "step": 9073 + }, + { + "epoch": 0.49, + "learning_rate": 0.0005442016528027473, + "loss": 0.8359, + "step": 9074 + }, + { + "epoch": 0.49, + "learning_rate": 0.0005441149537189137, + "loss": 0.8633, + "step": 9075 + }, + { + "epoch": 0.49, + "learning_rate": 0.0005440282532982522, + "loss": 0.8359, + "step": 9076 + }, + { + "epoch": 0.49, + "learning_rate": 0.0005439415515433898, + "loss": 0.8008, + "step": 9077 + }, + { + "epoch": 0.49, + "learning_rate": 0.0005438548484569541, + "loss": 0.7461, + "step": 9078 + }, + { + "epoch": 0.49, + "learning_rate": 0.0005437681440415725, + "loss": 0.8516, + "step": 9079 + }, + { + "epoch": 0.49, + "learning_rate": 0.0005436814382998723, + "loss": 0.8359, + "step": 9080 + }, + { + "epoch": 0.49, + "learning_rate": 0.000543594731234481, + "loss": 0.8203, + "step": 9081 + }, + { + "epoch": 0.49, + "learning_rate": 0.0005435080228480262, + "loss": 0.8398, + "step": 9082 + }, + { + "epoch": 0.49, + "learning_rate": 0.0005434213131431353, + "loss": 0.9492, + "step": 9083 + }, + { + "epoch": 0.49, + "learning_rate": 0.0005433346021224361, + "loss": 0.8359, + "step": 9084 + }, + { + "epoch": 0.49, + "learning_rate": 0.0005432478897885563, + "loss": 0.8281, + "step": 9085 + }, + { + "epoch": 0.49, + "learning_rate": 0.0005431611761441231, + "loss": 0.8477, + "step": 9086 + }, + { + "epoch": 0.49, + "learning_rate": 0.0005430744611917644, + "loss": 0.9531, + "step": 9087 + }, + { + "epoch": 0.49, + "learning_rate": 0.0005429877449341083, + "loss": 0.8906, + "step": 9088 + }, + { + "epoch": 0.49, + "learning_rate": 0.0005429010273737821, + "loss": 0.8516, + "step": 9089 + }, + { + "epoch": 0.49, + "learning_rate": 0.000542814308513414, + "loss": 0.8242, + "step": 9090 + }, + { + "epoch": 0.49, + "learning_rate": 0.0005427275883556316, + "loss": 0.9258, + "step": 9091 + }, + { + "epoch": 0.49, + "learning_rate": 0.0005426408669030631, + "loss": 0.9648, + "step": 9092 + }, + { + "epoch": 0.49, + "learning_rate": 0.0005425541441583362, + "loss": 0.9102, + "step": 9093 + }, + { + "epoch": 0.49, + "learning_rate": 0.0005424674201240788, + "loss": 0.8086, + "step": 9094 + }, + { + "epoch": 0.49, + "learning_rate": 0.000542380694802919, + "loss": 0.8516, + "step": 9095 + }, + { + "epoch": 0.49, + "learning_rate": 0.0005422939681974853, + "loss": 0.875, + "step": 9096 + }, + { + "epoch": 0.49, + "learning_rate": 0.0005422072403104051, + "loss": 0.8516, + "step": 9097 + }, + { + "epoch": 0.49, + "learning_rate": 0.0005421205111443073, + "loss": 0.8008, + "step": 9098 + }, + { + "epoch": 0.49, + "learning_rate": 0.0005420337807018192, + "loss": 0.8164, + "step": 9099 + }, + { + "epoch": 0.49, + "learning_rate": 0.0005419470489855697, + "loss": 0.832, + "step": 9100 + }, + { + "epoch": 0.49, + "learning_rate": 0.0005418603159981867, + "loss": 0.8438, + "step": 9101 + }, + { + "epoch": 0.49, + "learning_rate": 0.0005417735817422987, + "loss": 0.9297, + "step": 9102 + }, + { + "epoch": 0.49, + "learning_rate": 0.000541686846220534, + "loss": 0.8672, + "step": 9103 + }, + { + "epoch": 0.49, + "learning_rate": 0.0005416001094355209, + "loss": 0.832, + "step": 9104 + }, + { + "epoch": 0.49, + "learning_rate": 0.0005415133713898878, + "loss": 0.8125, + "step": 9105 + }, + { + "epoch": 0.49, + "learning_rate": 0.0005414266320862633, + "loss": 0.8047, + "step": 9106 + }, + { + "epoch": 0.49, + "learning_rate": 0.0005413398915272756, + "loss": 0.8672, + "step": 9107 + }, + { + "epoch": 0.49, + "learning_rate": 0.0005412531497155534, + "loss": 0.8945, + "step": 9108 + }, + { + "epoch": 0.49, + "learning_rate": 0.0005411664066537253, + "loss": 0.8828, + "step": 9109 + }, + { + "epoch": 0.49, + "learning_rate": 0.00054107966234442, + "loss": 0.8008, + "step": 9110 + }, + { + "epoch": 0.49, + "learning_rate": 0.0005409929167902659, + "loss": 0.7461, + "step": 9111 + }, + { + "epoch": 0.49, + "learning_rate": 0.0005409061699938916, + "loss": 0.8945, + "step": 9112 + }, + { + "epoch": 0.49, + "learning_rate": 0.0005408194219579261, + "loss": 0.793, + "step": 9113 + }, + { + "epoch": 0.49, + "learning_rate": 0.0005407326726849981, + "loss": 0.9883, + "step": 9114 + }, + { + "epoch": 0.49, + "learning_rate": 0.0005406459221777363, + "loss": 0.8594, + "step": 9115 + }, + { + "epoch": 0.49, + "learning_rate": 0.0005405591704387695, + "loss": 0.8789, + "step": 9116 + }, + { + "epoch": 0.49, + "learning_rate": 0.0005404724174707265, + "loss": 0.875, + "step": 9117 + }, + { + "epoch": 0.49, + "learning_rate": 0.0005403856632762363, + "loss": 0.8906, + "step": 9118 + }, + { + "epoch": 0.49, + "learning_rate": 0.0005402989078579281, + "loss": 0.8945, + "step": 9119 + }, + { + "epoch": 0.49, + "learning_rate": 0.0005402121512184303, + "loss": 0.8594, + "step": 9120 + }, + { + "epoch": 0.49, + "learning_rate": 0.0005401253933603726, + "loss": 0.7383, + "step": 9121 + }, + { + "epoch": 0.49, + "learning_rate": 0.0005400386342863835, + "loss": 0.8789, + "step": 9122 + }, + { + "epoch": 0.49, + "learning_rate": 0.0005399518739990922, + "loss": 0.793, + "step": 9123 + }, + { + "epoch": 0.49, + "learning_rate": 0.0005398651125011279, + "loss": 0.9805, + "step": 9124 + }, + { + "epoch": 0.49, + "learning_rate": 0.0005397783497951197, + "loss": 0.8281, + "step": 9125 + }, + { + "epoch": 0.49, + "learning_rate": 0.000539691585883697, + "loss": 0.8203, + "step": 9126 + }, + { + "epoch": 0.49, + "learning_rate": 0.0005396048207694888, + "loss": 0.8281, + "step": 9127 + }, + { + "epoch": 0.49, + "learning_rate": 0.0005395180544551243, + "loss": 0.7656, + "step": 9128 + }, + { + "epoch": 0.49, + "learning_rate": 0.0005394312869432332, + "loss": 0.8984, + "step": 9129 + }, + { + "epoch": 0.49, + "learning_rate": 0.0005393445182364444, + "loss": 0.793, + "step": 9130 + }, + { + "epoch": 0.49, + "learning_rate": 0.0005392577483373875, + "loss": 0.7656, + "step": 9131 + }, + { + "epoch": 0.49, + "learning_rate": 0.0005391709772486917, + "loss": 0.8789, + "step": 9132 + }, + { + "epoch": 0.49, + "learning_rate": 0.0005390842049729869, + "loss": 0.8008, + "step": 9133 + }, + { + "epoch": 0.49, + "learning_rate": 0.0005389974315129023, + "loss": 0.8906, + "step": 9134 + }, + { + "epoch": 0.49, + "learning_rate": 0.0005389106568710672, + "loss": 0.8242, + "step": 9135 + }, + { + "epoch": 0.49, + "learning_rate": 0.0005388238810501116, + "loss": 0.8516, + "step": 9136 + }, + { + "epoch": 0.49, + "learning_rate": 0.0005387371040526648, + "loss": 0.8789, + "step": 9137 + }, + { + "epoch": 0.49, + "learning_rate": 0.0005386503258813563, + "loss": 0.8555, + "step": 9138 + }, + { + "epoch": 0.49, + "learning_rate": 0.0005385635465388162, + "loss": 0.8672, + "step": 9139 + }, + { + "epoch": 0.49, + "learning_rate": 0.0005384767660276739, + "loss": 0.7852, + "step": 9140 + }, + { + "epoch": 0.49, + "learning_rate": 0.0005383899843505592, + "loss": 0.8828, + "step": 9141 + }, + { + "epoch": 0.49, + "learning_rate": 0.000538303201510102, + "loss": 0.8633, + "step": 9142 + }, + { + "epoch": 0.49, + "learning_rate": 0.0005382164175089318, + "loss": 0.8125, + "step": 9143 + }, + { + "epoch": 0.49, + "learning_rate": 0.0005381296323496787, + "loss": 0.9102, + "step": 9144 + }, + { + "epoch": 0.49, + "learning_rate": 0.0005380428460349724, + "loss": 0.8711, + "step": 9145 + }, + { + "epoch": 0.49, + "learning_rate": 0.000537956058567443, + "loss": 0.832, + "step": 9146 + }, + { + "epoch": 0.49, + "learning_rate": 0.0005378692699497203, + "loss": 0.9609, + "step": 9147 + }, + { + "epoch": 0.49, + "learning_rate": 0.0005377824801844343, + "loss": 0.7656, + "step": 9148 + }, + { + "epoch": 0.49, + "learning_rate": 0.0005376956892742152, + "loss": 0.9297, + "step": 9149 + }, + { + "epoch": 0.49, + "learning_rate": 0.0005376088972216926, + "loss": 0.8555, + "step": 9150 + }, + { + "epoch": 0.49, + "learning_rate": 0.0005375221040294971, + "loss": 0.8867, + "step": 9151 + }, + { + "epoch": 0.49, + "learning_rate": 0.0005374353097002586, + "loss": 0.8711, + "step": 9152 + }, + { + "epoch": 0.49, + "learning_rate": 0.0005373485142366072, + "loss": 0.9062, + "step": 9153 + }, + { + "epoch": 0.49, + "learning_rate": 0.0005372617176411732, + "loss": 0.7539, + "step": 9154 + }, + { + "epoch": 0.49, + "learning_rate": 0.0005371749199165866, + "loss": 0.9258, + "step": 9155 + }, + { + "epoch": 0.49, + "learning_rate": 0.0005370881210654778, + "loss": 0.8828, + "step": 9156 + }, + { + "epoch": 0.49, + "learning_rate": 0.0005370013210904772, + "loss": 0.8555, + "step": 9157 + }, + { + "epoch": 0.49, + "learning_rate": 0.000536914519994215, + "loss": 0.8359, + "step": 9158 + }, + { + "epoch": 0.49, + "learning_rate": 0.0005368277177793217, + "loss": 0.7734, + "step": 9159 + }, + { + "epoch": 0.49, + "learning_rate": 0.0005367409144484275, + "loss": 0.8906, + "step": 9160 + }, + { + "epoch": 0.49, + "learning_rate": 0.0005366541100041629, + "loss": 0.875, + "step": 9161 + }, + { + "epoch": 0.49, + "learning_rate": 0.0005365673044491583, + "loss": 0.7812, + "step": 9162 + }, + { + "epoch": 0.49, + "learning_rate": 0.0005364804977860444, + "loss": 0.8047, + "step": 9163 + }, + { + "epoch": 0.49, + "learning_rate": 0.0005363936900174515, + "loss": 0.7266, + "step": 9164 + }, + { + "epoch": 0.49, + "learning_rate": 0.0005363068811460103, + "loss": 0.8711, + "step": 9165 + }, + { + "epoch": 0.49, + "learning_rate": 0.0005362200711743513, + "loss": 0.9062, + "step": 9166 + }, + { + "epoch": 0.49, + "learning_rate": 0.0005361332601051051, + "loss": 0.8867, + "step": 9167 + }, + { + "epoch": 0.49, + "learning_rate": 0.0005360464479409026, + "loss": 0.918, + "step": 9168 + }, + { + "epoch": 0.49, + "learning_rate": 0.0005359596346843741, + "loss": 0.918, + "step": 9169 + }, + { + "epoch": 0.49, + "learning_rate": 0.0005358728203381508, + "loss": 0.8242, + "step": 9170 + }, + { + "epoch": 0.49, + "learning_rate": 0.000535786004904863, + "loss": 0.8359, + "step": 9171 + }, + { + "epoch": 0.49, + "learning_rate": 0.0005356991883871419, + "loss": 0.9062, + "step": 9172 + }, + { + "epoch": 0.49, + "learning_rate": 0.0005356123707876178, + "loss": 0.8164, + "step": 9173 + }, + { + "epoch": 0.49, + "learning_rate": 0.0005355255521089221, + "loss": 0.9102, + "step": 9174 + }, + { + "epoch": 0.49, + "learning_rate": 0.0005354387323536855, + "loss": 0.7852, + "step": 9175 + }, + { + "epoch": 0.49, + "learning_rate": 0.0005353519115245389, + "loss": 0.8867, + "step": 9176 + }, + { + "epoch": 0.49, + "learning_rate": 0.0005352650896241131, + "loss": 0.8359, + "step": 9177 + }, + { + "epoch": 0.49, + "learning_rate": 0.0005351782666550394, + "loss": 0.8711, + "step": 9178 + }, + { + "epoch": 0.49, + "learning_rate": 0.0005350914426199487, + "loss": 0.9375, + "step": 9179 + }, + { + "epoch": 0.49, + "learning_rate": 0.0005350046175214721, + "loss": 0.9648, + "step": 9180 + }, + { + "epoch": 0.49, + "learning_rate": 0.0005349177913622403, + "loss": 0.8477, + "step": 9181 + }, + { + "epoch": 0.49, + "learning_rate": 0.0005348309641448849, + "loss": 0.8516, + "step": 9182 + }, + { + "epoch": 0.49, + "learning_rate": 0.0005347441358720368, + "loss": 0.8516, + "step": 9183 + }, + { + "epoch": 0.49, + "learning_rate": 0.0005346573065463273, + "loss": 0.8711, + "step": 9184 + }, + { + "epoch": 0.49, + "learning_rate": 0.0005345704761703876, + "loss": 0.8594, + "step": 9185 + }, + { + "epoch": 0.49, + "learning_rate": 0.0005344836447468489, + "loss": 0.8828, + "step": 9186 + }, + { + "epoch": 0.49, + "learning_rate": 0.0005343968122783425, + "loss": 0.875, + "step": 9187 + }, + { + "epoch": 0.49, + "learning_rate": 0.0005343099787674996, + "loss": 0.8789, + "step": 9188 + }, + { + "epoch": 0.49, + "learning_rate": 0.0005342231442169518, + "loss": 0.793, + "step": 9189 + }, + { + "epoch": 0.49, + "learning_rate": 0.0005341363086293303, + "loss": 0.8398, + "step": 9190 + }, + { + "epoch": 0.49, + "learning_rate": 0.0005340494720072664, + "loss": 0.8984, + "step": 9191 + }, + { + "epoch": 0.49, + "learning_rate": 0.0005339626343533917, + "loss": 0.8828, + "step": 9192 + }, + { + "epoch": 0.49, + "learning_rate": 0.0005338757956703376, + "loss": 0.8203, + "step": 9193 + }, + { + "epoch": 0.49, + "learning_rate": 0.0005337889559607356, + "loss": 0.8984, + "step": 9194 + }, + { + "epoch": 0.49, + "learning_rate": 0.0005337021152272174, + "loss": 0.8398, + "step": 9195 + }, + { + "epoch": 0.49, + "learning_rate": 0.0005336152734724141, + "loss": 0.832, + "step": 9196 + }, + { + "epoch": 0.49, + "learning_rate": 0.000533528430698958, + "loss": 0.8359, + "step": 9197 + }, + { + "epoch": 0.49, + "learning_rate": 0.00053344158690948, + "loss": 0.8164, + "step": 9198 + }, + { + "epoch": 0.49, + "learning_rate": 0.0005333547421066122, + "loss": 0.9141, + "step": 9199 + }, + { + "epoch": 0.49, + "learning_rate": 0.0005332678962929861, + "loss": 0.8398, + "step": 9200 + }, + { + "epoch": 0.49, + "learning_rate": 0.0005331810494712335, + "loss": 0.8047, + "step": 9201 + }, + { + "epoch": 0.49, + "learning_rate": 0.0005330942016439861, + "loss": 0.8789, + "step": 9202 + }, + { + "epoch": 0.49, + "learning_rate": 0.0005330073528138757, + "loss": 0.832, + "step": 9203 + }, + { + "epoch": 0.49, + "learning_rate": 0.0005329205029835339, + "loss": 0.8906, + "step": 9204 + }, + { + "epoch": 0.49, + "learning_rate": 0.000532833652155593, + "loss": 0.832, + "step": 9205 + }, + { + "epoch": 0.49, + "learning_rate": 0.0005327468003326844, + "loss": 0.7852, + "step": 9206 + }, + { + "epoch": 0.49, + "learning_rate": 0.0005326599475174403, + "loss": 0.7422, + "step": 9207 + }, + { + "epoch": 0.49, + "learning_rate": 0.0005325730937124924, + "loss": 0.8047, + "step": 9208 + }, + { + "epoch": 0.49, + "learning_rate": 0.0005324862389204729, + "loss": 0.9141, + "step": 9209 + }, + { + "epoch": 0.5, + "learning_rate": 0.0005323993831440136, + "loss": 0.8086, + "step": 9210 + }, + { + "epoch": 0.5, + "learning_rate": 0.0005323125263857466, + "loss": 0.8008, + "step": 9211 + }, + { + "epoch": 0.5, + "learning_rate": 0.0005322256686483038, + "loss": 0.8828, + "step": 9212 + }, + { + "epoch": 0.5, + "learning_rate": 0.0005321388099343175, + "loss": 0.8281, + "step": 9213 + }, + { + "epoch": 0.5, + "learning_rate": 0.0005320519502464194, + "loss": 0.9219, + "step": 9214 + }, + { + "epoch": 0.5, + "learning_rate": 0.0005319650895872424, + "loss": 0.8438, + "step": 9215 + }, + { + "epoch": 0.5, + "learning_rate": 0.0005318782279594178, + "loss": 0.8242, + "step": 9216 + }, + { + "epoch": 0.5, + "learning_rate": 0.0005317913653655783, + "loss": 0.8164, + "step": 9217 + }, + { + "epoch": 0.5, + "learning_rate": 0.000531704501808356, + "loss": 0.8398, + "step": 9218 + }, + { + "epoch": 0.5, + "learning_rate": 0.0005316176372903832, + "loss": 0.8555, + "step": 9219 + }, + { + "epoch": 0.5, + "learning_rate": 0.0005315307718142921, + "loss": 0.8828, + "step": 9220 + }, + { + "epoch": 0.5, + "learning_rate": 0.000531443905382715, + "loss": 0.8438, + "step": 9221 + }, + { + "epoch": 0.5, + "learning_rate": 0.0005313570379982843, + "loss": 0.8438, + "step": 9222 + }, + { + "epoch": 0.5, + "learning_rate": 0.0005312701696636323, + "loss": 0.8438, + "step": 9223 + }, + { + "epoch": 0.5, + "learning_rate": 0.0005311833003813915, + "loss": 0.8906, + "step": 9224 + }, + { + "epoch": 0.5, + "learning_rate": 0.0005310964301541944, + "loss": 0.8477, + "step": 9225 + }, + { + "epoch": 0.5, + "learning_rate": 0.000531009558984673, + "loss": 0.8906, + "step": 9226 + }, + { + "epoch": 0.5, + "learning_rate": 0.0005309226868754605, + "loss": 0.8945, + "step": 9227 + }, + { + "epoch": 0.5, + "learning_rate": 0.0005308358138291887, + "loss": 0.918, + "step": 9228 + }, + { + "epoch": 0.5, + "learning_rate": 0.0005307489398484906, + "loss": 0.8242, + "step": 9229 + }, + { + "epoch": 0.5, + "learning_rate": 0.0005306620649359986, + "loss": 0.8477, + "step": 9230 + }, + { + "epoch": 0.5, + "learning_rate": 0.0005305751890943453, + "loss": 0.8594, + "step": 9231 + }, + { + "epoch": 0.5, + "learning_rate": 0.0005304883123261633, + "loss": 0.9609, + "step": 9232 + }, + { + "epoch": 0.5, + "learning_rate": 0.0005304014346340853, + "loss": 0.8516, + "step": 9233 + }, + { + "epoch": 0.5, + "learning_rate": 0.0005303145560207439, + "loss": 0.8789, + "step": 9234 + }, + { + "epoch": 0.5, + "learning_rate": 0.000530227676488772, + "loss": 0.8633, + "step": 9235 + }, + { + "epoch": 0.5, + "learning_rate": 0.0005301407960408022, + "loss": 0.793, + "step": 9236 + }, + { + "epoch": 0.5, + "learning_rate": 0.0005300539146794672, + "loss": 0.7734, + "step": 9237 + }, + { + "epoch": 0.5, + "learning_rate": 0.0005299670324074, + "loss": 0.9961, + "step": 9238 + }, + { + "epoch": 0.5, + "learning_rate": 0.000529880149227233, + "loss": 0.9023, + "step": 9239 + }, + { + "epoch": 0.5, + "learning_rate": 0.0005297932651415997, + "loss": 0.8477, + "step": 9240 + }, + { + "epoch": 0.5, + "learning_rate": 0.0005297063801531323, + "loss": 0.8828, + "step": 9241 + }, + { + "epoch": 0.5, + "learning_rate": 0.0005296194942644642, + "loss": 0.7695, + "step": 9242 + }, + { + "epoch": 0.5, + "learning_rate": 0.0005295326074782281, + "loss": 0.8906, + "step": 9243 + }, + { + "epoch": 0.5, + "learning_rate": 0.0005294457197970569, + "loss": 0.8633, + "step": 9244 + }, + { + "epoch": 0.5, + "learning_rate": 0.0005293588312235836, + "loss": 0.8477, + "step": 9245 + }, + { + "epoch": 0.5, + "learning_rate": 0.0005292719417604414, + "loss": 0.8672, + "step": 9246 + }, + { + "epoch": 0.5, + "learning_rate": 0.0005291850514102631, + "loss": 0.8594, + "step": 9247 + }, + { + "epoch": 0.5, + "learning_rate": 0.0005290981601756822, + "loss": 0.8984, + "step": 9248 + }, + { + "epoch": 0.5, + "learning_rate": 0.000529011268059331, + "loss": 0.8555, + "step": 9249 + }, + { + "epoch": 0.5, + "learning_rate": 0.0005289243750638434, + "loss": 0.8945, + "step": 9250 + }, + { + "epoch": 0.5, + "learning_rate": 0.0005288374811918522, + "loss": 0.8125, + "step": 9251 + }, + { + "epoch": 0.5, + "learning_rate": 0.0005287505864459905, + "loss": 0.8828, + "step": 9252 + }, + { + "epoch": 0.5, + "learning_rate": 0.0005286636908288917, + "loss": 0.9336, + "step": 9253 + }, + { + "epoch": 0.5, + "learning_rate": 0.0005285767943431888, + "loss": 0.7812, + "step": 9254 + }, + { + "epoch": 0.5, + "learning_rate": 0.0005284898969915151, + "loss": 0.8203, + "step": 9255 + }, + { + "epoch": 0.5, + "learning_rate": 0.0005284029987765041, + "loss": 0.8906, + "step": 9256 + }, + { + "epoch": 0.5, + "learning_rate": 0.0005283160997007888, + "loss": 0.918, + "step": 9257 + }, + { + "epoch": 0.5, + "learning_rate": 0.0005282291997670028, + "loss": 0.8477, + "step": 9258 + }, + { + "epoch": 0.5, + "learning_rate": 0.0005281422989777792, + "loss": 0.7969, + "step": 9259 + }, + { + "epoch": 0.5, + "learning_rate": 0.0005280553973357516, + "loss": 0.8672, + "step": 9260 + }, + { + "epoch": 0.5, + "learning_rate": 0.0005279684948435533, + "loss": 0.8789, + "step": 9261 + }, + { + "epoch": 0.5, + "learning_rate": 0.0005278815915038176, + "loss": 0.8281, + "step": 9262 + }, + { + "epoch": 0.5, + "learning_rate": 0.0005277946873191782, + "loss": 0.9297, + "step": 9263 + }, + { + "epoch": 0.5, + "learning_rate": 0.0005277077822922685, + "loss": 0.8594, + "step": 9264 + }, + { + "epoch": 0.5, + "learning_rate": 0.0005276208764257217, + "loss": 0.9062, + "step": 9265 + }, + { + "epoch": 0.5, + "learning_rate": 0.0005275339697221721, + "loss": 0.8828, + "step": 9266 + }, + { + "epoch": 0.5, + "learning_rate": 0.0005274470621842524, + "loss": 0.8438, + "step": 9267 + }, + { + "epoch": 0.5, + "learning_rate": 0.0005273601538145966, + "loss": 0.9492, + "step": 9268 + }, + { + "epoch": 0.5, + "learning_rate": 0.0005272732446158385, + "loss": 0.707, + "step": 9269 + }, + { + "epoch": 0.5, + "learning_rate": 0.0005271863345906113, + "loss": 0.8594, + "step": 9270 + }, + { + "epoch": 0.5, + "learning_rate": 0.000527099423741549, + "loss": 0.9141, + "step": 9271 + }, + { + "epoch": 0.5, + "learning_rate": 0.000527012512071285, + "loss": 0.9766, + "step": 9272 + }, + { + "epoch": 0.5, + "learning_rate": 0.0005269255995824533, + "loss": 0.8281, + "step": 9273 + }, + { + "epoch": 0.5, + "learning_rate": 0.0005268386862776873, + "loss": 0.875, + "step": 9274 + }, + { + "epoch": 0.5, + "learning_rate": 0.000526751772159621, + "loss": 0.832, + "step": 9275 + }, + { + "epoch": 0.5, + "learning_rate": 0.0005266648572308882, + "loss": 0.7422, + "step": 9276 + }, + { + "epoch": 0.5, + "learning_rate": 0.0005265779414941227, + "loss": 0.8086, + "step": 9277 + }, + { + "epoch": 0.5, + "learning_rate": 0.0005264910249519581, + "loss": 0.8984, + "step": 9278 + }, + { + "epoch": 0.5, + "learning_rate": 0.0005264041076070285, + "loss": 0.9141, + "step": 9279 + }, + { + "epoch": 0.5, + "learning_rate": 0.0005263171894619676, + "loss": 0.8633, + "step": 9280 + }, + { + "epoch": 0.5, + "learning_rate": 0.0005262302705194096, + "loss": 0.8555, + "step": 9281 + }, + { + "epoch": 0.5, + "learning_rate": 0.0005261433507819882, + "loss": 0.832, + "step": 9282 + }, + { + "epoch": 0.5, + "learning_rate": 0.0005260564302523373, + "loss": 0.7852, + "step": 9283 + }, + { + "epoch": 0.5, + "learning_rate": 0.000525969508933091, + "loss": 0.9219, + "step": 9284 + }, + { + "epoch": 0.5, + "learning_rate": 0.0005258825868268834, + "loss": 0.8867, + "step": 9285 + }, + { + "epoch": 0.5, + "learning_rate": 0.0005257956639363484, + "loss": 0.8594, + "step": 9286 + }, + { + "epoch": 0.5, + "learning_rate": 0.0005257087402641198, + "loss": 0.8945, + "step": 9287 + }, + { + "epoch": 0.5, + "learning_rate": 0.0005256218158128322, + "loss": 0.832, + "step": 9288 + }, + { + "epoch": 0.5, + "learning_rate": 0.0005255348905851192, + "loss": 0.9453, + "step": 9289 + }, + { + "epoch": 0.5, + "learning_rate": 0.0005254479645836152, + "loss": 0.9062, + "step": 9290 + }, + { + "epoch": 0.5, + "learning_rate": 0.0005253610378109545, + "loss": 0.9375, + "step": 9291 + }, + { + "epoch": 0.5, + "learning_rate": 0.0005252741102697707, + "loss": 0.9102, + "step": 9292 + }, + { + "epoch": 0.5, + "learning_rate": 0.0005251871819626986, + "loss": 0.8398, + "step": 9293 + }, + { + "epoch": 0.5, + "learning_rate": 0.000525100252892372, + "loss": 0.9062, + "step": 9294 + }, + { + "epoch": 0.5, + "learning_rate": 0.0005250133230614253, + "loss": 0.8633, + "step": 9295 + }, + { + "epoch": 0.5, + "learning_rate": 0.0005249263924724929, + "loss": 0.8477, + "step": 9296 + }, + { + "epoch": 0.5, + "learning_rate": 0.0005248394611282088, + "loss": 0.8594, + "step": 9297 + }, + { + "epoch": 0.5, + "learning_rate": 0.0005247525290312073, + "loss": 0.9375, + "step": 9298 + }, + { + "epoch": 0.5, + "learning_rate": 0.0005246655961841231, + "loss": 0.7891, + "step": 9299 + }, + { + "epoch": 0.5, + "learning_rate": 0.0005245786625895902, + "loss": 0.8633, + "step": 9300 + }, + { + "epoch": 0.5, + "learning_rate": 0.0005244917282502432, + "loss": 1.0, + "step": 9301 + }, + { + "epoch": 0.5, + "learning_rate": 0.0005244047931687163, + "loss": 0.75, + "step": 9302 + }, + { + "epoch": 0.5, + "learning_rate": 0.0005243178573476439, + "loss": 0.8164, + "step": 9303 + }, + { + "epoch": 0.5, + "learning_rate": 0.0005242309207896608, + "loss": 0.918, + "step": 9304 + }, + { + "epoch": 0.5, + "learning_rate": 0.0005241439834974008, + "loss": 0.9805, + "step": 9305 + }, + { + "epoch": 0.5, + "learning_rate": 0.0005240570454734992, + "loss": 1.0078, + "step": 9306 + }, + { + "epoch": 0.5, + "learning_rate": 0.0005239701067205898, + "loss": 0.875, + "step": 9307 + }, + { + "epoch": 0.5, + "learning_rate": 0.0005238831672413077, + "loss": 0.8984, + "step": 9308 + }, + { + "epoch": 0.5, + "learning_rate": 0.000523796227038287, + "loss": 0.8516, + "step": 9309 + }, + { + "epoch": 0.5, + "learning_rate": 0.0005237092861141626, + "loss": 0.7969, + "step": 9310 + }, + { + "epoch": 0.5, + "learning_rate": 0.0005236223444715688, + "loss": 0.8789, + "step": 9311 + }, + { + "epoch": 0.5, + "learning_rate": 0.0005235354021131404, + "loss": 0.8398, + "step": 9312 + }, + { + "epoch": 0.5, + "learning_rate": 0.000523448459041512, + "loss": 0.8281, + "step": 9313 + }, + { + "epoch": 0.5, + "learning_rate": 0.0005233615152593184, + "loss": 0.8945, + "step": 9314 + }, + { + "epoch": 0.5, + "learning_rate": 0.000523274570769194, + "loss": 0.793, + "step": 9315 + }, + { + "epoch": 0.5, + "learning_rate": 0.0005231876255737736, + "loss": 0.8555, + "step": 9316 + }, + { + "epoch": 0.5, + "learning_rate": 0.0005231006796756922, + "loss": 0.8594, + "step": 9317 + }, + { + "epoch": 0.5, + "learning_rate": 0.0005230137330775841, + "loss": 0.9336, + "step": 9318 + }, + { + "epoch": 0.5, + "learning_rate": 0.0005229267857820845, + "loss": 0.9258, + "step": 9319 + }, + { + "epoch": 0.5, + "learning_rate": 0.0005228398377918279, + "loss": 0.9219, + "step": 9320 + }, + { + "epoch": 0.5, + "learning_rate": 0.0005227528891094492, + "loss": 0.9102, + "step": 9321 + }, + { + "epoch": 0.5, + "learning_rate": 0.0005226659397375832, + "loss": 0.8711, + "step": 9322 + }, + { + "epoch": 0.5, + "learning_rate": 0.0005225789896788647, + "loss": 0.8672, + "step": 9323 + }, + { + "epoch": 0.5, + "learning_rate": 0.0005224920389359288, + "loss": 0.918, + "step": 9324 + }, + { + "epoch": 0.5, + "learning_rate": 0.00052240508751141, + "loss": 0.8828, + "step": 9325 + }, + { + "epoch": 0.5, + "learning_rate": 0.0005223181354079436, + "loss": 0.8867, + "step": 9326 + }, + { + "epoch": 0.5, + "learning_rate": 0.0005222311826281644, + "loss": 0.7852, + "step": 9327 + }, + { + "epoch": 0.5, + "learning_rate": 0.0005221442291747074, + "loss": 0.9453, + "step": 9328 + }, + { + "epoch": 0.5, + "learning_rate": 0.0005220572750502074, + "loss": 0.832, + "step": 9329 + }, + { + "epoch": 0.5, + "learning_rate": 0.0005219703202572995, + "loss": 0.8984, + "step": 9330 + }, + { + "epoch": 0.5, + "learning_rate": 0.0005218833647986187, + "loss": 0.8086, + "step": 9331 + }, + { + "epoch": 0.5, + "learning_rate": 0.0005217964086768003, + "loss": 0.8555, + "step": 9332 + }, + { + "epoch": 0.5, + "learning_rate": 0.0005217094518944788, + "loss": 0.7656, + "step": 9333 + }, + { + "epoch": 0.5, + "learning_rate": 0.0005216224944542899, + "loss": 0.9297, + "step": 9334 + }, + { + "epoch": 0.5, + "learning_rate": 0.0005215355363588682, + "loss": 0.8984, + "step": 9335 + }, + { + "epoch": 0.5, + "learning_rate": 0.0005214485776108491, + "loss": 0.8906, + "step": 9336 + }, + { + "epoch": 0.5, + "learning_rate": 0.0005213616182128675, + "loss": 0.8828, + "step": 9337 + }, + { + "epoch": 0.5, + "learning_rate": 0.0005212746581675589, + "loss": 0.8047, + "step": 9338 + }, + { + "epoch": 0.5, + "learning_rate": 0.0005211876974775582, + "loss": 0.8281, + "step": 9339 + }, + { + "epoch": 0.5, + "learning_rate": 0.0005211007361455007, + "loss": 0.7461, + "step": 9340 + }, + { + "epoch": 0.5, + "learning_rate": 0.0005210137741740214, + "loss": 0.7891, + "step": 9341 + }, + { + "epoch": 0.5, + "learning_rate": 0.0005209268115657558, + "loss": 0.8906, + "step": 9342 + }, + { + "epoch": 0.5, + "learning_rate": 0.0005208398483233392, + "loss": 0.8711, + "step": 9343 + }, + { + "epoch": 0.5, + "learning_rate": 0.0005207528844494066, + "loss": 0.7969, + "step": 9344 + }, + { + "epoch": 0.5, + "learning_rate": 0.0005206659199465935, + "loss": 0.9531, + "step": 9345 + }, + { + "epoch": 0.5, + "learning_rate": 0.000520578954817535, + "loss": 0.8594, + "step": 9346 + }, + { + "epoch": 0.5, + "learning_rate": 0.000520491989064867, + "loss": 0.8672, + "step": 9347 + }, + { + "epoch": 0.5, + "learning_rate": 0.0005204050226912239, + "loss": 0.8438, + "step": 9348 + }, + { + "epoch": 0.5, + "learning_rate": 0.0005203180556992419, + "loss": 0.8516, + "step": 9349 + }, + { + "epoch": 0.5, + "learning_rate": 0.000520231088091556, + "loss": 0.9023, + "step": 9350 + }, + { + "epoch": 0.5, + "learning_rate": 0.0005201441198708016, + "loss": 0.8711, + "step": 9351 + }, + { + "epoch": 0.5, + "learning_rate": 0.0005200571510396143, + "loss": 0.875, + "step": 9352 + }, + { + "epoch": 0.5, + "learning_rate": 0.0005199701816006294, + "loss": 0.8398, + "step": 9353 + }, + { + "epoch": 0.5, + "learning_rate": 0.0005198832115564824, + "loss": 1.0, + "step": 9354 + }, + { + "epoch": 0.5, + "learning_rate": 0.0005197962409098087, + "loss": 0.9531, + "step": 9355 + }, + { + "epoch": 0.5, + "learning_rate": 0.0005197092696632438, + "loss": 0.8359, + "step": 9356 + }, + { + "epoch": 0.5, + "learning_rate": 0.0005196222978194236, + "loss": 0.8711, + "step": 9357 + }, + { + "epoch": 0.5, + "learning_rate": 0.0005195353253809831, + "loss": 0.9336, + "step": 9358 + }, + { + "epoch": 0.5, + "learning_rate": 0.0005194483523505581, + "loss": 0.8906, + "step": 9359 + }, + { + "epoch": 0.5, + "learning_rate": 0.0005193613787307841, + "loss": 0.918, + "step": 9360 + }, + { + "epoch": 0.5, + "learning_rate": 0.0005192744045242969, + "loss": 0.793, + "step": 9361 + }, + { + "epoch": 0.5, + "learning_rate": 0.0005191874297337318, + "loss": 0.8125, + "step": 9362 + }, + { + "epoch": 0.5, + "learning_rate": 0.0005191004543617246, + "loss": 0.9102, + "step": 9363 + }, + { + "epoch": 0.5, + "learning_rate": 0.0005190134784109107, + "loss": 0.875, + "step": 9364 + }, + { + "epoch": 0.5, + "learning_rate": 0.0005189265018839263, + "loss": 0.8789, + "step": 9365 + }, + { + "epoch": 0.5, + "learning_rate": 0.0005188395247834065, + "loss": 0.8438, + "step": 9366 + }, + { + "epoch": 0.5, + "learning_rate": 0.0005187525471119873, + "loss": 0.8477, + "step": 9367 + }, + { + "epoch": 0.5, + "learning_rate": 0.0005186655688723042, + "loss": 0.8438, + "step": 9368 + }, + { + "epoch": 0.5, + "learning_rate": 0.0005185785900669931, + "loss": 0.8984, + "step": 9369 + }, + { + "epoch": 0.5, + "learning_rate": 0.0005184916106986898, + "loss": 0.9492, + "step": 9370 + }, + { + "epoch": 0.5, + "learning_rate": 0.0005184046307700298, + "loss": 0.9727, + "step": 9371 + }, + { + "epoch": 0.5, + "learning_rate": 0.0005183176502836491, + "loss": 0.7344, + "step": 9372 + }, + { + "epoch": 0.5, + "learning_rate": 0.0005182306692421834, + "loss": 0.8672, + "step": 9373 + }, + { + "epoch": 0.5, + "learning_rate": 0.0005181436876482686, + "loss": 0.8555, + "step": 9374 + }, + { + "epoch": 0.5, + "learning_rate": 0.0005180567055045405, + "loss": 0.8789, + "step": 9375 + }, + { + "epoch": 0.5, + "learning_rate": 0.0005179697228136348, + "loss": 0.8711, + "step": 9376 + }, + { + "epoch": 0.5, + "learning_rate": 0.0005178827395781874, + "loss": 0.8789, + "step": 9377 + }, + { + "epoch": 0.5, + "learning_rate": 0.0005177957558008344, + "loss": 0.8906, + "step": 9378 + }, + { + "epoch": 0.5, + "learning_rate": 0.0005177087714842114, + "loss": 0.9258, + "step": 9379 + }, + { + "epoch": 0.5, + "learning_rate": 0.0005176217866309547, + "loss": 0.8164, + "step": 9380 + }, + { + "epoch": 0.5, + "learning_rate": 0.0005175348012436998, + "loss": 0.8047, + "step": 9381 + }, + { + "epoch": 0.5, + "learning_rate": 0.0005174478153250828, + "loss": 0.8984, + "step": 9382 + }, + { + "epoch": 0.5, + "learning_rate": 0.0005173608288777398, + "loss": 0.8359, + "step": 9383 + }, + { + "epoch": 0.5, + "learning_rate": 0.0005172738419043066, + "loss": 0.9453, + "step": 9384 + }, + { + "epoch": 0.5, + "learning_rate": 0.0005171868544074193, + "loss": 0.7852, + "step": 9385 + }, + { + "epoch": 0.5, + "learning_rate": 0.0005170998663897137, + "loss": 0.8242, + "step": 9386 + }, + { + "epoch": 0.5, + "learning_rate": 0.0005170128778538263, + "loss": 0.9141, + "step": 9387 + }, + { + "epoch": 0.5, + "learning_rate": 0.0005169258888023926, + "loss": 0.9062, + "step": 9388 + }, + { + "epoch": 0.5, + "learning_rate": 0.0005168388992380488, + "loss": 0.7266, + "step": 9389 + }, + { + "epoch": 0.5, + "learning_rate": 0.0005167519091634311, + "loss": 0.7773, + "step": 9390 + }, + { + "epoch": 0.5, + "learning_rate": 0.0005166649185811754, + "loss": 0.7656, + "step": 9391 + }, + { + "epoch": 0.5, + "learning_rate": 0.0005165779274939182, + "loss": 0.9453, + "step": 9392 + }, + { + "epoch": 0.5, + "learning_rate": 0.0005164909359042952, + "loss": 0.793, + "step": 9393 + }, + { + "epoch": 0.5, + "learning_rate": 0.0005164039438149427, + "loss": 0.8984, + "step": 9394 + }, + { + "epoch": 0.5, + "learning_rate": 0.0005163169512284967, + "loss": 0.7969, + "step": 9395 + }, + { + "epoch": 0.5, + "learning_rate": 0.0005162299581475937, + "loss": 0.8828, + "step": 9396 + }, + { + "epoch": 0.51, + "learning_rate": 0.0005161429645748694, + "loss": 1.0703, + "step": 9397 + }, + { + "epoch": 0.51, + "learning_rate": 0.0005160559705129605, + "loss": 0.9023, + "step": 9398 + }, + { + "epoch": 0.51, + "learning_rate": 0.0005159689759645028, + "loss": 0.9297, + "step": 9399 + }, + { + "epoch": 0.51, + "learning_rate": 0.0005158819809321328, + "loss": 0.9219, + "step": 9400 + }, + { + "epoch": 0.51, + "learning_rate": 0.0005157949854184865, + "loss": 0.8789, + "step": 9401 + }, + { + "epoch": 0.51, + "learning_rate": 0.0005157079894262003, + "loss": 0.8594, + "step": 9402 + }, + { + "epoch": 0.51, + "learning_rate": 0.0005156209929579103, + "loss": 0.8945, + "step": 9403 + }, + { + "epoch": 0.51, + "learning_rate": 0.0005155339960162531, + "loss": 0.8438, + "step": 9404 + }, + { + "epoch": 0.51, + "learning_rate": 0.0005154469986038646, + "loss": 0.8398, + "step": 9405 + }, + { + "epoch": 0.51, + "learning_rate": 0.0005153600007233814, + "loss": 0.9609, + "step": 9406 + }, + { + "epoch": 0.51, + "learning_rate": 0.0005152730023774397, + "loss": 0.8828, + "step": 9407 + }, + { + "epoch": 0.51, + "learning_rate": 0.000515186003568676, + "loss": 0.8203, + "step": 9408 + }, + { + "epoch": 0.51, + "learning_rate": 0.0005150990042997262, + "loss": 0.7695, + "step": 9409 + }, + { + "epoch": 0.51, + "learning_rate": 0.0005150120045732271, + "loss": 0.9023, + "step": 9410 + }, + { + "epoch": 0.51, + "learning_rate": 0.0005149250043918151, + "loss": 0.8438, + "step": 9411 + }, + { + "epoch": 0.51, + "learning_rate": 0.0005148380037581264, + "loss": 0.8789, + "step": 9412 + }, + { + "epoch": 0.51, + "learning_rate": 0.0005147510026747975, + "loss": 0.8398, + "step": 9413 + }, + { + "epoch": 0.51, + "learning_rate": 0.0005146640011444646, + "loss": 1.0391, + "step": 9414 + }, + { + "epoch": 0.51, + "learning_rate": 0.0005145769991697643, + "loss": 0.8789, + "step": 9415 + }, + { + "epoch": 0.51, + "learning_rate": 0.0005144899967533331, + "loss": 0.8516, + "step": 9416 + }, + { + "epoch": 0.51, + "learning_rate": 0.0005144029938978074, + "loss": 0.8945, + "step": 9417 + }, + { + "epoch": 0.51, + "learning_rate": 0.0005143159906058237, + "loss": 0.8711, + "step": 9418 + }, + { + "epoch": 0.51, + "learning_rate": 0.0005142289868800184, + "loss": 0.8359, + "step": 9419 + }, + { + "epoch": 0.51, + "learning_rate": 0.0005141419827230281, + "loss": 0.8906, + "step": 9420 + }, + { + "epoch": 0.51, + "learning_rate": 0.0005140549781374892, + "loss": 0.8633, + "step": 9421 + }, + { + "epoch": 0.51, + "learning_rate": 0.0005139679731260383, + "loss": 0.9062, + "step": 9422 + }, + { + "epoch": 0.51, + "learning_rate": 0.0005138809676913123, + "loss": 0.9414, + "step": 9423 + }, + { + "epoch": 0.51, + "learning_rate": 0.0005137939618359468, + "loss": 0.8555, + "step": 9424 + }, + { + "epoch": 0.51, + "learning_rate": 0.0005137069555625793, + "loss": 0.9102, + "step": 9425 + }, + { + "epoch": 0.51, + "learning_rate": 0.0005136199488738459, + "loss": 0.9102, + "step": 9426 + }, + { + "epoch": 0.51, + "learning_rate": 0.0005135329417723834, + "loss": 0.9141, + "step": 9427 + }, + { + "epoch": 0.51, + "learning_rate": 0.0005134459342608283, + "loss": 0.8984, + "step": 9428 + }, + { + "epoch": 0.51, + "learning_rate": 0.0005133589263418172, + "loss": 0.8633, + "step": 9429 + }, + { + "epoch": 0.51, + "learning_rate": 0.0005132719180179865, + "loss": 0.7656, + "step": 9430 + }, + { + "epoch": 0.51, + "learning_rate": 0.0005131849092919736, + "loss": 0.8672, + "step": 9431 + }, + { + "epoch": 0.51, + "learning_rate": 0.0005130979001664142, + "loss": 0.8828, + "step": 9432 + }, + { + "epoch": 0.51, + "learning_rate": 0.0005130108906439457, + "loss": 0.9062, + "step": 9433 + }, + { + "epoch": 0.51, + "learning_rate": 0.0005129238807272041, + "loss": 0.8867, + "step": 9434 + }, + { + "epoch": 0.51, + "learning_rate": 0.0005128368704188269, + "loss": 0.8789, + "step": 9435 + }, + { + "epoch": 0.51, + "learning_rate": 0.0005127498597214501, + "loss": 0.9023, + "step": 9436 + }, + { + "epoch": 0.51, + "learning_rate": 0.0005126628486377108, + "loss": 0.7969, + "step": 9437 + }, + { + "epoch": 0.51, + "learning_rate": 0.0005125758371702455, + "loss": 0.9414, + "step": 9438 + }, + { + "epoch": 0.51, + "learning_rate": 0.000512488825321691, + "loss": 0.8047, + "step": 9439 + }, + { + "epoch": 0.51, + "learning_rate": 0.000512401813094684, + "loss": 0.9375, + "step": 9440 + }, + { + "epoch": 0.51, + "learning_rate": 0.0005123148004918616, + "loss": 0.8359, + "step": 9441 + }, + { + "epoch": 0.51, + "learning_rate": 0.0005122277875158601, + "loss": 0.9023, + "step": 9442 + }, + { + "epoch": 0.51, + "learning_rate": 0.0005121407741693164, + "loss": 0.8203, + "step": 9443 + }, + { + "epoch": 0.51, + "learning_rate": 0.0005120537604548676, + "loss": 0.8672, + "step": 9444 + }, + { + "epoch": 0.51, + "learning_rate": 0.0005119667463751501, + "loss": 0.9023, + "step": 9445 + }, + { + "epoch": 0.51, + "learning_rate": 0.000511879731932801, + "loss": 0.8867, + "step": 9446 + }, + { + "epoch": 0.51, + "learning_rate": 0.0005117927171304569, + "loss": 0.8516, + "step": 9447 + }, + { + "epoch": 0.51, + "learning_rate": 0.0005117057019707547, + "loss": 0.875, + "step": 9448 + }, + { + "epoch": 0.51, + "learning_rate": 0.0005116186864563314, + "loss": 0.7773, + "step": 9449 + }, + { + "epoch": 0.51, + "learning_rate": 0.0005115316705898235, + "loss": 0.8477, + "step": 9450 + }, + { + "epoch": 0.51, + "learning_rate": 0.0005114446543738685, + "loss": 0.8047, + "step": 9451 + }, + { + "epoch": 0.51, + "learning_rate": 0.0005113576378111025, + "loss": 0.7734, + "step": 9452 + }, + { + "epoch": 0.51, + "learning_rate": 0.0005112706209041629, + "loss": 0.8984, + "step": 9453 + }, + { + "epoch": 0.51, + "learning_rate": 0.0005111836036556864, + "loss": 0.8516, + "step": 9454 + }, + { + "epoch": 0.51, + "learning_rate": 0.00051109658606831, + "loss": 0.9375, + "step": 9455 + }, + { + "epoch": 0.51, + "learning_rate": 0.0005110095681446707, + "loss": 0.9258, + "step": 9456 + }, + { + "epoch": 0.51, + "learning_rate": 0.0005109225498874052, + "loss": 0.8516, + "step": 9457 + }, + { + "epoch": 0.51, + "learning_rate": 0.0005108355312991506, + "loss": 0.8828, + "step": 9458 + }, + { + "epoch": 0.51, + "learning_rate": 0.0005107485123825438, + "loss": 0.8789, + "step": 9459 + }, + { + "epoch": 0.51, + "learning_rate": 0.0005106614931402218, + "loss": 0.8477, + "step": 9460 + }, + { + "epoch": 0.51, + "learning_rate": 0.0005105744735748216, + "loss": 0.7734, + "step": 9461 + }, + { + "epoch": 0.51, + "learning_rate": 0.0005104874536889801, + "loss": 0.8281, + "step": 9462 + }, + { + "epoch": 0.51, + "learning_rate": 0.0005104004334853342, + "loss": 0.8711, + "step": 9463 + }, + { + "epoch": 0.51, + "learning_rate": 0.0005103134129665211, + "loss": 0.75, + "step": 9464 + }, + { + "epoch": 0.51, + "learning_rate": 0.0005102263921351775, + "loss": 0.8438, + "step": 9465 + }, + { + "epoch": 0.51, + "learning_rate": 0.0005101393709939409, + "loss": 0.8281, + "step": 9466 + }, + { + "epoch": 0.51, + "learning_rate": 0.0005100523495454479, + "loss": 0.8906, + "step": 9467 + }, + { + "epoch": 0.51, + "learning_rate": 0.0005099653277923357, + "loss": 0.8164, + "step": 9468 + }, + { + "epoch": 0.51, + "learning_rate": 0.0005098783057372414, + "loss": 0.8203, + "step": 9469 + }, + { + "epoch": 0.51, + "learning_rate": 0.000509791283382802, + "loss": 0.9141, + "step": 9470 + }, + { + "epoch": 0.51, + "learning_rate": 0.0005097042607316545, + "loss": 0.9258, + "step": 9471 + }, + { + "epoch": 0.51, + "learning_rate": 0.000509617237786436, + "loss": 0.8398, + "step": 9472 + }, + { + "epoch": 0.51, + "learning_rate": 0.0005095302145497835, + "loss": 0.9219, + "step": 9473 + }, + { + "epoch": 0.51, + "learning_rate": 0.0005094431910243344, + "loss": 0.9961, + "step": 9474 + }, + { + "epoch": 0.51, + "learning_rate": 0.0005093561672127255, + "loss": 0.7812, + "step": 9475 + }, + { + "epoch": 0.51, + "learning_rate": 0.0005092691431175941, + "loss": 0.7891, + "step": 9476 + }, + { + "epoch": 0.51, + "learning_rate": 0.0005091821187415771, + "loss": 0.8359, + "step": 9477 + }, + { + "epoch": 0.51, + "learning_rate": 0.0005090950940873118, + "loss": 0.8086, + "step": 9478 + }, + { + "epoch": 0.51, + "learning_rate": 0.0005090080691574352, + "loss": 0.8672, + "step": 9479 + }, + { + "epoch": 0.51, + "learning_rate": 0.0005089210439545846, + "loss": 0.8047, + "step": 9480 + }, + { + "epoch": 0.51, + "learning_rate": 0.0005088340184813971, + "loss": 0.7383, + "step": 9481 + }, + { + "epoch": 0.51, + "learning_rate": 0.0005087469927405097, + "loss": 0.9297, + "step": 9482 + }, + { + "epoch": 0.51, + "learning_rate": 0.0005086599667345597, + "loss": 0.8047, + "step": 9483 + }, + { + "epoch": 0.51, + "learning_rate": 0.0005085729404661844, + "loss": 0.7812, + "step": 9484 + }, + { + "epoch": 0.51, + "learning_rate": 0.0005084859139380206, + "loss": 0.8398, + "step": 9485 + }, + { + "epoch": 0.51, + "learning_rate": 0.0005083988871527059, + "loss": 0.8438, + "step": 9486 + }, + { + "epoch": 0.51, + "learning_rate": 0.0005083118601128773, + "loss": 0.8906, + "step": 9487 + }, + { + "epoch": 0.51, + "learning_rate": 0.0005082248328211721, + "loss": 0.8633, + "step": 9488 + }, + { + "epoch": 0.51, + "learning_rate": 0.0005081378052802274, + "loss": 0.9062, + "step": 9489 + }, + { + "epoch": 0.51, + "learning_rate": 0.0005080507774926804, + "loss": 0.832, + "step": 9490 + }, + { + "epoch": 0.51, + "learning_rate": 0.0005079637494611684, + "loss": 0.8789, + "step": 9491 + }, + { + "epoch": 0.51, + "learning_rate": 0.0005078767211883288, + "loss": 0.8711, + "step": 9492 + }, + { + "epoch": 0.51, + "learning_rate": 0.0005077896926767984, + "loss": 0.8242, + "step": 9493 + }, + { + "epoch": 0.51, + "learning_rate": 0.000507702663929215, + "loss": 0.9219, + "step": 9494 + }, + { + "epoch": 0.51, + "learning_rate": 0.0005076156349482154, + "loss": 0.8359, + "step": 9495 + }, + { + "epoch": 0.51, + "learning_rate": 0.0005075286057364371, + "loss": 0.8711, + "step": 9496 + }, + { + "epoch": 0.51, + "learning_rate": 0.0005074415762965174, + "loss": 0.9062, + "step": 9497 + }, + { + "epoch": 0.51, + "learning_rate": 0.0005073545466310933, + "loss": 0.9297, + "step": 9498 + }, + { + "epoch": 0.51, + "learning_rate": 0.0005072675167428026, + "loss": 0.7539, + "step": 9499 + }, + { + "epoch": 0.51, + "learning_rate": 0.000507180486634282, + "loss": 0.8164, + "step": 9500 + }, + { + "epoch": 0.51, + "learning_rate": 0.000507093456308169, + "loss": 0.8398, + "step": 9501 + }, + { + "epoch": 0.51, + "learning_rate": 0.0005070064257671012, + "loss": 0.8789, + "step": 9502 + }, + { + "epoch": 0.51, + "learning_rate": 0.0005069193950137155, + "loss": 0.7969, + "step": 9503 + }, + { + "epoch": 0.51, + "learning_rate": 0.0005068323640506496, + "loss": 0.8594, + "step": 9504 + }, + { + "epoch": 0.51, + "learning_rate": 0.0005067453328805406, + "loss": 0.9062, + "step": 9505 + }, + { + "epoch": 0.51, + "learning_rate": 0.0005066583015060257, + "loss": 0.9492, + "step": 9506 + }, + { + "epoch": 0.51, + "learning_rate": 0.0005065712699297426, + "loss": 0.8984, + "step": 9507 + }, + { + "epoch": 0.51, + "learning_rate": 0.000506484238154328, + "loss": 0.8203, + "step": 9508 + }, + { + "epoch": 0.51, + "learning_rate": 0.0005063972061824201, + "loss": 0.9062, + "step": 9509 + }, + { + "epoch": 0.51, + "learning_rate": 0.0005063101740166558, + "loss": 0.7969, + "step": 9510 + }, + { + "epoch": 0.51, + "learning_rate": 0.0005062231416596725, + "loss": 0.8516, + "step": 9511 + }, + { + "epoch": 0.51, + "learning_rate": 0.0005061361091141074, + "loss": 0.8672, + "step": 9512 + }, + { + "epoch": 0.51, + "learning_rate": 0.0005060490763825982, + "loss": 0.9336, + "step": 9513 + }, + { + "epoch": 0.51, + "learning_rate": 0.0005059620434677821, + "loss": 0.8125, + "step": 9514 + }, + { + "epoch": 0.51, + "learning_rate": 0.0005058750103722965, + "loss": 0.9648, + "step": 9515 + }, + { + "epoch": 0.51, + "learning_rate": 0.0005057879770987786, + "loss": 0.8438, + "step": 9516 + }, + { + "epoch": 0.51, + "learning_rate": 0.0005057009436498661, + "loss": 0.9414, + "step": 9517 + }, + { + "epoch": 0.51, + "learning_rate": 0.0005056139100281964, + "loss": 0.8789, + "step": 9518 + }, + { + "epoch": 0.51, + "learning_rate": 0.0005055268762364066, + "loss": 0.7852, + "step": 9519 + }, + { + "epoch": 0.51, + "learning_rate": 0.0005054398422771343, + "loss": 0.8711, + "step": 9520 + }, + { + "epoch": 0.51, + "learning_rate": 0.0005053528081530171, + "loss": 0.8398, + "step": 9521 + }, + { + "epoch": 0.51, + "learning_rate": 0.0005052657738666922, + "loss": 0.8164, + "step": 9522 + }, + { + "epoch": 0.51, + "learning_rate": 0.000505178739420797, + "loss": 0.8594, + "step": 9523 + }, + { + "epoch": 0.51, + "learning_rate": 0.0005050917048179687, + "loss": 0.8516, + "step": 9524 + }, + { + "epoch": 0.51, + "learning_rate": 0.0005050046700608455, + "loss": 0.8594, + "step": 9525 + }, + { + "epoch": 0.51, + "learning_rate": 0.000504917635152064, + "loss": 0.9375, + "step": 9526 + }, + { + "epoch": 0.51, + "learning_rate": 0.0005048306000942622, + "loss": 0.7695, + "step": 9527 + }, + { + "epoch": 0.51, + "learning_rate": 0.0005047435648900772, + "loss": 0.7578, + "step": 9528 + }, + { + "epoch": 0.51, + "learning_rate": 0.0005046565295421468, + "loss": 1.0078, + "step": 9529 + }, + { + "epoch": 0.51, + "learning_rate": 0.0005045694940531082, + "loss": 0.8633, + "step": 9530 + }, + { + "epoch": 0.51, + "learning_rate": 0.0005044824584255987, + "loss": 0.8828, + "step": 9531 + }, + { + "epoch": 0.51, + "learning_rate": 0.0005043954226622563, + "loss": 0.8477, + "step": 9532 + }, + { + "epoch": 0.51, + "learning_rate": 0.0005043083867657178, + "loss": 0.8672, + "step": 9533 + }, + { + "epoch": 0.51, + "learning_rate": 0.0005042213507386213, + "loss": 0.9336, + "step": 9534 + }, + { + "epoch": 0.51, + "learning_rate": 0.000504134314583604, + "loss": 0.9648, + "step": 9535 + }, + { + "epoch": 0.51, + "learning_rate": 0.0005040472783033033, + "loss": 0.8867, + "step": 9536 + }, + { + "epoch": 0.51, + "learning_rate": 0.0005039602419003569, + "loss": 0.8438, + "step": 9537 + }, + { + "epoch": 0.51, + "learning_rate": 0.000503873205377402, + "loss": 1.0078, + "step": 9538 + }, + { + "epoch": 0.51, + "learning_rate": 0.0005037861687370763, + "loss": 0.8125, + "step": 9539 + }, + { + "epoch": 0.51, + "learning_rate": 0.0005036991319820174, + "loss": 0.8438, + "step": 9540 + }, + { + "epoch": 0.51, + "learning_rate": 0.0005036120951148623, + "loss": 0.8906, + "step": 9541 + }, + { + "epoch": 0.51, + "learning_rate": 0.0005035250581382492, + "loss": 0.7578, + "step": 9542 + }, + { + "epoch": 0.51, + "learning_rate": 0.0005034380210548152, + "loss": 0.7969, + "step": 9543 + }, + { + "epoch": 0.51, + "learning_rate": 0.0005033509838671978, + "loss": 0.7734, + "step": 9544 + }, + { + "epoch": 0.51, + "learning_rate": 0.0005032639465780345, + "loss": 0.793, + "step": 9545 + }, + { + "epoch": 0.51, + "learning_rate": 0.000503176909189963, + "loss": 0.8672, + "step": 9546 + }, + { + "epoch": 0.51, + "learning_rate": 0.0005030898717056207, + "loss": 0.9414, + "step": 9547 + }, + { + "epoch": 0.51, + "learning_rate": 0.0005030028341276452, + "loss": 0.9492, + "step": 9548 + }, + { + "epoch": 0.51, + "learning_rate": 0.0005029157964586738, + "loss": 0.6992, + "step": 9549 + }, + { + "epoch": 0.51, + "learning_rate": 0.0005028287587013443, + "loss": 0.8203, + "step": 9550 + }, + { + "epoch": 0.51, + "learning_rate": 0.0005027417208582941, + "loss": 0.7188, + "step": 9551 + }, + { + "epoch": 0.51, + "learning_rate": 0.0005026546829321607, + "loss": 0.8438, + "step": 9552 + }, + { + "epoch": 0.51, + "learning_rate": 0.0005025676449255818, + "loss": 0.8047, + "step": 9553 + }, + { + "epoch": 0.51, + "learning_rate": 0.0005024806068411948, + "loss": 0.9141, + "step": 9554 + }, + { + "epoch": 0.51, + "learning_rate": 0.0005023935686816373, + "loss": 0.8828, + "step": 9555 + }, + { + "epoch": 0.51, + "learning_rate": 0.0005023065304495468, + "loss": 0.7422, + "step": 9556 + }, + { + "epoch": 0.51, + "learning_rate": 0.0005022194921475606, + "loss": 0.8438, + "step": 9557 + }, + { + "epoch": 0.51, + "learning_rate": 0.0005021324537783169, + "loss": 0.7891, + "step": 9558 + }, + { + "epoch": 0.51, + "learning_rate": 0.0005020454153444525, + "loss": 0.8555, + "step": 9559 + }, + { + "epoch": 0.51, + "learning_rate": 0.0005019583768486055, + "loss": 0.8164, + "step": 9560 + }, + { + "epoch": 0.51, + "learning_rate": 0.0005018713382934131, + "loss": 0.7227, + "step": 9561 + }, + { + "epoch": 0.51, + "learning_rate": 0.000501784299681513, + "loss": 0.9141, + "step": 9562 + }, + { + "epoch": 0.51, + "learning_rate": 0.0005016972610155429, + "loss": 0.8477, + "step": 9563 + }, + { + "epoch": 0.51, + "learning_rate": 0.0005016102222981402, + "loss": 0.8359, + "step": 9564 + }, + { + "epoch": 0.51, + "learning_rate": 0.0005015231835319425, + "loss": 0.8789, + "step": 9565 + }, + { + "epoch": 0.51, + "learning_rate": 0.0005014361447195871, + "loss": 0.8281, + "step": 9566 + }, + { + "epoch": 0.51, + "learning_rate": 0.0005013491058637121, + "loss": 0.8516, + "step": 9567 + }, + { + "epoch": 0.51, + "learning_rate": 0.0005012620669669545, + "loss": 0.9375, + "step": 9568 + }, + { + "epoch": 0.51, + "learning_rate": 0.0005011750280319524, + "loss": 0.8516, + "step": 9569 + }, + { + "epoch": 0.51, + "learning_rate": 0.0005010879890613429, + "loss": 0.7852, + "step": 9570 + }, + { + "epoch": 0.51, + "learning_rate": 0.0005010009500577638, + "loss": 0.8008, + "step": 9571 + }, + { + "epoch": 0.51, + "learning_rate": 0.0005009139110238526, + "loss": 0.9844, + "step": 9572 + }, + { + "epoch": 0.51, + "learning_rate": 0.000500826871962247, + "loss": 0.875, + "step": 9573 + }, + { + "epoch": 0.51, + "learning_rate": 0.0005007398328755842, + "loss": 0.8477, + "step": 9574 + }, + { + "epoch": 0.51, + "learning_rate": 0.0005006527937665024, + "loss": 0.8672, + "step": 9575 + }, + { + "epoch": 0.51, + "learning_rate": 0.0005005657546376385, + "loss": 0.9258, + "step": 9576 + }, + { + "epoch": 0.51, + "learning_rate": 0.0005004787154916305, + "loss": 0.875, + "step": 9577 + }, + { + "epoch": 0.51, + "learning_rate": 0.0005003916763311159, + "loss": 0.8516, + "step": 9578 + }, + { + "epoch": 0.51, + "learning_rate": 0.0005003046371587322, + "loss": 0.8828, + "step": 9579 + }, + { + "epoch": 0.51, + "learning_rate": 0.000500217597977117, + "loss": 0.8867, + "step": 9580 + }, + { + "epoch": 0.51, + "learning_rate": 0.0005001305587889078, + "loss": 0.8008, + "step": 9581 + }, + { + "epoch": 0.51, + "learning_rate": 0.0005000435195967421, + "loss": 0.9531, + "step": 9582 + }, + { + "epoch": 0.52, + "learning_rate": 0.0004999564804032578, + "loss": 0.8398, + "step": 9583 + }, + { + "epoch": 0.52, + "learning_rate": 0.0004998694412110923, + "loss": 0.8438, + "step": 9584 + }, + { + "epoch": 0.52, + "learning_rate": 0.0004997824020228831, + "loss": 0.8398, + "step": 9585 + }, + { + "epoch": 0.52, + "learning_rate": 0.0004996953628412681, + "loss": 0.8164, + "step": 9586 + }, + { + "epoch": 0.52, + "learning_rate": 0.0004996083236688841, + "loss": 0.8125, + "step": 9587 + }, + { + "epoch": 0.52, + "learning_rate": 0.0004995212845083695, + "loss": 0.8672, + "step": 9588 + }, + { + "epoch": 0.52, + "learning_rate": 0.0004994342453623616, + "loss": 0.8281, + "step": 9589 + }, + { + "epoch": 0.52, + "learning_rate": 0.0004993472062334978, + "loss": 0.9414, + "step": 9590 + }, + { + "epoch": 0.52, + "learning_rate": 0.0004992601671244157, + "loss": 0.8906, + "step": 9591 + }, + { + "epoch": 0.52, + "learning_rate": 0.0004991731280377531, + "loss": 0.8203, + "step": 9592 + }, + { + "epoch": 0.52, + "learning_rate": 0.0004990860889761476, + "loss": 0.793, + "step": 9593 + }, + { + "epoch": 0.52, + "learning_rate": 0.0004989990499422364, + "loss": 0.8984, + "step": 9594 + }, + { + "epoch": 0.52, + "learning_rate": 0.0004989120109386572, + "loss": 0.8438, + "step": 9595 + }, + { + "epoch": 0.52, + "learning_rate": 0.0004988249719680477, + "loss": 0.9102, + "step": 9596 + }, + { + "epoch": 0.52, + "learning_rate": 0.0004987379330330455, + "loss": 0.9453, + "step": 9597 + }, + { + "epoch": 0.52, + "learning_rate": 0.000498650894136288, + "loss": 0.875, + "step": 9598 + }, + { + "epoch": 0.52, + "learning_rate": 0.0004985638552804128, + "loss": 0.7461, + "step": 9599 + }, + { + "epoch": 0.52, + "learning_rate": 0.0004984768164680576, + "loss": 0.7891, + "step": 9600 + }, + { + "epoch": 0.52, + "learning_rate": 0.00049838977770186, + "loss": 0.9453, + "step": 9601 + }, + { + "epoch": 0.52, + "learning_rate": 0.000498302738984457, + "loss": 0.8164, + "step": 9602 + }, + { + "epoch": 0.52, + "learning_rate": 0.000498215700318487, + "loss": 0.9141, + "step": 9603 + }, + { + "epoch": 0.52, + "learning_rate": 0.000498128661706587, + "loss": 0.8945, + "step": 9604 + }, + { + "epoch": 0.52, + "learning_rate": 0.0004980416231513947, + "loss": 0.9258, + "step": 9605 + }, + { + "epoch": 0.52, + "learning_rate": 0.0004979545846555475, + "loss": 0.8633, + "step": 9606 + }, + { + "epoch": 0.52, + "learning_rate": 0.0004978675462216833, + "loss": 0.8828, + "step": 9607 + }, + { + "epoch": 0.52, + "learning_rate": 0.0004977805078524394, + "loss": 0.7617, + "step": 9608 + }, + { + "epoch": 0.52, + "learning_rate": 0.0004976934695504534, + "loss": 0.7812, + "step": 9609 + }, + { + "epoch": 0.52, + "learning_rate": 0.0004976064313183628, + "loss": 0.8594, + "step": 9610 + }, + { + "epoch": 0.52, + "learning_rate": 0.0004975193931588052, + "loss": 0.8789, + "step": 9611 + }, + { + "epoch": 0.52, + "learning_rate": 0.0004974323550744182, + "loss": 0.7969, + "step": 9612 + }, + { + "epoch": 0.52, + "learning_rate": 0.0004973453170678393, + "loss": 0.9648, + "step": 9613 + }, + { + "epoch": 0.52, + "learning_rate": 0.0004972582791417058, + "loss": 0.9492, + "step": 9614 + }, + { + "epoch": 0.52, + "learning_rate": 0.0004971712412986557, + "loss": 0.8672, + "step": 9615 + }, + { + "epoch": 0.52, + "learning_rate": 0.0004970842035413262, + "loss": 0.8516, + "step": 9616 + }, + { + "epoch": 0.52, + "learning_rate": 0.000496997165872355, + "loss": 0.9609, + "step": 9617 + }, + { + "epoch": 0.52, + "learning_rate": 0.0004969101282943793, + "loss": 0.8633, + "step": 9618 + }, + { + "epoch": 0.52, + "learning_rate": 0.0004968230908100371, + "loss": 0.8086, + "step": 9619 + }, + { + "epoch": 0.52, + "learning_rate": 0.0004967360534219655, + "loss": 0.7852, + "step": 9620 + }, + { + "epoch": 0.52, + "learning_rate": 0.0004966490161328024, + "loss": 0.7969, + "step": 9621 + }, + { + "epoch": 0.52, + "learning_rate": 0.0004965619789451849, + "loss": 0.793, + "step": 9622 + }, + { + "epoch": 0.52, + "learning_rate": 0.0004964749418617509, + "loss": 0.8438, + "step": 9623 + }, + { + "epoch": 0.52, + "learning_rate": 0.0004963879048851377, + "loss": 0.7773, + "step": 9624 + }, + { + "epoch": 0.52, + "learning_rate": 0.0004963008680179828, + "loss": 0.9219, + "step": 9625 + }, + { + "epoch": 0.52, + "learning_rate": 0.0004962138312629236, + "loss": 0.832, + "step": 9626 + }, + { + "epoch": 0.52, + "learning_rate": 0.000496126794622598, + "loss": 0.7852, + "step": 9627 + }, + { + "epoch": 0.52, + "learning_rate": 0.0004960397580996432, + "loss": 0.8359, + "step": 9628 + }, + { + "epoch": 0.52, + "learning_rate": 0.0004959527216966969, + "loss": 0.9062, + "step": 9629 + }, + { + "epoch": 0.52, + "learning_rate": 0.000495865685416396, + "loss": 0.8242, + "step": 9630 + }, + { + "epoch": 0.52, + "learning_rate": 0.0004957786492613787, + "loss": 0.8086, + "step": 9631 + }, + { + "epoch": 0.52, + "learning_rate": 0.0004956916132342822, + "loss": 0.7969, + "step": 9632 + }, + { + "epoch": 0.52, + "learning_rate": 0.0004956045773377439, + "loss": 0.7344, + "step": 9633 + }, + { + "epoch": 0.52, + "learning_rate": 0.0004955175415744012, + "loss": 0.9258, + "step": 9634 + }, + { + "epoch": 0.52, + "learning_rate": 0.000495430505946892, + "loss": 0.8398, + "step": 9635 + }, + { + "epoch": 0.52, + "learning_rate": 0.0004953434704578533, + "loss": 0.9258, + "step": 9636 + }, + { + "epoch": 0.52, + "learning_rate": 0.0004952564351099229, + "loss": 0.8086, + "step": 9637 + }, + { + "epoch": 0.52, + "learning_rate": 0.0004951693999057378, + "loss": 0.9609, + "step": 9638 + }, + { + "epoch": 0.52, + "learning_rate": 0.000495082364847936, + "loss": 0.7539, + "step": 9639 + }, + { + "epoch": 0.52, + "learning_rate": 0.0004949953299391547, + "loss": 0.8164, + "step": 9640 + }, + { + "epoch": 0.52, + "learning_rate": 0.0004949082951820312, + "loss": 0.8203, + "step": 9641 + }, + { + "epoch": 0.52, + "learning_rate": 0.0004948212605792032, + "loss": 0.8711, + "step": 9642 + }, + { + "epoch": 0.52, + "learning_rate": 0.000494734226133308, + "loss": 0.957, + "step": 9643 + }, + { + "epoch": 0.52, + "learning_rate": 0.000494647191846983, + "loss": 0.8125, + "step": 9644 + }, + { + "epoch": 0.52, + "learning_rate": 0.0004945601577228656, + "loss": 0.918, + "step": 9645 + }, + { + "epoch": 0.52, + "learning_rate": 0.0004944731237635935, + "loss": 0.7812, + "step": 9646 + }, + { + "epoch": 0.52, + "learning_rate": 0.0004943860899718037, + "loss": 0.8125, + "step": 9647 + }, + { + "epoch": 0.52, + "learning_rate": 0.000494299056350134, + "loss": 0.8438, + "step": 9648 + }, + { + "epoch": 0.52, + "learning_rate": 0.0004942120229012215, + "loss": 0.8008, + "step": 9649 + }, + { + "epoch": 0.52, + "learning_rate": 0.0004941249896277037, + "loss": 0.8477, + "step": 9650 + }, + { + "epoch": 0.52, + "learning_rate": 0.000494037956532218, + "loss": 0.8672, + "step": 9651 + }, + { + "epoch": 0.52, + "learning_rate": 0.000493950923617402, + "loss": 0.8945, + "step": 9652 + }, + { + "epoch": 0.52, + "learning_rate": 0.0004938638908858926, + "loss": 0.8516, + "step": 9653 + }, + { + "epoch": 0.52, + "learning_rate": 0.0004937768583403277, + "loss": 0.7305, + "step": 9654 + }, + { + "epoch": 0.52, + "learning_rate": 0.0004936898259833443, + "loss": 0.8945, + "step": 9655 + }, + { + "epoch": 0.52, + "learning_rate": 0.0004936027938175799, + "loss": 0.9141, + "step": 9656 + }, + { + "epoch": 0.52, + "learning_rate": 0.0004935157618456719, + "loss": 0.7734, + "step": 9657 + }, + { + "epoch": 0.52, + "learning_rate": 0.0004934287300702576, + "loss": 0.8164, + "step": 9658 + }, + { + "epoch": 0.52, + "learning_rate": 0.0004933416984939745, + "loss": 0.8359, + "step": 9659 + }, + { + "epoch": 0.52, + "learning_rate": 0.0004932546671194597, + "loss": 0.8477, + "step": 9660 + }, + { + "epoch": 0.52, + "learning_rate": 0.0004931676359493504, + "loss": 0.8906, + "step": 9661 + }, + { + "epoch": 0.52, + "learning_rate": 0.0004930806049862845, + "loss": 0.9258, + "step": 9662 + }, + { + "epoch": 0.52, + "learning_rate": 0.0004929935742328989, + "loss": 0.8984, + "step": 9663 + }, + { + "epoch": 0.52, + "learning_rate": 0.000492906543691831, + "loss": 0.9336, + "step": 9664 + }, + { + "epoch": 0.52, + "learning_rate": 0.000492819513365718, + "loss": 0.8828, + "step": 9665 + }, + { + "epoch": 0.52, + "learning_rate": 0.0004927324832571976, + "loss": 0.8984, + "step": 9666 + }, + { + "epoch": 0.52, + "learning_rate": 0.0004926454533689068, + "loss": 0.8086, + "step": 9667 + }, + { + "epoch": 0.52, + "learning_rate": 0.0004925584237034828, + "loss": 0.9609, + "step": 9668 + }, + { + "epoch": 0.52, + "learning_rate": 0.000492471394263563, + "loss": 0.8828, + "step": 9669 + }, + { + "epoch": 0.52, + "learning_rate": 0.0004923843650517846, + "loss": 0.9492, + "step": 9670 + }, + { + "epoch": 0.52, + "learning_rate": 0.0004922973360707852, + "loss": 0.8906, + "step": 9671 + }, + { + "epoch": 0.52, + "learning_rate": 0.0004922103073232017, + "loss": 0.8242, + "step": 9672 + }, + { + "epoch": 0.52, + "learning_rate": 0.0004921232788116713, + "loss": 0.8867, + "step": 9673 + }, + { + "epoch": 0.52, + "learning_rate": 0.0004920362505388317, + "loss": 0.832, + "step": 9674 + }, + { + "epoch": 0.52, + "learning_rate": 0.0004919492225073197, + "loss": 0.8906, + "step": 9675 + }, + { + "epoch": 0.52, + "learning_rate": 0.0004918621947197728, + "loss": 0.9141, + "step": 9676 + }, + { + "epoch": 0.52, + "learning_rate": 0.000491775167178828, + "loss": 0.8867, + "step": 9677 + }, + { + "epoch": 0.52, + "learning_rate": 0.0004916881398871228, + "loss": 0.8516, + "step": 9678 + }, + { + "epoch": 0.52, + "learning_rate": 0.0004916011128472942, + "loss": 0.7695, + "step": 9679 + }, + { + "epoch": 0.52, + "learning_rate": 0.0004915140860619793, + "loss": 0.75, + "step": 9680 + }, + { + "epoch": 0.52, + "learning_rate": 0.0004914270595338157, + "loss": 0.8398, + "step": 9681 + }, + { + "epoch": 0.52, + "learning_rate": 0.0004913400332654404, + "loss": 0.9336, + "step": 9682 + }, + { + "epoch": 0.52, + "learning_rate": 0.0004912530072594904, + "loss": 0.8594, + "step": 9683 + }, + { + "epoch": 0.52, + "learning_rate": 0.000491165981518603, + "loss": 0.9453, + "step": 9684 + }, + { + "epoch": 0.52, + "learning_rate": 0.0004910789560454155, + "loss": 0.8555, + "step": 9685 + }, + { + "epoch": 0.52, + "learning_rate": 0.0004909919308425648, + "loss": 0.8281, + "step": 9686 + }, + { + "epoch": 0.52, + "learning_rate": 0.0004909049059126884, + "loss": 0.8438, + "step": 9687 + }, + { + "epoch": 0.52, + "learning_rate": 0.0004908178812584228, + "loss": 0.8867, + "step": 9688 + }, + { + "epoch": 0.52, + "learning_rate": 0.000490730856882406, + "loss": 0.8906, + "step": 9689 + }, + { + "epoch": 0.52, + "learning_rate": 0.0004906438327872745, + "loss": 0.7578, + "step": 9690 + }, + { + "epoch": 0.52, + "learning_rate": 0.0004905568089756657, + "loss": 0.7266, + "step": 9691 + }, + { + "epoch": 0.52, + "learning_rate": 0.0004904697854502165, + "loss": 0.9336, + "step": 9692 + }, + { + "epoch": 0.52, + "learning_rate": 0.000490382762213564, + "loss": 0.8672, + "step": 9693 + }, + { + "epoch": 0.52, + "learning_rate": 0.0004902957392683456, + "loss": 0.8008, + "step": 9694 + }, + { + "epoch": 0.52, + "learning_rate": 0.0004902087166171982, + "loss": 0.8828, + "step": 9695 + }, + { + "epoch": 0.52, + "learning_rate": 0.0004901216942627585, + "loss": 0.8711, + "step": 9696 + }, + { + "epoch": 0.52, + "learning_rate": 0.0004900346722076643, + "loss": 0.8906, + "step": 9697 + }, + { + "epoch": 0.52, + "learning_rate": 0.0004899476504545522, + "loss": 0.8945, + "step": 9698 + }, + { + "epoch": 0.52, + "learning_rate": 0.0004898606290060592, + "loss": 0.8164, + "step": 9699 + }, + { + "epoch": 0.52, + "learning_rate": 0.0004897736078648225, + "loss": 0.7773, + "step": 9700 + }, + { + "epoch": 0.52, + "learning_rate": 0.000489686587033479, + "loss": 0.875, + "step": 9701 + }, + { + "epoch": 0.52, + "learning_rate": 0.000489599566514666, + "loss": 0.8047, + "step": 9702 + }, + { + "epoch": 0.52, + "learning_rate": 0.0004895125463110202, + "loss": 0.8086, + "step": 9703 + }, + { + "epoch": 0.52, + "learning_rate": 0.0004894255264251784, + "loss": 0.8203, + "step": 9704 + }, + { + "epoch": 0.52, + "learning_rate": 0.0004893385068597782, + "loss": 0.8359, + "step": 9705 + }, + { + "epoch": 0.52, + "learning_rate": 0.0004892514876174563, + "loss": 0.7305, + "step": 9706 + }, + { + "epoch": 0.52, + "learning_rate": 0.0004891644687008494, + "loss": 0.7969, + "step": 9707 + }, + { + "epoch": 0.52, + "learning_rate": 0.0004890774501125949, + "loss": 0.75, + "step": 9708 + }, + { + "epoch": 0.52, + "learning_rate": 0.0004889904318553293, + "loss": 0.7852, + "step": 9709 + }, + { + "epoch": 0.52, + "learning_rate": 0.0004889034139316901, + "loss": 0.8984, + "step": 9710 + }, + { + "epoch": 0.52, + "learning_rate": 0.0004888163963443137, + "loss": 0.8438, + "step": 9711 + }, + { + "epoch": 0.52, + "learning_rate": 0.0004887293790958372, + "loss": 0.8047, + "step": 9712 + }, + { + "epoch": 0.52, + "learning_rate": 0.0004886423621888976, + "loss": 0.8555, + "step": 9713 + }, + { + "epoch": 0.52, + "learning_rate": 0.0004885553456261318, + "loss": 0.9258, + "step": 9714 + }, + { + "epoch": 0.52, + "learning_rate": 0.0004884683294101765, + "loss": 0.8242, + "step": 9715 + }, + { + "epoch": 0.52, + "learning_rate": 0.0004883813135436688, + "loss": 0.8672, + "step": 9716 + }, + { + "epoch": 0.52, + "learning_rate": 0.0004882942980292454, + "loss": 0.875, + "step": 9717 + }, + { + "epoch": 0.52, + "learning_rate": 0.0004882072828695434, + "loss": 0.9414, + "step": 9718 + }, + { + "epoch": 0.52, + "learning_rate": 0.000488120268067199, + "loss": 0.7891, + "step": 9719 + }, + { + "epoch": 0.52, + "learning_rate": 0.00048803325362485, + "loss": 0.7656, + "step": 9720 + }, + { + "epoch": 0.52, + "learning_rate": 0.0004879462395451325, + "loss": 0.7891, + "step": 9721 + }, + { + "epoch": 0.52, + "learning_rate": 0.00048785922583068357, + "loss": 0.8203, + "step": 9722 + }, + { + "epoch": 0.52, + "learning_rate": 0.0004877722124841399, + "loss": 0.8359, + "step": 9723 + }, + { + "epoch": 0.52, + "learning_rate": 0.00048768519950813843, + "loss": 0.8125, + "step": 9724 + }, + { + "epoch": 0.52, + "learning_rate": 0.000487598186905316, + "loss": 0.8672, + "step": 9725 + }, + { + "epoch": 0.52, + "learning_rate": 0.00048751117467830913, + "loss": 0.8477, + "step": 9726 + }, + { + "epoch": 0.52, + "learning_rate": 0.0004874241628297546, + "loss": 0.7812, + "step": 9727 + }, + { + "epoch": 0.52, + "learning_rate": 0.0004873371513622893, + "loss": 0.9023, + "step": 9728 + }, + { + "epoch": 0.52, + "learning_rate": 0.00048725014027854995, + "loss": 0.8398, + "step": 9729 + }, + { + "epoch": 0.52, + "learning_rate": 0.00048716312958117325, + "loss": 0.8477, + "step": 9730 + }, + { + "epoch": 0.52, + "learning_rate": 0.0004870761192727957, + "loss": 0.7266, + "step": 9731 + }, + { + "epoch": 0.52, + "learning_rate": 0.0004869891093560544, + "loss": 0.8047, + "step": 9732 + }, + { + "epoch": 0.52, + "learning_rate": 0.00048690209983358584, + "loss": 0.875, + "step": 9733 + }, + { + "epoch": 0.52, + "learning_rate": 0.00048681509070802663, + "loss": 0.7891, + "step": 9734 + }, + { + "epoch": 0.52, + "learning_rate": 0.0004867280819820134, + "loss": 0.875, + "step": 9735 + }, + { + "epoch": 0.52, + "learning_rate": 0.00048664107365818297, + "loss": 0.8242, + "step": 9736 + }, + { + "epoch": 0.52, + "learning_rate": 0.0004865540657391718, + "loss": 0.8906, + "step": 9737 + }, + { + "epoch": 0.52, + "learning_rate": 0.0004864670582276168, + "loss": 0.793, + "step": 9738 + }, + { + "epoch": 0.52, + "learning_rate": 0.000486380051126154, + "loss": 0.8398, + "step": 9739 + }, + { + "epoch": 0.52, + "learning_rate": 0.00048629304443742087, + "loss": 0.8281, + "step": 9740 + }, + { + "epoch": 0.52, + "learning_rate": 0.00048620603816405323, + "loss": 0.8242, + "step": 9741 + }, + { + "epoch": 0.52, + "learning_rate": 0.00048611903230868803, + "loss": 0.8906, + "step": 9742 + }, + { + "epoch": 0.52, + "learning_rate": 0.0004860320268739616, + "loss": 0.75, + "step": 9743 + }, + { + "epoch": 0.52, + "learning_rate": 0.00048594502186251077, + "loss": 0.8516, + "step": 9744 + }, + { + "epoch": 0.52, + "learning_rate": 0.000485858017276972, + "loss": 0.9297, + "step": 9745 + }, + { + "epoch": 0.52, + "learning_rate": 0.0004857710131199818, + "loss": 0.8398, + "step": 9746 + }, + { + "epoch": 0.52, + "learning_rate": 0.0004856840093941763, + "loss": 0.8086, + "step": 9747 + }, + { + "epoch": 0.52, + "learning_rate": 0.0004855970061021927, + "loss": 0.8594, + "step": 9748 + }, + { + "epoch": 0.52, + "learning_rate": 0.000485510003246667, + "loss": 0.9023, + "step": 9749 + }, + { + "epoch": 0.52, + "learning_rate": 0.0004854230008302357, + "loss": 0.793, + "step": 9750 + }, + { + "epoch": 0.52, + "learning_rate": 0.0004853359988555355, + "loss": 0.8555, + "step": 9751 + }, + { + "epoch": 0.52, + "learning_rate": 0.00048524899732520265, + "loss": 0.7578, + "step": 9752 + }, + { + "epoch": 0.52, + "learning_rate": 0.0004851619962418738, + "loss": 0.8594, + "step": 9753 + }, + { + "epoch": 0.52, + "learning_rate": 0.00048507499560818487, + "loss": 0.7461, + "step": 9754 + }, + { + "epoch": 0.52, + "learning_rate": 0.00048498799542677284, + "loss": 0.8867, + "step": 9755 + }, + { + "epoch": 0.52, + "learning_rate": 0.00048490099570027383, + "loss": 0.8906, + "step": 9756 + }, + { + "epoch": 0.52, + "learning_rate": 0.00048481399643132423, + "loss": 0.9219, + "step": 9757 + }, + { + "epoch": 0.52, + "learning_rate": 0.00048472699762256035, + "loss": 0.8281, + "step": 9758 + }, + { + "epoch": 0.52, + "learning_rate": 0.0004846399992766187, + "loss": 0.9141, + "step": 9759 + }, + { + "epoch": 0.52, + "learning_rate": 0.00048455300139613547, + "loss": 0.8867, + "step": 9760 + }, + { + "epoch": 0.52, + "learning_rate": 0.00048446600398374716, + "loss": 0.832, + "step": 9761 + }, + { + "epoch": 0.52, + "learning_rate": 0.00048437900704208963, + "loss": 0.793, + "step": 9762 + }, + { + "epoch": 0.52, + "learning_rate": 0.0004842920105737999, + "loss": 0.7695, + "step": 9763 + }, + { + "epoch": 0.52, + "learning_rate": 0.00048420501458151365, + "loss": 0.8398, + "step": 9764 + }, + { + "epoch": 0.52, + "learning_rate": 0.0004841180190678674, + "loss": 0.8516, + "step": 9765 + }, + { + "epoch": 0.52, + "learning_rate": 0.0004840310240354972, + "loss": 0.8867, + "step": 9766 + }, + { + "epoch": 0.52, + "learning_rate": 0.0004839440294870396, + "loss": 0.7539, + "step": 9767 + }, + { + "epoch": 0.52, + "learning_rate": 0.00048385703542513063, + "loss": 0.8438, + "step": 9768 + }, + { + "epoch": 0.53, + "learning_rate": 0.00048377004185240653, + "loss": 0.8164, + "step": 9769 + }, + { + "epoch": 0.53, + "learning_rate": 0.0004836830487715033, + "loss": 0.7656, + "step": 9770 + }, + { + "epoch": 0.53, + "learning_rate": 0.0004835960561850574, + "loss": 0.8008, + "step": 9771 + }, + { + "epoch": 0.53, + "learning_rate": 0.0004835090640957049, + "loss": 0.8438, + "step": 9772 + }, + { + "epoch": 0.53, + "learning_rate": 0.00048342207250608193, + "loss": 0.8633, + "step": 9773 + }, + { + "epoch": 0.53, + "learning_rate": 0.0004833350814188245, + "loss": 0.9297, + "step": 9774 + }, + { + "epoch": 0.53, + "learning_rate": 0.00048324809083656893, + "loss": 0.9258, + "step": 9775 + }, + { + "epoch": 0.53, + "learning_rate": 0.0004831611007619514, + "loss": 0.8008, + "step": 9776 + }, + { + "epoch": 0.53, + "learning_rate": 0.0004830741111976077, + "loss": 0.7305, + "step": 9777 + }, + { + "epoch": 0.53, + "learning_rate": 0.0004829871221461739, + "loss": 0.9297, + "step": 9778 + }, + { + "epoch": 0.53, + "learning_rate": 0.0004829001336102863, + "loss": 0.8984, + "step": 9779 + }, + { + "epoch": 0.53, + "learning_rate": 0.0004828131455925808, + "loss": 0.8164, + "step": 9780 + }, + { + "epoch": 0.53, + "learning_rate": 0.0004827261580956936, + "loss": 0.7852, + "step": 9781 + }, + { + "epoch": 0.53, + "learning_rate": 0.00048263917112226017, + "loss": 0.8672, + "step": 9782 + }, + { + "epoch": 0.53, + "learning_rate": 0.00048255218467491726, + "loss": 0.9297, + "step": 9783 + }, + { + "epoch": 0.53, + "learning_rate": 0.00048246519875630033, + "loss": 0.9219, + "step": 9784 + }, + { + "epoch": 0.53, + "learning_rate": 0.00048237821336904546, + "loss": 0.8164, + "step": 9785 + }, + { + "epoch": 0.53, + "learning_rate": 0.0004822912285157886, + "loss": 0.7969, + "step": 9786 + }, + { + "epoch": 0.53, + "learning_rate": 0.00048220424419916563, + "loss": 0.9023, + "step": 9787 + }, + { + "epoch": 0.53, + "learning_rate": 0.00048211726042181265, + "loss": 0.8828, + "step": 9788 + }, + { + "epoch": 0.53, + "learning_rate": 0.00048203027718636527, + "loss": 0.8594, + "step": 9789 + }, + { + "epoch": 0.53, + "learning_rate": 0.00048194329449545957, + "loss": 0.8438, + "step": 9790 + }, + { + "epoch": 0.53, + "learning_rate": 0.00048185631235173157, + "loss": 0.8711, + "step": 9791 + }, + { + "epoch": 0.53, + "learning_rate": 0.00048176933075781675, + "loss": 0.9336, + "step": 9792 + }, + { + "epoch": 0.53, + "learning_rate": 0.0004816823497163509, + "loss": 0.8086, + "step": 9793 + }, + { + "epoch": 0.53, + "learning_rate": 0.00048159536922997025, + "loss": 0.8359, + "step": 9794 + }, + { + "epoch": 0.53, + "learning_rate": 0.00048150838930131033, + "loss": 0.7969, + "step": 9795 + }, + { + "epoch": 0.53, + "learning_rate": 0.00048142140993300697, + "loss": 0.8008, + "step": 9796 + }, + { + "epoch": 0.53, + "learning_rate": 0.0004813344311276958, + "loss": 0.8711, + "step": 9797 + }, + { + "epoch": 0.53, + "learning_rate": 0.0004812474528880128, + "loss": 0.8086, + "step": 9798 + }, + { + "epoch": 0.53, + "learning_rate": 0.0004811604752165937, + "loss": 0.832, + "step": 9799 + }, + { + "epoch": 0.53, + "learning_rate": 0.00048107349811607393, + "loss": 0.8203, + "step": 9800 + }, + { + "epoch": 0.53, + "learning_rate": 0.0004809865215890892, + "loss": 0.8633, + "step": 9801 + }, + { + "epoch": 0.53, + "learning_rate": 0.00048089954563827547, + "loss": 0.8477, + "step": 9802 + }, + { + "epoch": 0.53, + "learning_rate": 0.00048081257026626824, + "loss": 0.8398, + "step": 9803 + }, + { + "epoch": 0.53, + "learning_rate": 0.00048072559547570335, + "loss": 0.8867, + "step": 9804 + }, + { + "epoch": 0.53, + "learning_rate": 0.00048063862126921573, + "loss": 0.8359, + "step": 9805 + }, + { + "epoch": 0.53, + "learning_rate": 0.00048055164764944196, + "loss": 0.8789, + "step": 9806 + }, + { + "epoch": 0.53, + "learning_rate": 0.000480464674619017, + "loss": 0.8516, + "step": 9807 + }, + { + "epoch": 0.53, + "learning_rate": 0.0004803777021805766, + "loss": 0.8164, + "step": 9808 + }, + { + "epoch": 0.53, + "learning_rate": 0.0004802907303367561, + "loss": 0.8477, + "step": 9809 + }, + { + "epoch": 0.53, + "learning_rate": 0.0004802037590901913, + "loss": 0.8203, + "step": 9810 + }, + { + "epoch": 0.53, + "learning_rate": 0.0004801167884435177, + "loss": 0.793, + "step": 9811 + }, + { + "epoch": 0.53, + "learning_rate": 0.0004800298183993708, + "loss": 0.8672, + "step": 9812 + }, + { + "epoch": 0.53, + "learning_rate": 0.0004799428489603857, + "loss": 0.8711, + "step": 9813 + }, + { + "epoch": 0.53, + "learning_rate": 0.00047985588012919853, + "loss": 0.8047, + "step": 9814 + }, + { + "epoch": 0.53, + "learning_rate": 0.00047976891190844415, + "loss": 0.9062, + "step": 9815 + }, + { + "epoch": 0.53, + "learning_rate": 0.0004796819443007582, + "loss": 0.793, + "step": 9816 + }, + { + "epoch": 0.53, + "learning_rate": 0.000479594977308776, + "loss": 0.8398, + "step": 9817 + }, + { + "epoch": 0.53, + "learning_rate": 0.0004795080109351332, + "loss": 0.9258, + "step": 9818 + }, + { + "epoch": 0.53, + "learning_rate": 0.000479421045182465, + "loss": 0.8086, + "step": 9819 + }, + { + "epoch": 0.53, + "learning_rate": 0.0004793340800534066, + "loss": 0.8242, + "step": 9820 + }, + { + "epoch": 0.53, + "learning_rate": 0.00047924711555059343, + "loss": 0.8711, + "step": 9821 + }, + { + "epoch": 0.53, + "learning_rate": 0.0004791601516766609, + "loss": 0.8945, + "step": 9822 + }, + { + "epoch": 0.53, + "learning_rate": 0.0004790731884342442, + "loss": 0.8047, + "step": 9823 + }, + { + "epoch": 0.53, + "learning_rate": 0.00047898622582597867, + "loss": 0.8555, + "step": 9824 + }, + { + "epoch": 0.53, + "learning_rate": 0.00047889926385449947, + "loss": 0.7266, + "step": 9825 + }, + { + "epoch": 0.53, + "learning_rate": 0.0004788123025224419, + "loss": 0.8594, + "step": 9826 + }, + { + "epoch": 0.53, + "learning_rate": 0.00047872534183244125, + "loss": 0.9336, + "step": 9827 + }, + { + "epoch": 0.53, + "learning_rate": 0.0004786383817871324, + "loss": 0.9062, + "step": 9828 + }, + { + "epoch": 0.53, + "learning_rate": 0.000478551422389151, + "loss": 0.8164, + "step": 9829 + }, + { + "epoch": 0.53, + "learning_rate": 0.0004784644636411319, + "loss": 0.9102, + "step": 9830 + }, + { + "epoch": 0.53, + "learning_rate": 0.00047837750554571027, + "loss": 0.7305, + "step": 9831 + }, + { + "epoch": 0.53, + "learning_rate": 0.00047829054810552107, + "loss": 0.9375, + "step": 9832 + }, + { + "epoch": 0.53, + "learning_rate": 0.0004782035913231998, + "loss": 0.832, + "step": 9833 + }, + { + "epoch": 0.53, + "learning_rate": 0.00047811663520138135, + "loss": 0.8359, + "step": 9834 + }, + { + "epoch": 0.53, + "learning_rate": 0.00047802967974270064, + "loss": 0.8477, + "step": 9835 + }, + { + "epoch": 0.53, + "learning_rate": 0.00047794272494979266, + "loss": 0.8047, + "step": 9836 + }, + { + "epoch": 0.53, + "learning_rate": 0.00047785577082529277, + "loss": 0.9141, + "step": 9837 + }, + { + "epoch": 0.53, + "learning_rate": 0.0004777688173718357, + "loss": 0.8359, + "step": 9838 + }, + { + "epoch": 0.53, + "learning_rate": 0.00047768186459205646, + "loss": 0.7773, + "step": 9839 + }, + { + "epoch": 0.53, + "learning_rate": 0.00047759491248858997, + "loss": 0.8203, + "step": 9840 + }, + { + "epoch": 0.53, + "learning_rate": 0.0004775079610640713, + "loss": 0.8945, + "step": 9841 + }, + { + "epoch": 0.53, + "learning_rate": 0.00047742101032113545, + "loss": 0.8242, + "step": 9842 + }, + { + "epoch": 0.53, + "learning_rate": 0.00047733406026241707, + "loss": 0.8438, + "step": 9843 + }, + { + "epoch": 0.53, + "learning_rate": 0.0004772471108905509, + "loss": 0.7344, + "step": 9844 + }, + { + "epoch": 0.53, + "learning_rate": 0.0004771601622081722, + "loss": 0.9414, + "step": 9845 + }, + { + "epoch": 0.53, + "learning_rate": 0.0004770732142179156, + "loss": 0.875, + "step": 9846 + }, + { + "epoch": 0.53, + "learning_rate": 0.000476986266922416, + "loss": 0.8594, + "step": 9847 + }, + { + "epoch": 0.53, + "learning_rate": 0.00047689932032430785, + "loss": 0.9336, + "step": 9848 + }, + { + "epoch": 0.53, + "learning_rate": 0.00047681237442622637, + "loss": 0.8984, + "step": 9849 + }, + { + "epoch": 0.53, + "learning_rate": 0.0004767254292308061, + "loss": 0.8672, + "step": 9850 + }, + { + "epoch": 0.53, + "learning_rate": 0.00047663848474068177, + "loss": 0.8281, + "step": 9851 + }, + { + "epoch": 0.53, + "learning_rate": 0.00047655154095848794, + "loss": 0.8164, + "step": 9852 + }, + { + "epoch": 0.53, + "learning_rate": 0.00047646459788685966, + "loss": 0.8477, + "step": 9853 + }, + { + "epoch": 0.53, + "learning_rate": 0.0004763776555284313, + "loss": 0.8398, + "step": 9854 + }, + { + "epoch": 0.53, + "learning_rate": 0.0004762907138858377, + "loss": 0.8633, + "step": 9855 + }, + { + "epoch": 0.53, + "learning_rate": 0.0004762037729617129, + "loss": 0.8672, + "step": 9856 + }, + { + "epoch": 0.53, + "learning_rate": 0.00047611683275869247, + "loss": 0.8867, + "step": 9857 + }, + { + "epoch": 0.53, + "learning_rate": 0.00047602989327941023, + "loss": 0.875, + "step": 9858 + }, + { + "epoch": 0.53, + "learning_rate": 0.00047594295452650095, + "loss": 0.8633, + "step": 9859 + }, + { + "epoch": 0.53, + "learning_rate": 0.00047585601650259907, + "loss": 0.8281, + "step": 9860 + }, + { + "epoch": 0.53, + "learning_rate": 0.00047576907921033937, + "loss": 0.8047, + "step": 9861 + }, + { + "epoch": 0.53, + "learning_rate": 0.0004756821426523562, + "loss": 0.875, + "step": 9862 + }, + { + "epoch": 0.53, + "learning_rate": 0.0004755952068312837, + "loss": 0.8125, + "step": 9863 + }, + { + "epoch": 0.53, + "learning_rate": 0.00047550827174975695, + "loss": 0.8516, + "step": 9864 + }, + { + "epoch": 0.53, + "learning_rate": 0.0004754213374104099, + "loss": 0.8711, + "step": 9865 + }, + { + "epoch": 0.53, + "learning_rate": 0.000475334403815877, + "loss": 0.8594, + "step": 9866 + }, + { + "epoch": 0.53, + "learning_rate": 0.0004752474709687926, + "loss": 0.8867, + "step": 9867 + }, + { + "epoch": 0.53, + "learning_rate": 0.0004751605388717913, + "loss": 0.707, + "step": 9868 + }, + { + "epoch": 0.53, + "learning_rate": 0.0004750736075275072, + "loss": 0.8516, + "step": 9869 + }, + { + "epoch": 0.53, + "learning_rate": 0.00047498667693857485, + "loss": 0.9102, + "step": 9870 + }, + { + "epoch": 0.53, + "learning_rate": 0.00047489974710762797, + "loss": 0.918, + "step": 9871 + }, + { + "epoch": 0.53, + "learning_rate": 0.00047481281803730155, + "loss": 0.9492, + "step": 9872 + }, + { + "epoch": 0.53, + "learning_rate": 0.00047472588973022933, + "loss": 0.9141, + "step": 9873 + }, + { + "epoch": 0.53, + "learning_rate": 0.0004746389621890457, + "loss": 0.8203, + "step": 9874 + }, + { + "epoch": 0.53, + "learning_rate": 0.00047455203541638477, + "loss": 0.8242, + "step": 9875 + }, + { + "epoch": 0.53, + "learning_rate": 0.0004744651094148808, + "loss": 0.7812, + "step": 9876 + }, + { + "epoch": 0.53, + "learning_rate": 0.0004743781841871679, + "loss": 0.7617, + "step": 9877 + }, + { + "epoch": 0.53, + "learning_rate": 0.0004742912597358803, + "loss": 0.8164, + "step": 9878 + }, + { + "epoch": 0.53, + "learning_rate": 0.00047420433606365167, + "loss": 0.8672, + "step": 9879 + }, + { + "epoch": 0.53, + "learning_rate": 0.0004741174131731167, + "loss": 0.8125, + "step": 9880 + }, + { + "epoch": 0.53, + "learning_rate": 0.000474030491066909, + "loss": 0.8672, + "step": 9881 + }, + { + "epoch": 0.53, + "learning_rate": 0.00047394356974766273, + "loss": 0.7969, + "step": 9882 + }, + { + "epoch": 0.53, + "learning_rate": 0.0004738566492180118, + "loss": 0.8789, + "step": 9883 + }, + { + "epoch": 0.53, + "learning_rate": 0.0004737697294805904, + "loss": 0.8086, + "step": 9884 + }, + { + "epoch": 0.53, + "learning_rate": 0.0004736828105380324, + "loss": 0.8945, + "step": 9885 + }, + { + "epoch": 0.53, + "learning_rate": 0.0004735958923929717, + "loss": 0.8945, + "step": 9886 + }, + { + "epoch": 0.53, + "learning_rate": 0.00047350897504804196, + "loss": 0.9219, + "step": 9887 + }, + { + "epoch": 0.53, + "learning_rate": 0.0004734220585058774, + "loss": 0.7773, + "step": 9888 + }, + { + "epoch": 0.53, + "learning_rate": 0.0004733351427691119, + "loss": 0.832, + "step": 9889 + }, + { + "epoch": 0.53, + "learning_rate": 0.00047324822784037906, + "loss": 0.8672, + "step": 9890 + }, + { + "epoch": 0.53, + "learning_rate": 0.0004731613137223127, + "loss": 0.8711, + "step": 9891 + }, + { + "epoch": 0.53, + "learning_rate": 0.0004730744004175468, + "loss": 0.8164, + "step": 9892 + }, + { + "epoch": 0.53, + "learning_rate": 0.00047298748792871514, + "loss": 0.7812, + "step": 9893 + }, + { + "epoch": 0.53, + "learning_rate": 0.0004729005762584512, + "loss": 0.9531, + "step": 9894 + }, + { + "epoch": 0.53, + "learning_rate": 0.00047281366540938876, + "loss": 0.8711, + "step": 9895 + }, + { + "epoch": 0.53, + "learning_rate": 0.0004727267553841616, + "loss": 0.7891, + "step": 9896 + }, + { + "epoch": 0.53, + "learning_rate": 0.00047263984618540334, + "loss": 0.9219, + "step": 9897 + }, + { + "epoch": 0.53, + "learning_rate": 0.00047255293781574775, + "loss": 0.875, + "step": 9898 + }, + { + "epoch": 0.53, + "learning_rate": 0.000472466030277828, + "loss": 0.8867, + "step": 9899 + }, + { + "epoch": 0.53, + "learning_rate": 0.00047237912357427824, + "loss": 0.9023, + "step": 9900 + }, + { + "epoch": 0.53, + "learning_rate": 0.00047229221770773175, + "loss": 0.8828, + "step": 9901 + }, + { + "epoch": 0.53, + "learning_rate": 0.0004722053126808219, + "loss": 0.8828, + "step": 9902 + }, + { + "epoch": 0.53, + "learning_rate": 0.00047211840849618246, + "loss": 0.8477, + "step": 9903 + }, + { + "epoch": 0.53, + "learning_rate": 0.00047203150515644687, + "loss": 0.8945, + "step": 9904 + }, + { + "epoch": 0.53, + "learning_rate": 0.00047194460266424855, + "loss": 0.8633, + "step": 9905 + }, + { + "epoch": 0.53, + "learning_rate": 0.0004718577010222208, + "loss": 0.7695, + "step": 9906 + }, + { + "epoch": 0.53, + "learning_rate": 0.0004717708002329972, + "loss": 0.7812, + "step": 9907 + }, + { + "epoch": 0.53, + "learning_rate": 0.0004716839002992113, + "loss": 0.8867, + "step": 9908 + }, + { + "epoch": 0.53, + "learning_rate": 0.00047159700122349605, + "loss": 0.8438, + "step": 9909 + }, + { + "epoch": 0.53, + "learning_rate": 0.0004715101030084849, + "loss": 0.7891, + "step": 9910 + }, + { + "epoch": 0.53, + "learning_rate": 0.00047142320565681135, + "loss": 0.7695, + "step": 9911 + }, + { + "epoch": 0.53, + "learning_rate": 0.00047133630917110846, + "loss": 0.8867, + "step": 9912 + }, + { + "epoch": 0.53, + "learning_rate": 0.00047124941355400975, + "loss": 0.8125, + "step": 9913 + }, + { + "epoch": 0.53, + "learning_rate": 0.0004711625188081478, + "loss": 0.7891, + "step": 9914 + }, + { + "epoch": 0.53, + "learning_rate": 0.0004710756249361567, + "loss": 0.8633, + "step": 9915 + }, + { + "epoch": 0.53, + "learning_rate": 0.000470988731940669, + "loss": 0.7539, + "step": 9916 + }, + { + "epoch": 0.53, + "learning_rate": 0.0004709018398243181, + "loss": 0.8477, + "step": 9917 + }, + { + "epoch": 0.53, + "learning_rate": 0.0004708149485897368, + "loss": 0.918, + "step": 9918 + }, + { + "epoch": 0.53, + "learning_rate": 0.0004707280582395586, + "loss": 0.793, + "step": 9919 + }, + { + "epoch": 0.53, + "learning_rate": 0.0004706411687764164, + "loss": 0.8398, + "step": 9920 + }, + { + "epoch": 0.53, + "learning_rate": 0.00047055428020294335, + "loss": 0.7891, + "step": 9921 + }, + { + "epoch": 0.53, + "learning_rate": 0.00047046739252177194, + "loss": 0.8398, + "step": 9922 + }, + { + "epoch": 0.53, + "learning_rate": 0.0004703805057355359, + "loss": 0.8008, + "step": 9923 + }, + { + "epoch": 0.53, + "learning_rate": 0.0004702936198468677, + "loss": 0.8555, + "step": 9924 + }, + { + "epoch": 0.53, + "learning_rate": 0.0004702067348584005, + "loss": 0.8008, + "step": 9925 + }, + { + "epoch": 0.53, + "learning_rate": 0.00047011985077276687, + "loss": 0.9023, + "step": 9926 + }, + { + "epoch": 0.53, + "learning_rate": 0.00047003296759260007, + "loss": 0.8867, + "step": 9927 + }, + { + "epoch": 0.53, + "learning_rate": 0.0004699460853205329, + "loss": 0.7539, + "step": 9928 + }, + { + "epoch": 0.53, + "learning_rate": 0.00046985920395919794, + "loss": 0.8398, + "step": 9929 + }, + { + "epoch": 0.53, + "learning_rate": 0.0004697723235112281, + "loss": 0.8594, + "step": 9930 + }, + { + "epoch": 0.53, + "learning_rate": 0.0004696854439792561, + "loss": 0.9102, + "step": 9931 + }, + { + "epoch": 0.53, + "learning_rate": 0.00046959856536591484, + "loss": 0.8711, + "step": 9932 + }, + { + "epoch": 0.53, + "learning_rate": 0.00046951168767383686, + "loss": 0.8906, + "step": 9933 + }, + { + "epoch": 0.53, + "learning_rate": 0.00046942481090565477, + "loss": 0.9219, + "step": 9934 + }, + { + "epoch": 0.53, + "learning_rate": 0.00046933793506400144, + "loss": 0.8633, + "step": 9935 + }, + { + "epoch": 0.53, + "learning_rate": 0.0004692510601515096, + "loss": 0.7891, + "step": 9936 + }, + { + "epoch": 0.53, + "learning_rate": 0.00046916418617081124, + "loss": 0.8672, + "step": 9937 + }, + { + "epoch": 0.53, + "learning_rate": 0.0004690773131245397, + "loss": 0.8945, + "step": 9938 + }, + { + "epoch": 0.53, + "learning_rate": 0.0004689904410153269, + "loss": 0.9023, + "step": 9939 + }, + { + "epoch": 0.53, + "learning_rate": 0.0004689035698458058, + "loss": 0.7891, + "step": 9940 + }, + { + "epoch": 0.53, + "learning_rate": 0.0004688166996186085, + "loss": 0.8516, + "step": 9941 + }, + { + "epoch": 0.53, + "learning_rate": 0.0004687298303363677, + "loss": 0.8789, + "step": 9942 + }, + { + "epoch": 0.53, + "learning_rate": 0.0004686429620017159, + "loss": 0.8633, + "step": 9943 + }, + { + "epoch": 0.53, + "learning_rate": 0.00046855609461728516, + "loss": 0.8672, + "step": 9944 + }, + { + "epoch": 0.53, + "learning_rate": 0.000468469228185708, + "loss": 0.8867, + "step": 9945 + }, + { + "epoch": 0.53, + "learning_rate": 0.00046838236270961694, + "loss": 0.8242, + "step": 9946 + }, + { + "epoch": 0.53, + "learning_rate": 0.00046829549819164405, + "loss": 0.9258, + "step": 9947 + }, + { + "epoch": 0.53, + "learning_rate": 0.0004682086346344218, + "loss": 0.793, + "step": 9948 + }, + { + "epoch": 0.53, + "learning_rate": 0.0004681217720405822, + "loss": 0.9062, + "step": 9949 + }, + { + "epoch": 0.53, + "learning_rate": 0.00046803491041275775, + "loss": 0.832, + "step": 9950 + }, + { + "epoch": 0.53, + "learning_rate": 0.00046794804975358064, + "loss": 0.8789, + "step": 9951 + }, + { + "epoch": 0.53, + "learning_rate": 0.0004678611900656828, + "loss": 0.9141, + "step": 9952 + }, + { + "epoch": 0.53, + "learning_rate": 0.0004677743313516963, + "loss": 0.8867, + "step": 9953 + }, + { + "epoch": 0.53, + "learning_rate": 0.00046768747361425355, + "loss": 0.832, + "step": 9954 + }, + { + "epoch": 0.54, + "learning_rate": 0.0004676006168559865, + "loss": 0.875, + "step": 9955 + }, + { + "epoch": 0.54, + "learning_rate": 0.00046751376107952733, + "loss": 0.7969, + "step": 9956 + }, + { + "epoch": 0.54, + "learning_rate": 0.0004674269062875075, + "loss": 0.8828, + "step": 9957 + }, + { + "epoch": 0.54, + "learning_rate": 0.00046734005248255986, + "loss": 0.8242, + "step": 9958 + }, + { + "epoch": 0.54, + "learning_rate": 0.00046725319966731566, + "loss": 0.7734, + "step": 9959 + }, + { + "epoch": 0.54, + "learning_rate": 0.0004671663478444071, + "loss": 0.7656, + "step": 9960 + }, + { + "epoch": 0.54, + "learning_rate": 0.00046707949701646603, + "loss": 0.9062, + "step": 9961 + }, + { + "epoch": 0.54, + "learning_rate": 0.0004669926471861244, + "loss": 0.8047, + "step": 9962 + }, + { + "epoch": 0.54, + "learning_rate": 0.000466905798356014, + "loss": 0.8477, + "step": 9963 + }, + { + "epoch": 0.54, + "learning_rate": 0.00046681895052876673, + "loss": 0.7812, + "step": 9964 + }, + { + "epoch": 0.54, + "learning_rate": 0.0004667321037070139, + "loss": 0.9102, + "step": 9965 + }, + { + "epoch": 0.54, + "learning_rate": 0.0004666452578933879, + "loss": 0.8516, + "step": 9966 + }, + { + "epoch": 0.54, + "learning_rate": 0.0004665584130905201, + "loss": 0.8906, + "step": 9967 + }, + { + "epoch": 0.54, + "learning_rate": 0.0004664715693010422, + "loss": 0.8789, + "step": 9968 + }, + { + "epoch": 0.54, + "learning_rate": 0.0004663847265275858, + "loss": 0.8164, + "step": 9969 + }, + { + "epoch": 0.54, + "learning_rate": 0.00046629788477278267, + "loss": 0.8125, + "step": 9970 + }, + { + "epoch": 0.54, + "learning_rate": 0.0004662110440392644, + "loss": 0.8164, + "step": 9971 + }, + { + "epoch": 0.54, + "learning_rate": 0.00046612420432966256, + "loss": 0.793, + "step": 9972 + }, + { + "epoch": 0.54, + "learning_rate": 0.0004660373656466083, + "loss": 0.918, + "step": 9973 + }, + { + "epoch": 0.54, + "learning_rate": 0.00046595052799273376, + "loss": 0.8438, + "step": 9974 + }, + { + "epoch": 0.54, + "learning_rate": 0.0004658636913706699, + "loss": 0.8945, + "step": 9975 + }, + { + "epoch": 0.54, + "learning_rate": 0.00046577685578304826, + "loss": 0.8359, + "step": 9976 + }, + { + "epoch": 0.54, + "learning_rate": 0.00046569002123250034, + "loss": 0.8555, + "step": 9977 + }, + { + "epoch": 0.54, + "learning_rate": 0.00046560318772165753, + "loss": 0.7266, + "step": 9978 + }, + { + "epoch": 0.54, + "learning_rate": 0.0004655163552531513, + "loss": 0.875, + "step": 9979 + }, + { + "epoch": 0.54, + "learning_rate": 0.00046542952382961234, + "loss": 0.8711, + "step": 9980 + }, + { + "epoch": 0.54, + "learning_rate": 0.00046534269345367273, + "loss": 0.8398, + "step": 9981 + }, + { + "epoch": 0.54, + "learning_rate": 0.0004652558641279633, + "loss": 0.8203, + "step": 9982 + }, + { + "epoch": 0.54, + "learning_rate": 0.0004651690358551153, + "loss": 0.8711, + "step": 9983 + }, + { + "epoch": 0.54, + "learning_rate": 0.00046508220863775976, + "loss": 0.8672, + "step": 9984 + }, + { + "epoch": 0.54, + "learning_rate": 0.0004649953824785281, + "loss": 0.793, + "step": 9985 + }, + { + "epoch": 0.54, + "learning_rate": 0.0004649085573800514, + "loss": 0.8594, + "step": 9986 + }, + { + "epoch": 0.54, + "learning_rate": 0.0004648217333449607, + "loss": 0.875, + "step": 9987 + }, + { + "epoch": 0.54, + "learning_rate": 0.0004647349103758868, + "loss": 0.8672, + "step": 9988 + }, + { + "epoch": 0.54, + "learning_rate": 0.00046464808847546124, + "loss": 0.8008, + "step": 9989 + }, + { + "epoch": 0.54, + "learning_rate": 0.00046456126764631456, + "loss": 0.7578, + "step": 9990 + }, + { + "epoch": 0.54, + "learning_rate": 0.00046447444789107794, + "loss": 0.8516, + "step": 9991 + }, + { + "epoch": 0.54, + "learning_rate": 0.00046438762921238215, + "loss": 0.875, + "step": 9992 + }, + { + "epoch": 0.54, + "learning_rate": 0.00046430081161285826, + "loss": 0.8359, + "step": 9993 + }, + { + "epoch": 0.54, + "learning_rate": 0.0004642139950951372, + "loss": 0.8047, + "step": 9994 + }, + { + "epoch": 0.54, + "learning_rate": 0.00046412717966184947, + "loss": 0.7656, + "step": 9995 + }, + { + "epoch": 0.54, + "learning_rate": 0.00046404036531562593, + "loss": 0.8906, + "step": 9996 + }, + { + "epoch": 0.54, + "learning_rate": 0.00046395355205909757, + "loss": 0.8047, + "step": 9997 + }, + { + "epoch": 0.54, + "learning_rate": 0.000463866739894895, + "loss": 0.8594, + "step": 9998 + }, + { + "epoch": 0.54, + "learning_rate": 0.0004637799288256488, + "loss": 0.9062, + "step": 9999 + }, + { + "epoch": 0.54, + "learning_rate": 0.00046369311885398974, + "loss": 0.8789, + "step": 10000 + }, + { + "epoch": 0.54, + "learning_rate": 0.0004636063099825485, + "loss": 0.8789, + "step": 10001 + }, + { + "epoch": 0.54, + "learning_rate": 0.0004635195022139557, + "loss": 0.8633, + "step": 10002 + }, + { + "epoch": 0.54, + "learning_rate": 0.00046343269555084175, + "loss": 0.7422, + "step": 10003 + }, + { + "epoch": 0.54, + "learning_rate": 0.00046334588999583716, + "loss": 0.9375, + "step": 10004 + }, + { + "epoch": 0.54, + "learning_rate": 0.0004632590855515726, + "loss": 0.7812, + "step": 10005 + }, + { + "epoch": 0.54, + "learning_rate": 0.0004631722822206784, + "loss": 0.8555, + "step": 10006 + }, + { + "epoch": 0.54, + "learning_rate": 0.0004630854800057851, + "loss": 0.8125, + "step": 10007 + }, + { + "epoch": 0.54, + "learning_rate": 0.00046299867890952273, + "loss": 0.8789, + "step": 10008 + }, + { + "epoch": 0.54, + "learning_rate": 0.00046291187893452226, + "loss": 0.8555, + "step": 10009 + }, + { + "epoch": 0.54, + "learning_rate": 0.00046282508008341363, + "loss": 0.8711, + "step": 10010 + }, + { + "epoch": 0.54, + "learning_rate": 0.000462738282358827, + "loss": 0.8555, + "step": 10011 + }, + { + "epoch": 0.54, + "learning_rate": 0.0004626514857633929, + "loss": 0.7734, + "step": 10012 + }, + { + "epoch": 0.54, + "learning_rate": 0.0004625646902997415, + "loss": 0.7891, + "step": 10013 + }, + { + "epoch": 0.54, + "learning_rate": 0.000462477895970503, + "loss": 0.8516, + "step": 10014 + }, + { + "epoch": 0.54, + "learning_rate": 0.00046239110277830733, + "loss": 0.8125, + "step": 10015 + }, + { + "epoch": 0.54, + "learning_rate": 0.0004623043107257849, + "loss": 0.7734, + "step": 10016 + }, + { + "epoch": 0.54, + "learning_rate": 0.0004622175198155658, + "loss": 0.8789, + "step": 10017 + }, + { + "epoch": 0.54, + "learning_rate": 0.00046213073005027983, + "loss": 0.8047, + "step": 10018 + }, + { + "epoch": 0.54, + "learning_rate": 0.00046204394143255705, + "loss": 0.7969, + "step": 10019 + }, + { + "epoch": 0.54, + "learning_rate": 0.0004619571539650276, + "loss": 0.8867, + "step": 10020 + }, + { + "epoch": 0.54, + "learning_rate": 0.0004618703676503214, + "loss": 0.9102, + "step": 10021 + }, + { + "epoch": 0.54, + "learning_rate": 0.00046178358249106846, + "loss": 0.8047, + "step": 10022 + }, + { + "epoch": 0.54, + "learning_rate": 0.0004616967984898981, + "loss": 0.8984, + "step": 10023 + }, + { + "epoch": 0.54, + "learning_rate": 0.0004616100156494409, + "loss": 0.9023, + "step": 10024 + }, + { + "epoch": 0.54, + "learning_rate": 0.0004615232339723262, + "loss": 0.8203, + "step": 10025 + }, + { + "epoch": 0.54, + "learning_rate": 0.0004614364534611839, + "loss": 0.793, + "step": 10026 + }, + { + "epoch": 0.54, + "learning_rate": 0.00046134967411864367, + "loss": 0.8828, + "step": 10027 + }, + { + "epoch": 0.54, + "learning_rate": 0.00046126289594733535, + "loss": 0.7852, + "step": 10028 + }, + { + "epoch": 0.54, + "learning_rate": 0.00046117611894988856, + "loss": 0.8164, + "step": 10029 + }, + { + "epoch": 0.54, + "learning_rate": 0.00046108934312893296, + "loss": 0.8828, + "step": 10030 + }, + { + "epoch": 0.54, + "learning_rate": 0.00046100256848709774, + "loss": 0.957, + "step": 10031 + }, + { + "epoch": 0.54, + "learning_rate": 0.0004609157950270132, + "loss": 0.8438, + "step": 10032 + }, + { + "epoch": 0.54, + "learning_rate": 0.00046082902275130827, + "loss": 0.8008, + "step": 10033 + }, + { + "epoch": 0.54, + "learning_rate": 0.0004607422516626126, + "loss": 0.8984, + "step": 10034 + }, + { + "epoch": 0.54, + "learning_rate": 0.00046065548176355567, + "loss": 0.7148, + "step": 10035 + }, + { + "epoch": 0.54, + "learning_rate": 0.0004605687130567669, + "loss": 0.9023, + "step": 10036 + }, + { + "epoch": 0.54, + "learning_rate": 0.0004604819455448758, + "loss": 0.7109, + "step": 10037 + }, + { + "epoch": 0.54, + "learning_rate": 0.0004603951792305114, + "loss": 0.793, + "step": 10038 + }, + { + "epoch": 0.54, + "learning_rate": 0.0004603084141163031, + "loss": 0.8203, + "step": 10039 + }, + { + "epoch": 0.54, + "learning_rate": 0.00046022165020488025, + "loss": 0.8438, + "step": 10040 + }, + { + "epoch": 0.54, + "learning_rate": 0.00046013488749887215, + "loss": 0.875, + "step": 10041 + }, + { + "epoch": 0.54, + "learning_rate": 0.0004600481260009079, + "loss": 0.9258, + "step": 10042 + }, + { + "epoch": 0.54, + "learning_rate": 0.00045996136571361656, + "loss": 0.7852, + "step": 10043 + }, + { + "epoch": 0.54, + "learning_rate": 0.00045987460663962746, + "loss": 0.8477, + "step": 10044 + }, + { + "epoch": 0.54, + "learning_rate": 0.0004597878487815697, + "loss": 0.8906, + "step": 10045 + }, + { + "epoch": 0.54, + "learning_rate": 0.00045970109214207204, + "loss": 0.8672, + "step": 10046 + }, + { + "epoch": 0.54, + "learning_rate": 0.0004596143367237636, + "loss": 0.8242, + "step": 10047 + }, + { + "epoch": 0.54, + "learning_rate": 0.00045952758252927356, + "loss": 0.7891, + "step": 10048 + }, + { + "epoch": 0.54, + "learning_rate": 0.00045944082956123067, + "loss": 0.832, + "step": 10049 + }, + { + "epoch": 0.54, + "learning_rate": 0.0004593540778222638, + "loss": 0.8359, + "step": 10050 + }, + { + "epoch": 0.54, + "learning_rate": 0.000459267327315002, + "loss": 0.8828, + "step": 10051 + }, + { + "epoch": 0.54, + "learning_rate": 0.0004591805780420739, + "loss": 0.9297, + "step": 10052 + }, + { + "epoch": 0.54, + "learning_rate": 0.00045909383000610854, + "loss": 0.9141, + "step": 10053 + }, + { + "epoch": 0.54, + "learning_rate": 0.0004590070832097342, + "loss": 0.832, + "step": 10054 + }, + { + "epoch": 0.54, + "learning_rate": 0.0004589203376555802, + "loss": 0.8867, + "step": 10055 + }, + { + "epoch": 0.54, + "learning_rate": 0.00045883359334627474, + "loss": 0.8672, + "step": 10056 + }, + { + "epoch": 0.54, + "learning_rate": 0.0004587468502844467, + "loss": 0.7891, + "step": 10057 + }, + { + "epoch": 0.54, + "learning_rate": 0.00045866010847272446, + "loss": 0.7812, + "step": 10058 + }, + { + "epoch": 0.54, + "learning_rate": 0.00045857336791373685, + "loss": 0.9141, + "step": 10059 + }, + { + "epoch": 0.54, + "learning_rate": 0.00045848662861011234, + "loss": 0.8125, + "step": 10060 + }, + { + "epoch": 0.54, + "learning_rate": 0.00045839989056447927, + "loss": 0.8203, + "step": 10061 + }, + { + "epoch": 0.54, + "learning_rate": 0.000458313153779466, + "loss": 0.8945, + "step": 10062 + }, + { + "epoch": 0.54, + "learning_rate": 0.0004582264182577013, + "loss": 0.8789, + "step": 10063 + }, + { + "epoch": 0.54, + "learning_rate": 0.00045813968400181335, + "loss": 0.9219, + "step": 10064 + }, + { + "epoch": 0.54, + "learning_rate": 0.00045805295101443037, + "loss": 0.7812, + "step": 10065 + }, + { + "epoch": 0.54, + "learning_rate": 0.00045796621929818077, + "loss": 0.8984, + "step": 10066 + }, + { + "epoch": 0.54, + "learning_rate": 0.00045787948885569286, + "loss": 0.7734, + "step": 10067 + }, + { + "epoch": 0.54, + "learning_rate": 0.00045779275968959487, + "loss": 0.8867, + "step": 10068 + }, + { + "epoch": 0.54, + "learning_rate": 0.00045770603180251483, + "loss": 0.8398, + "step": 10069 + }, + { + "epoch": 0.54, + "learning_rate": 0.0004576193051970809, + "loss": 0.8164, + "step": 10070 + }, + { + "epoch": 0.54, + "learning_rate": 0.0004575325798759213, + "loss": 0.9453, + "step": 10071 + }, + { + "epoch": 0.54, + "learning_rate": 0.000457445855841664, + "loss": 0.7695, + "step": 10072 + }, + { + "epoch": 0.54, + "learning_rate": 0.0004573591330969372, + "loss": 0.8008, + "step": 10073 + }, + { + "epoch": 0.54, + "learning_rate": 0.0004572724116443683, + "loss": 0.7383, + "step": 10074 + }, + { + "epoch": 0.54, + "learning_rate": 0.0004571856914865861, + "loss": 0.7734, + "step": 10075 + }, + { + "epoch": 0.54, + "learning_rate": 0.000457098972626218, + "loss": 0.7891, + "step": 10076 + }, + { + "epoch": 0.54, + "learning_rate": 0.0004570122550658919, + "loss": 0.7812, + "step": 10077 + }, + { + "epoch": 0.54, + "learning_rate": 0.0004569255388082356, + "loss": 0.8164, + "step": 10078 + }, + { + "epoch": 0.54, + "learning_rate": 0.00045683882385587707, + "loss": 0.8477, + "step": 10079 + }, + { + "epoch": 0.54, + "learning_rate": 0.0004567521102114439, + "loss": 0.8203, + "step": 10080 + }, + { + "epoch": 0.54, + "learning_rate": 0.000456665397877564, + "loss": 0.8438, + "step": 10081 + }, + { + "epoch": 0.54, + "learning_rate": 0.0004565786868568646, + "loss": 0.7812, + "step": 10082 + }, + { + "epoch": 0.54, + "learning_rate": 0.00045649197715197387, + "loss": 0.8438, + "step": 10083 + }, + { + "epoch": 0.54, + "learning_rate": 0.0004564052687655191, + "loss": 0.8164, + "step": 10084 + }, + { + "epoch": 0.54, + "learning_rate": 0.00045631856170012775, + "loss": 0.793, + "step": 10085 + }, + { + "epoch": 0.54, + "learning_rate": 0.0004562318559584276, + "loss": 0.9102, + "step": 10086 + }, + { + "epoch": 0.54, + "learning_rate": 0.000456145151543046, + "loss": 0.8633, + "step": 10087 + }, + { + "epoch": 0.54, + "learning_rate": 0.0004560584484566104, + "loss": 0.9258, + "step": 10088 + }, + { + "epoch": 0.54, + "learning_rate": 0.0004559717467017479, + "loss": 0.8555, + "step": 10089 + }, + { + "epoch": 0.54, + "learning_rate": 0.00045588504628108633, + "loss": 0.8281, + "step": 10090 + }, + { + "epoch": 0.54, + "learning_rate": 0.00045579834719725276, + "loss": 0.8203, + "step": 10091 + }, + { + "epoch": 0.54, + "learning_rate": 0.00045571164945287443, + "loss": 0.9648, + "step": 10092 + }, + { + "epoch": 0.54, + "learning_rate": 0.0004556249530505785, + "loss": 0.8086, + "step": 10093 + }, + { + "epoch": 0.54, + "learning_rate": 0.00045553825799299233, + "loss": 0.957, + "step": 10094 + }, + { + "epoch": 0.54, + "learning_rate": 0.000455451564282743, + "loss": 0.8359, + "step": 10095 + }, + { + "epoch": 0.54, + "learning_rate": 0.0004553648719224577, + "loss": 0.8672, + "step": 10096 + }, + { + "epoch": 0.54, + "learning_rate": 0.00045527818091476314, + "loss": 0.918, + "step": 10097 + }, + { + "epoch": 0.54, + "learning_rate": 0.0004551914912622869, + "loss": 0.8086, + "step": 10098 + }, + { + "epoch": 0.54, + "learning_rate": 0.0004551048029676555, + "loss": 0.8711, + "step": 10099 + }, + { + "epoch": 0.54, + "learning_rate": 0.0004550181160334961, + "loss": 0.8672, + "step": 10100 + }, + { + "epoch": 0.54, + "learning_rate": 0.0004549314304624354, + "loss": 0.75, + "step": 10101 + }, + { + "epoch": 0.54, + "learning_rate": 0.00045484474625710053, + "loss": 0.8047, + "step": 10102 + }, + { + "epoch": 0.54, + "learning_rate": 0.00045475806342011826, + "loss": 0.8047, + "step": 10103 + }, + { + "epoch": 0.54, + "learning_rate": 0.00045467138195411517, + "loss": 0.8594, + "step": 10104 + }, + { + "epoch": 0.54, + "learning_rate": 0.000454584701861718, + "loss": 0.8086, + "step": 10105 + }, + { + "epoch": 0.54, + "learning_rate": 0.0004544980231455537, + "loss": 0.8555, + "step": 10106 + }, + { + "epoch": 0.54, + "learning_rate": 0.00045441134580824873, + "loss": 0.8047, + "step": 10107 + }, + { + "epoch": 0.54, + "learning_rate": 0.0004543246698524297, + "loss": 0.8203, + "step": 10108 + }, + { + "epoch": 0.54, + "learning_rate": 0.00045423799528072315, + "loss": 0.8281, + "step": 10109 + }, + { + "epoch": 0.54, + "learning_rate": 0.0004541513220957557, + "loss": 0.8281, + "step": 10110 + }, + { + "epoch": 0.54, + "learning_rate": 0.00045406465030015396, + "loss": 0.8555, + "step": 10111 + }, + { + "epoch": 0.54, + "learning_rate": 0.0004539779798965441, + "loss": 0.7539, + "step": 10112 + }, + { + "epoch": 0.54, + "learning_rate": 0.0004538913108875525, + "loss": 0.8281, + "step": 10113 + }, + { + "epoch": 0.54, + "learning_rate": 0.0004538046432758057, + "loss": 0.7383, + "step": 10114 + }, + { + "epoch": 0.54, + "learning_rate": 0.00045371797706393005, + "loss": 0.8789, + "step": 10115 + }, + { + "epoch": 0.54, + "learning_rate": 0.00045363131225455177, + "loss": 0.9297, + "step": 10116 + }, + { + "epoch": 0.54, + "learning_rate": 0.00045354464885029674, + "loss": 0.8086, + "step": 10117 + }, + { + "epoch": 0.54, + "learning_rate": 0.00045345798685379176, + "loss": 0.8398, + "step": 10118 + }, + { + "epoch": 0.54, + "learning_rate": 0.0004533713262676625, + "loss": 0.8203, + "step": 10119 + }, + { + "epoch": 0.54, + "learning_rate": 0.0004532846670945353, + "loss": 0.7891, + "step": 10120 + }, + { + "epoch": 0.54, + "learning_rate": 0.000453198009337036, + "loss": 0.832, + "step": 10121 + }, + { + "epoch": 0.54, + "learning_rate": 0.0004531113529977909, + "loss": 0.8633, + "step": 10122 + }, + { + "epoch": 0.54, + "learning_rate": 0.00045302469807942576, + "loss": 0.8477, + "step": 10123 + }, + { + "epoch": 0.54, + "learning_rate": 0.0004529380445845665, + "loss": 0.8203, + "step": 10124 + }, + { + "epoch": 0.54, + "learning_rate": 0.00045285139251583916, + "loss": 0.8945, + "step": 10125 + }, + { + "epoch": 0.54, + "learning_rate": 0.00045276474187586963, + "loss": 0.9531, + "step": 10126 + }, + { + "epoch": 0.54, + "learning_rate": 0.0004526780926672835, + "loss": 0.8711, + "step": 10127 + }, + { + "epoch": 0.54, + "learning_rate": 0.0004525914448927064, + "loss": 0.9766, + "step": 10128 + }, + { + "epoch": 0.54, + "learning_rate": 0.00045250479855476447, + "loss": 0.793, + "step": 10129 + }, + { + "epoch": 0.54, + "learning_rate": 0.0004524181536560831, + "loss": 0.9336, + "step": 10130 + }, + { + "epoch": 0.54, + "learning_rate": 0.00045233151019928797, + "loss": 0.8867, + "step": 10131 + }, + { + "epoch": 0.54, + "learning_rate": 0.0004522448681870046, + "loss": 0.8594, + "step": 10132 + }, + { + "epoch": 0.54, + "learning_rate": 0.0004521582276218587, + "loss": 0.8359, + "step": 10133 + }, + { + "epoch": 0.54, + "learning_rate": 0.00045207158850647566, + "loss": 0.8438, + "step": 10134 + }, + { + "epoch": 0.54, + "learning_rate": 0.0004519849508434809, + "loss": 0.7969, + "step": 10135 + }, + { + "epoch": 0.54, + "learning_rate": 0.0004518983146354997, + "loss": 0.7656, + "step": 10136 + }, + { + "epoch": 0.54, + "learning_rate": 0.0004518116798851577, + "loss": 0.8477, + "step": 10137 + }, + { + "epoch": 0.54, + "learning_rate": 0.0004517250465950801, + "loss": 0.8945, + "step": 10138 + }, + { + "epoch": 0.54, + "learning_rate": 0.00045163841476789226, + "loss": 0.9062, + "step": 10139 + }, + { + "epoch": 0.54, + "learning_rate": 0.000451551784406219, + "loss": 0.832, + "step": 10140 + }, + { + "epoch": 0.55, + "learning_rate": 0.00045146515551268613, + "loss": 0.8477, + "step": 10141 + }, + { + "epoch": 0.55, + "learning_rate": 0.00045137852808991837, + "loss": 0.875, + "step": 10142 + }, + { + "epoch": 0.55, + "learning_rate": 0.0004512919021405409, + "loss": 0.9844, + "step": 10143 + }, + { + "epoch": 0.55, + "learning_rate": 0.00045120527766717873, + "loss": 0.8555, + "step": 10144 + }, + { + "epoch": 0.55, + "learning_rate": 0.000451118654672457, + "loss": 0.9258, + "step": 10145 + }, + { + "epoch": 0.55, + "learning_rate": 0.0004510320331590006, + "loss": 0.7539, + "step": 10146 + }, + { + "epoch": 0.55, + "learning_rate": 0.00045094541312943454, + "loss": 0.9375, + "step": 10147 + }, + { + "epoch": 0.55, + "learning_rate": 0.0004508587945863833, + "loss": 0.9688, + "step": 10148 + }, + { + "epoch": 0.55, + "learning_rate": 0.00045077217753247234, + "loss": 0.7734, + "step": 10149 + }, + { + "epoch": 0.55, + "learning_rate": 0.00045068556197032595, + "loss": 0.8359, + "step": 10150 + }, + { + "epoch": 0.55, + "learning_rate": 0.00045059894790256907, + "loss": 0.8281, + "step": 10151 + }, + { + "epoch": 0.55, + "learning_rate": 0.00045051233533182624, + "loss": 0.9023, + "step": 10152 + }, + { + "epoch": 0.55, + "learning_rate": 0.00045042572426072226, + "loss": 0.7031, + "step": 10153 + }, + { + "epoch": 0.55, + "learning_rate": 0.0004503391146918818, + "loss": 0.8281, + "step": 10154 + }, + { + "epoch": 0.55, + "learning_rate": 0.0004502525066279292, + "loss": 0.9531, + "step": 10155 + }, + { + "epoch": 0.55, + "learning_rate": 0.00045016590007148895, + "loss": 0.8633, + "step": 10156 + }, + { + "epoch": 0.55, + "learning_rate": 0.00045007929502518574, + "loss": 0.8477, + "step": 10157 + }, + { + "epoch": 0.55, + "learning_rate": 0.00044999269149164386, + "loss": 0.7852, + "step": 10158 + }, + { + "epoch": 0.55, + "learning_rate": 0.00044990608947348767, + "loss": 0.8125, + "step": 10159 + }, + { + "epoch": 0.55, + "learning_rate": 0.0004498194889733415, + "loss": 0.9297, + "step": 10160 + }, + { + "epoch": 0.55, + "learning_rate": 0.0004497328899938296, + "loss": 0.8711, + "step": 10161 + }, + { + "epoch": 0.55, + "learning_rate": 0.0004496462925375765, + "loss": 0.8164, + "step": 10162 + }, + { + "epoch": 0.55, + "learning_rate": 0.00044955969660720577, + "loss": 0.8477, + "step": 10163 + }, + { + "epoch": 0.55, + "learning_rate": 0.0004494731022053422, + "loss": 0.832, + "step": 10164 + }, + { + "epoch": 0.55, + "learning_rate": 0.00044938650933460956, + "loss": 0.8906, + "step": 10165 + }, + { + "epoch": 0.55, + "learning_rate": 0.00044929991799763186, + "loss": 0.7578, + "step": 10166 + }, + { + "epoch": 0.55, + "learning_rate": 0.00044921332819703315, + "loss": 0.8789, + "step": 10167 + }, + { + "epoch": 0.55, + "learning_rate": 0.0004491267399354375, + "loss": 0.8516, + "step": 10168 + }, + { + "epoch": 0.55, + "learning_rate": 0.00044904015321546885, + "loss": 0.832, + "step": 10169 + }, + { + "epoch": 0.55, + "learning_rate": 0.00044895356803975085, + "loss": 0.8906, + "step": 10170 + }, + { + "epoch": 0.55, + "learning_rate": 0.00044886698441090736, + "loss": 0.8984, + "step": 10171 + }, + { + "epoch": 0.55, + "learning_rate": 0.00044878040233156224, + "loss": 0.9219, + "step": 10172 + }, + { + "epoch": 0.55, + "learning_rate": 0.00044869382180433916, + "loss": 0.75, + "step": 10173 + }, + { + "epoch": 0.55, + "learning_rate": 0.0004486072428318619, + "loss": 0.9062, + "step": 10174 + }, + { + "epoch": 0.55, + "learning_rate": 0.0004485206654167538, + "loss": 0.8789, + "step": 10175 + }, + { + "epoch": 0.55, + "learning_rate": 0.00044843408956163875, + "loss": 0.8281, + "step": 10176 + }, + { + "epoch": 0.55, + "learning_rate": 0.0004483475152691404, + "loss": 0.8359, + "step": 10177 + }, + { + "epoch": 0.55, + "learning_rate": 0.00044826094254188175, + "loss": 0.8242, + "step": 10178 + }, + { + "epoch": 0.55, + "learning_rate": 0.00044817437138248647, + "loss": 0.8906, + "step": 10179 + }, + { + "epoch": 0.55, + "learning_rate": 0.00044808780179357805, + "loss": 0.9688, + "step": 10180 + }, + { + "epoch": 0.55, + "learning_rate": 0.00044800123377777975, + "loss": 0.793, + "step": 10181 + }, + { + "epoch": 0.55, + "learning_rate": 0.00044791466733771503, + "loss": 0.8906, + "step": 10182 + }, + { + "epoch": 0.55, + "learning_rate": 0.00044782810247600657, + "loss": 0.8594, + "step": 10183 + }, + { + "epoch": 0.55, + "learning_rate": 0.00044774153919527836, + "loss": 0.8516, + "step": 10184 + }, + { + "epoch": 0.55, + "learning_rate": 0.000447654977498153, + "loss": 0.8281, + "step": 10185 + }, + { + "epoch": 0.55, + "learning_rate": 0.0004475684173872539, + "loss": 0.8555, + "step": 10186 + }, + { + "epoch": 0.55, + "learning_rate": 0.00044748185886520374, + "loss": 0.8594, + "step": 10187 + }, + { + "epoch": 0.55, + "learning_rate": 0.00044739530193462596, + "loss": 0.8867, + "step": 10188 + }, + { + "epoch": 0.55, + "learning_rate": 0.0004473087465981433, + "loss": 0.7695, + "step": 10189 + }, + { + "epoch": 0.55, + "learning_rate": 0.00044722219285837884, + "loss": 0.8555, + "step": 10190 + }, + { + "epoch": 0.55, + "learning_rate": 0.0004471356407179549, + "loss": 0.8828, + "step": 10191 + }, + { + "epoch": 0.55, + "learning_rate": 0.0004470490901794951, + "loss": 0.8945, + "step": 10192 + }, + { + "epoch": 0.55, + "learning_rate": 0.00044696254124562164, + "loss": 0.8398, + "step": 10193 + }, + { + "epoch": 0.55, + "learning_rate": 0.0004468759939189574, + "loss": 0.8164, + "step": 10194 + }, + { + "epoch": 0.55, + "learning_rate": 0.00044678944820212494, + "loss": 0.832, + "step": 10195 + }, + { + "epoch": 0.55, + "learning_rate": 0.0004467029040977472, + "loss": 0.8008, + "step": 10196 + }, + { + "epoch": 0.55, + "learning_rate": 0.00044661636160844647, + "loss": 0.7891, + "step": 10197 + }, + { + "epoch": 0.55, + "learning_rate": 0.0004465298207368451, + "loss": 0.8516, + "step": 10198 + }, + { + "epoch": 0.55, + "learning_rate": 0.0004464432814855661, + "loss": 0.918, + "step": 10199 + }, + { + "epoch": 0.55, + "learning_rate": 0.0004463567438572316, + "loss": 0.7695, + "step": 10200 + }, + { + "epoch": 0.55, + "learning_rate": 0.0004462702078544638, + "loss": 0.8125, + "step": 10201 + }, + { + "epoch": 0.55, + "learning_rate": 0.0004461836734798852, + "loss": 0.8477, + "step": 10202 + }, + { + "epoch": 0.55, + "learning_rate": 0.00044609714073611813, + "loss": 0.9102, + "step": 10203 + }, + { + "epoch": 0.55, + "learning_rate": 0.00044601060962578474, + "loss": 0.8594, + "step": 10204 + }, + { + "epoch": 0.55, + "learning_rate": 0.0004459240801515074, + "loss": 0.9023, + "step": 10205 + }, + { + "epoch": 0.55, + "learning_rate": 0.00044583755231590767, + "loss": 0.8594, + "step": 10206 + }, + { + "epoch": 0.55, + "learning_rate": 0.0004457510261216084, + "loss": 0.8516, + "step": 10207 + }, + { + "epoch": 0.55, + "learning_rate": 0.0004456645015712311, + "loss": 0.8984, + "step": 10208 + }, + { + "epoch": 0.55, + "learning_rate": 0.00044557797866739783, + "loss": 0.8477, + "step": 10209 + }, + { + "epoch": 0.55, + "learning_rate": 0.0004454914574127306, + "loss": 0.8945, + "step": 10210 + }, + { + "epoch": 0.55, + "learning_rate": 0.0004454049378098512, + "loss": 0.9375, + "step": 10211 + }, + { + "epoch": 0.55, + "learning_rate": 0.0004453184198613816, + "loss": 0.8164, + "step": 10212 + }, + { + "epoch": 0.55, + "learning_rate": 0.0004452319035699436, + "loss": 0.9062, + "step": 10213 + }, + { + "epoch": 0.55, + "learning_rate": 0.0004451453889381585, + "loss": 0.8633, + "step": 10214 + }, + { + "epoch": 0.55, + "learning_rate": 0.0004450588759686487, + "loss": 0.7539, + "step": 10215 + }, + { + "epoch": 0.55, + "learning_rate": 0.0004449723646640352, + "loss": 0.7461, + "step": 10216 + }, + { + "epoch": 0.55, + "learning_rate": 0.0004448858550269399, + "loss": 0.8438, + "step": 10217 + }, + { + "epoch": 0.55, + "learning_rate": 0.00044479934705998406, + "loss": 0.8594, + "step": 10218 + }, + { + "epoch": 0.55, + "learning_rate": 0.00044471284076578945, + "loss": 0.8125, + "step": 10219 + }, + { + "epoch": 0.55, + "learning_rate": 0.0004446263361469775, + "loss": 0.9023, + "step": 10220 + }, + { + "epoch": 0.55, + "learning_rate": 0.00044453983320616935, + "loss": 0.9102, + "step": 10221 + }, + { + "epoch": 0.55, + "learning_rate": 0.0004444533319459863, + "loss": 0.9102, + "step": 10222 + }, + { + "epoch": 0.55, + "learning_rate": 0.00044436683236904985, + "loss": 0.9648, + "step": 10223 + }, + { + "epoch": 0.55, + "learning_rate": 0.00044428033447798114, + "loss": 0.8555, + "step": 10224 + }, + { + "epoch": 0.55, + "learning_rate": 0.00044419383827540136, + "loss": 0.8438, + "step": 10225 + }, + { + "epoch": 0.55, + "learning_rate": 0.00044410734376393145, + "loss": 0.8477, + "step": 10226 + }, + { + "epoch": 0.55, + "learning_rate": 0.00044402085094619274, + "loss": 0.8555, + "step": 10227 + }, + { + "epoch": 0.55, + "learning_rate": 0.00044393435982480623, + "loss": 0.7109, + "step": 10228 + }, + { + "epoch": 0.55, + "learning_rate": 0.00044384787040239267, + "loss": 0.7773, + "step": 10229 + }, + { + "epoch": 0.55, + "learning_rate": 0.00044376138268157294, + "loss": 0.8789, + "step": 10230 + }, + { + "epoch": 0.55, + "learning_rate": 0.00044367489666496826, + "loss": 0.8516, + "step": 10231 + }, + { + "epoch": 0.55, + "learning_rate": 0.00044358841235519916, + "loss": 0.7734, + "step": 10232 + }, + { + "epoch": 0.55, + "learning_rate": 0.0004435019297548866, + "loss": 0.832, + "step": 10233 + }, + { + "epoch": 0.55, + "learning_rate": 0.0004434154488666507, + "loss": 0.7656, + "step": 10234 + }, + { + "epoch": 0.55, + "learning_rate": 0.000443328969693113, + "loss": 0.8398, + "step": 10235 + }, + { + "epoch": 0.55, + "learning_rate": 0.0004432424922368936, + "loss": 0.7031, + "step": 10236 + }, + { + "epoch": 0.55, + "learning_rate": 0.000443156016500613, + "loss": 0.8906, + "step": 10237 + }, + { + "epoch": 0.55, + "learning_rate": 0.0004430695424868919, + "loss": 0.8672, + "step": 10238 + }, + { + "epoch": 0.55, + "learning_rate": 0.0004429830701983507, + "loss": 0.8516, + "step": 10239 + }, + { + "epoch": 0.55, + "learning_rate": 0.00044289659963760977, + "loss": 0.9297, + "step": 10240 + }, + { + "epoch": 0.55, + "learning_rate": 0.0004428101308072893, + "loss": 0.8438, + "step": 10241 + }, + { + "epoch": 0.55, + "learning_rate": 0.0004427236637100098, + "loss": 0.8242, + "step": 10242 + }, + { + "epoch": 0.55, + "learning_rate": 0.0004426371983483916, + "loss": 0.8203, + "step": 10243 + }, + { + "epoch": 0.55, + "learning_rate": 0.00044255073472505456, + "loss": 0.8164, + "step": 10244 + }, + { + "epoch": 0.55, + "learning_rate": 0.00044246427284261894, + "loss": 0.8828, + "step": 10245 + }, + { + "epoch": 0.55, + "learning_rate": 0.0004423778127037049, + "loss": 0.8555, + "step": 10246 + }, + { + "epoch": 0.55, + "learning_rate": 0.00044229135431093236, + "loss": 0.8438, + "step": 10247 + }, + { + "epoch": 0.55, + "learning_rate": 0.0004422048976669215, + "loss": 0.8398, + "step": 10248 + }, + { + "epoch": 0.55, + "learning_rate": 0.0004421184427742918, + "loss": 0.9102, + "step": 10249 + }, + { + "epoch": 0.55, + "learning_rate": 0.00044203198963566374, + "loss": 0.8477, + "step": 10250 + }, + { + "epoch": 0.55, + "learning_rate": 0.0004419455382536566, + "loss": 0.9492, + "step": 10251 + }, + { + "epoch": 0.55, + "learning_rate": 0.00044185908863089045, + "loss": 0.8594, + "step": 10252 + }, + { + "epoch": 0.55, + "learning_rate": 0.00044177264076998476, + "loss": 0.8984, + "step": 10253 + }, + { + "epoch": 0.55, + "learning_rate": 0.0004416861946735594, + "loss": 0.9141, + "step": 10254 + }, + { + "epoch": 0.55, + "learning_rate": 0.00044159975034423394, + "loss": 0.8594, + "step": 10255 + }, + { + "epoch": 0.55, + "learning_rate": 0.00044151330778462796, + "loss": 0.8867, + "step": 10256 + }, + { + "epoch": 0.55, + "learning_rate": 0.00044142686699736054, + "loss": 0.8867, + "step": 10257 + }, + { + "epoch": 0.55, + "learning_rate": 0.00044134042798505183, + "loss": 0.8594, + "step": 10258 + }, + { + "epoch": 0.55, + "learning_rate": 0.00044125399075032066, + "loss": 0.8867, + "step": 10259 + }, + { + "epoch": 0.55, + "learning_rate": 0.0004411675552957866, + "loss": 0.8555, + "step": 10260 + }, + { + "epoch": 0.55, + "learning_rate": 0.00044108112162406874, + "loss": 0.8242, + "step": 10261 + }, + { + "epoch": 0.55, + "learning_rate": 0.00044099468973778647, + "loss": 0.7109, + "step": 10262 + }, + { + "epoch": 0.55, + "learning_rate": 0.0004409082596395591, + "loss": 0.8242, + "step": 10263 + }, + { + "epoch": 0.55, + "learning_rate": 0.00044082183133200545, + "loss": 0.8555, + "step": 10264 + }, + { + "epoch": 0.55, + "learning_rate": 0.0004407354048177445, + "loss": 0.8594, + "step": 10265 + }, + { + "epoch": 0.55, + "learning_rate": 0.00044064898009939577, + "loss": 0.8867, + "step": 10266 + }, + { + "epoch": 0.55, + "learning_rate": 0.00044056255717957773, + "loss": 0.9141, + "step": 10267 + }, + { + "epoch": 0.55, + "learning_rate": 0.00044047613606090953, + "loss": 0.8633, + "step": 10268 + }, + { + "epoch": 0.55, + "learning_rate": 0.0004403897167460097, + "loss": 0.8633, + "step": 10269 + }, + { + "epoch": 0.55, + "learning_rate": 0.00044030329923749754, + "loss": 0.8711, + "step": 10270 + }, + { + "epoch": 0.55, + "learning_rate": 0.0004402168835379916, + "loss": 0.8867, + "step": 10271 + }, + { + "epoch": 0.55, + "learning_rate": 0.00044013046965011, + "loss": 0.7734, + "step": 10272 + }, + { + "epoch": 0.55, + "learning_rate": 0.00044004405757647224, + "loss": 0.8633, + "step": 10273 + }, + { + "epoch": 0.55, + "learning_rate": 0.0004399576473196963, + "loss": 0.7344, + "step": 10274 + }, + { + "epoch": 0.55, + "learning_rate": 0.000439871238882401, + "loss": 0.7891, + "step": 10275 + }, + { + "epoch": 0.55, + "learning_rate": 0.0004397848322672045, + "loss": 0.9219, + "step": 10276 + }, + { + "epoch": 0.55, + "learning_rate": 0.0004396984274767255, + "loss": 0.8281, + "step": 10277 + }, + { + "epoch": 0.55, + "learning_rate": 0.0004396120245135823, + "loss": 0.7734, + "step": 10278 + }, + { + "epoch": 0.55, + "learning_rate": 0.00043952562338039306, + "loss": 0.8281, + "step": 10279 + }, + { + "epoch": 0.55, + "learning_rate": 0.0004394392240797759, + "loss": 0.9219, + "step": 10280 + }, + { + "epoch": 0.55, + "learning_rate": 0.0004393528266143494, + "loss": 0.8711, + "step": 10281 + }, + { + "epoch": 0.55, + "learning_rate": 0.00043926643098673133, + "loss": 0.8203, + "step": 10282 + }, + { + "epoch": 0.55, + "learning_rate": 0.00043918003719954, + "loss": 0.8203, + "step": 10283 + }, + { + "epoch": 0.55, + "learning_rate": 0.0004390936452553932, + "loss": 0.8711, + "step": 10284 + }, + { + "epoch": 0.55, + "learning_rate": 0.0004390072551569091, + "loss": 0.8906, + "step": 10285 + }, + { + "epoch": 0.55, + "learning_rate": 0.00043892086690670563, + "loss": 0.8594, + "step": 10286 + }, + { + "epoch": 0.55, + "learning_rate": 0.00043883448050740045, + "loss": 0.9336, + "step": 10287 + }, + { + "epoch": 0.55, + "learning_rate": 0.00043874809596161125, + "loss": 0.832, + "step": 10288 + }, + { + "epoch": 0.55, + "learning_rate": 0.00043866171327195617, + "loss": 0.8398, + "step": 10289 + }, + { + "epoch": 0.55, + "learning_rate": 0.0004385753324410525, + "loss": 0.8867, + "step": 10290 + }, + { + "epoch": 0.55, + "learning_rate": 0.00043848895347151827, + "loss": 0.8906, + "step": 10291 + }, + { + "epoch": 0.55, + "learning_rate": 0.0004384025763659704, + "loss": 0.7695, + "step": 10292 + }, + { + "epoch": 0.55, + "learning_rate": 0.0004383162011270271, + "loss": 0.9023, + "step": 10293 + }, + { + "epoch": 0.55, + "learning_rate": 0.00043822982775730557, + "loss": 0.9414, + "step": 10294 + }, + { + "epoch": 0.55, + "learning_rate": 0.00043814345625942303, + "loss": 0.8906, + "step": 10295 + }, + { + "epoch": 0.55, + "learning_rate": 0.0004380570866359969, + "loss": 0.8711, + "step": 10296 + }, + { + "epoch": 0.55, + "learning_rate": 0.0004379707188896446, + "loss": 0.8242, + "step": 10297 + }, + { + "epoch": 0.55, + "learning_rate": 0.0004378843530229832, + "loss": 0.8906, + "step": 10298 + }, + { + "epoch": 0.55, + "learning_rate": 0.0004377979890386302, + "loss": 0.75, + "step": 10299 + }, + { + "epoch": 0.55, + "learning_rate": 0.000437711626939202, + "loss": 0.9766, + "step": 10300 + }, + { + "epoch": 0.55, + "learning_rate": 0.00043762526672731653, + "loss": 0.9414, + "step": 10301 + }, + { + "epoch": 0.55, + "learning_rate": 0.00043753890840559024, + "loss": 0.9062, + "step": 10302 + }, + { + "epoch": 0.55, + "learning_rate": 0.00043745255197664013, + "loss": 0.8672, + "step": 10303 + }, + { + "epoch": 0.55, + "learning_rate": 0.0004373661974430831, + "loss": 0.875, + "step": 10304 + }, + { + "epoch": 0.55, + "learning_rate": 0.00043727984480753615, + "loss": 0.8398, + "step": 10305 + }, + { + "epoch": 0.55, + "learning_rate": 0.0004371934940726159, + "loss": 0.875, + "step": 10306 + }, + { + "epoch": 0.55, + "learning_rate": 0.0004371071452409392, + "loss": 0.8203, + "step": 10307 + }, + { + "epoch": 0.55, + "learning_rate": 0.0004370207983151222, + "loss": 0.7969, + "step": 10308 + }, + { + "epoch": 0.55, + "learning_rate": 0.00043693445329778233, + "loss": 0.9141, + "step": 10309 + }, + { + "epoch": 0.55, + "learning_rate": 0.00043684811019153546, + "loss": 0.9258, + "step": 10310 + }, + { + "epoch": 0.55, + "learning_rate": 0.0004367617689989983, + "loss": 0.875, + "step": 10311 + }, + { + "epoch": 0.55, + "learning_rate": 0.0004366754297227873, + "loss": 0.8672, + "step": 10312 + }, + { + "epoch": 0.55, + "learning_rate": 0.0004365890923655188, + "loss": 0.8242, + "step": 10313 + }, + { + "epoch": 0.55, + "learning_rate": 0.0004365027569298092, + "loss": 0.8789, + "step": 10314 + }, + { + "epoch": 0.55, + "learning_rate": 0.0004364164234182743, + "loss": 0.7461, + "step": 10315 + }, + { + "epoch": 0.55, + "learning_rate": 0.00043633009183353103, + "loss": 0.8945, + "step": 10316 + }, + { + "epoch": 0.55, + "learning_rate": 0.00043624376217819496, + "loss": 0.8281, + "step": 10317 + }, + { + "epoch": 0.55, + "learning_rate": 0.00043615743445488234, + "loss": 0.7969, + "step": 10318 + }, + { + "epoch": 0.55, + "learning_rate": 0.00043607110866620896, + "loss": 0.8633, + "step": 10319 + }, + { + "epoch": 0.55, + "learning_rate": 0.0004359847848147912, + "loss": 0.7578, + "step": 10320 + }, + { + "epoch": 0.55, + "learning_rate": 0.0004358984629032447, + "loss": 0.9453, + "step": 10321 + }, + { + "epoch": 0.55, + "learning_rate": 0.0004358121429341855, + "loss": 0.793, + "step": 10322 + }, + { + "epoch": 0.55, + "learning_rate": 0.0004357258249102288, + "loss": 0.8594, + "step": 10323 + }, + { + "epoch": 0.55, + "learning_rate": 0.0004356395088339911, + "loss": 0.7852, + "step": 10324 + }, + { + "epoch": 0.55, + "learning_rate": 0.0004355531947080876, + "loss": 0.7422, + "step": 10325 + }, + { + "epoch": 0.55, + "learning_rate": 0.000435466882535134, + "loss": 0.8945, + "step": 10326 + }, + { + "epoch": 0.56, + "learning_rate": 0.0004353805723177457, + "loss": 0.9102, + "step": 10327 + }, + { + "epoch": 0.56, + "learning_rate": 0.00043529426405853846, + "loss": 0.8438, + "step": 10328 + }, + { + "epoch": 0.56, + "learning_rate": 0.0004352079577601276, + "loss": 0.7773, + "step": 10329 + }, + { + "epoch": 0.56, + "learning_rate": 0.0004351216534251284, + "loss": 0.9102, + "step": 10330 + }, + { + "epoch": 0.56, + "learning_rate": 0.0004350353510561561, + "loss": 0.8633, + "step": 10331 + }, + { + "epoch": 0.56, + "learning_rate": 0.00043494905065582617, + "loss": 0.9141, + "step": 10332 + }, + { + "epoch": 0.56, + "learning_rate": 0.0004348627522267537, + "loss": 0.8398, + "step": 10333 + }, + { + "epoch": 0.56, + "learning_rate": 0.0004347764557715538, + "loss": 0.7969, + "step": 10334 + }, + { + "epoch": 0.56, + "learning_rate": 0.0004346901612928414, + "loss": 0.8281, + "step": 10335 + }, + { + "epoch": 0.56, + "learning_rate": 0.0004346038687932318, + "loss": 0.8086, + "step": 10336 + }, + { + "epoch": 0.56, + "learning_rate": 0.0004345175782753398, + "loss": 0.875, + "step": 10337 + }, + { + "epoch": 0.56, + "learning_rate": 0.00043443128974178025, + "loss": 0.7773, + "step": 10338 + }, + { + "epoch": 0.56, + "learning_rate": 0.0004343450031951679, + "loss": 0.8242, + "step": 10339 + }, + { + "epoch": 0.56, + "learning_rate": 0.0004342587186381177, + "loss": 0.793, + "step": 10340 + }, + { + "epoch": 0.56, + "learning_rate": 0.0004341724360732442, + "loss": 0.8516, + "step": 10341 + }, + { + "epoch": 0.56, + "learning_rate": 0.00043408615550316235, + "loss": 0.793, + "step": 10342 + }, + { + "epoch": 0.56, + "learning_rate": 0.00043399987693048614, + "loss": 0.8242, + "step": 10343 + }, + { + "epoch": 0.56, + "learning_rate": 0.0004339136003578308, + "loss": 0.8203, + "step": 10344 + }, + { + "epoch": 0.56, + "learning_rate": 0.00043382732578781027, + "loss": 0.8867, + "step": 10345 + }, + { + "epoch": 0.56, + "learning_rate": 0.00043374105322303906, + "loss": 0.8281, + "step": 10346 + }, + { + "epoch": 0.56, + "learning_rate": 0.00043365478266613167, + "loss": 0.8242, + "step": 10347 + }, + { + "epoch": 0.56, + "learning_rate": 0.0004335685141197024, + "loss": 0.8438, + "step": 10348 + }, + { + "epoch": 0.56, + "learning_rate": 0.0004334822475863652, + "loss": 0.9336, + "step": 10349 + }, + { + "epoch": 0.56, + "learning_rate": 0.0004333959830687345, + "loss": 0.8359, + "step": 10350 + }, + { + "epoch": 0.56, + "learning_rate": 0.0004333097205694242, + "loss": 0.9297, + "step": 10351 + }, + { + "epoch": 0.56, + "learning_rate": 0.00043322346009104864, + "loss": 0.8477, + "step": 10352 + }, + { + "epoch": 0.56, + "learning_rate": 0.0004331372016362215, + "loss": 0.8867, + "step": 10353 + }, + { + "epoch": 0.56, + "learning_rate": 0.00043305094520755663, + "loss": 0.8477, + "step": 10354 + }, + { + "epoch": 0.56, + "learning_rate": 0.0004329646908076681, + "loss": 0.918, + "step": 10355 + }, + { + "epoch": 0.56, + "learning_rate": 0.0004328784384391697, + "loss": 0.8008, + "step": 10356 + }, + { + "epoch": 0.56, + "learning_rate": 0.0004327921881046752, + "loss": 0.8516, + "step": 10357 + }, + { + "epoch": 0.56, + "learning_rate": 0.00043270593980679784, + "loss": 0.8359, + "step": 10358 + }, + { + "epoch": 0.56, + "learning_rate": 0.0004326196935481519, + "loss": 0.7383, + "step": 10359 + }, + { + "epoch": 0.56, + "learning_rate": 0.0004325334493313504, + "loss": 0.8672, + "step": 10360 + }, + { + "epoch": 0.56, + "learning_rate": 0.00043244720715900705, + "loss": 0.7734, + "step": 10361 + }, + { + "epoch": 0.56, + "learning_rate": 0.00043236096703373506, + "loss": 0.9141, + "step": 10362 + }, + { + "epoch": 0.56, + "learning_rate": 0.00043227472895814815, + "loss": 0.7891, + "step": 10363 + }, + { + "epoch": 0.56, + "learning_rate": 0.00043218849293485934, + "loss": 0.9336, + "step": 10364 + }, + { + "epoch": 0.56, + "learning_rate": 0.00043210225896648207, + "loss": 0.875, + "step": 10365 + }, + { + "epoch": 0.56, + "learning_rate": 0.0004320160270556291, + "loss": 0.9844, + "step": 10366 + }, + { + "epoch": 0.56, + "learning_rate": 0.0004319297972049141, + "loss": 0.8281, + "step": 10367 + }, + { + "epoch": 0.56, + "learning_rate": 0.00043184356941694974, + "loss": 0.8125, + "step": 10368 + }, + { + "epoch": 0.56, + "learning_rate": 0.0004317573436943491, + "loss": 0.8828, + "step": 10369 + }, + { + "epoch": 0.56, + "learning_rate": 0.000431671120039725, + "loss": 0.8828, + "step": 10370 + }, + { + "epoch": 0.56, + "learning_rate": 0.0004315848984556906, + "loss": 0.7227, + "step": 10371 + }, + { + "epoch": 0.56, + "learning_rate": 0.0004314986789448585, + "loss": 0.8164, + "step": 10372 + }, + { + "epoch": 0.56, + "learning_rate": 0.0004314124615098414, + "loss": 0.8203, + "step": 10373 + }, + { + "epoch": 0.56, + "learning_rate": 0.0004313262461532519, + "loss": 0.8594, + "step": 10374 + }, + { + "epoch": 0.56, + "learning_rate": 0.00043124003287770284, + "loss": 0.8594, + "step": 10375 + }, + { + "epoch": 0.56, + "learning_rate": 0.0004311538216858066, + "loss": 0.8633, + "step": 10376 + }, + { + "epoch": 0.56, + "learning_rate": 0.00043106761258017575, + "loss": 0.8594, + "step": 10377 + }, + { + "epoch": 0.56, + "learning_rate": 0.00043098140556342257, + "loss": 0.9258, + "step": 10378 + }, + { + "epoch": 0.56, + "learning_rate": 0.00043089520063815956, + "loss": 0.8984, + "step": 10379 + }, + { + "epoch": 0.56, + "learning_rate": 0.0004308089978069991, + "loss": 0.7578, + "step": 10380 + }, + { + "epoch": 0.56, + "learning_rate": 0.0004307227970725532, + "loss": 0.793, + "step": 10381 + }, + { + "epoch": 0.56, + "learning_rate": 0.000430636598437434, + "loss": 0.7773, + "step": 10382 + }, + { + "epoch": 0.56, + "learning_rate": 0.0004305504019042539, + "loss": 0.8203, + "step": 10383 + }, + { + "epoch": 0.56, + "learning_rate": 0.00043046420747562466, + "loss": 0.9258, + "step": 10384 + }, + { + "epoch": 0.56, + "learning_rate": 0.0004303780151541582, + "loss": 0.7422, + "step": 10385 + }, + { + "epoch": 0.56, + "learning_rate": 0.0004302918249424668, + "loss": 0.8633, + "step": 10386 + }, + { + "epoch": 0.56, + "learning_rate": 0.00043020563684316194, + "loss": 0.7773, + "step": 10387 + }, + { + "epoch": 0.56, + "learning_rate": 0.00043011945085885575, + "loss": 0.8086, + "step": 10388 + }, + { + "epoch": 0.56, + "learning_rate": 0.0004300332669921594, + "loss": 0.832, + "step": 10389 + }, + { + "epoch": 0.56, + "learning_rate": 0.00042994708524568515, + "loss": 0.8203, + "step": 10390 + }, + { + "epoch": 0.56, + "learning_rate": 0.0004298609056220443, + "loss": 0.7461, + "step": 10391 + }, + { + "epoch": 0.56, + "learning_rate": 0.0004297747281238484, + "loss": 0.8672, + "step": 10392 + }, + { + "epoch": 0.56, + "learning_rate": 0.00042968855275370875, + "loss": 0.8789, + "step": 10393 + }, + { + "epoch": 0.56, + "learning_rate": 0.0004296023795142371, + "loss": 0.7852, + "step": 10394 + }, + { + "epoch": 0.56, + "learning_rate": 0.00042951620840804463, + "loss": 0.8359, + "step": 10395 + }, + { + "epoch": 0.56, + "learning_rate": 0.00042943003943774253, + "loss": 0.8555, + "step": 10396 + }, + { + "epoch": 0.56, + "learning_rate": 0.0004293438726059419, + "loss": 0.8906, + "step": 10397 + }, + { + "epoch": 0.56, + "learning_rate": 0.0004292577079152541, + "loss": 0.9414, + "step": 10398 + }, + { + "epoch": 0.56, + "learning_rate": 0.00042917154536829013, + "loss": 0.8867, + "step": 10399 + }, + { + "epoch": 0.56, + "learning_rate": 0.00042908538496766105, + "loss": 0.8242, + "step": 10400 + }, + { + "epoch": 0.56, + "learning_rate": 0.0004289992267159776, + "loss": 0.832, + "step": 10401 + }, + { + "epoch": 0.56, + "learning_rate": 0.0004289130706158509, + "loss": 0.8594, + "step": 10402 + }, + { + "epoch": 0.56, + "learning_rate": 0.0004288269166698918, + "loss": 0.7891, + "step": 10403 + }, + { + "epoch": 0.56, + "learning_rate": 0.0004287407648807108, + "loss": 0.793, + "step": 10404 + }, + { + "epoch": 0.56, + "learning_rate": 0.0004286546152509185, + "loss": 0.7383, + "step": 10405 + }, + { + "epoch": 0.56, + "learning_rate": 0.000428568467783126, + "loss": 0.8711, + "step": 10406 + }, + { + "epoch": 0.56, + "learning_rate": 0.00042848232247994346, + "loss": 0.7617, + "step": 10407 + }, + { + "epoch": 0.56, + "learning_rate": 0.00042839617934398156, + "loss": 0.8281, + "step": 10408 + }, + { + "epoch": 0.56, + "learning_rate": 0.0004283100383778503, + "loss": 0.8711, + "step": 10409 + }, + { + "epoch": 0.56, + "learning_rate": 0.0004282238995841607, + "loss": 0.7969, + "step": 10410 + }, + { + "epoch": 0.56, + "learning_rate": 0.0004281377629655227, + "loss": 0.8516, + "step": 10411 + }, + { + "epoch": 0.56, + "learning_rate": 0.00042805162852454647, + "loss": 0.8477, + "step": 10412 + }, + { + "epoch": 0.56, + "learning_rate": 0.0004279654962638421, + "loss": 0.8242, + "step": 10413 + }, + { + "epoch": 0.56, + "learning_rate": 0.00042787936618601997, + "loss": 0.7344, + "step": 10414 + }, + { + "epoch": 0.56, + "learning_rate": 0.00042779323829368994, + "loss": 0.9297, + "step": 10415 + }, + { + "epoch": 0.56, + "learning_rate": 0.00042770711258946205, + "loss": 0.8008, + "step": 10416 + }, + { + "epoch": 0.56, + "learning_rate": 0.0004276209890759458, + "loss": 0.7891, + "step": 10417 + }, + { + "epoch": 0.56, + "learning_rate": 0.00042753486775575167, + "loss": 0.9141, + "step": 10418 + }, + { + "epoch": 0.56, + "learning_rate": 0.00042744874863148897, + "loss": 0.9219, + "step": 10419 + }, + { + "epoch": 0.56, + "learning_rate": 0.00042736263170576737, + "loss": 0.8242, + "step": 10420 + }, + { + "epoch": 0.56, + "learning_rate": 0.0004272765169811968, + "loss": 0.8711, + "step": 10421 + }, + { + "epoch": 0.56, + "learning_rate": 0.0004271904044603866, + "loss": 0.9141, + "step": 10422 + }, + { + "epoch": 0.56, + "learning_rate": 0.0004271042941459464, + "loss": 0.8906, + "step": 10423 + }, + { + "epoch": 0.56, + "learning_rate": 0.00042701818604048525, + "loss": 0.7461, + "step": 10424 + }, + { + "epoch": 0.56, + "learning_rate": 0.0004269320801466131, + "loss": 0.8086, + "step": 10425 + }, + { + "epoch": 0.56, + "learning_rate": 0.00042684597646693885, + "loss": 0.8594, + "step": 10426 + }, + { + "epoch": 0.56, + "learning_rate": 0.0004267598750040718, + "loss": 0.7617, + "step": 10427 + }, + { + "epoch": 0.56, + "learning_rate": 0.000426673775760621, + "loss": 0.918, + "step": 10428 + }, + { + "epoch": 0.56, + "learning_rate": 0.0004265876787391958, + "loss": 0.8047, + "step": 10429 + }, + { + "epoch": 0.56, + "learning_rate": 0.00042650158394240504, + "loss": 0.9219, + "step": 10430 + }, + { + "epoch": 0.56, + "learning_rate": 0.0004264154913728578, + "loss": 0.8047, + "step": 10431 + }, + { + "epoch": 0.56, + "learning_rate": 0.0004263294010331626, + "loss": 0.8125, + "step": 10432 + }, + { + "epoch": 0.56, + "learning_rate": 0.0004262433129259289, + "loss": 0.8672, + "step": 10433 + }, + { + "epoch": 0.56, + "learning_rate": 0.00042615722705376493, + "loss": 0.8398, + "step": 10434 + }, + { + "epoch": 0.56, + "learning_rate": 0.0004260711434192795, + "loss": 0.7617, + "step": 10435 + }, + { + "epoch": 0.56, + "learning_rate": 0.00042598506202508123, + "loss": 0.8633, + "step": 10436 + }, + { + "epoch": 0.56, + "learning_rate": 0.0004258989828737788, + "loss": 0.8477, + "step": 10437 + }, + { + "epoch": 0.56, + "learning_rate": 0.0004258129059679807, + "loss": 0.7969, + "step": 10438 + }, + { + "epoch": 0.56, + "learning_rate": 0.0004257268313102951, + "loss": 0.8711, + "step": 10439 + }, + { + "epoch": 0.56, + "learning_rate": 0.00042564075890333045, + "loss": 0.9102, + "step": 10440 + }, + { + "epoch": 0.56, + "learning_rate": 0.0004255546887496951, + "loss": 0.793, + "step": 10441 + }, + { + "epoch": 0.56, + "learning_rate": 0.0004254686208519973, + "loss": 0.8008, + "step": 10442 + }, + { + "epoch": 0.56, + "learning_rate": 0.000425382555212845, + "loss": 0.8281, + "step": 10443 + }, + { + "epoch": 0.56, + "learning_rate": 0.0004252964918348463, + "loss": 0.7227, + "step": 10444 + }, + { + "epoch": 0.56, + "learning_rate": 0.0004252104307206095, + "loss": 0.8594, + "step": 10445 + }, + { + "epoch": 0.56, + "learning_rate": 0.0004251243718727423, + "loss": 0.832, + "step": 10446 + }, + { + "epoch": 0.56, + "learning_rate": 0.0004250383152938526, + "loss": 0.8711, + "step": 10447 + }, + { + "epoch": 0.56, + "learning_rate": 0.00042495226098654794, + "loss": 0.7539, + "step": 10448 + }, + { + "epoch": 0.56, + "learning_rate": 0.00042486620895343646, + "loss": 0.8516, + "step": 10449 + }, + { + "epoch": 0.56, + "learning_rate": 0.0004247801591971256, + "loss": 0.8984, + "step": 10450 + }, + { + "epoch": 0.56, + "learning_rate": 0.0004246941117202232, + "loss": 0.8789, + "step": 10451 + }, + { + "epoch": 0.56, + "learning_rate": 0.0004246080665253362, + "loss": 0.7695, + "step": 10452 + }, + { + "epoch": 0.56, + "learning_rate": 0.0004245220236150728, + "loss": 0.793, + "step": 10453 + }, + { + "epoch": 0.56, + "learning_rate": 0.00042443598299203984, + "loss": 0.9336, + "step": 10454 + }, + { + "epoch": 0.56, + "learning_rate": 0.0004243499446588449, + "loss": 0.8164, + "step": 10455 + }, + { + "epoch": 0.56, + "learning_rate": 0.0004242639086180949, + "loss": 0.8164, + "step": 10456 + }, + { + "epoch": 0.56, + "learning_rate": 0.00042417787487239743, + "loss": 0.8047, + "step": 10457 + }, + { + "epoch": 0.56, + "learning_rate": 0.0004240918434243594, + "loss": 0.8398, + "step": 10458 + }, + { + "epoch": 0.56, + "learning_rate": 0.00042400581427658776, + "loss": 0.8203, + "step": 10459 + }, + { + "epoch": 0.56, + "learning_rate": 0.0004239197874316897, + "loss": 0.7891, + "step": 10460 + }, + { + "epoch": 0.56, + "learning_rate": 0.00042383376289227207, + "loss": 0.8398, + "step": 10461 + }, + { + "epoch": 0.56, + "learning_rate": 0.00042374774066094147, + "loss": 0.8711, + "step": 10462 + }, + { + "epoch": 0.56, + "learning_rate": 0.0004236617207403047, + "loss": 0.8242, + "step": 10463 + }, + { + "epoch": 0.56, + "learning_rate": 0.0004235757031329687, + "loss": 0.8789, + "step": 10464 + }, + { + "epoch": 0.56, + "learning_rate": 0.00042348968784153995, + "loss": 0.8242, + "step": 10465 + }, + { + "epoch": 0.56, + "learning_rate": 0.00042340367486862494, + "loss": 0.8828, + "step": 10466 + }, + { + "epoch": 0.56, + "learning_rate": 0.0004233176642168302, + "loss": 0.8984, + "step": 10467 + }, + { + "epoch": 0.56, + "learning_rate": 0.00042323165588876207, + "loss": 0.8672, + "step": 10468 + }, + { + "epoch": 0.56, + "learning_rate": 0.0004231456498870271, + "loss": 0.9023, + "step": 10469 + }, + { + "epoch": 0.56, + "learning_rate": 0.00042305964621423126, + "loss": 0.8594, + "step": 10470 + }, + { + "epoch": 0.56, + "learning_rate": 0.0004229736448729808, + "loss": 0.8047, + "step": 10471 + }, + { + "epoch": 0.56, + "learning_rate": 0.00042288764586588204, + "loss": 0.918, + "step": 10472 + }, + { + "epoch": 0.56, + "learning_rate": 0.00042280164919554085, + "loss": 0.9531, + "step": 10473 + }, + { + "epoch": 0.56, + "learning_rate": 0.00042271565486456345, + "loss": 0.7812, + "step": 10474 + }, + { + "epoch": 0.56, + "learning_rate": 0.00042262966287555517, + "loss": 0.8438, + "step": 10475 + }, + { + "epoch": 0.56, + "learning_rate": 0.00042254367323112265, + "loss": 0.9062, + "step": 10476 + }, + { + "epoch": 0.56, + "learning_rate": 0.00042245768593387106, + "loss": 0.8242, + "step": 10477 + }, + { + "epoch": 0.56, + "learning_rate": 0.00042237170098640634, + "loss": 0.8828, + "step": 10478 + }, + { + "epoch": 0.56, + "learning_rate": 0.000422285718391334, + "loss": 0.8633, + "step": 10479 + }, + { + "epoch": 0.56, + "learning_rate": 0.0004221997381512598, + "loss": 0.8906, + "step": 10480 + }, + { + "epoch": 0.56, + "learning_rate": 0.000422113760268789, + "loss": 0.8086, + "step": 10481 + }, + { + "epoch": 0.56, + "learning_rate": 0.00042202778474652726, + "loss": 0.7344, + "step": 10482 + }, + { + "epoch": 0.56, + "learning_rate": 0.00042194181158707945, + "loss": 0.8203, + "step": 10483 + }, + { + "epoch": 0.56, + "learning_rate": 0.0004218558407930515, + "loss": 0.8398, + "step": 10484 + }, + { + "epoch": 0.56, + "learning_rate": 0.0004217698723670481, + "loss": 0.8945, + "step": 10485 + }, + { + "epoch": 0.56, + "learning_rate": 0.0004216839063116746, + "loss": 0.875, + "step": 10486 + }, + { + "epoch": 0.56, + "learning_rate": 0.00042159794262953585, + "loss": 0.8086, + "step": 10487 + }, + { + "epoch": 0.56, + "learning_rate": 0.0004215119813232371, + "loss": 0.8867, + "step": 10488 + }, + { + "epoch": 0.56, + "learning_rate": 0.0004214260223953832, + "loss": 0.7734, + "step": 10489 + }, + { + "epoch": 0.56, + "learning_rate": 0.00042134006584857884, + "loss": 0.8711, + "step": 10490 + }, + { + "epoch": 0.56, + "learning_rate": 0.0004212541116854288, + "loss": 0.8047, + "step": 10491 + }, + { + "epoch": 0.56, + "learning_rate": 0.00042116815990853783, + "loss": 0.8945, + "step": 10492 + }, + { + "epoch": 0.56, + "learning_rate": 0.00042108221052051067, + "loss": 0.8359, + "step": 10493 + }, + { + "epoch": 0.56, + "learning_rate": 0.00042099626352395177, + "loss": 0.8047, + "step": 10494 + }, + { + "epoch": 0.56, + "learning_rate": 0.00042091031892146544, + "loss": 0.7578, + "step": 10495 + }, + { + "epoch": 0.56, + "learning_rate": 0.0004208243767156563, + "loss": 0.7969, + "step": 10496 + }, + { + "epoch": 0.56, + "learning_rate": 0.0004207384369091288, + "loss": 0.8398, + "step": 10497 + }, + { + "epoch": 0.56, + "learning_rate": 0.00042065249950448675, + "loss": 0.8672, + "step": 10498 + }, + { + "epoch": 0.56, + "learning_rate": 0.00042056656450433486, + "loss": 0.8672, + "step": 10499 + }, + { + "epoch": 0.56, + "learning_rate": 0.00042048063191127687, + "loss": 0.8125, + "step": 10500 + }, + { + "epoch": 0.56, + "learning_rate": 0.00042039470172791697, + "loss": 0.9453, + "step": 10501 + }, + { + "epoch": 0.56, + "learning_rate": 0.000420308773956859, + "loss": 0.8906, + "step": 10502 + }, + { + "epoch": 0.56, + "learning_rate": 0.00042022284860070705, + "loss": 0.8164, + "step": 10503 + }, + { + "epoch": 0.56, + "learning_rate": 0.0004201369256620649, + "loss": 0.8789, + "step": 10504 + }, + { + "epoch": 0.56, + "learning_rate": 0.00042005100514353616, + "loss": 0.7969, + "step": 10505 + }, + { + "epoch": 0.56, + "learning_rate": 0.0004199650870477245, + "loss": 0.7773, + "step": 10506 + }, + { + "epoch": 0.56, + "learning_rate": 0.00041987917137723365, + "loss": 0.8086, + "step": 10507 + }, + { + "epoch": 0.56, + "learning_rate": 0.0004197932581346672, + "loss": 0.9375, + "step": 10508 + }, + { + "epoch": 0.56, + "learning_rate": 0.0004197073473226284, + "loss": 0.8398, + "step": 10509 + }, + { + "epoch": 0.56, + "learning_rate": 0.00041962143894372063, + "loss": 0.7109, + "step": 10510 + }, + { + "epoch": 0.56, + "learning_rate": 0.00041953553300054736, + "loss": 0.9062, + "step": 10511 + }, + { + "epoch": 0.56, + "learning_rate": 0.0004194496294957119, + "loss": 0.7656, + "step": 10512 + }, + { + "epoch": 0.57, + "learning_rate": 0.00041936372843181716, + "loss": 0.9336, + "step": 10513 + }, + { + "epoch": 0.57, + "learning_rate": 0.0004192778298114662, + "loss": 0.8203, + "step": 10514 + }, + { + "epoch": 0.57, + "learning_rate": 0.00041919193363726225, + "loss": 0.9219, + "step": 10515 + }, + { + "epoch": 0.57, + "learning_rate": 0.00041910603991180816, + "loss": 0.8477, + "step": 10516 + }, + { + "epoch": 0.57, + "learning_rate": 0.0004190201486377069, + "loss": 0.8086, + "step": 10517 + }, + { + "epoch": 0.57, + "learning_rate": 0.00041893425981756084, + "loss": 0.8867, + "step": 10518 + }, + { + "epoch": 0.57, + "learning_rate": 0.00041884837345397324, + "loss": 0.7539, + "step": 10519 + }, + { + "epoch": 0.57, + "learning_rate": 0.00041876248954954645, + "loss": 0.8125, + "step": 10520 + }, + { + "epoch": 0.57, + "learning_rate": 0.0004186766081068831, + "loss": 0.7891, + "step": 10521 + }, + { + "epoch": 0.57, + "learning_rate": 0.00041859072912858546, + "loss": 0.8086, + "step": 10522 + }, + { + "epoch": 0.57, + "learning_rate": 0.0004185048526172563, + "loss": 0.8516, + "step": 10523 + }, + { + "epoch": 0.57, + "learning_rate": 0.0004184189785754978, + "loss": 0.8086, + "step": 10524 + }, + { + "epoch": 0.57, + "learning_rate": 0.00041833310700591233, + "loss": 0.918, + "step": 10525 + }, + { + "epoch": 0.57, + "learning_rate": 0.00041824723791110164, + "loss": 0.8711, + "step": 10526 + }, + { + "epoch": 0.57, + "learning_rate": 0.00041816137129366856, + "loss": 0.8555, + "step": 10527 + }, + { + "epoch": 0.57, + "learning_rate": 0.00041807550715621455, + "loss": 0.7852, + "step": 10528 + }, + { + "epoch": 0.57, + "learning_rate": 0.0004179896455013419, + "loss": 0.7539, + "step": 10529 + }, + { + "epoch": 0.57, + "learning_rate": 0.00041790378633165215, + "loss": 0.8555, + "step": 10530 + }, + { + "epoch": 0.57, + "learning_rate": 0.0004178179296497475, + "loss": 0.8516, + "step": 10531 + }, + { + "epoch": 0.57, + "learning_rate": 0.0004177320754582297, + "loss": 0.8008, + "step": 10532 + }, + { + "epoch": 0.57, + "learning_rate": 0.0004176462237596999, + "loss": 0.7656, + "step": 10533 + }, + { + "epoch": 0.57, + "learning_rate": 0.0004175603745567603, + "loss": 0.8086, + "step": 10534 + }, + { + "epoch": 0.57, + "learning_rate": 0.00041747452785201214, + "loss": 0.793, + "step": 10535 + }, + { + "epoch": 0.57, + "learning_rate": 0.00041738868364805677, + "loss": 0.7656, + "step": 10536 + }, + { + "epoch": 0.57, + "learning_rate": 0.0004173028419474956, + "loss": 0.6914, + "step": 10537 + }, + { + "epoch": 0.57, + "learning_rate": 0.00041721700275293005, + "loss": 0.8555, + "step": 10538 + }, + { + "epoch": 0.57, + "learning_rate": 0.00041713116606696124, + "loss": 1.0156, + "step": 10539 + }, + { + "epoch": 0.57, + "learning_rate": 0.0004170453318921904, + "loss": 0.8398, + "step": 10540 + }, + { + "epoch": 0.57, + "learning_rate": 0.0004169595002312182, + "loss": 0.7109, + "step": 10541 + }, + { + "epoch": 0.57, + "learning_rate": 0.0004168736710866462, + "loss": 0.7188, + "step": 10542 + }, + { + "epoch": 0.57, + "learning_rate": 0.00041678784446107497, + "loss": 0.8164, + "step": 10543 + }, + { + "epoch": 0.57, + "learning_rate": 0.00041670202035710535, + "loss": 0.8906, + "step": 10544 + }, + { + "epoch": 0.57, + "learning_rate": 0.00041661619877733804, + "loss": 0.8086, + "step": 10545 + }, + { + "epoch": 0.57, + "learning_rate": 0.00041653037972437393, + "loss": 0.8633, + "step": 10546 + }, + { + "epoch": 0.57, + "learning_rate": 0.00041644456320081343, + "loss": 0.7891, + "step": 10547 + }, + { + "epoch": 0.57, + "learning_rate": 0.00041635874920925727, + "loss": 0.875, + "step": 10548 + }, + { + "epoch": 0.57, + "learning_rate": 0.00041627293775230547, + "loss": 0.8477, + "step": 10549 + }, + { + "epoch": 0.57, + "learning_rate": 0.0004161871288325589, + "loss": 0.918, + "step": 10550 + }, + { + "epoch": 0.57, + "learning_rate": 0.00041610132245261766, + "loss": 0.8125, + "step": 10551 + }, + { + "epoch": 0.57, + "learning_rate": 0.0004160155186150819, + "loss": 0.7539, + "step": 10552 + }, + { + "epoch": 0.57, + "learning_rate": 0.0004159297173225516, + "loss": 0.8047, + "step": 10553 + }, + { + "epoch": 0.57, + "learning_rate": 0.00041584391857762726, + "loss": 0.8438, + "step": 10554 + }, + { + "epoch": 0.57, + "learning_rate": 0.00041575812238290867, + "loss": 0.8555, + "step": 10555 + }, + { + "epoch": 0.57, + "learning_rate": 0.0004156723287409956, + "loss": 0.8164, + "step": 10556 + }, + { + "epoch": 0.57, + "learning_rate": 0.00041558653765448784, + "loss": 0.8398, + "step": 10557 + }, + { + "epoch": 0.57, + "learning_rate": 0.0004155007491259854, + "loss": 0.8359, + "step": 10558 + }, + { + "epoch": 0.57, + "learning_rate": 0.0004154149631580878, + "loss": 0.7617, + "step": 10559 + }, + { + "epoch": 0.57, + "learning_rate": 0.00041532917975339466, + "loss": 0.8398, + "step": 10560 + }, + { + "epoch": 0.57, + "learning_rate": 0.00041524339891450547, + "loss": 0.8047, + "step": 10561 + }, + { + "epoch": 0.57, + "learning_rate": 0.0004151576206440197, + "loss": 0.8672, + "step": 10562 + }, + { + "epoch": 0.57, + "learning_rate": 0.00041507184494453685, + "loss": 0.7969, + "step": 10563 + }, + { + "epoch": 0.57, + "learning_rate": 0.000414986071818656, + "loss": 0.8711, + "step": 10564 + }, + { + "epoch": 0.57, + "learning_rate": 0.00041490030126897635, + "loss": 0.8555, + "step": 10565 + }, + { + "epoch": 0.57, + "learning_rate": 0.00041481453329809715, + "loss": 0.9258, + "step": 10566 + }, + { + "epoch": 0.57, + "learning_rate": 0.00041472876790861745, + "loss": 0.7734, + "step": 10567 + }, + { + "epoch": 0.57, + "learning_rate": 0.00041464300510313625, + "loss": 0.9062, + "step": 10568 + }, + { + "epoch": 0.57, + "learning_rate": 0.00041455724488425206, + "loss": 0.8281, + "step": 10569 + }, + { + "epoch": 0.57, + "learning_rate": 0.00041447148725456436, + "loss": 0.8516, + "step": 10570 + }, + { + "epoch": 0.57, + "learning_rate": 0.00041438573221667143, + "loss": 0.8438, + "step": 10571 + }, + { + "epoch": 0.57, + "learning_rate": 0.0004142999797731719, + "loss": 0.75, + "step": 10572 + }, + { + "epoch": 0.57, + "learning_rate": 0.00041421422992666466, + "loss": 0.8633, + "step": 10573 + }, + { + "epoch": 0.57, + "learning_rate": 0.000414128482679748, + "loss": 0.8633, + "step": 10574 + }, + { + "epoch": 0.57, + "learning_rate": 0.0004140427380350204, + "loss": 0.7578, + "step": 10575 + }, + { + "epoch": 0.57, + "learning_rate": 0.0004139569959950801, + "loss": 0.8125, + "step": 10576 + }, + { + "epoch": 0.57, + "learning_rate": 0.0004138712565625256, + "loss": 0.7773, + "step": 10577 + }, + { + "epoch": 0.57, + "learning_rate": 0.0004137855197399549, + "loss": 0.8984, + "step": 10578 + }, + { + "epoch": 0.57, + "learning_rate": 0.0004136997855299661, + "loss": 0.9062, + "step": 10579 + }, + { + "epoch": 0.57, + "learning_rate": 0.00041361405393515726, + "loss": 0.7383, + "step": 10580 + }, + { + "epoch": 0.57, + "learning_rate": 0.0004135283249581264, + "loss": 0.875, + "step": 10581 + }, + { + "epoch": 0.57, + "learning_rate": 0.0004134425986014713, + "loss": 0.7891, + "step": 10582 + }, + { + "epoch": 0.57, + "learning_rate": 0.00041335687486778995, + "loss": 0.9453, + "step": 10583 + }, + { + "epoch": 0.57, + "learning_rate": 0.0004132711537596796, + "loss": 0.8906, + "step": 10584 + }, + { + "epoch": 0.57, + "learning_rate": 0.00041318543527973846, + "loss": 0.8164, + "step": 10585 + }, + { + "epoch": 0.57, + "learning_rate": 0.0004130997194305637, + "loss": 0.8125, + "step": 10586 + }, + { + "epoch": 0.57, + "learning_rate": 0.00041301400621475285, + "loss": 0.9102, + "step": 10587 + }, + { + "epoch": 0.57, + "learning_rate": 0.00041292829563490324, + "loss": 0.9297, + "step": 10588 + }, + { + "epoch": 0.57, + "learning_rate": 0.00041284258769361243, + "loss": 0.8398, + "step": 10589 + }, + { + "epoch": 0.57, + "learning_rate": 0.00041275688239347753, + "loss": 0.832, + "step": 10590 + }, + { + "epoch": 0.57, + "learning_rate": 0.0004126711797370958, + "loss": 0.8438, + "step": 10591 + }, + { + "epoch": 0.57, + "learning_rate": 0.0004125854797270637, + "loss": 0.8281, + "step": 10592 + }, + { + "epoch": 0.57, + "learning_rate": 0.00041249978236597917, + "loss": 0.875, + "step": 10593 + }, + { + "epoch": 0.57, + "learning_rate": 0.00041241408765643853, + "loss": 0.7852, + "step": 10594 + }, + { + "epoch": 0.57, + "learning_rate": 0.00041232839560103874, + "loss": 0.8594, + "step": 10595 + }, + { + "epoch": 0.57, + "learning_rate": 0.0004122427062023764, + "loss": 0.8945, + "step": 10596 + }, + { + "epoch": 0.57, + "learning_rate": 0.00041215701946304843, + "loss": 0.7969, + "step": 10597 + }, + { + "epoch": 0.57, + "learning_rate": 0.0004120713353856515, + "loss": 0.8164, + "step": 10598 + }, + { + "epoch": 0.57, + "learning_rate": 0.00041198565397278176, + "loss": 0.8086, + "step": 10599 + }, + { + "epoch": 0.57, + "learning_rate": 0.00041189997522703576, + "loss": 0.8789, + "step": 10600 + }, + { + "epoch": 0.57, + "learning_rate": 0.00041181429915101, + "loss": 0.8164, + "step": 10601 + }, + { + "epoch": 0.57, + "learning_rate": 0.00041172862574730064, + "loss": 0.8438, + "step": 10602 + }, + { + "epoch": 0.57, + "learning_rate": 0.00041164295501850377, + "loss": 0.8867, + "step": 10603 + }, + { + "epoch": 0.57, + "learning_rate": 0.0004115572869672156, + "loss": 0.7539, + "step": 10604 + }, + { + "epoch": 0.57, + "learning_rate": 0.0004114716215960323, + "loss": 0.8672, + "step": 10605 + }, + { + "epoch": 0.57, + "learning_rate": 0.0004113859589075497, + "loss": 0.8438, + "step": 10606 + }, + { + "epoch": 0.57, + "learning_rate": 0.0004113002989043633, + "loss": 0.8555, + "step": 10607 + }, + { + "epoch": 0.57, + "learning_rate": 0.00041121464158906955, + "loss": 0.8672, + "step": 10608 + }, + { + "epoch": 0.57, + "learning_rate": 0.0004111289869642637, + "loss": 0.9258, + "step": 10609 + }, + { + "epoch": 0.57, + "learning_rate": 0.00041104333503254147, + "loss": 0.957, + "step": 10610 + }, + { + "epoch": 0.57, + "learning_rate": 0.0004109576857964983, + "loss": 0.8906, + "step": 10611 + }, + { + "epoch": 0.57, + "learning_rate": 0.0004108720392587298, + "loss": 0.8047, + "step": 10612 + }, + { + "epoch": 0.57, + "learning_rate": 0.0004107863954218314, + "loss": 0.8086, + "step": 10613 + }, + { + "epoch": 0.57, + "learning_rate": 0.0004107007542883982, + "loss": 0.9141, + "step": 10614 + }, + { + "epoch": 0.57, + "learning_rate": 0.0004106151158610253, + "loss": 0.8164, + "step": 10615 + }, + { + "epoch": 0.57, + "learning_rate": 0.00041052948014230815, + "loss": 0.8555, + "step": 10616 + }, + { + "epoch": 0.57, + "learning_rate": 0.0004104438471348417, + "loss": 0.793, + "step": 10617 + }, + { + "epoch": 0.57, + "learning_rate": 0.00041035821684122075, + "loss": 0.9258, + "step": 10618 + }, + { + "epoch": 0.57, + "learning_rate": 0.0004102725892640403, + "loss": 0.9297, + "step": 10619 + }, + { + "epoch": 0.57, + "learning_rate": 0.0004101869644058951, + "loss": 0.7695, + "step": 10620 + }, + { + "epoch": 0.57, + "learning_rate": 0.0004101013422693801, + "loss": 0.9023, + "step": 10621 + }, + { + "epoch": 0.57, + "learning_rate": 0.0004100157228570896, + "loss": 0.8828, + "step": 10622 + }, + { + "epoch": 0.57, + "learning_rate": 0.00040993010617161813, + "loss": 0.8633, + "step": 10623 + }, + { + "epoch": 0.57, + "learning_rate": 0.0004098444922155605, + "loss": 0.8047, + "step": 10624 + }, + { + "epoch": 0.57, + "learning_rate": 0.00040975888099151084, + "loss": 0.918, + "step": 10625 + }, + { + "epoch": 0.57, + "learning_rate": 0.0004096732725020636, + "loss": 0.9219, + "step": 10626 + }, + { + "epoch": 0.57, + "learning_rate": 0.00040958766674981264, + "loss": 0.8359, + "step": 10627 + }, + { + "epoch": 0.57, + "learning_rate": 0.00040950206373735254, + "loss": 0.8281, + "step": 10628 + }, + { + "epoch": 0.57, + "learning_rate": 0.00040941646346727726, + "loss": 0.8047, + "step": 10629 + }, + { + "epoch": 0.57, + "learning_rate": 0.00040933086594218066, + "loss": 0.8789, + "step": 10630 + }, + { + "epoch": 0.57, + "learning_rate": 0.00040924527116465646, + "loss": 0.7188, + "step": 10631 + }, + { + "epoch": 0.57, + "learning_rate": 0.00040915967913729884, + "loss": 0.8945, + "step": 10632 + }, + { + "epoch": 0.57, + "learning_rate": 0.0004090740898627013, + "loss": 0.7812, + "step": 10633 + }, + { + "epoch": 0.57, + "learning_rate": 0.0004089885033434576, + "loss": 0.918, + "step": 10634 + }, + { + "epoch": 0.57, + "learning_rate": 0.0004089029195821609, + "loss": 0.7969, + "step": 10635 + }, + { + "epoch": 0.57, + "learning_rate": 0.00040881733858140533, + "loss": 0.8867, + "step": 10636 + }, + { + "epoch": 0.57, + "learning_rate": 0.00040873176034378377, + "loss": 0.918, + "step": 10637 + }, + { + "epoch": 0.57, + "learning_rate": 0.0004086461848718897, + "loss": 0.7695, + "step": 10638 + }, + { + "epoch": 0.57, + "learning_rate": 0.0004085606121683162, + "loss": 0.8125, + "step": 10639 + }, + { + "epoch": 0.57, + "learning_rate": 0.0004084750422356566, + "loss": 0.832, + "step": 10640 + }, + { + "epoch": 0.57, + "learning_rate": 0.0004083894750765038, + "loss": 0.7891, + "step": 10641 + }, + { + "epoch": 0.57, + "learning_rate": 0.00040830391069345104, + "loss": 0.7578, + "step": 10642 + }, + { + "epoch": 0.57, + "learning_rate": 0.0004082183490890906, + "loss": 0.875, + "step": 10643 + }, + { + "epoch": 0.57, + "learning_rate": 0.000408132790266016, + "loss": 0.8711, + "step": 10644 + }, + { + "epoch": 0.57, + "learning_rate": 0.00040804723422681956, + "loss": 0.9062, + "step": 10645 + }, + { + "epoch": 0.57, + "learning_rate": 0.0004079616809740938, + "loss": 0.8945, + "step": 10646 + }, + { + "epoch": 0.57, + "learning_rate": 0.0004078761305104316, + "loss": 0.7734, + "step": 10647 + }, + { + "epoch": 0.57, + "learning_rate": 0.00040779058283842525, + "loss": 0.8789, + "step": 10648 + }, + { + "epoch": 0.57, + "learning_rate": 0.00040770503796066723, + "loss": 0.8516, + "step": 10649 + }, + { + "epoch": 0.57, + "learning_rate": 0.0004076194958797494, + "loss": 0.7891, + "step": 10650 + }, + { + "epoch": 0.57, + "learning_rate": 0.0004075339565982646, + "loss": 0.9219, + "step": 10651 + }, + { + "epoch": 0.57, + "learning_rate": 0.0004074484201188046, + "loss": 0.7266, + "step": 10652 + }, + { + "epoch": 0.57, + "learning_rate": 0.0004073628864439614, + "loss": 0.875, + "step": 10653 + }, + { + "epoch": 0.57, + "learning_rate": 0.00040727735557632695, + "loss": 0.9766, + "step": 10654 + }, + { + "epoch": 0.57, + "learning_rate": 0.0004071918275184934, + "loss": 0.9219, + "step": 10655 + }, + { + "epoch": 0.57, + "learning_rate": 0.00040710630227305225, + "loss": 0.8164, + "step": 10656 + }, + { + "epoch": 0.57, + "learning_rate": 0.00040702077984259536, + "loss": 0.8516, + "step": 10657 + }, + { + "epoch": 0.57, + "learning_rate": 0.000406935260229714, + "loss": 0.8086, + "step": 10658 + }, + { + "epoch": 0.57, + "learning_rate": 0.0004068497434370002, + "loss": 0.8203, + "step": 10659 + }, + { + "epoch": 0.57, + "learning_rate": 0.0004067642294670452, + "loss": 0.8125, + "step": 10660 + }, + { + "epoch": 0.57, + "learning_rate": 0.00040667871832244016, + "loss": 0.9062, + "step": 10661 + }, + { + "epoch": 0.57, + "learning_rate": 0.0004065932100057764, + "loss": 0.8047, + "step": 10662 + }, + { + "epoch": 0.57, + "learning_rate": 0.0004065077045196454, + "loss": 0.9375, + "step": 10663 + }, + { + "epoch": 0.57, + "learning_rate": 0.0004064222018666381, + "loss": 0.957, + "step": 10664 + }, + { + "epoch": 0.57, + "learning_rate": 0.00040633670204934534, + "loss": 0.9062, + "step": 10665 + }, + { + "epoch": 0.57, + "learning_rate": 0.0004062512050703581, + "loss": 0.7422, + "step": 10666 + }, + { + "epoch": 0.57, + "learning_rate": 0.0004061657109322674, + "loss": 0.9375, + "step": 10667 + }, + { + "epoch": 0.57, + "learning_rate": 0.000406080219637664, + "loss": 0.8828, + "step": 10668 + }, + { + "epoch": 0.57, + "learning_rate": 0.0004059947311891383, + "loss": 0.8125, + "step": 10669 + }, + { + "epoch": 0.57, + "learning_rate": 0.00040590924558928093, + "loss": 0.8203, + "step": 10670 + }, + { + "epoch": 0.57, + "learning_rate": 0.0004058237628406827, + "loss": 0.8047, + "step": 10671 + }, + { + "epoch": 0.57, + "learning_rate": 0.00040573828294593377, + "loss": 0.8711, + "step": 10672 + }, + { + "epoch": 0.57, + "learning_rate": 0.0004056528059076245, + "loss": 0.7969, + "step": 10673 + }, + { + "epoch": 0.57, + "learning_rate": 0.000405567331728345, + "loss": 0.8789, + "step": 10674 + }, + { + "epoch": 0.57, + "learning_rate": 0.00040548186041068564, + "loss": 0.8438, + "step": 10675 + }, + { + "epoch": 0.57, + "learning_rate": 0.0004053963919572364, + "loss": 0.8242, + "step": 10676 + }, + { + "epoch": 0.57, + "learning_rate": 0.0004053109263705873, + "loss": 0.8594, + "step": 10677 + }, + { + "epoch": 0.57, + "learning_rate": 0.00040522546365332786, + "loss": 0.8047, + "step": 10678 + }, + { + "epoch": 0.57, + "learning_rate": 0.0004051400038080485, + "loss": 0.8125, + "step": 10679 + }, + { + "epoch": 0.57, + "learning_rate": 0.00040505454683733856, + "loss": 0.7891, + "step": 10680 + }, + { + "epoch": 0.57, + "learning_rate": 0.00040496909274378756, + "loss": 0.7852, + "step": 10681 + }, + { + "epoch": 0.57, + "learning_rate": 0.00040488364152998536, + "loss": 0.9297, + "step": 10682 + }, + { + "epoch": 0.57, + "learning_rate": 0.0004047981931985213, + "loss": 0.8438, + "step": 10683 + }, + { + "epoch": 0.57, + "learning_rate": 0.0004047127477519846, + "loss": 0.7539, + "step": 10684 + }, + { + "epoch": 0.57, + "learning_rate": 0.00040462730519296466, + "loss": 0.8281, + "step": 10685 + }, + { + "epoch": 0.57, + "learning_rate": 0.0004045418655240507, + "loss": 0.7656, + "step": 10686 + }, + { + "epoch": 0.57, + "learning_rate": 0.00040445642874783187, + "loss": 0.7539, + "step": 10687 + }, + { + "epoch": 0.57, + "learning_rate": 0.000404370994866897, + "loss": 0.7695, + "step": 10688 + }, + { + "epoch": 0.57, + "learning_rate": 0.00040428556388383497, + "loss": 0.8711, + "step": 10689 + }, + { + "epoch": 0.57, + "learning_rate": 0.00040420013580123485, + "loss": 0.8438, + "step": 10690 + }, + { + "epoch": 0.57, + "learning_rate": 0.00040411471062168537, + "loss": 0.7734, + "step": 10691 + }, + { + "epoch": 0.57, + "learning_rate": 0.0004040292883477752, + "loss": 0.8477, + "step": 10692 + }, + { + "epoch": 0.57, + "learning_rate": 0.00040394386898209253, + "loss": 0.7891, + "step": 10693 + }, + { + "epoch": 0.57, + "learning_rate": 0.00040385845252722646, + "loss": 0.7656, + "step": 10694 + }, + { + "epoch": 0.57, + "learning_rate": 0.000403773038985765, + "loss": 0.8984, + "step": 10695 + }, + { + "epoch": 0.57, + "learning_rate": 0.0004036876283602965, + "loss": 0.7852, + "step": 10696 + }, + { + "epoch": 0.57, + "learning_rate": 0.0004036022206534092, + "loss": 0.8281, + "step": 10697 + }, + { + "epoch": 0.57, + "learning_rate": 0.0004035168158676913, + "loss": 0.8789, + "step": 10698 + }, + { + "epoch": 0.58, + "learning_rate": 0.0004034314140057308, + "loss": 0.7969, + "step": 10699 + }, + { + "epoch": 0.58, + "learning_rate": 0.0004033460150701158, + "loss": 0.7812, + "step": 10700 + }, + { + "epoch": 0.58, + "learning_rate": 0.0004032606190634337, + "loss": 0.9297, + "step": 10701 + }, + { + "epoch": 0.58, + "learning_rate": 0.00040317522598827294, + "loss": 0.8711, + "step": 10702 + }, + { + "epoch": 0.58, + "learning_rate": 0.0004030898358472208, + "loss": 0.8281, + "step": 10703 + }, + { + "epoch": 0.58, + "learning_rate": 0.00040300444864286493, + "loss": 0.8438, + "step": 10704 + }, + { + "epoch": 0.58, + "learning_rate": 0.0004029190643777928, + "loss": 0.8047, + "step": 10705 + }, + { + "epoch": 0.58, + "learning_rate": 0.000402833683054592, + "loss": 0.832, + "step": 10706 + }, + { + "epoch": 0.58, + "learning_rate": 0.0004027483046758499, + "loss": 0.8555, + "step": 10707 + }, + { + "epoch": 0.58, + "learning_rate": 0.00040266292924415355, + "loss": 0.918, + "step": 10708 + }, + { + "epoch": 0.58, + "learning_rate": 0.00040257755676209004, + "loss": 0.8242, + "step": 10709 + }, + { + "epoch": 0.58, + "learning_rate": 0.00040249218723224664, + "loss": 0.8398, + "step": 10710 + }, + { + "epoch": 0.58, + "learning_rate": 0.0004024068206572103, + "loss": 0.8828, + "step": 10711 + }, + { + "epoch": 0.58, + "learning_rate": 0.00040232145703956784, + "loss": 0.9062, + "step": 10712 + }, + { + "epoch": 0.58, + "learning_rate": 0.000402236096381906, + "loss": 0.8633, + "step": 10713 + }, + { + "epoch": 0.58, + "learning_rate": 0.0004021507386868117, + "loss": 0.957, + "step": 10714 + }, + { + "epoch": 0.58, + "learning_rate": 0.00040206538395687146, + "loss": 0.8555, + "step": 10715 + }, + { + "epoch": 0.58, + "learning_rate": 0.00040198003219467176, + "loss": 0.8438, + "step": 10716 + }, + { + "epoch": 0.58, + "learning_rate": 0.00040189468340279895, + "loss": 0.793, + "step": 10717 + }, + { + "epoch": 0.58, + "learning_rate": 0.00040180933758383956, + "loss": 0.793, + "step": 10718 + }, + { + "epoch": 0.58, + "learning_rate": 0.0004017239947403798, + "loss": 0.8633, + "step": 10719 + }, + { + "epoch": 0.58, + "learning_rate": 0.00040163865487500574, + "loss": 0.9453, + "step": 10720 + }, + { + "epoch": 0.58, + "learning_rate": 0.0004015533179903036, + "loss": 0.9336, + "step": 10721 + }, + { + "epoch": 0.58, + "learning_rate": 0.0004014679840888593, + "loss": 0.9023, + "step": 10722 + }, + { + "epoch": 0.58, + "learning_rate": 0.00040138265317325885, + "loss": 0.8359, + "step": 10723 + }, + { + "epoch": 0.58, + "learning_rate": 0.00040129732524608766, + "loss": 0.8398, + "step": 10724 + }, + { + "epoch": 0.58, + "learning_rate": 0.00040121200030993206, + "loss": 0.9297, + "step": 10725 + }, + { + "epoch": 0.58, + "learning_rate": 0.0004011266783673772, + "loss": 0.8281, + "step": 10726 + }, + { + "epoch": 0.58, + "learning_rate": 0.0004010413594210088, + "loss": 0.8555, + "step": 10727 + }, + { + "epoch": 0.58, + "learning_rate": 0.00040095604347341215, + "loss": 0.7383, + "step": 10728 + }, + { + "epoch": 0.58, + "learning_rate": 0.00040087073052717283, + "loss": 0.793, + "step": 10729 + }, + { + "epoch": 0.58, + "learning_rate": 0.00040078542058487603, + "loss": 0.7695, + "step": 10730 + }, + { + "epoch": 0.58, + "learning_rate": 0.00040070011364910687, + "loss": 0.8633, + "step": 10731 + }, + { + "epoch": 0.58, + "learning_rate": 0.00040061480972245037, + "loss": 0.8438, + "step": 10732 + }, + { + "epoch": 0.58, + "learning_rate": 0.00040052950880749164, + "loss": 0.8711, + "step": 10733 + }, + { + "epoch": 0.58, + "learning_rate": 0.0004004442109068155, + "loss": 0.8008, + "step": 10734 + }, + { + "epoch": 0.58, + "learning_rate": 0.00040035891602300687, + "loss": 0.9023, + "step": 10735 + }, + { + "epoch": 0.58, + "learning_rate": 0.00040027362415865026, + "loss": 0.8594, + "step": 10736 + }, + { + "epoch": 0.58, + "learning_rate": 0.00040018833531633047, + "loss": 0.8242, + "step": 10737 + }, + { + "epoch": 0.58, + "learning_rate": 0.0004001030494986322, + "loss": 0.8281, + "step": 10738 + }, + { + "epoch": 0.58, + "learning_rate": 0.0004000177667081395, + "loss": 0.8164, + "step": 10739 + }, + { + "epoch": 0.58, + "learning_rate": 0.0003999324869474368, + "loss": 0.8867, + "step": 10740 + }, + { + "epoch": 0.58, + "learning_rate": 0.0003998472102191085, + "loss": 0.8398, + "step": 10741 + }, + { + "epoch": 0.58, + "learning_rate": 0.0003997619365257388, + "loss": 0.8516, + "step": 10742 + }, + { + "epoch": 0.58, + "learning_rate": 0.0003996766658699117, + "loss": 0.8242, + "step": 10743 + }, + { + "epoch": 0.58, + "learning_rate": 0.0003995913982542109, + "loss": 0.8906, + "step": 10744 + }, + { + "epoch": 0.58, + "learning_rate": 0.00039950613368122087, + "loss": 0.8477, + "step": 10745 + }, + { + "epoch": 0.58, + "learning_rate": 0.00039942087215352496, + "loss": 0.8164, + "step": 10746 + }, + { + "epoch": 0.58, + "learning_rate": 0.0003993356136737071, + "loss": 0.8906, + "step": 10747 + }, + { + "epoch": 0.58, + "learning_rate": 0.00039925035824435055, + "loss": 0.8594, + "step": 10748 + }, + { + "epoch": 0.58, + "learning_rate": 0.0003991651058680393, + "loss": 0.8633, + "step": 10749 + }, + { + "epoch": 0.58, + "learning_rate": 0.0003990798565473566, + "loss": 0.7969, + "step": 10750 + }, + { + "epoch": 0.58, + "learning_rate": 0.0003989946102848858, + "loss": 0.8438, + "step": 10751 + }, + { + "epoch": 0.58, + "learning_rate": 0.0003989093670832097, + "loss": 0.8359, + "step": 10752 + }, + { + "epoch": 0.58, + "learning_rate": 0.00039882412694491225, + "loss": 0.6953, + "step": 10753 + }, + { + "epoch": 0.58, + "learning_rate": 0.000398738889872576, + "loss": 0.9297, + "step": 10754 + }, + { + "epoch": 0.58, + "learning_rate": 0.0003986536558687839, + "loss": 0.8594, + "step": 10755 + }, + { + "epoch": 0.58, + "learning_rate": 0.000398568424936119, + "loss": 0.918, + "step": 10756 + }, + { + "epoch": 0.58, + "learning_rate": 0.0003984831970771641, + "loss": 0.7891, + "step": 10757 + }, + { + "epoch": 0.58, + "learning_rate": 0.00039839797229450173, + "loss": 0.8516, + "step": 10758 + }, + { + "epoch": 0.58, + "learning_rate": 0.00039831275059071437, + "loss": 0.6992, + "step": 10759 + }, + { + "epoch": 0.58, + "learning_rate": 0.0003982275319683849, + "loss": 0.8242, + "step": 10760 + }, + { + "epoch": 0.58, + "learning_rate": 0.00039814231643009554, + "loss": 0.8984, + "step": 10761 + }, + { + "epoch": 0.58, + "learning_rate": 0.00039805710397842846, + "loss": 0.7734, + "step": 10762 + }, + { + "epoch": 0.58, + "learning_rate": 0.00039797189461596594, + "loss": 0.9023, + "step": 10763 + }, + { + "epoch": 0.58, + "learning_rate": 0.00039788668834529024, + "loss": 0.7969, + "step": 10764 + }, + { + "epoch": 0.58, + "learning_rate": 0.0003978014851689833, + "loss": 0.8945, + "step": 10765 + }, + { + "epoch": 0.58, + "learning_rate": 0.0003977162850896272, + "loss": 0.7656, + "step": 10766 + }, + { + "epoch": 0.58, + "learning_rate": 0.0003976310881098033, + "loss": 0.7852, + "step": 10767 + }, + { + "epoch": 0.58, + "learning_rate": 0.00039754589423209404, + "loss": 0.8359, + "step": 10768 + }, + { + "epoch": 0.58, + "learning_rate": 0.00039746070345908057, + "loss": 0.793, + "step": 10769 + }, + { + "epoch": 0.58, + "learning_rate": 0.0003973755157933446, + "loss": 0.8711, + "step": 10770 + }, + { + "epoch": 0.58, + "learning_rate": 0.0003972903312374676, + "loss": 0.8711, + "step": 10771 + }, + { + "epoch": 0.58, + "learning_rate": 0.000397205149794031, + "loss": 0.7578, + "step": 10772 + }, + { + "epoch": 0.58, + "learning_rate": 0.00039711997146561604, + "loss": 0.7773, + "step": 10773 + }, + { + "epoch": 0.58, + "learning_rate": 0.00039703479625480395, + "loss": 0.7812, + "step": 10774 + }, + { + "epoch": 0.58, + "learning_rate": 0.00039694962416417554, + "loss": 0.8281, + "step": 10775 + }, + { + "epoch": 0.58, + "learning_rate": 0.0003968644551963121, + "loss": 0.8281, + "step": 10776 + }, + { + "epoch": 0.58, + "learning_rate": 0.0003967792893537945, + "loss": 0.8477, + "step": 10777 + }, + { + "epoch": 0.58, + "learning_rate": 0.00039669412663920347, + "loss": 0.8008, + "step": 10778 + }, + { + "epoch": 0.58, + "learning_rate": 0.0003966089670551197, + "loss": 0.8867, + "step": 10779 + }, + { + "epoch": 0.58, + "learning_rate": 0.00039652381060412383, + "loss": 0.8516, + "step": 10780 + }, + { + "epoch": 0.58, + "learning_rate": 0.00039643865728879657, + "loss": 0.7773, + "step": 10781 + }, + { + "epoch": 0.58, + "learning_rate": 0.00039635350711171813, + "loss": 0.8633, + "step": 10782 + }, + { + "epoch": 0.58, + "learning_rate": 0.0003962683600754687, + "loss": 0.9141, + "step": 10783 + }, + { + "epoch": 0.58, + "learning_rate": 0.0003961832161826288, + "loss": 0.707, + "step": 10784 + }, + { + "epoch": 0.58, + "learning_rate": 0.0003960980754357785, + "loss": 0.7344, + "step": 10785 + }, + { + "epoch": 0.58, + "learning_rate": 0.00039601293783749793, + "loss": 0.9492, + "step": 10786 + }, + { + "epoch": 0.58, + "learning_rate": 0.0003959278033903666, + "loss": 0.8594, + "step": 10787 + }, + { + "epoch": 0.58, + "learning_rate": 0.0003958426720969649, + "loss": 0.8281, + "step": 10788 + }, + { + "epoch": 0.58, + "learning_rate": 0.0003957575439598724, + "loss": 0.875, + "step": 10789 + }, + { + "epoch": 0.58, + "learning_rate": 0.0003956724189816687, + "loss": 0.832, + "step": 10790 + }, + { + "epoch": 0.58, + "learning_rate": 0.0003955872971649333, + "loss": 0.8281, + "step": 10791 + }, + { + "epoch": 0.58, + "learning_rate": 0.00039550217851224583, + "loss": 0.8828, + "step": 10792 + }, + { + "epoch": 0.58, + "learning_rate": 0.00039541706302618563, + "loss": 0.8984, + "step": 10793 + }, + { + "epoch": 0.58, + "learning_rate": 0.00039533195070933186, + "loss": 0.8828, + "step": 10794 + }, + { + "epoch": 0.58, + "learning_rate": 0.00039524684156426394, + "loss": 0.8828, + "step": 10795 + }, + { + "epoch": 0.58, + "learning_rate": 0.00039516173559356083, + "loss": 0.8555, + "step": 10796 + }, + { + "epoch": 0.58, + "learning_rate": 0.0003950766327998015, + "loss": 0.7812, + "step": 10797 + }, + { + "epoch": 0.58, + "learning_rate": 0.0003949915331855647, + "loss": 0.8672, + "step": 10798 + }, + { + "epoch": 0.58, + "learning_rate": 0.0003949064367534295, + "loss": 0.8242, + "step": 10799 + }, + { + "epoch": 0.58, + "learning_rate": 0.00039482134350597443, + "loss": 0.918, + "step": 10800 + }, + { + "epoch": 0.58, + "learning_rate": 0.0003947362534457781, + "loss": 0.7852, + "step": 10801 + }, + { + "epoch": 0.58, + "learning_rate": 0.0003946511665754191, + "loss": 0.9297, + "step": 10802 + }, + { + "epoch": 0.58, + "learning_rate": 0.00039456608289747583, + "loss": 0.832, + "step": 10803 + }, + { + "epoch": 0.58, + "learning_rate": 0.00039448100241452666, + "loss": 0.7734, + "step": 10804 + }, + { + "epoch": 0.58, + "learning_rate": 0.0003943959251291497, + "loss": 0.7617, + "step": 10805 + }, + { + "epoch": 0.58, + "learning_rate": 0.00039431085104392287, + "loss": 0.9062, + "step": 10806 + }, + { + "epoch": 0.58, + "learning_rate": 0.0003942257801614246, + "loss": 0.75, + "step": 10807 + }, + { + "epoch": 0.58, + "learning_rate": 0.00039414071248423265, + "loss": 0.8438, + "step": 10808 + }, + { + "epoch": 0.58, + "learning_rate": 0.000394055648014925, + "loss": 0.7305, + "step": 10809 + }, + { + "epoch": 0.58, + "learning_rate": 0.0003939705867560788, + "loss": 0.875, + "step": 10810 + }, + { + "epoch": 0.58, + "learning_rate": 0.00039388552871027246, + "loss": 0.7734, + "step": 10811 + }, + { + "epoch": 0.58, + "learning_rate": 0.00039380047388008305, + "loss": 0.7383, + "step": 10812 + }, + { + "epoch": 0.58, + "learning_rate": 0.00039371542226808817, + "loss": 0.8516, + "step": 10813 + }, + { + "epoch": 0.58, + "learning_rate": 0.00039363037387686497, + "loss": 0.9375, + "step": 10814 + }, + { + "epoch": 0.58, + "learning_rate": 0.00039354532870899094, + "loss": 0.7812, + "step": 10815 + }, + { + "epoch": 0.58, + "learning_rate": 0.0003934602867670432, + "loss": 0.793, + "step": 10816 + }, + { + "epoch": 0.58, + "learning_rate": 0.00039337524805359883, + "loss": 0.9336, + "step": 10817 + }, + { + "epoch": 0.58, + "learning_rate": 0.0003932902125712343, + "loss": 0.8203, + "step": 10818 + }, + { + "epoch": 0.58, + "learning_rate": 0.00039320518032252724, + "loss": 0.7812, + "step": 10819 + }, + { + "epoch": 0.58, + "learning_rate": 0.00039312015131005385, + "loss": 0.7969, + "step": 10820 + }, + { + "epoch": 0.58, + "learning_rate": 0.000393035125536391, + "loss": 0.9141, + "step": 10821 + }, + { + "epoch": 0.58, + "learning_rate": 0.00039295010300411504, + "loss": 0.8945, + "step": 10822 + }, + { + "epoch": 0.58, + "learning_rate": 0.00039286508371580274, + "loss": 0.8906, + "step": 10823 + }, + { + "epoch": 0.58, + "learning_rate": 0.00039278006767403046, + "loss": 0.75, + "step": 10824 + }, + { + "epoch": 0.58, + "learning_rate": 0.0003926950548813742, + "loss": 0.7891, + "step": 10825 + }, + { + "epoch": 0.58, + "learning_rate": 0.00039261004534041013, + "loss": 0.8164, + "step": 10826 + }, + { + "epoch": 0.58, + "learning_rate": 0.00039252503905371454, + "loss": 0.8359, + "step": 10827 + }, + { + "epoch": 0.58, + "learning_rate": 0.00039244003602386334, + "loss": 0.8359, + "step": 10828 + }, + { + "epoch": 0.58, + "learning_rate": 0.0003923550362534323, + "loss": 0.7305, + "step": 10829 + }, + { + "epoch": 0.58, + "learning_rate": 0.0003922700397449972, + "loss": 0.8828, + "step": 10830 + }, + { + "epoch": 0.58, + "learning_rate": 0.0003921850465011339, + "loss": 0.7656, + "step": 10831 + }, + { + "epoch": 0.58, + "learning_rate": 0.00039210005652441795, + "loss": 0.8086, + "step": 10832 + }, + { + "epoch": 0.58, + "learning_rate": 0.0003920150698174243, + "loss": 0.8008, + "step": 10833 + }, + { + "epoch": 0.58, + "learning_rate": 0.0003919300863827291, + "loss": 0.9492, + "step": 10834 + }, + { + "epoch": 0.58, + "learning_rate": 0.00039184510622290725, + "loss": 0.8945, + "step": 10835 + }, + { + "epoch": 0.58, + "learning_rate": 0.0003917601293405339, + "loss": 0.8047, + "step": 10836 + }, + { + "epoch": 0.58, + "learning_rate": 0.00039167515573818403, + "loss": 0.8828, + "step": 10837 + }, + { + "epoch": 0.58, + "learning_rate": 0.0003915901854184329, + "loss": 0.7695, + "step": 10838 + }, + { + "epoch": 0.58, + "learning_rate": 0.0003915052183838554, + "loss": 0.7969, + "step": 10839 + }, + { + "epoch": 0.58, + "learning_rate": 0.000391420254637026, + "loss": 0.832, + "step": 10840 + }, + { + "epoch": 0.58, + "learning_rate": 0.0003913352941805195, + "loss": 0.8398, + "step": 10841 + }, + { + "epoch": 0.58, + "learning_rate": 0.0003912503370169106, + "loss": 0.8984, + "step": 10842 + }, + { + "epoch": 0.58, + "learning_rate": 0.00039116538314877373, + "loss": 0.7578, + "step": 10843 + }, + { + "epoch": 0.58, + "learning_rate": 0.00039108043257868324, + "loss": 0.8398, + "step": 10844 + }, + { + "epoch": 0.58, + "learning_rate": 0.0003909954853092134, + "loss": 0.8906, + "step": 10845 + }, + { + "epoch": 0.58, + "learning_rate": 0.00039091054134293837, + "loss": 0.8594, + "step": 10846 + }, + { + "epoch": 0.58, + "learning_rate": 0.0003908256006824324, + "loss": 0.8359, + "step": 10847 + }, + { + "epoch": 0.58, + "learning_rate": 0.00039074066333026934, + "loss": 0.8516, + "step": 10848 + }, + { + "epoch": 0.58, + "learning_rate": 0.00039065572928902295, + "loss": 0.875, + "step": 10849 + }, + { + "epoch": 0.58, + "learning_rate": 0.0003905707985612672, + "loss": 0.8438, + "step": 10850 + }, + { + "epoch": 0.58, + "learning_rate": 0.00039048587114957575, + "loss": 0.8594, + "step": 10851 + }, + { + "epoch": 0.58, + "learning_rate": 0.0003904009470565222, + "loss": 0.7773, + "step": 10852 + }, + { + "epoch": 0.58, + "learning_rate": 0.00039031602628467965, + "loss": 0.9531, + "step": 10853 + }, + { + "epoch": 0.58, + "learning_rate": 0.0003902311088366221, + "loss": 0.7188, + "step": 10854 + }, + { + "epoch": 0.58, + "learning_rate": 0.0003901461947149225, + "loss": 0.8711, + "step": 10855 + }, + { + "epoch": 0.58, + "learning_rate": 0.00039006128392215394, + "loss": 0.8203, + "step": 10856 + }, + { + "epoch": 0.58, + "learning_rate": 0.0003899763764608896, + "loss": 0.9023, + "step": 10857 + }, + { + "epoch": 0.58, + "learning_rate": 0.0003898914723337025, + "loss": 0.8516, + "step": 10858 + }, + { + "epoch": 0.58, + "learning_rate": 0.00038980657154316547, + "loss": 0.8828, + "step": 10859 + }, + { + "epoch": 0.58, + "learning_rate": 0.00038972167409185145, + "loss": 0.7383, + "step": 10860 + }, + { + "epoch": 0.58, + "learning_rate": 0.0003896367799823326, + "loss": 0.7656, + "step": 10861 + }, + { + "epoch": 0.58, + "learning_rate": 0.0003895518892171821, + "loss": 0.8398, + "step": 10862 + }, + { + "epoch": 0.58, + "learning_rate": 0.0003894670017989721, + "loss": 0.7812, + "step": 10863 + }, + { + "epoch": 0.58, + "learning_rate": 0.00038938211773027504, + "loss": 0.8672, + "step": 10864 + }, + { + "epoch": 0.58, + "learning_rate": 0.000389297237013663, + "loss": 0.8477, + "step": 10865 + }, + { + "epoch": 0.58, + "learning_rate": 0.00038921235965170853, + "loss": 0.9102, + "step": 10866 + }, + { + "epoch": 0.58, + "learning_rate": 0.00038912748564698353, + "loss": 0.7656, + "step": 10867 + }, + { + "epoch": 0.58, + "learning_rate": 0.0003890426150020596, + "loss": 0.8789, + "step": 10868 + }, + { + "epoch": 0.58, + "learning_rate": 0.0003889577477195092, + "loss": 0.7695, + "step": 10869 + }, + { + "epoch": 0.58, + "learning_rate": 0.00038887288380190375, + "loss": 0.8398, + "step": 10870 + }, + { + "epoch": 0.58, + "learning_rate": 0.0003887880232518149, + "loss": 0.8125, + "step": 10871 + }, + { + "epoch": 0.58, + "learning_rate": 0.0003887031660718142, + "loss": 0.918, + "step": 10872 + }, + { + "epoch": 0.58, + "learning_rate": 0.0003886183122644732, + "loss": 0.8008, + "step": 10873 + }, + { + "epoch": 0.58, + "learning_rate": 0.0003885334618323632, + "loss": 0.8438, + "step": 10874 + }, + { + "epoch": 0.58, + "learning_rate": 0.0003884486147780556, + "loss": 0.8672, + "step": 10875 + }, + { + "epoch": 0.58, + "learning_rate": 0.00038836377110412106, + "loss": 0.8047, + "step": 10876 + }, + { + "epoch": 0.58, + "learning_rate": 0.0003882789308131313, + "loss": 0.8125, + "step": 10877 + }, + { + "epoch": 0.58, + "learning_rate": 0.0003881940939076568, + "loss": 0.8672, + "step": 10878 + }, + { + "epoch": 0.58, + "learning_rate": 0.0003881092603902685, + "loss": 0.7266, + "step": 10879 + }, + { + "epoch": 0.58, + "learning_rate": 0.0003880244302635371, + "loss": 0.9336, + "step": 10880 + }, + { + "epoch": 0.58, + "learning_rate": 0.0003879396035300333, + "loss": 0.9102, + "step": 10881 + }, + { + "epoch": 0.58, + "learning_rate": 0.00038785478019232766, + "loss": 0.793, + "step": 10882 + }, + { + "epoch": 0.58, + "learning_rate": 0.0003877699602529907, + "loss": 0.8477, + "step": 10883 + }, + { + "epoch": 0.58, + "learning_rate": 0.00038768514371459223, + "loss": 0.9023, + "step": 10884 + }, + { + "epoch": 0.59, + "learning_rate": 0.0003876003305797032, + "loss": 0.8086, + "step": 10885 + }, + { + "epoch": 0.59, + "learning_rate": 0.0003875155208508932, + "loss": 0.75, + "step": 10886 + }, + { + "epoch": 0.59, + "learning_rate": 0.0003874307145307324, + "loss": 0.8672, + "step": 10887 + }, + { + "epoch": 0.59, + "learning_rate": 0.0003873459116217907, + "loss": 0.8672, + "step": 10888 + }, + { + "epoch": 0.59, + "learning_rate": 0.00038726111212663796, + "loss": 0.8242, + "step": 10889 + }, + { + "epoch": 0.59, + "learning_rate": 0.00038717631604784396, + "loss": 0.8242, + "step": 10890 + }, + { + "epoch": 0.59, + "learning_rate": 0.0003870915233879781, + "loss": 0.8672, + "step": 10891 + }, + { + "epoch": 0.59, + "learning_rate": 0.0003870067341496098, + "loss": 0.9492, + "step": 10892 + }, + { + "epoch": 0.59, + "learning_rate": 0.0003869219483353088, + "loss": 0.7344, + "step": 10893 + }, + { + "epoch": 0.59, + "learning_rate": 0.0003868371659476442, + "loss": 0.7891, + "step": 10894 + }, + { + "epoch": 0.59, + "learning_rate": 0.00038675238698918514, + "loss": 0.8008, + "step": 10895 + }, + { + "epoch": 0.59, + "learning_rate": 0.0003866676114625007, + "loss": 0.8359, + "step": 10896 + }, + { + "epoch": 0.59, + "learning_rate": 0.00038658283937015997, + "loss": 0.8398, + "step": 10897 + }, + { + "epoch": 0.59, + "learning_rate": 0.00038649807071473185, + "loss": 0.8047, + "step": 10898 + }, + { + "epoch": 0.59, + "learning_rate": 0.000386413305498785, + "loss": 0.8477, + "step": 10899 + }, + { + "epoch": 0.59, + "learning_rate": 0.00038632854372488794, + "loss": 0.832, + "step": 10900 + }, + { + "epoch": 0.59, + "learning_rate": 0.0003862437853956095, + "loss": 0.7969, + "step": 10901 + }, + { + "epoch": 0.59, + "learning_rate": 0.00038615903051351805, + "loss": 0.793, + "step": 10902 + }, + { + "epoch": 0.59, + "learning_rate": 0.00038607427908118197, + "loss": 0.8516, + "step": 10903 + }, + { + "epoch": 0.59, + "learning_rate": 0.00038598953110116917, + "loss": 0.7969, + "step": 10904 + }, + { + "epoch": 0.59, + "learning_rate": 0.00038590478657604845, + "loss": 0.8203, + "step": 10905 + }, + { + "epoch": 0.59, + "learning_rate": 0.00038582004550838736, + "loss": 0.9023, + "step": 10906 + }, + { + "epoch": 0.59, + "learning_rate": 0.00038573530790075384, + "loss": 0.6953, + "step": 10907 + }, + { + "epoch": 0.59, + "learning_rate": 0.00038565057375571587, + "loss": 0.9414, + "step": 10908 + }, + { + "epoch": 0.59, + "learning_rate": 0.0003855658430758412, + "loss": 0.7969, + "step": 10909 + }, + { + "epoch": 0.59, + "learning_rate": 0.00038548111586369734, + "loss": 0.832, + "step": 10910 + }, + { + "epoch": 0.59, + "learning_rate": 0.00038539639212185173, + "loss": 0.8633, + "step": 10911 + }, + { + "epoch": 0.59, + "learning_rate": 0.000385311671852872, + "loss": 0.8164, + "step": 10912 + }, + { + "epoch": 0.59, + "learning_rate": 0.0003852269550593255, + "loss": 0.7578, + "step": 10913 + }, + { + "epoch": 0.59, + "learning_rate": 0.0003851422417437791, + "loss": 0.8828, + "step": 10914 + }, + { + "epoch": 0.59, + "learning_rate": 0.0003850575319088, + "loss": 0.8516, + "step": 10915 + }, + { + "epoch": 0.59, + "learning_rate": 0.00038497282555695534, + "loss": 0.7188, + "step": 10916 + }, + { + "epoch": 0.59, + "learning_rate": 0.0003848881226908119, + "loss": 0.8789, + "step": 10917 + }, + { + "epoch": 0.59, + "learning_rate": 0.0003848034233129366, + "loss": 0.7891, + "step": 10918 + }, + { + "epoch": 0.59, + "learning_rate": 0.00038471872742589565, + "loss": 0.8008, + "step": 10919 + }, + { + "epoch": 0.59, + "learning_rate": 0.0003846340350322562, + "loss": 0.8008, + "step": 10920 + }, + { + "epoch": 0.59, + "learning_rate": 0.00038454934613458435, + "loss": 0.7969, + "step": 10921 + }, + { + "epoch": 0.59, + "learning_rate": 0.00038446466073544655, + "loss": 0.8203, + "step": 10922 + }, + { + "epoch": 0.59, + "learning_rate": 0.000384379978837409, + "loss": 0.8125, + "step": 10923 + }, + { + "epoch": 0.59, + "learning_rate": 0.0003842953004430379, + "loss": 0.8164, + "step": 10924 + }, + { + "epoch": 0.59, + "learning_rate": 0.00038421062555489934, + "loss": 0.8203, + "step": 10925 + }, + { + "epoch": 0.59, + "learning_rate": 0.00038412595417555923, + "loss": 0.8867, + "step": 10926 + }, + { + "epoch": 0.59, + "learning_rate": 0.000384041286307583, + "loss": 0.8711, + "step": 10927 + }, + { + "epoch": 0.59, + "learning_rate": 0.00038395662195353703, + "loss": 0.8828, + "step": 10928 + }, + { + "epoch": 0.59, + "learning_rate": 0.00038387196111598653, + "loss": 0.8398, + "step": 10929 + }, + { + "epoch": 0.59, + "learning_rate": 0.00038378730379749705, + "loss": 0.8789, + "step": 10930 + }, + { + "epoch": 0.59, + "learning_rate": 0.00038370265000063386, + "loss": 0.8477, + "step": 10931 + }, + { + "epoch": 0.59, + "learning_rate": 0.0003836179997279625, + "loss": 0.9141, + "step": 10932 + }, + { + "epoch": 0.59, + "learning_rate": 0.0003835333529820482, + "loss": 0.8047, + "step": 10933 + }, + { + "epoch": 0.59, + "learning_rate": 0.0003834487097654557, + "loss": 0.8086, + "step": 10934 + }, + { + "epoch": 0.59, + "learning_rate": 0.0003833640700807501, + "loss": 0.832, + "step": 10935 + }, + { + "epoch": 0.59, + "learning_rate": 0.00038327943393049634, + "loss": 0.8203, + "step": 10936 + }, + { + "epoch": 0.59, + "learning_rate": 0.0003831948013172592, + "loss": 0.9688, + "step": 10937 + }, + { + "epoch": 0.59, + "learning_rate": 0.0003831101722436032, + "loss": 0.8555, + "step": 10938 + }, + { + "epoch": 0.59, + "learning_rate": 0.00038302554671209286, + "loss": 0.75, + "step": 10939 + }, + { + "epoch": 0.59, + "learning_rate": 0.00038294092472529283, + "loss": 0.8047, + "step": 10940 + }, + { + "epoch": 0.59, + "learning_rate": 0.0003828563062857674, + "loss": 0.8125, + "step": 10941 + }, + { + "epoch": 0.59, + "learning_rate": 0.0003827716913960804, + "loss": 0.8516, + "step": 10942 + }, + { + "epoch": 0.59, + "learning_rate": 0.00038268708005879644, + "loss": 0.8398, + "step": 10943 + }, + { + "epoch": 0.59, + "learning_rate": 0.00038260247227647927, + "loss": 0.8906, + "step": 10944 + }, + { + "epoch": 0.59, + "learning_rate": 0.00038251786805169283, + "loss": 0.7539, + "step": 10945 + }, + { + "epoch": 0.59, + "learning_rate": 0.0003824332673870007, + "loss": 0.8555, + "step": 10946 + }, + { + "epoch": 0.59, + "learning_rate": 0.0003823486702849669, + "loss": 0.8789, + "step": 10947 + }, + { + "epoch": 0.59, + "learning_rate": 0.000382264076748155, + "loss": 0.7617, + "step": 10948 + }, + { + "epoch": 0.59, + "learning_rate": 0.00038217948677912825, + "loss": 0.9375, + "step": 10949 + }, + { + "epoch": 0.59, + "learning_rate": 0.00038209490038044986, + "loss": 0.8047, + "step": 10950 + }, + { + "epoch": 0.59, + "learning_rate": 0.0003820103175546834, + "loss": 0.8086, + "step": 10951 + }, + { + "epoch": 0.59, + "learning_rate": 0.00038192573830439193, + "loss": 0.8086, + "step": 10952 + }, + { + "epoch": 0.59, + "learning_rate": 0.00038184116263213845, + "loss": 0.875, + "step": 10953 + }, + { + "epoch": 0.59, + "learning_rate": 0.00038175659054048573, + "loss": 0.8359, + "step": 10954 + }, + { + "epoch": 0.59, + "learning_rate": 0.00038167202203199683, + "loss": 0.8281, + "step": 10955 + }, + { + "epoch": 0.59, + "learning_rate": 0.0003815874571092345, + "loss": 0.9375, + "step": 10956 + }, + { + "epoch": 0.59, + "learning_rate": 0.00038150289577476104, + "loss": 0.7539, + "step": 10957 + }, + { + "epoch": 0.59, + "learning_rate": 0.000381418338031139, + "loss": 0.8633, + "step": 10958 + }, + { + "epoch": 0.59, + "learning_rate": 0.00038133378388093095, + "loss": 0.8164, + "step": 10959 + }, + { + "epoch": 0.59, + "learning_rate": 0.00038124923332669895, + "loss": 0.9219, + "step": 10960 + }, + { + "epoch": 0.59, + "learning_rate": 0.0003811646863710053, + "loss": 0.7734, + "step": 10961 + }, + { + "epoch": 0.59, + "learning_rate": 0.000381080143016412, + "loss": 0.9297, + "step": 10962 + }, + { + "epoch": 0.59, + "learning_rate": 0.000380995603265481, + "loss": 0.75, + "step": 10963 + }, + { + "epoch": 0.59, + "learning_rate": 0.0003809110671207742, + "loss": 0.8008, + "step": 10964 + }, + { + "epoch": 0.59, + "learning_rate": 0.00038082653458485323, + "loss": 0.8359, + "step": 10965 + }, + { + "epoch": 0.59, + "learning_rate": 0.0003807420056602796, + "loss": 0.832, + "step": 10966 + }, + { + "epoch": 0.59, + "learning_rate": 0.000380657480349615, + "loss": 0.8125, + "step": 10967 + }, + { + "epoch": 0.59, + "learning_rate": 0.0003805729586554208, + "loss": 0.8711, + "step": 10968 + }, + { + "epoch": 0.59, + "learning_rate": 0.0003804884405802583, + "loss": 0.8398, + "step": 10969 + }, + { + "epoch": 0.59, + "learning_rate": 0.0003804039261266884, + "loss": 1.0, + "step": 10970 + }, + { + "epoch": 0.59, + "learning_rate": 0.00038031941529727265, + "loss": 0.7617, + "step": 10971 + }, + { + "epoch": 0.59, + "learning_rate": 0.00038023490809457167, + "loss": 0.9453, + "step": 10972 + }, + { + "epoch": 0.59, + "learning_rate": 0.00038015040452114634, + "loss": 0.8125, + "step": 10973 + }, + { + "epoch": 0.59, + "learning_rate": 0.0003800659045795574, + "loss": 0.9023, + "step": 10974 + }, + { + "epoch": 0.59, + "learning_rate": 0.00037998140827236556, + "loss": 0.7539, + "step": 10975 + }, + { + "epoch": 0.59, + "learning_rate": 0.00037989691560213123, + "loss": 0.8086, + "step": 10976 + }, + { + "epoch": 0.59, + "learning_rate": 0.00037981242657141506, + "loss": 0.8789, + "step": 10977 + }, + { + "epoch": 0.59, + "learning_rate": 0.0003797279411827768, + "loss": 0.7852, + "step": 10978 + }, + { + "epoch": 0.59, + "learning_rate": 0.00037964345943877737, + "loss": 0.832, + "step": 10979 + }, + { + "epoch": 0.59, + "learning_rate": 0.0003795589813419763, + "loss": 0.8086, + "step": 10980 + }, + { + "epoch": 0.59, + "learning_rate": 0.00037947450689493367, + "loss": 0.8398, + "step": 10981 + }, + { + "epoch": 0.59, + "learning_rate": 0.0003793900361002094, + "loss": 0.8672, + "step": 10982 + }, + { + "epoch": 0.59, + "learning_rate": 0.00037930556896036333, + "loss": 0.9102, + "step": 10983 + }, + { + "epoch": 0.59, + "learning_rate": 0.00037922110547795504, + "loss": 0.8086, + "step": 10984 + }, + { + "epoch": 0.59, + "learning_rate": 0.00037913664565554365, + "loss": 0.8008, + "step": 10985 + }, + { + "epoch": 0.59, + "learning_rate": 0.00037905218949568927, + "loss": 0.8555, + "step": 10986 + }, + { + "epoch": 0.59, + "learning_rate": 0.0003789677370009507, + "loss": 0.832, + "step": 10987 + }, + { + "epoch": 0.59, + "learning_rate": 0.00037888328817388726, + "loss": 0.7617, + "step": 10988 + }, + { + "epoch": 0.59, + "learning_rate": 0.0003787988430170579, + "loss": 0.9102, + "step": 10989 + }, + { + "epoch": 0.59, + "learning_rate": 0.0003787144015330219, + "loss": 0.832, + "step": 10990 + }, + { + "epoch": 0.59, + "learning_rate": 0.00037862996372433786, + "loss": 0.6602, + "step": 10991 + }, + { + "epoch": 0.59, + "learning_rate": 0.00037854552959356474, + "loss": 0.7773, + "step": 10992 + }, + { + "epoch": 0.59, + "learning_rate": 0.0003784610991432608, + "loss": 0.8516, + "step": 10993 + }, + { + "epoch": 0.59, + "learning_rate": 0.00037837667237598495, + "loss": 0.8281, + "step": 10994 + }, + { + "epoch": 0.59, + "learning_rate": 0.0003782922492942954, + "loss": 0.8555, + "step": 10995 + }, + { + "epoch": 0.59, + "learning_rate": 0.0003782078299007505, + "loss": 0.8594, + "step": 10996 + }, + { + "epoch": 0.59, + "learning_rate": 0.0003781234141979083, + "loss": 0.8828, + "step": 10997 + }, + { + "epoch": 0.59, + "learning_rate": 0.00037803900218832704, + "loss": 0.7969, + "step": 10998 + }, + { + "epoch": 0.59, + "learning_rate": 0.0003779545938745648, + "loss": 0.8242, + "step": 10999 + }, + { + "epoch": 0.59, + "learning_rate": 0.00037787018925917917, + "loss": 0.7812, + "step": 11000 + }, + { + "epoch": 0.59, + "learning_rate": 0.0003777857883447278, + "loss": 0.8164, + "step": 11001 + }, + { + "epoch": 0.59, + "learning_rate": 0.0003777013911337687, + "loss": 0.8203, + "step": 11002 + }, + { + "epoch": 0.59, + "learning_rate": 0.0003776169976288591, + "loss": 0.7617, + "step": 11003 + }, + { + "epoch": 0.59, + "learning_rate": 0.0003775326078325565, + "loss": 0.8125, + "step": 11004 + }, + { + "epoch": 0.59, + "learning_rate": 0.00037744822174741807, + "loss": 0.8516, + "step": 11005 + }, + { + "epoch": 0.59, + "learning_rate": 0.0003773638393760012, + "loss": 0.8828, + "step": 11006 + }, + { + "epoch": 0.59, + "learning_rate": 0.00037727946072086283, + "loss": 0.7539, + "step": 11007 + }, + { + "epoch": 0.59, + "learning_rate": 0.0003771950857845599, + "loss": 0.8242, + "step": 11008 + }, + { + "epoch": 0.59, + "learning_rate": 0.00037711071456964917, + "loss": 0.8398, + "step": 11009 + }, + { + "epoch": 0.59, + "learning_rate": 0.00037702634707868755, + "loss": 0.8477, + "step": 11010 + }, + { + "epoch": 0.59, + "learning_rate": 0.0003769419833142315, + "loss": 0.8945, + "step": 11011 + }, + { + "epoch": 0.59, + "learning_rate": 0.00037685762327883775, + "loss": 0.7578, + "step": 11012 + }, + { + "epoch": 0.59, + "learning_rate": 0.0003767732669750622, + "loss": 0.9297, + "step": 11013 + }, + { + "epoch": 0.59, + "learning_rate": 0.0003766889144054617, + "loss": 0.7305, + "step": 11014 + }, + { + "epoch": 0.59, + "learning_rate": 0.00037660456557259207, + "loss": 0.8164, + "step": 11015 + }, + { + "epoch": 0.59, + "learning_rate": 0.0003765202204790094, + "loss": 0.9453, + "step": 11016 + }, + { + "epoch": 0.59, + "learning_rate": 0.0003764358791272696, + "loss": 0.832, + "step": 11017 + }, + { + "epoch": 0.59, + "learning_rate": 0.0003763515415199287, + "loss": 0.8945, + "step": 11018 + }, + { + "epoch": 0.59, + "learning_rate": 0.0003762672076595422, + "loss": 0.8711, + "step": 11019 + }, + { + "epoch": 0.59, + "learning_rate": 0.00037618287754866565, + "loss": 0.7812, + "step": 11020 + }, + { + "epoch": 0.59, + "learning_rate": 0.0003760985511898547, + "loss": 0.793, + "step": 11021 + }, + { + "epoch": 0.59, + "learning_rate": 0.00037601422858566476, + "loss": 0.8711, + "step": 11022 + }, + { + "epoch": 0.59, + "learning_rate": 0.00037592990973865084, + "loss": 0.8125, + "step": 11023 + }, + { + "epoch": 0.59, + "learning_rate": 0.00037584559465136816, + "loss": 0.8008, + "step": 11024 + }, + { + "epoch": 0.59, + "learning_rate": 0.0003757612833263718, + "loss": 0.7344, + "step": 11025 + }, + { + "epoch": 0.59, + "learning_rate": 0.0003756769757662167, + "loss": 0.8438, + "step": 11026 + }, + { + "epoch": 0.59, + "learning_rate": 0.00037559267197345773, + "loss": 0.8086, + "step": 11027 + }, + { + "epoch": 0.59, + "learning_rate": 0.00037550837195064914, + "loss": 0.8125, + "step": 11028 + }, + { + "epoch": 0.59, + "learning_rate": 0.00037542407570034607, + "loss": 0.8359, + "step": 11029 + }, + { + "epoch": 0.59, + "learning_rate": 0.0003753397832251027, + "loss": 0.8516, + "step": 11030 + }, + { + "epoch": 0.59, + "learning_rate": 0.00037525549452747334, + "loss": 0.7969, + "step": 11031 + }, + { + "epoch": 0.59, + "learning_rate": 0.00037517120961001217, + "loss": 0.8828, + "step": 11032 + }, + { + "epoch": 0.59, + "learning_rate": 0.0003750869284752735, + "loss": 0.8672, + "step": 11033 + }, + { + "epoch": 0.59, + "learning_rate": 0.00037500265112581113, + "loss": 0.7969, + "step": 11034 + }, + { + "epoch": 0.59, + "learning_rate": 0.0003749183775641792, + "loss": 0.832, + "step": 11035 + }, + { + "epoch": 0.59, + "learning_rate": 0.00037483410779293094, + "loss": 0.8398, + "step": 11036 + }, + { + "epoch": 0.59, + "learning_rate": 0.00037474984181462074, + "loss": 0.8672, + "step": 11037 + }, + { + "epoch": 0.59, + "learning_rate": 0.0003746655796318016, + "loss": 0.9844, + "step": 11038 + }, + { + "epoch": 0.59, + "learning_rate": 0.00037458132124702716, + "loss": 0.8438, + "step": 11039 + }, + { + "epoch": 0.59, + "learning_rate": 0.0003744970666628504, + "loss": 0.7891, + "step": 11040 + }, + { + "epoch": 0.59, + "learning_rate": 0.00037441281588182506, + "loss": 0.8164, + "step": 11041 + }, + { + "epoch": 0.59, + "learning_rate": 0.000374328568906504, + "loss": 0.9414, + "step": 11042 + }, + { + "epoch": 0.59, + "learning_rate": 0.0003742443257394399, + "loss": 0.7891, + "step": 11043 + }, + { + "epoch": 0.59, + "learning_rate": 0.00037416008638318577, + "loss": 0.9062, + "step": 11044 + }, + { + "epoch": 0.59, + "learning_rate": 0.0003740758508402945, + "loss": 0.8047, + "step": 11045 + }, + { + "epoch": 0.59, + "learning_rate": 0.00037399161911331856, + "loss": 0.8828, + "step": 11046 + }, + { + "epoch": 0.59, + "learning_rate": 0.00037390739120481055, + "loss": 0.8594, + "step": 11047 + }, + { + "epoch": 0.59, + "learning_rate": 0.00037382316711732255, + "loss": 0.8828, + "step": 11048 + }, + { + "epoch": 0.59, + "learning_rate": 0.00037373894685340724, + "loss": 0.8867, + "step": 11049 + }, + { + "epoch": 0.59, + "learning_rate": 0.0003736547304156167, + "loss": 0.8945, + "step": 11050 + }, + { + "epoch": 0.59, + "learning_rate": 0.00037357051780650274, + "loss": 0.8086, + "step": 11051 + }, + { + "epoch": 0.59, + "learning_rate": 0.0003734863090286172, + "loss": 0.8477, + "step": 11052 + }, + { + "epoch": 0.59, + "learning_rate": 0.0003734021040845123, + "loss": 0.8672, + "step": 11053 + }, + { + "epoch": 0.59, + "learning_rate": 0.0003733179029767394, + "loss": 0.7422, + "step": 11054 + }, + { + "epoch": 0.59, + "learning_rate": 0.0003732337057078501, + "loss": 0.875, + "step": 11055 + }, + { + "epoch": 0.59, + "learning_rate": 0.000373149512280396, + "loss": 0.7969, + "step": 11056 + }, + { + "epoch": 0.59, + "learning_rate": 0.0003730653226969284, + "loss": 0.7539, + "step": 11057 + }, + { + "epoch": 0.59, + "learning_rate": 0.0003729811369599985, + "loss": 0.8438, + "step": 11058 + }, + { + "epoch": 0.59, + "learning_rate": 0.00037289695507215716, + "loss": 0.9102, + "step": 11059 + }, + { + "epoch": 0.59, + "learning_rate": 0.00037281277703595585, + "loss": 0.7773, + "step": 11060 + }, + { + "epoch": 0.59, + "learning_rate": 0.0003727286028539451, + "loss": 0.8281, + "step": 11061 + }, + { + "epoch": 0.59, + "learning_rate": 0.00037264443252867574, + "loss": 0.8281, + "step": 11062 + }, + { + "epoch": 0.59, + "learning_rate": 0.0003725602660626983, + "loss": 0.8555, + "step": 11063 + }, + { + "epoch": 0.59, + "learning_rate": 0.00037247610345856355, + "loss": 0.9375, + "step": 11064 + }, + { + "epoch": 0.59, + "learning_rate": 0.00037239194471882185, + "loss": 0.9062, + "step": 11065 + }, + { + "epoch": 0.59, + "learning_rate": 0.0003723077898460233, + "loss": 0.8047, + "step": 11066 + }, + { + "epoch": 0.59, + "learning_rate": 0.000372223638842718, + "loss": 0.8086, + "step": 11067 + }, + { + "epoch": 0.59, + "learning_rate": 0.00037213949171145634, + "loss": 0.8203, + "step": 11068 + }, + { + "epoch": 0.59, + "learning_rate": 0.000372055348454788, + "loss": 0.8008, + "step": 11069 + }, + { + "epoch": 0.59, + "learning_rate": 0.00037197120907526293, + "loss": 0.6797, + "step": 11070 + }, + { + "epoch": 0.6, + "learning_rate": 0.0003718870735754307, + "loss": 0.8594, + "step": 11071 + }, + { + "epoch": 0.6, + "learning_rate": 0.00037180294195784095, + "loss": 0.8594, + "step": 11072 + }, + { + "epoch": 0.6, + "learning_rate": 0.0003717188142250434, + "loss": 0.8438, + "step": 11073 + }, + { + "epoch": 0.6, + "learning_rate": 0.00037163469037958697, + "loss": 0.8203, + "step": 11074 + }, + { + "epoch": 0.6, + "learning_rate": 0.000371550570424021, + "loss": 0.793, + "step": 11075 + }, + { + "epoch": 0.6, + "learning_rate": 0.0003714664543608948, + "loss": 0.8281, + "step": 11076 + }, + { + "epoch": 0.6, + "learning_rate": 0.0003713823421927572, + "loss": 0.9531, + "step": 11077 + }, + { + "epoch": 0.6, + "learning_rate": 0.0003712982339221573, + "loss": 0.7461, + "step": 11078 + }, + { + "epoch": 0.6, + "learning_rate": 0.0003712141295516433, + "loss": 0.8242, + "step": 11079 + }, + { + "epoch": 0.6, + "learning_rate": 0.00037113002908376454, + "loss": 1.0234, + "step": 11080 + }, + { + "epoch": 0.6, + "learning_rate": 0.0003710459325210691, + "loss": 0.8008, + "step": 11081 + }, + { + "epoch": 0.6, + "learning_rate": 0.0003709618398661055, + "loss": 0.9258, + "step": 11082 + }, + { + "epoch": 0.6, + "learning_rate": 0.0003708777511214219, + "loss": 0.8633, + "step": 11083 + }, + { + "epoch": 0.6, + "learning_rate": 0.00037079366628956666, + "loss": 0.7969, + "step": 11084 + }, + { + "epoch": 0.6, + "learning_rate": 0.00037070958537308783, + "loss": 0.8516, + "step": 11085 + }, + { + "epoch": 0.6, + "learning_rate": 0.00037062550837453336, + "loss": 0.7969, + "step": 11086 + }, + { + "epoch": 0.6, + "learning_rate": 0.0003705414352964506, + "loss": 0.8477, + "step": 11087 + }, + { + "epoch": 0.6, + "learning_rate": 0.00037045736614138786, + "loss": 0.8242, + "step": 11088 + }, + { + "epoch": 0.6, + "learning_rate": 0.00037037330091189237, + "loss": 0.7656, + "step": 11089 + }, + { + "epoch": 0.6, + "learning_rate": 0.00037028923961051165, + "loss": 0.8594, + "step": 11090 + }, + { + "epoch": 0.6, + "learning_rate": 0.00037020518223979305, + "loss": 0.8594, + "step": 11091 + }, + { + "epoch": 0.6, + "learning_rate": 0.0003701211288022838, + "loss": 0.8867, + "step": 11092 + }, + { + "epoch": 0.6, + "learning_rate": 0.0003700370793005311, + "loss": 0.8984, + "step": 11093 + }, + { + "epoch": 0.6, + "learning_rate": 0.00036995303373708145, + "loss": 0.8008, + "step": 11094 + }, + { + "epoch": 0.6, + "learning_rate": 0.00036986899211448245, + "loss": 0.832, + "step": 11095 + }, + { + "epoch": 0.6, + "learning_rate": 0.0003697849544352803, + "loss": 0.832, + "step": 11096 + }, + { + "epoch": 0.6, + "learning_rate": 0.0003697009207020217, + "loss": 0.8164, + "step": 11097 + }, + { + "epoch": 0.6, + "learning_rate": 0.00036961689091725313, + "loss": 0.8242, + "step": 11098 + }, + { + "epoch": 0.6, + "learning_rate": 0.00036953286508352125, + "loss": 0.9023, + "step": 11099 + }, + { + "epoch": 0.6, + "learning_rate": 0.000369448843203372, + "loss": 0.8945, + "step": 11100 + }, + { + "epoch": 0.6, + "learning_rate": 0.00036936482527935176, + "loss": 0.8633, + "step": 11101 + }, + { + "epoch": 0.6, + "learning_rate": 0.00036928081131400615, + "loss": 0.8867, + "step": 11102 + }, + { + "epoch": 0.6, + "learning_rate": 0.00036919680130988164, + "loss": 0.832, + "step": 11103 + }, + { + "epoch": 0.6, + "learning_rate": 0.0003691127952695237, + "loss": 0.8672, + "step": 11104 + }, + { + "epoch": 0.6, + "learning_rate": 0.00036902879319547786, + "loss": 0.7695, + "step": 11105 + }, + { + "epoch": 0.6, + "learning_rate": 0.0003689447950902898, + "loss": 0.9414, + "step": 11106 + }, + { + "epoch": 0.6, + "learning_rate": 0.000368860800956505, + "loss": 0.7617, + "step": 11107 + }, + { + "epoch": 0.6, + "learning_rate": 0.0003687768107966688, + "loss": 0.7734, + "step": 11108 + }, + { + "epoch": 0.6, + "learning_rate": 0.0003686928246133262, + "loss": 0.8672, + "step": 11109 + }, + { + "epoch": 0.6, + "learning_rate": 0.00036860884240902213, + "loss": 0.8086, + "step": 11110 + }, + { + "epoch": 0.6, + "learning_rate": 0.00036852486418630196, + "loss": 0.918, + "step": 11111 + }, + { + "epoch": 0.6, + "learning_rate": 0.0003684408899477102, + "loss": 0.8477, + "step": 11112 + }, + { + "epoch": 0.6, + "learning_rate": 0.00036835691969579166, + "loss": 0.8203, + "step": 11113 + }, + { + "epoch": 0.6, + "learning_rate": 0.0003682729534330907, + "loss": 0.8047, + "step": 11114 + }, + { + "epoch": 0.6, + "learning_rate": 0.0003681889911621522, + "loss": 0.8711, + "step": 11115 + }, + { + "epoch": 0.6, + "learning_rate": 0.00036810503288552023, + "loss": 0.8125, + "step": 11116 + }, + { + "epoch": 0.6, + "learning_rate": 0.00036802107860573895, + "loss": 0.8906, + "step": 11117 + }, + { + "epoch": 0.6, + "learning_rate": 0.00036793712832535244, + "loss": 0.8203, + "step": 11118 + }, + { + "epoch": 0.6, + "learning_rate": 0.00036785318204690475, + "loss": 0.8086, + "step": 11119 + }, + { + "epoch": 0.6, + "learning_rate": 0.0003677692397729398, + "loss": 0.8867, + "step": 11120 + }, + { + "epoch": 0.6, + "learning_rate": 0.00036768530150600135, + "loss": 0.7812, + "step": 11121 + }, + { + "epoch": 0.6, + "learning_rate": 0.0003676013672486325, + "loss": 0.832, + "step": 11122 + }, + { + "epoch": 0.6, + "learning_rate": 0.0003675174370033775, + "loss": 0.8008, + "step": 11123 + }, + { + "epoch": 0.6, + "learning_rate": 0.0003674335107727792, + "loss": 0.8359, + "step": 11124 + }, + { + "epoch": 0.6, + "learning_rate": 0.0003673495885593811, + "loss": 0.7734, + "step": 11125 + }, + { + "epoch": 0.6, + "learning_rate": 0.0003672656703657259, + "loss": 0.7656, + "step": 11126 + }, + { + "epoch": 0.6, + "learning_rate": 0.00036718175619435714, + "loss": 0.8281, + "step": 11127 + }, + { + "epoch": 0.6, + "learning_rate": 0.00036709784604781735, + "loss": 0.8828, + "step": 11128 + }, + { + "epoch": 0.6, + "learning_rate": 0.0003670139399286494, + "loss": 0.8359, + "step": 11129 + }, + { + "epoch": 0.6, + "learning_rate": 0.00036693003783939596, + "loss": 0.7539, + "step": 11130 + }, + { + "epoch": 0.6, + "learning_rate": 0.0003668461397825995, + "loss": 0.7969, + "step": 11131 + }, + { + "epoch": 0.6, + "learning_rate": 0.0003667622457608024, + "loss": 0.9102, + "step": 11132 + }, + { + "epoch": 0.6, + "learning_rate": 0.00036667835577654673, + "loss": 0.8594, + "step": 11133 + }, + { + "epoch": 0.6, + "learning_rate": 0.00036659446983237497, + "loss": 0.8281, + "step": 11134 + }, + { + "epoch": 0.6, + "learning_rate": 0.000366510587930829, + "loss": 0.832, + "step": 11135 + }, + { + "epoch": 0.6, + "learning_rate": 0.0003664267100744506, + "loss": 0.8438, + "step": 11136 + }, + { + "epoch": 0.6, + "learning_rate": 0.0003663428362657817, + "loss": 0.9648, + "step": 11137 + }, + { + "epoch": 0.6, + "learning_rate": 0.0003662589665073639, + "loss": 0.8125, + "step": 11138 + }, + { + "epoch": 0.6, + "learning_rate": 0.0003661751008017388, + "loss": 0.8711, + "step": 11139 + }, + { + "epoch": 0.6, + "learning_rate": 0.0003660912391514477, + "loss": 0.7969, + "step": 11140 + }, + { + "epoch": 0.6, + "learning_rate": 0.0003660073815590318, + "loss": 0.8125, + "step": 11141 + }, + { + "epoch": 0.6, + "learning_rate": 0.0003659235280270325, + "loss": 0.8125, + "step": 11142 + }, + { + "epoch": 0.6, + "learning_rate": 0.00036583967855799074, + "loss": 0.8203, + "step": 11143 + }, + { + "epoch": 0.6, + "learning_rate": 0.00036575583315444745, + "loss": 0.7539, + "step": 11144 + }, + { + "epoch": 0.6, + "learning_rate": 0.0003656719918189431, + "loss": 0.8398, + "step": 11145 + }, + { + "epoch": 0.6, + "learning_rate": 0.00036558815455401897, + "loss": 0.7578, + "step": 11146 + }, + { + "epoch": 0.6, + "learning_rate": 0.0003655043213622151, + "loss": 0.7461, + "step": 11147 + }, + { + "epoch": 0.6, + "learning_rate": 0.0003654204922460721, + "loss": 0.8164, + "step": 11148 + }, + { + "epoch": 0.6, + "learning_rate": 0.0003653366672081302, + "loss": 0.8047, + "step": 11149 + }, + { + "epoch": 0.6, + "learning_rate": 0.0003652528462509297, + "loss": 0.8047, + "step": 11150 + }, + { + "epoch": 0.6, + "learning_rate": 0.0003651690293770106, + "loss": 0.8164, + "step": 11151 + }, + { + "epoch": 0.6, + "learning_rate": 0.0003650852165889129, + "loss": 0.8125, + "step": 11152 + }, + { + "epoch": 0.6, + "learning_rate": 0.000365001407889176, + "loss": 0.7852, + "step": 11153 + }, + { + "epoch": 0.6, + "learning_rate": 0.00036491760328034016, + "loss": 0.8672, + "step": 11154 + }, + { + "epoch": 0.6, + "learning_rate": 0.0003648338027649446, + "loss": 0.8242, + "step": 11155 + }, + { + "epoch": 0.6, + "learning_rate": 0.0003647500063455288, + "loss": 0.8867, + "step": 11156 + }, + { + "epoch": 0.6, + "learning_rate": 0.000364666214024632, + "loss": 0.7695, + "step": 11157 + }, + { + "epoch": 0.6, + "learning_rate": 0.0003645824258047935, + "loss": 0.8359, + "step": 11158 + }, + { + "epoch": 0.6, + "learning_rate": 0.0003644986416885525, + "loss": 0.8008, + "step": 11159 + }, + { + "epoch": 0.6, + "learning_rate": 0.00036441486167844764, + "loss": 0.7812, + "step": 11160 + }, + { + "epoch": 0.6, + "learning_rate": 0.00036433108577701767, + "loss": 0.8789, + "step": 11161 + }, + { + "epoch": 0.6, + "learning_rate": 0.00036424731398680156, + "loss": 0.7656, + "step": 11162 + }, + { + "epoch": 0.6, + "learning_rate": 0.00036416354631033776, + "loss": 0.75, + "step": 11163 + }, + { + "epoch": 0.6, + "learning_rate": 0.0003640797827501646, + "loss": 0.8672, + "step": 11164 + }, + { + "epoch": 0.6, + "learning_rate": 0.00036399602330882064, + "loss": 0.8008, + "step": 11165 + }, + { + "epoch": 0.6, + "learning_rate": 0.00036391226798884393, + "loss": 0.8828, + "step": 11166 + }, + { + "epoch": 0.6, + "learning_rate": 0.0003638285167927726, + "loss": 0.8906, + "step": 11167 + }, + { + "epoch": 0.6, + "learning_rate": 0.0003637447697231442, + "loss": 0.8438, + "step": 11168 + }, + { + "epoch": 0.6, + "learning_rate": 0.0003636610267824971, + "loss": 0.8164, + "step": 11169 + }, + { + "epoch": 0.6, + "learning_rate": 0.00036357728797336875, + "loss": 0.8359, + "step": 11170 + }, + { + "epoch": 0.6, + "learning_rate": 0.00036349355329829666, + "loss": 0.7773, + "step": 11171 + }, + { + "epoch": 0.6, + "learning_rate": 0.00036340982275981815, + "loss": 0.8633, + "step": 11172 + }, + { + "epoch": 0.6, + "learning_rate": 0.0003633260963604709, + "loss": 0.8359, + "step": 11173 + }, + { + "epoch": 0.6, + "learning_rate": 0.0003632423741027918, + "loss": 0.8359, + "step": 11174 + }, + { + "epoch": 0.6, + "learning_rate": 0.000363158655989318, + "loss": 0.7305, + "step": 11175 + }, + { + "epoch": 0.6, + "learning_rate": 0.0003630749420225863, + "loss": 0.8828, + "step": 11176 + }, + { + "epoch": 0.6, + "learning_rate": 0.00036299123220513376, + "loss": 0.7773, + "step": 11177 + }, + { + "epoch": 0.6, + "learning_rate": 0.0003629075265394969, + "loss": 0.8125, + "step": 11178 + }, + { + "epoch": 0.6, + "learning_rate": 0.0003628238250282123, + "loss": 0.8984, + "step": 11179 + }, + { + "epoch": 0.6, + "learning_rate": 0.00036274012767381625, + "loss": 0.9102, + "step": 11180 + }, + { + "epoch": 0.6, + "learning_rate": 0.0003626564344788454, + "loss": 0.8867, + "step": 11181 + }, + { + "epoch": 0.6, + "learning_rate": 0.00036257274544583583, + "loss": 0.8516, + "step": 11182 + }, + { + "epoch": 0.6, + "learning_rate": 0.00036248906057732334, + "loss": 0.8945, + "step": 11183 + }, + { + "epoch": 0.6, + "learning_rate": 0.00036240537987584394, + "loss": 0.9805, + "step": 11184 + }, + { + "epoch": 0.6, + "learning_rate": 0.0003623217033439335, + "loss": 0.8047, + "step": 11185 + }, + { + "epoch": 0.6, + "learning_rate": 0.0003622380309841279, + "loss": 0.8125, + "step": 11186 + }, + { + "epoch": 0.6, + "learning_rate": 0.0003621543627989625, + "loss": 0.875, + "step": 11187 + }, + { + "epoch": 0.6, + "learning_rate": 0.00036207069879097233, + "loss": 0.7227, + "step": 11188 + }, + { + "epoch": 0.6, + "learning_rate": 0.0003619870389626934, + "loss": 0.8906, + "step": 11189 + }, + { + "epoch": 0.6, + "learning_rate": 0.00036190338331666043, + "loss": 0.8359, + "step": 11190 + }, + { + "epoch": 0.6, + "learning_rate": 0.0003618197318554086, + "loss": 0.8984, + "step": 11191 + }, + { + "epoch": 0.6, + "learning_rate": 0.00036173608458147267, + "loss": 0.7539, + "step": 11192 + }, + { + "epoch": 0.6, + "learning_rate": 0.00036165244149738765, + "loss": 0.8125, + "step": 11193 + }, + { + "epoch": 0.6, + "learning_rate": 0.00036156880260568807, + "loss": 0.8164, + "step": 11194 + }, + { + "epoch": 0.6, + "learning_rate": 0.0003614851679089086, + "loss": 0.918, + "step": 11195 + }, + { + "epoch": 0.6, + "learning_rate": 0.00036140153740958327, + "loss": 0.793, + "step": 11196 + }, + { + "epoch": 0.6, + "learning_rate": 0.0003613179111102469, + "loss": 0.8008, + "step": 11197 + }, + { + "epoch": 0.6, + "learning_rate": 0.0003612342890134333, + "loss": 0.9883, + "step": 11198 + }, + { + "epoch": 0.6, + "learning_rate": 0.00036115067112167654, + "loss": 0.918, + "step": 11199 + }, + { + "epoch": 0.6, + "learning_rate": 0.00036106705743751044, + "loss": 0.8242, + "step": 11200 + }, + { + "epoch": 0.6, + "learning_rate": 0.00036098344796346894, + "loss": 0.8359, + "step": 11201 + }, + { + "epoch": 0.6, + "learning_rate": 0.00036089984270208577, + "loss": 0.8438, + "step": 11202 + }, + { + "epoch": 0.6, + "learning_rate": 0.00036081624165589394, + "loss": 0.8945, + "step": 11203 + }, + { + "epoch": 0.6, + "learning_rate": 0.0003607326448274275, + "loss": 0.8594, + "step": 11204 + }, + { + "epoch": 0.6, + "learning_rate": 0.0003606490522192193, + "loss": 0.8125, + "step": 11205 + }, + { + "epoch": 0.6, + "learning_rate": 0.0003605654638338025, + "loss": 0.8359, + "step": 11206 + }, + { + "epoch": 0.6, + "learning_rate": 0.0003604818796737101, + "loss": 0.9375, + "step": 11207 + }, + { + "epoch": 0.6, + "learning_rate": 0.00036039829974147513, + "loss": 0.9336, + "step": 11208 + }, + { + "epoch": 0.6, + "learning_rate": 0.00036031472403963026, + "loss": 0.8711, + "step": 11209 + }, + { + "epoch": 0.6, + "learning_rate": 0.00036023115257070816, + "loss": 0.8477, + "step": 11210 + }, + { + "epoch": 0.6, + "learning_rate": 0.00036014758533724103, + "loss": 0.7539, + "step": 11211 + }, + { + "epoch": 0.6, + "learning_rate": 0.0003600640223417617, + "loss": 0.793, + "step": 11212 + }, + { + "epoch": 0.6, + "learning_rate": 0.000359980463586802, + "loss": 0.8789, + "step": 11213 + }, + { + "epoch": 0.6, + "learning_rate": 0.00035989690907489426, + "loss": 0.8516, + "step": 11214 + }, + { + "epoch": 0.6, + "learning_rate": 0.00035981335880857026, + "loss": 0.8906, + "step": 11215 + }, + { + "epoch": 0.6, + "learning_rate": 0.0003597298127903621, + "loss": 0.875, + "step": 11216 + }, + { + "epoch": 0.6, + "learning_rate": 0.00035964627102280135, + "loss": 0.7695, + "step": 11217 + }, + { + "epoch": 0.6, + "learning_rate": 0.0003595627335084197, + "loss": 0.9062, + "step": 11218 + }, + { + "epoch": 0.6, + "learning_rate": 0.0003594792002497482, + "loss": 0.8594, + "step": 11219 + }, + { + "epoch": 0.6, + "learning_rate": 0.0003593956712493189, + "loss": 0.8125, + "step": 11220 + }, + { + "epoch": 0.6, + "learning_rate": 0.00035931214650966256, + "loss": 0.832, + "step": 11221 + }, + { + "epoch": 0.6, + "learning_rate": 0.00035922862603331027, + "loss": 0.75, + "step": 11222 + }, + { + "epoch": 0.6, + "learning_rate": 0.00035914510982279285, + "loss": 0.7773, + "step": 11223 + }, + { + "epoch": 0.6, + "learning_rate": 0.0003590615978806414, + "loss": 0.8672, + "step": 11224 + }, + { + "epoch": 0.6, + "learning_rate": 0.00035897809020938665, + "loss": 0.8789, + "step": 11225 + }, + { + "epoch": 0.6, + "learning_rate": 0.0003588945868115589, + "loss": 0.7695, + "step": 11226 + }, + { + "epoch": 0.6, + "learning_rate": 0.0003588110876896885, + "loss": 0.7734, + "step": 11227 + }, + { + "epoch": 0.6, + "learning_rate": 0.00035872759284630605, + "loss": 0.9062, + "step": 11228 + }, + { + "epoch": 0.6, + "learning_rate": 0.0003586441022839416, + "loss": 0.8789, + "step": 11229 + }, + { + "epoch": 0.6, + "learning_rate": 0.00035856061600512523, + "loss": 0.8164, + "step": 11230 + }, + { + "epoch": 0.6, + "learning_rate": 0.0003584771340123867, + "loss": 0.8398, + "step": 11231 + }, + { + "epoch": 0.6, + "learning_rate": 0.00035839365630825593, + "loss": 0.832, + "step": 11232 + }, + { + "epoch": 0.6, + "learning_rate": 0.0003583101828952626, + "loss": 0.8359, + "step": 11233 + }, + { + "epoch": 0.6, + "learning_rate": 0.0003582267137759361, + "loss": 0.9414, + "step": 11234 + }, + { + "epoch": 0.6, + "learning_rate": 0.0003581432489528057, + "loss": 0.7891, + "step": 11235 + }, + { + "epoch": 0.6, + "learning_rate": 0.00035805978842840105, + "loss": 0.8672, + "step": 11236 + }, + { + "epoch": 0.6, + "learning_rate": 0.00035797633220525097, + "loss": 0.875, + "step": 11237 + }, + { + "epoch": 0.6, + "learning_rate": 0.0003578928802858846, + "loss": 0.8438, + "step": 11238 + }, + { + "epoch": 0.6, + "learning_rate": 0.00035780943267283056, + "loss": 0.8008, + "step": 11239 + }, + { + "epoch": 0.6, + "learning_rate": 0.00035772598936861805, + "loss": 0.7969, + "step": 11240 + }, + { + "epoch": 0.6, + "learning_rate": 0.0003576425503757753, + "loss": 0.9023, + "step": 11241 + }, + { + "epoch": 0.6, + "learning_rate": 0.0003575591156968308, + "loss": 0.8906, + "step": 11242 + }, + { + "epoch": 0.6, + "learning_rate": 0.000357475685334313, + "loss": 0.8203, + "step": 11243 + }, + { + "epoch": 0.6, + "learning_rate": 0.0003573922592907502, + "loss": 0.8711, + "step": 11244 + }, + { + "epoch": 0.6, + "learning_rate": 0.0003573088375686703, + "loss": 0.7812, + "step": 11245 + }, + { + "epoch": 0.6, + "learning_rate": 0.00035722542017060135, + "loss": 0.7969, + "step": 11246 + }, + { + "epoch": 0.6, + "learning_rate": 0.0003571420070990712, + "loss": 0.7305, + "step": 11247 + }, + { + "epoch": 0.6, + "learning_rate": 0.0003570585983566076, + "loss": 0.8203, + "step": 11248 + }, + { + "epoch": 0.6, + "learning_rate": 0.0003569751939457379, + "loss": 0.9336, + "step": 11249 + }, + { + "epoch": 0.6, + "learning_rate": 0.00035689179386898945, + "loss": 0.8555, + "step": 11250 + }, + { + "epoch": 0.6, + "learning_rate": 0.0003568083981288899, + "loss": 0.8477, + "step": 11251 + }, + { + "epoch": 0.6, + "learning_rate": 0.0003567250067279663, + "loss": 0.8633, + "step": 11252 + }, + { + "epoch": 0.6, + "learning_rate": 0.00035664161966874567, + "loss": 0.8281, + "step": 11253 + }, + { + "epoch": 0.6, + "learning_rate": 0.00035655823695375456, + "loss": 0.7891, + "step": 11254 + }, + { + "epoch": 0.6, + "learning_rate": 0.0003564748585855203, + "loss": 0.7734, + "step": 11255 + }, + { + "epoch": 0.6, + "learning_rate": 0.0003563914845665693, + "loss": 0.8164, + "step": 11256 + }, + { + "epoch": 0.61, + "learning_rate": 0.0003563081148994279, + "loss": 0.8203, + "step": 11257 + }, + { + "epoch": 0.61, + "learning_rate": 0.00035622474958662263, + "loss": 0.8359, + "step": 11258 + }, + { + "epoch": 0.61, + "learning_rate": 0.0003561413886306798, + "loss": 0.9102, + "step": 11259 + }, + { + "epoch": 0.61, + "learning_rate": 0.00035605803203412543, + "loss": 0.832, + "step": 11260 + }, + { + "epoch": 0.61, + "learning_rate": 0.00035597467979948564, + "loss": 0.8398, + "step": 11261 + }, + { + "epoch": 0.61, + "learning_rate": 0.00035589133192928587, + "loss": 0.8398, + "step": 11262 + }, + { + "epoch": 0.61, + "learning_rate": 0.00035580798842605235, + "loss": 0.8555, + "step": 11263 + }, + { + "epoch": 0.61, + "learning_rate": 0.00035572464929231037, + "loss": 0.8047, + "step": 11264 + }, + { + "epoch": 0.61, + "learning_rate": 0.00035564131453058535, + "loss": 0.9062, + "step": 11265 + }, + { + "epoch": 0.61, + "learning_rate": 0.0003555579841434027, + "loss": 0.8047, + "step": 11266 + }, + { + "epoch": 0.61, + "learning_rate": 0.0003554746581332876, + "loss": 0.9492, + "step": 11267 + }, + { + "epoch": 0.61, + "learning_rate": 0.00035539133650276525, + "loss": 0.8203, + "step": 11268 + }, + { + "epoch": 0.61, + "learning_rate": 0.0003553080192543603, + "loss": 0.793, + "step": 11269 + }, + { + "epoch": 0.61, + "learning_rate": 0.00035522470639059756, + "loss": 0.9141, + "step": 11270 + }, + { + "epoch": 0.61, + "learning_rate": 0.0003551413979140019, + "loss": 0.8125, + "step": 11271 + }, + { + "epoch": 0.61, + "learning_rate": 0.0003550580938270976, + "loss": 0.8945, + "step": 11272 + }, + { + "epoch": 0.61, + "learning_rate": 0.0003549747941324093, + "loss": 0.8906, + "step": 11273 + }, + { + "epoch": 0.61, + "learning_rate": 0.000354891498832461, + "loss": 0.7695, + "step": 11274 + }, + { + "epoch": 0.61, + "learning_rate": 0.000354808207929777, + "loss": 0.7188, + "step": 11275 + }, + { + "epoch": 0.61, + "learning_rate": 0.00035472492142688136, + "loss": 0.8242, + "step": 11276 + }, + { + "epoch": 0.61, + "learning_rate": 0.00035464163932629746, + "loss": 0.8555, + "step": 11277 + }, + { + "epoch": 0.61, + "learning_rate": 0.00035455836163054966, + "loss": 0.9258, + "step": 11278 + }, + { + "epoch": 0.61, + "learning_rate": 0.0003544750883421612, + "loss": 0.832, + "step": 11279 + }, + { + "epoch": 0.61, + "learning_rate": 0.00035439181946365553, + "loss": 0.8008, + "step": 11280 + }, + { + "epoch": 0.61, + "learning_rate": 0.000354308554997556, + "loss": 0.8945, + "step": 11281 + }, + { + "epoch": 0.61, + "learning_rate": 0.00035422529494638584, + "loss": 0.7344, + "step": 11282 + }, + { + "epoch": 0.61, + "learning_rate": 0.0003541420393126682, + "loss": 0.8711, + "step": 11283 + }, + { + "epoch": 0.61, + "learning_rate": 0.00035405878809892583, + "loss": 0.8164, + "step": 11284 + }, + { + "epoch": 0.61, + "learning_rate": 0.00035397554130768143, + "loss": 0.8594, + "step": 11285 + }, + { + "epoch": 0.61, + "learning_rate": 0.0003538922989414579, + "loss": 0.707, + "step": 11286 + }, + { + "epoch": 0.61, + "learning_rate": 0.00035380906100277766, + "loss": 0.832, + "step": 11287 + }, + { + "epoch": 0.61, + "learning_rate": 0.00035372582749416303, + "loss": 0.793, + "step": 11288 + }, + { + "epoch": 0.61, + "learning_rate": 0.00035364259841813627, + "loss": 0.8281, + "step": 11289 + }, + { + "epoch": 0.61, + "learning_rate": 0.00035355937377721966, + "loss": 0.793, + "step": 11290 + }, + { + "epoch": 0.61, + "learning_rate": 0.0003534761535739351, + "loss": 0.8555, + "step": 11291 + }, + { + "epoch": 0.61, + "learning_rate": 0.0003533929378108043, + "loss": 0.8008, + "step": 11292 + }, + { + "epoch": 0.61, + "learning_rate": 0.000353309726490349, + "loss": 0.793, + "step": 11293 + }, + { + "epoch": 0.61, + "learning_rate": 0.0003532265196150909, + "loss": 0.9023, + "step": 11294 + }, + { + "epoch": 0.61, + "learning_rate": 0.00035314331718755133, + "loss": 0.9219, + "step": 11295 + }, + { + "epoch": 0.61, + "learning_rate": 0.00035306011921025173, + "loss": 0.8672, + "step": 11296 + }, + { + "epoch": 0.61, + "learning_rate": 0.00035297692568571305, + "loss": 0.8008, + "step": 11297 + }, + { + "epoch": 0.61, + "learning_rate": 0.00035289373661645654, + "loss": 0.8438, + "step": 11298 + }, + { + "epoch": 0.61, + "learning_rate": 0.00035281055200500313, + "loss": 0.7695, + "step": 11299 + }, + { + "epoch": 0.61, + "learning_rate": 0.00035272737185387343, + "loss": 0.8711, + "step": 11300 + }, + { + "epoch": 0.61, + "learning_rate": 0.00035264419616558794, + "loss": 0.7695, + "step": 11301 + }, + { + "epoch": 0.61, + "learning_rate": 0.00035256102494266755, + "loss": 0.8867, + "step": 11302 + }, + { + "epoch": 0.61, + "learning_rate": 0.00035247785818763236, + "loss": 0.8203, + "step": 11303 + }, + { + "epoch": 0.61, + "learning_rate": 0.00035239469590300277, + "loss": 0.8242, + "step": 11304 + }, + { + "epoch": 0.61, + "learning_rate": 0.00035231153809129847, + "loss": 0.7969, + "step": 11305 + }, + { + "epoch": 0.61, + "learning_rate": 0.00035222838475503995, + "loss": 0.7656, + "step": 11306 + }, + { + "epoch": 0.61, + "learning_rate": 0.00035214523589674663, + "loss": 0.8477, + "step": 11307 + }, + { + "epoch": 0.61, + "learning_rate": 0.0003520620915189384, + "loss": 0.8633, + "step": 11308 + }, + { + "epoch": 0.61, + "learning_rate": 0.0003519789516241346, + "loss": 0.9258, + "step": 11309 + }, + { + "epoch": 0.61, + "learning_rate": 0.000351895816214855, + "loss": 0.8945, + "step": 11310 + }, + { + "epoch": 0.61, + "learning_rate": 0.0003518126852936185, + "loss": 0.8008, + "step": 11311 + }, + { + "epoch": 0.61, + "learning_rate": 0.00035172955886294464, + "loss": 0.8086, + "step": 11312 + }, + { + "epoch": 0.61, + "learning_rate": 0.0003516464369253518, + "loss": 0.8555, + "step": 11313 + }, + { + "epoch": 0.61, + "learning_rate": 0.00035156331948335963, + "loss": 0.793, + "step": 11314 + }, + { + "epoch": 0.61, + "learning_rate": 0.00035148020653948624, + "loss": 0.7148, + "step": 11315 + }, + { + "epoch": 0.61, + "learning_rate": 0.0003513970980962504, + "loss": 0.8438, + "step": 11316 + }, + { + "epoch": 0.61, + "learning_rate": 0.00035131399415617076, + "loss": 0.8008, + "step": 11317 + }, + { + "epoch": 0.61, + "learning_rate": 0.0003512308947217655, + "loss": 0.7969, + "step": 11318 + }, + { + "epoch": 0.61, + "learning_rate": 0.00035114779979555287, + "loss": 0.7812, + "step": 11319 + }, + { + "epoch": 0.61, + "learning_rate": 0.0003510647093800506, + "loss": 0.6875, + "step": 11320 + }, + { + "epoch": 0.61, + "learning_rate": 0.0003509816234777771, + "loss": 0.7812, + "step": 11321 + }, + { + "epoch": 0.61, + "learning_rate": 0.0003508985420912498, + "loss": 0.7812, + "step": 11322 + }, + { + "epoch": 0.61, + "learning_rate": 0.0003508154652229865, + "loss": 0.8203, + "step": 11323 + }, + { + "epoch": 0.61, + "learning_rate": 0.00035073239287550457, + "loss": 0.8242, + "step": 11324 + }, + { + "epoch": 0.61, + "learning_rate": 0.0003506493250513215, + "loss": 0.8789, + "step": 11325 + }, + { + "epoch": 0.61, + "learning_rate": 0.00035056626175295445, + "loss": 0.8633, + "step": 11326 + }, + { + "epoch": 0.61, + "learning_rate": 0.0003504832029829207, + "loss": 0.8047, + "step": 11327 + }, + { + "epoch": 0.61, + "learning_rate": 0.00035040014874373673, + "loss": 0.8164, + "step": 11328 + }, + { + "epoch": 0.61, + "learning_rate": 0.00035031709903791985, + "loss": 0.8281, + "step": 11329 + }, + { + "epoch": 0.61, + "learning_rate": 0.00035023405386798656, + "loss": 0.7891, + "step": 11330 + }, + { + "epoch": 0.61, + "learning_rate": 0.00035015101323645337, + "loss": 0.7656, + "step": 11331 + }, + { + "epoch": 0.61, + "learning_rate": 0.00035006797714583654, + "loss": 0.7539, + "step": 11332 + }, + { + "epoch": 0.61, + "learning_rate": 0.00034998494559865267, + "loss": 0.7773, + "step": 11333 + }, + { + "epoch": 0.61, + "learning_rate": 0.0003499019185974178, + "loss": 0.8906, + "step": 11334 + }, + { + "epoch": 0.61, + "learning_rate": 0.00034981889614464775, + "loss": 0.8672, + "step": 11335 + }, + { + "epoch": 0.61, + "learning_rate": 0.00034973587824285837, + "loss": 0.7734, + "step": 11336 + }, + { + "epoch": 0.61, + "learning_rate": 0.0003496528648945655, + "loss": 0.8594, + "step": 11337 + }, + { + "epoch": 0.61, + "learning_rate": 0.00034956985610228475, + "loss": 0.7617, + "step": 11338 + }, + { + "epoch": 0.61, + "learning_rate": 0.0003494868518685315, + "loss": 0.8438, + "step": 11339 + }, + { + "epoch": 0.61, + "learning_rate": 0.0003494038521958209, + "loss": 0.8438, + "step": 11340 + }, + { + "epoch": 0.61, + "learning_rate": 0.00034932085708666837, + "loss": 0.7812, + "step": 11341 + }, + { + "epoch": 0.61, + "learning_rate": 0.0003492378665435889, + "loss": 0.8633, + "step": 11342 + }, + { + "epoch": 0.61, + "learning_rate": 0.00034915488056909725, + "loss": 0.8633, + "step": 11343 + }, + { + "epoch": 0.61, + "learning_rate": 0.00034907189916570815, + "loss": 0.7812, + "step": 11344 + }, + { + "epoch": 0.61, + "learning_rate": 0.00034898892233593625, + "loss": 0.9609, + "step": 11345 + }, + { + "epoch": 0.61, + "learning_rate": 0.0003489059500822962, + "loss": 0.8672, + "step": 11346 + }, + { + "epoch": 0.61, + "learning_rate": 0.00034882298240730214, + "loss": 0.793, + "step": 11347 + }, + { + "epoch": 0.61, + "learning_rate": 0.0003487400193134681, + "loss": 0.8242, + "step": 11348 + }, + { + "epoch": 0.61, + "learning_rate": 0.0003486570608033086, + "loss": 0.8867, + "step": 11349 + }, + { + "epoch": 0.61, + "learning_rate": 0.0003485741068793372, + "loss": 0.875, + "step": 11350 + }, + { + "epoch": 0.61, + "learning_rate": 0.0003484911575440676, + "loss": 0.9102, + "step": 11351 + }, + { + "epoch": 0.61, + "learning_rate": 0.0003484082128000138, + "loss": 0.7266, + "step": 11352 + }, + { + "epoch": 0.61, + "learning_rate": 0.00034832527264968904, + "loss": 0.8828, + "step": 11353 + }, + { + "epoch": 0.61, + "learning_rate": 0.0003482423370956067, + "loss": 0.832, + "step": 11354 + }, + { + "epoch": 0.61, + "learning_rate": 0.00034815940614028006, + "loss": 0.8242, + "step": 11355 + }, + { + "epoch": 0.61, + "learning_rate": 0.0003480764797862222, + "loss": 0.8008, + "step": 11356 + }, + { + "epoch": 0.61, + "learning_rate": 0.0003479935580359462, + "loss": 0.8086, + "step": 11357 + }, + { + "epoch": 0.61, + "learning_rate": 0.00034791064089196455, + "loss": 0.8164, + "step": 11358 + }, + { + "epoch": 0.61, + "learning_rate": 0.00034782772835679, + "loss": 0.8672, + "step": 11359 + }, + { + "epoch": 0.61, + "learning_rate": 0.0003477448204329352, + "loss": 0.7422, + "step": 11360 + }, + { + "epoch": 0.61, + "learning_rate": 0.00034766191712291253, + "loss": 0.8008, + "step": 11361 + }, + { + "epoch": 0.61, + "learning_rate": 0.00034757901842923425, + "loss": 0.7812, + "step": 11362 + }, + { + "epoch": 0.61, + "learning_rate": 0.000347496124354412, + "loss": 0.8047, + "step": 11363 + }, + { + "epoch": 0.61, + "learning_rate": 0.0003474132349009586, + "loss": 0.7969, + "step": 11364 + }, + { + "epoch": 0.61, + "learning_rate": 0.00034733035007138515, + "loss": 0.9023, + "step": 11365 + }, + { + "epoch": 0.61, + "learning_rate": 0.0003472474698682036, + "loss": 0.9141, + "step": 11366 + }, + { + "epoch": 0.61, + "learning_rate": 0.0003471645942939254, + "loss": 0.7891, + "step": 11367 + }, + { + "epoch": 0.61, + "learning_rate": 0.00034708172335106216, + "loss": 0.8516, + "step": 11368 + }, + { + "epoch": 0.61, + "learning_rate": 0.000346998857042125, + "loss": 0.8125, + "step": 11369 + }, + { + "epoch": 0.61, + "learning_rate": 0.0003469159953696251, + "loss": 0.8359, + "step": 11370 + }, + { + "epoch": 0.61, + "learning_rate": 0.0003468331383360731, + "loss": 0.8906, + "step": 11371 + }, + { + "epoch": 0.61, + "learning_rate": 0.0003467502859439805, + "loss": 0.8672, + "step": 11372 + }, + { + "epoch": 0.61, + "learning_rate": 0.0003466674381958575, + "loss": 0.8672, + "step": 11373 + }, + { + "epoch": 0.61, + "learning_rate": 0.0003465845950942148, + "loss": 0.8164, + "step": 11374 + }, + { + "epoch": 0.61, + "learning_rate": 0.0003465017566415627, + "loss": 0.8281, + "step": 11375 + }, + { + "epoch": 0.61, + "learning_rate": 0.00034641892284041165, + "loss": 0.6953, + "step": 11376 + }, + { + "epoch": 0.61, + "learning_rate": 0.0003463360936932718, + "loss": 0.8555, + "step": 11377 + }, + { + "epoch": 0.61, + "learning_rate": 0.0003462532692026531, + "loss": 0.8203, + "step": 11378 + }, + { + "epoch": 0.61, + "learning_rate": 0.00034617044937106507, + "loss": 0.8359, + "step": 11379 + }, + { + "epoch": 0.61, + "learning_rate": 0.00034608763420101806, + "loss": 0.7695, + "step": 11380 + }, + { + "epoch": 0.61, + "learning_rate": 0.00034600482369502117, + "loss": 0.8711, + "step": 11381 + }, + { + "epoch": 0.61, + "learning_rate": 0.00034592201785558395, + "loss": 0.8047, + "step": 11382 + }, + { + "epoch": 0.61, + "learning_rate": 0.0003458392166852157, + "loss": 0.8242, + "step": 11383 + }, + { + "epoch": 0.61, + "learning_rate": 0.0003457564201864256, + "loss": 0.832, + "step": 11384 + }, + { + "epoch": 0.61, + "learning_rate": 0.0003456736283617228, + "loss": 0.8242, + "step": 11385 + }, + { + "epoch": 0.61, + "learning_rate": 0.00034559084121361586, + "loss": 0.9023, + "step": 11386 + }, + { + "epoch": 0.61, + "learning_rate": 0.00034550805874461346, + "loss": 0.7188, + "step": 11387 + }, + { + "epoch": 0.61, + "learning_rate": 0.00034542528095722445, + "loss": 0.9102, + "step": 11388 + }, + { + "epoch": 0.61, + "learning_rate": 0.00034534250785395724, + "loss": 0.75, + "step": 11389 + }, + { + "epoch": 0.61, + "learning_rate": 0.00034525973943732, + "loss": 0.8594, + "step": 11390 + }, + { + "epoch": 0.61, + "learning_rate": 0.00034517697570982103, + "loss": 0.9062, + "step": 11391 + }, + { + "epoch": 0.61, + "learning_rate": 0.0003450942166739682, + "loss": 0.8281, + "step": 11392 + }, + { + "epoch": 0.61, + "learning_rate": 0.00034501146233226963, + "loss": 0.7812, + "step": 11393 + }, + { + "epoch": 0.61, + "learning_rate": 0.00034492871268723246, + "loss": 0.7617, + "step": 11394 + }, + { + "epoch": 0.61, + "learning_rate": 0.00034484596774136505, + "loss": 0.8711, + "step": 11395 + }, + { + "epoch": 0.61, + "learning_rate": 0.00034476322749717436, + "loss": 0.9609, + "step": 11396 + }, + { + "epoch": 0.61, + "learning_rate": 0.00034468049195716775, + "loss": 0.8945, + "step": 11397 + }, + { + "epoch": 0.61, + "learning_rate": 0.00034459776112385246, + "loss": 0.8125, + "step": 11398 + }, + { + "epoch": 0.61, + "learning_rate": 0.0003445150349997354, + "loss": 0.8125, + "step": 11399 + }, + { + "epoch": 0.61, + "learning_rate": 0.00034443231358732374, + "loss": 0.7656, + "step": 11400 + }, + { + "epoch": 0.61, + "learning_rate": 0.0003443495968891238, + "loss": 0.8477, + "step": 11401 + }, + { + "epoch": 0.61, + "learning_rate": 0.00034426688490764236, + "loss": 0.7891, + "step": 11402 + }, + { + "epoch": 0.61, + "learning_rate": 0.0003441841776453859, + "loss": 0.875, + "step": 11403 + }, + { + "epoch": 0.61, + "learning_rate": 0.00034410147510486065, + "loss": 0.8125, + "step": 11404 + }, + { + "epoch": 0.61, + "learning_rate": 0.00034401877728857283, + "loss": 0.8242, + "step": 11405 + }, + { + "epoch": 0.61, + "learning_rate": 0.0003439360841990283, + "loss": 0.9219, + "step": 11406 + }, + { + "epoch": 0.61, + "learning_rate": 0.0003438533958387332, + "loss": 0.9258, + "step": 11407 + }, + { + "epoch": 0.61, + "learning_rate": 0.00034377071221019324, + "loss": 0.8008, + "step": 11408 + }, + { + "epoch": 0.61, + "learning_rate": 0.00034368803331591376, + "loss": 0.7969, + "step": 11409 + }, + { + "epoch": 0.61, + "learning_rate": 0.0003436053591584002, + "loss": 0.8672, + "step": 11410 + }, + { + "epoch": 0.61, + "learning_rate": 0.0003435226897401581, + "loss": 0.8711, + "step": 11411 + }, + { + "epoch": 0.61, + "learning_rate": 0.00034344002506369253, + "loss": 0.7773, + "step": 11412 + }, + { + "epoch": 0.61, + "learning_rate": 0.0003433573651315085, + "loss": 0.7812, + "step": 11413 + }, + { + "epoch": 0.61, + "learning_rate": 0.0003432747099461106, + "loss": 0.8359, + "step": 11414 + }, + { + "epoch": 0.61, + "learning_rate": 0.0003431920595100041, + "loss": 0.8242, + "step": 11415 + }, + { + "epoch": 0.61, + "learning_rate": 0.00034310941382569314, + "loss": 0.8711, + "step": 11416 + }, + { + "epoch": 0.61, + "learning_rate": 0.0003430267728956824, + "loss": 0.9219, + "step": 11417 + }, + { + "epoch": 0.61, + "learning_rate": 0.0003429441367224758, + "loss": 0.7812, + "step": 11418 + }, + { + "epoch": 0.61, + "learning_rate": 0.000342861505308578, + "loss": 0.875, + "step": 11419 + }, + { + "epoch": 0.61, + "learning_rate": 0.0003427788786564927, + "loss": 0.8047, + "step": 11420 + }, + { + "epoch": 0.61, + "learning_rate": 0.00034269625676872393, + "loss": 0.9023, + "step": 11421 + }, + { + "epoch": 0.61, + "learning_rate": 0.00034261363964777504, + "loss": 0.7695, + "step": 11422 + }, + { + "epoch": 0.61, + "learning_rate": 0.00034253102729615, + "loss": 0.8047, + "step": 11423 + }, + { + "epoch": 0.61, + "learning_rate": 0.0003424484197163522, + "loss": 0.8398, + "step": 11424 + }, + { + "epoch": 0.61, + "learning_rate": 0.0003423658169108846, + "loss": 0.9219, + "step": 11425 + }, + { + "epoch": 0.61, + "learning_rate": 0.0003422832188822507, + "loss": 0.7773, + "step": 11426 + }, + { + "epoch": 0.61, + "learning_rate": 0.0003422006256329532, + "loss": 0.8047, + "step": 11427 + }, + { + "epoch": 0.61, + "learning_rate": 0.00034211803716549527, + "loss": 0.9062, + "step": 11428 + }, + { + "epoch": 0.61, + "learning_rate": 0.0003420354534823791, + "loss": 0.8594, + "step": 11429 + }, + { + "epoch": 0.61, + "learning_rate": 0.00034195287458610793, + "loss": 0.8711, + "step": 11430 + }, + { + "epoch": 0.61, + "learning_rate": 0.0003418703004791837, + "loss": 0.8477, + "step": 11431 + }, + { + "epoch": 0.61, + "learning_rate": 0.0003417877311641088, + "loss": 0.8008, + "step": 11432 + }, + { + "epoch": 0.61, + "learning_rate": 0.0003417051666433852, + "loss": 0.8828, + "step": 11433 + }, + { + "epoch": 0.61, + "learning_rate": 0.0003416226069195151, + "loss": 0.7656, + "step": 11434 + }, + { + "epoch": 0.61, + "learning_rate": 0.0003415400519950003, + "loss": 0.8672, + "step": 11435 + }, + { + "epoch": 0.61, + "learning_rate": 0.0003414575018723426, + "loss": 0.8555, + "step": 11436 + }, + { + "epoch": 0.61, + "learning_rate": 0.0003413749565540429, + "loss": 0.8594, + "step": 11437 + }, + { + "epoch": 0.61, + "learning_rate": 0.0003412924160426035, + "loss": 0.8047, + "step": 11438 + }, + { + "epoch": 0.61, + "learning_rate": 0.0003412098803405251, + "loss": 0.8047, + "step": 11439 + }, + { + "epoch": 0.61, + "learning_rate": 0.0003411273494503089, + "loss": 0.8633, + "step": 11440 + }, + { + "epoch": 0.61, + "learning_rate": 0.0003410448233744558, + "loss": 0.8984, + "step": 11441 + }, + { + "epoch": 0.61, + "learning_rate": 0.00034096230211546675, + "loss": 0.8164, + "step": 11442 + }, + { + "epoch": 0.62, + "learning_rate": 0.00034087978567584257, + "loss": 0.7969, + "step": 11443 + }, + { + "epoch": 0.62, + "learning_rate": 0.0003407972740580833, + "loss": 0.8242, + "step": 11444 + }, + { + "epoch": 0.62, + "learning_rate": 0.00034071476726468954, + "loss": 0.875, + "step": 11445 + }, + { + "epoch": 0.62, + "learning_rate": 0.0003406322652981617, + "loss": 0.8438, + "step": 11446 + }, + { + "epoch": 0.62, + "learning_rate": 0.0003405497681609997, + "loss": 0.7617, + "step": 11447 + }, + { + "epoch": 0.62, + "learning_rate": 0.0003404672758557034, + "loss": 0.7812, + "step": 11448 + }, + { + "epoch": 0.62, + "learning_rate": 0.00034038478838477267, + "loss": 0.8984, + "step": 11449 + }, + { + "epoch": 0.62, + "learning_rate": 0.0003403023057507072, + "loss": 0.7773, + "step": 11450 + }, + { + "epoch": 0.62, + "learning_rate": 0.0003402198279560066, + "loss": 0.9023, + "step": 11451 + }, + { + "epoch": 0.62, + "learning_rate": 0.00034013735500316994, + "loss": 0.8086, + "step": 11452 + }, + { + "epoch": 0.62, + "learning_rate": 0.0003400548868946964, + "loss": 0.8359, + "step": 11453 + }, + { + "epoch": 0.62, + "learning_rate": 0.00033997242363308533, + "loss": 0.8789, + "step": 11454 + }, + { + "epoch": 0.62, + "learning_rate": 0.0003398899652208355, + "loss": 0.8711, + "step": 11455 + }, + { + "epoch": 0.62, + "learning_rate": 0.00033980751166044564, + "loss": 0.832, + "step": 11456 + }, + { + "epoch": 0.62, + "learning_rate": 0.0003397250629544142, + "loss": 0.793, + "step": 11457 + }, + { + "epoch": 0.62, + "learning_rate": 0.0003396426191052401, + "loss": 0.8633, + "step": 11458 + }, + { + "epoch": 0.62, + "learning_rate": 0.0003395601801154212, + "loss": 0.8672, + "step": 11459 + }, + { + "epoch": 0.62, + "learning_rate": 0.000339477745987456, + "loss": 0.9336, + "step": 11460 + }, + { + "epoch": 0.62, + "learning_rate": 0.0003393953167238422, + "loss": 0.8125, + "step": 11461 + }, + { + "epoch": 0.62, + "learning_rate": 0.000339312892327078, + "loss": 0.8438, + "step": 11462 + }, + { + "epoch": 0.62, + "learning_rate": 0.0003392304727996609, + "loss": 0.8203, + "step": 11463 + }, + { + "epoch": 0.62, + "learning_rate": 0.00033914805814408856, + "loss": 0.7227, + "step": 11464 + }, + { + "epoch": 0.62, + "learning_rate": 0.00033906564836285846, + "loss": 0.7383, + "step": 11465 + }, + { + "epoch": 0.62, + "learning_rate": 0.00033898324345846796, + "loss": 0.7891, + "step": 11466 + }, + { + "epoch": 0.62, + "learning_rate": 0.00033890084343341404, + "loss": 0.7656, + "step": 11467 + }, + { + "epoch": 0.62, + "learning_rate": 0.0003388184482901936, + "loss": 0.832, + "step": 11468 + }, + { + "epoch": 0.62, + "learning_rate": 0.0003387360580313038, + "loss": 0.8359, + "step": 11469 + }, + { + "epoch": 0.62, + "learning_rate": 0.0003386536726592411, + "loss": 0.8828, + "step": 11470 + }, + { + "epoch": 0.62, + "learning_rate": 0.00033857129217650214, + "loss": 0.7812, + "step": 11471 + }, + { + "epoch": 0.62, + "learning_rate": 0.0003384889165855831, + "loss": 0.7734, + "step": 11472 + }, + { + "epoch": 0.62, + "learning_rate": 0.00033840654588898064, + "loss": 0.8477, + "step": 11473 + }, + { + "epoch": 0.62, + "learning_rate": 0.00033832418008919067, + "loss": 0.8711, + "step": 11474 + }, + { + "epoch": 0.62, + "learning_rate": 0.0003382418191887091, + "loss": 0.9297, + "step": 11475 + }, + { + "epoch": 0.62, + "learning_rate": 0.00033815946319003156, + "loss": 0.75, + "step": 11476 + }, + { + "epoch": 0.62, + "learning_rate": 0.00033807711209565404, + "loss": 0.8711, + "step": 11477 + }, + { + "epoch": 0.62, + "learning_rate": 0.00033799476590807187, + "loss": 0.8359, + "step": 11478 + }, + { + "epoch": 0.62, + "learning_rate": 0.00033791242462978066, + "loss": 0.8164, + "step": 11479 + }, + { + "epoch": 0.62, + "learning_rate": 0.000337830088263275, + "loss": 0.8086, + "step": 11480 + }, + { + "epoch": 0.62, + "learning_rate": 0.00033774775681105074, + "loss": 0.8633, + "step": 11481 + }, + { + "epoch": 0.62, + "learning_rate": 0.00033766543027560227, + "loss": 0.8398, + "step": 11482 + }, + { + "epoch": 0.62, + "learning_rate": 0.00033758310865942447, + "loss": 0.8594, + "step": 11483 + }, + { + "epoch": 0.62, + "learning_rate": 0.0003375007919650119, + "loss": 0.8438, + "step": 11484 + }, + { + "epoch": 0.62, + "learning_rate": 0.00033741848019485927, + "loss": 0.8164, + "step": 11485 + }, + { + "epoch": 0.62, + "learning_rate": 0.0003373361733514607, + "loss": 0.8242, + "step": 11486 + }, + { + "epoch": 0.62, + "learning_rate": 0.00033725387143731053, + "loss": 0.8047, + "step": 11487 + }, + { + "epoch": 0.62, + "learning_rate": 0.0003371715744549023, + "loss": 0.8398, + "step": 11488 + }, + { + "epoch": 0.62, + "learning_rate": 0.00033708928240673054, + "loss": 0.793, + "step": 11489 + }, + { + "epoch": 0.62, + "learning_rate": 0.00033700699529528856, + "loss": 0.8359, + "step": 11490 + }, + { + "epoch": 0.62, + "learning_rate": 0.00033692471312307004, + "loss": 0.8047, + "step": 11491 + }, + { + "epoch": 0.62, + "learning_rate": 0.00033684243589256825, + "loss": 0.875, + "step": 11492 + }, + { + "epoch": 0.62, + "learning_rate": 0.0003367601636062767, + "loss": 0.8711, + "step": 11493 + }, + { + "epoch": 0.62, + "learning_rate": 0.00033667789626668854, + "loss": 0.7383, + "step": 11494 + }, + { + "epoch": 0.62, + "learning_rate": 0.0003365956338762964, + "loss": 0.8711, + "step": 11495 + }, + { + "epoch": 0.62, + "learning_rate": 0.00033651337643759326, + "loss": 0.8477, + "step": 11496 + }, + { + "epoch": 0.62, + "learning_rate": 0.00033643112395307203, + "loss": 0.8477, + "step": 11497 + }, + { + "epoch": 0.62, + "learning_rate": 0.0003363488764252249, + "loss": 0.793, + "step": 11498 + }, + { + "epoch": 0.62, + "learning_rate": 0.0003362666338565444, + "loss": 0.8516, + "step": 11499 + }, + { + "epoch": 0.62, + "learning_rate": 0.0003361843962495228, + "loss": 0.7969, + "step": 11500 + }, + { + "epoch": 0.62, + "learning_rate": 0.000336102163606652, + "loss": 0.7539, + "step": 11501 + }, + { + "epoch": 0.62, + "learning_rate": 0.00033601993593042426, + "loss": 0.8281, + "step": 11502 + }, + { + "epoch": 0.62, + "learning_rate": 0.0003359377132233307, + "loss": 0.7617, + "step": 11503 + }, + { + "epoch": 0.62, + "learning_rate": 0.00033585549548786366, + "loss": 0.7773, + "step": 11504 + }, + { + "epoch": 0.62, + "learning_rate": 0.00033577328272651417, + "loss": 0.8086, + "step": 11505 + }, + { + "epoch": 0.62, + "learning_rate": 0.00033569107494177376, + "loss": 0.7227, + "step": 11506 + }, + { + "epoch": 0.62, + "learning_rate": 0.00033560887213613327, + "loss": 0.7852, + "step": 11507 + }, + { + "epoch": 0.62, + "learning_rate": 0.00033552667431208414, + "loss": 0.9219, + "step": 11508 + }, + { + "epoch": 0.62, + "learning_rate": 0.0003354444814721171, + "loss": 0.7969, + "step": 11509 + }, + { + "epoch": 0.62, + "learning_rate": 0.0003353622936187227, + "loss": 0.8359, + "step": 11510 + }, + { + "epoch": 0.62, + "learning_rate": 0.0003352801107543916, + "loss": 0.9102, + "step": 11511 + }, + { + "epoch": 0.62, + "learning_rate": 0.00033519793288161427, + "loss": 0.8164, + "step": 11512 + }, + { + "epoch": 0.62, + "learning_rate": 0.00033511576000288087, + "loss": 0.8047, + "step": 11513 + }, + { + "epoch": 0.62, + "learning_rate": 0.0003350335921206816, + "loss": 0.7383, + "step": 11514 + }, + { + "epoch": 0.62, + "learning_rate": 0.0003349514292375062, + "loss": 0.8125, + "step": 11515 + }, + { + "epoch": 0.62, + "learning_rate": 0.0003348692713558448, + "loss": 0.7656, + "step": 11516 + }, + { + "epoch": 0.62, + "learning_rate": 0.00033478711847818697, + "loss": 0.7891, + "step": 11517 + }, + { + "epoch": 0.62, + "learning_rate": 0.0003347049706070221, + "loss": 0.8945, + "step": 11518 + }, + { + "epoch": 0.62, + "learning_rate": 0.0003346228277448395, + "loss": 0.7891, + "step": 11519 + }, + { + "epoch": 0.62, + "learning_rate": 0.0003345406898941285, + "loss": 0.8125, + "step": 11520 + }, + { + "epoch": 0.62, + "learning_rate": 0.0003344585570573781, + "loss": 0.8203, + "step": 11521 + }, + { + "epoch": 0.62, + "learning_rate": 0.00033437642923707736, + "loss": 0.7852, + "step": 11522 + }, + { + "epoch": 0.62, + "learning_rate": 0.00033429430643571443, + "loss": 0.8164, + "step": 11523 + }, + { + "epoch": 0.62, + "learning_rate": 0.00033421218865577873, + "loss": 0.7891, + "step": 11524 + }, + { + "epoch": 0.62, + "learning_rate": 0.00033413007589975813, + "loss": 0.8477, + "step": 11525 + }, + { + "epoch": 0.62, + "learning_rate": 0.0003340479681701411, + "loss": 0.832, + "step": 11526 + }, + { + "epoch": 0.62, + "learning_rate": 0.0003339658654694157, + "loss": 0.8008, + "step": 11527 + }, + { + "epoch": 0.62, + "learning_rate": 0.00033388376780007004, + "loss": 0.9688, + "step": 11528 + }, + { + "epoch": 0.62, + "learning_rate": 0.0003338016751645918, + "loss": 0.8477, + "step": 11529 + }, + { + "epoch": 0.62, + "learning_rate": 0.00033371958756546894, + "loss": 0.8047, + "step": 11530 + }, + { + "epoch": 0.62, + "learning_rate": 0.0003336375050051884, + "loss": 0.75, + "step": 11531 + }, + { + "epoch": 0.62, + "learning_rate": 0.0003335554274862383, + "loss": 0.7812, + "step": 11532 + }, + { + "epoch": 0.62, + "learning_rate": 0.0003334733550111053, + "loss": 0.8047, + "step": 11533 + }, + { + "epoch": 0.62, + "learning_rate": 0.0003333912875822768, + "loss": 0.9336, + "step": 11534 + }, + { + "epoch": 0.62, + "learning_rate": 0.0003333092252022393, + "loss": 0.8789, + "step": 11535 + }, + { + "epoch": 0.62, + "learning_rate": 0.00033322716787348, + "loss": 0.8906, + "step": 11536 + }, + { + "epoch": 0.62, + "learning_rate": 0.00033314511559848545, + "loss": 0.9375, + "step": 11537 + }, + { + "epoch": 0.62, + "learning_rate": 0.0003330630683797416, + "loss": 0.8359, + "step": 11538 + }, + { + "epoch": 0.62, + "learning_rate": 0.0003329810262197355, + "loss": 0.8125, + "step": 11539 + }, + { + "epoch": 0.62, + "learning_rate": 0.0003328989891209528, + "loss": 0.7969, + "step": 11540 + }, + { + "epoch": 0.62, + "learning_rate": 0.00033281695708587967, + "loss": 0.7461, + "step": 11541 + }, + { + "epoch": 0.62, + "learning_rate": 0.0003327349301170018, + "loss": 0.9766, + "step": 11542 + }, + { + "epoch": 0.62, + "learning_rate": 0.00033265290821680505, + "loss": 0.8359, + "step": 11543 + }, + { + "epoch": 0.62, + "learning_rate": 0.00033257089138777487, + "loss": 0.8086, + "step": 11544 + }, + { + "epoch": 0.62, + "learning_rate": 0.00033248887963239685, + "loss": 0.9102, + "step": 11545 + }, + { + "epoch": 0.62, + "learning_rate": 0.00033240687295315566, + "loss": 0.9219, + "step": 11546 + }, + { + "epoch": 0.62, + "learning_rate": 0.000332324871352537, + "loss": 0.8086, + "step": 11547 + }, + { + "epoch": 0.62, + "learning_rate": 0.0003322428748330254, + "loss": 0.7969, + "step": 11548 + }, + { + "epoch": 0.62, + "learning_rate": 0.0003321608833971058, + "loss": 0.8711, + "step": 11549 + }, + { + "epoch": 0.62, + "learning_rate": 0.00033207889704726256, + "loss": 0.7305, + "step": 11550 + }, + { + "epoch": 0.62, + "learning_rate": 0.00033199691578598056, + "loss": 0.7578, + "step": 11551 + }, + { + "epoch": 0.62, + "learning_rate": 0.0003319149396157438, + "loss": 0.8945, + "step": 11552 + }, + { + "epoch": 0.62, + "learning_rate": 0.00033183296853903663, + "loss": 0.8672, + "step": 11553 + }, + { + "epoch": 0.62, + "learning_rate": 0.00033175100255834256, + "loss": 0.8867, + "step": 11554 + }, + { + "epoch": 0.62, + "learning_rate": 0.0003316690416761461, + "loss": 0.8711, + "step": 11555 + }, + { + "epoch": 0.62, + "learning_rate": 0.00033158708589493054, + "loss": 0.7969, + "step": 11556 + }, + { + "epoch": 0.62, + "learning_rate": 0.00033150513521717947, + "loss": 0.7969, + "step": 11557 + }, + { + "epoch": 0.62, + "learning_rate": 0.00033142318964537613, + "loss": 0.8867, + "step": 11558 + }, + { + "epoch": 0.62, + "learning_rate": 0.00033134124918200405, + "loss": 0.7422, + "step": 11559 + }, + { + "epoch": 0.62, + "learning_rate": 0.0003312593138295462, + "loss": 0.7539, + "step": 11560 + }, + { + "epoch": 0.62, + "learning_rate": 0.0003311773835904853, + "loss": 0.8242, + "step": 11561 + }, + { + "epoch": 0.62, + "learning_rate": 0.00033109545846730416, + "loss": 0.75, + "step": 11562 + }, + { + "epoch": 0.62, + "learning_rate": 0.00033101353846248555, + "loss": 0.7539, + "step": 11563 + }, + { + "epoch": 0.62, + "learning_rate": 0.0003309316235785118, + "loss": 0.9219, + "step": 11564 + }, + { + "epoch": 0.62, + "learning_rate": 0.0003308497138178652, + "loss": 0.793, + "step": 11565 + }, + { + "epoch": 0.62, + "learning_rate": 0.00033076780918302786, + "loss": 1.0078, + "step": 11566 + }, + { + "epoch": 0.62, + "learning_rate": 0.0003306859096764818, + "loss": 0.6992, + "step": 11567 + }, + { + "epoch": 0.62, + "learning_rate": 0.00033060401530070907, + "loss": 0.8672, + "step": 11568 + }, + { + "epoch": 0.62, + "learning_rate": 0.0003305221260581909, + "loss": 0.8594, + "step": 11569 + }, + { + "epoch": 0.62, + "learning_rate": 0.000330440241951409, + "loss": 0.7773, + "step": 11570 + }, + { + "epoch": 0.62, + "learning_rate": 0.00033035836298284473, + "loss": 0.8633, + "step": 11571 + }, + { + "epoch": 0.62, + "learning_rate": 0.0003302764891549794, + "loss": 0.8203, + "step": 11572 + }, + { + "epoch": 0.62, + "learning_rate": 0.00033019462047029403, + "loss": 0.8438, + "step": 11573 + }, + { + "epoch": 0.62, + "learning_rate": 0.0003301127569312691, + "loss": 0.8242, + "step": 11574 + }, + { + "epoch": 0.62, + "learning_rate": 0.00033003089854038596, + "loss": 0.9141, + "step": 11575 + }, + { + "epoch": 0.62, + "learning_rate": 0.00032994904530012493, + "loss": 0.8047, + "step": 11576 + }, + { + "epoch": 0.62, + "learning_rate": 0.0003298671972129662, + "loss": 0.8789, + "step": 11577 + }, + { + "epoch": 0.62, + "learning_rate": 0.0003297853542813904, + "loss": 0.8555, + "step": 11578 + }, + { + "epoch": 0.62, + "learning_rate": 0.00032970351650787747, + "loss": 0.7305, + "step": 11579 + }, + { + "epoch": 0.62, + "learning_rate": 0.0003296216838949074, + "loss": 0.8125, + "step": 11580 + }, + { + "epoch": 0.62, + "learning_rate": 0.0003295398564449599, + "loss": 0.875, + "step": 11581 + }, + { + "epoch": 0.62, + "learning_rate": 0.00032945803416051467, + "loss": 0.7695, + "step": 11582 + }, + { + "epoch": 0.62, + "learning_rate": 0.0003293762170440514, + "loss": 0.8828, + "step": 11583 + }, + { + "epoch": 0.62, + "learning_rate": 0.00032929440509804907, + "loss": 0.8164, + "step": 11584 + }, + { + "epoch": 0.62, + "learning_rate": 0.0003292125983249869, + "loss": 0.7852, + "step": 11585 + }, + { + "epoch": 0.62, + "learning_rate": 0.00032913079672734416, + "loss": 0.75, + "step": 11586 + }, + { + "epoch": 0.62, + "learning_rate": 0.00032904900030759947, + "loss": 0.8906, + "step": 11587 + }, + { + "epoch": 0.62, + "learning_rate": 0.0003289672090682318, + "loss": 0.8398, + "step": 11588 + }, + { + "epoch": 0.62, + "learning_rate": 0.00032888542301171914, + "loss": 0.7891, + "step": 11589 + }, + { + "epoch": 0.62, + "learning_rate": 0.00032880364214054057, + "loss": 0.875, + "step": 11590 + }, + { + "epoch": 0.62, + "learning_rate": 0.0003287218664571739, + "loss": 0.9102, + "step": 11591 + }, + { + "epoch": 0.62, + "learning_rate": 0.0003286400959640973, + "loss": 0.793, + "step": 11592 + }, + { + "epoch": 0.62, + "learning_rate": 0.00032855833066378846, + "loss": 0.9062, + "step": 11593 + }, + { + "epoch": 0.62, + "learning_rate": 0.0003284765705587256, + "loss": 0.8828, + "step": 11594 + }, + { + "epoch": 0.62, + "learning_rate": 0.0003283948156513861, + "loss": 0.8945, + "step": 11595 + }, + { + "epoch": 0.62, + "learning_rate": 0.00032831306594424735, + "loss": 0.7656, + "step": 11596 + }, + { + "epoch": 0.62, + "learning_rate": 0.0003282313214397864, + "loss": 0.7734, + "step": 11597 + }, + { + "epoch": 0.62, + "learning_rate": 0.00032814958214048097, + "loss": 0.7305, + "step": 11598 + }, + { + "epoch": 0.62, + "learning_rate": 0.00032806784804880753, + "loss": 0.7109, + "step": 11599 + }, + { + "epoch": 0.62, + "learning_rate": 0.00032798611916724313, + "loss": 0.8945, + "step": 11600 + }, + { + "epoch": 0.62, + "learning_rate": 0.00032790439549826425, + "loss": 0.7891, + "step": 11601 + }, + { + "epoch": 0.62, + "learning_rate": 0.0003278226770443475, + "loss": 0.8711, + "step": 11602 + }, + { + "epoch": 0.62, + "learning_rate": 0.0003277409638079694, + "loss": 0.8086, + "step": 11603 + }, + { + "epoch": 0.62, + "learning_rate": 0.00032765925579160583, + "loss": 0.8867, + "step": 11604 + }, + { + "epoch": 0.62, + "learning_rate": 0.00032757755299773274, + "loss": 0.7969, + "step": 11605 + }, + { + "epoch": 0.62, + "learning_rate": 0.0003274958554288263, + "loss": 0.8789, + "step": 11606 + }, + { + "epoch": 0.62, + "learning_rate": 0.0003274141630873622, + "loss": 0.8633, + "step": 11607 + }, + { + "epoch": 0.62, + "learning_rate": 0.00032733247597581573, + "loss": 0.918, + "step": 11608 + }, + { + "epoch": 0.62, + "learning_rate": 0.00032725079409666235, + "loss": 0.7109, + "step": 11609 + }, + { + "epoch": 0.62, + "learning_rate": 0.00032716911745237745, + "loss": 0.8906, + "step": 11610 + }, + { + "epoch": 0.62, + "learning_rate": 0.00032708744604543615, + "loss": 0.9961, + "step": 11611 + }, + { + "epoch": 0.62, + "learning_rate": 0.00032700577987831287, + "loss": 0.8242, + "step": 11612 + }, + { + "epoch": 0.62, + "learning_rate": 0.00032692411895348305, + "loss": 0.875, + "step": 11613 + }, + { + "epoch": 0.62, + "learning_rate": 0.0003268424632734208, + "loss": 0.7188, + "step": 11614 + }, + { + "epoch": 0.62, + "learning_rate": 0.00032676081284060076, + "loss": 0.8633, + "step": 11615 + }, + { + "epoch": 0.62, + "learning_rate": 0.00032667916765749706, + "loss": 0.8789, + "step": 11616 + }, + { + "epoch": 0.62, + "learning_rate": 0.0003265975277265839, + "loss": 0.7578, + "step": 11617 + }, + { + "epoch": 0.62, + "learning_rate": 0.0003265158930503355, + "loss": 0.8047, + "step": 11618 + }, + { + "epoch": 0.62, + "learning_rate": 0.0003264342636312252, + "loss": 0.7812, + "step": 11619 + }, + { + "epoch": 0.62, + "learning_rate": 0.0003263526394717268, + "loss": 0.8086, + "step": 11620 + }, + { + "epoch": 0.62, + "learning_rate": 0.0003262710205743138, + "loss": 0.8828, + "step": 11621 + }, + { + "epoch": 0.62, + "learning_rate": 0.00032618940694145965, + "loss": 0.9102, + "step": 11622 + }, + { + "epoch": 0.62, + "learning_rate": 0.0003261077985756374, + "loss": 0.8711, + "step": 11623 + }, + { + "epoch": 0.62, + "learning_rate": 0.0003260261954793199, + "loss": 0.8516, + "step": 11624 + }, + { + "epoch": 0.62, + "learning_rate": 0.0003259445976549803, + "loss": 0.7773, + "step": 11625 + }, + { + "epoch": 0.62, + "learning_rate": 0.0003258630051050913, + "loss": 0.8086, + "step": 11626 + }, + { + "epoch": 0.62, + "learning_rate": 0.000325781417832125, + "loss": 0.9688, + "step": 11627 + }, + { + "epoch": 0.62, + "learning_rate": 0.000325699835838554, + "loss": 0.8242, + "step": 11628 + }, + { + "epoch": 0.63, + "learning_rate": 0.0003256182591268506, + "loss": 0.8281, + "step": 11629 + }, + { + "epoch": 0.63, + "learning_rate": 0.00032553668769948673, + "loss": 0.9219, + "step": 11630 + }, + { + "epoch": 0.63, + "learning_rate": 0.0003254551215589343, + "loss": 0.7734, + "step": 11631 + }, + { + "epoch": 0.63, + "learning_rate": 0.000325373560707665, + "loss": 0.7891, + "step": 11632 + }, + { + "epoch": 0.63, + "learning_rate": 0.0003252920051481505, + "loss": 0.8164, + "step": 11633 + }, + { + "epoch": 0.63, + "learning_rate": 0.00032521045488286216, + "loss": 0.9297, + "step": 11634 + }, + { + "epoch": 0.63, + "learning_rate": 0.0003251289099142712, + "loss": 0.8359, + "step": 11635 + }, + { + "epoch": 0.63, + "learning_rate": 0.0003250473702448485, + "loss": 0.8477, + "step": 11636 + }, + { + "epoch": 0.63, + "learning_rate": 0.0003249658358770653, + "loss": 0.8242, + "step": 11637 + }, + { + "epoch": 0.63, + "learning_rate": 0.0003248843068133922, + "loss": 0.8984, + "step": 11638 + }, + { + "epoch": 0.63, + "learning_rate": 0.00032480278305629994, + "loss": 0.9922, + "step": 11639 + }, + { + "epoch": 0.63, + "learning_rate": 0.0003247212646082586, + "loss": 0.8281, + "step": 11640 + }, + { + "epoch": 0.63, + "learning_rate": 0.00032463975147173894, + "loss": 0.8867, + "step": 11641 + }, + { + "epoch": 0.63, + "learning_rate": 0.0003245582436492107, + "loss": 0.9219, + "step": 11642 + }, + { + "epoch": 0.63, + "learning_rate": 0.00032447674114314406, + "loss": 0.8516, + "step": 11643 + }, + { + "epoch": 0.63, + "learning_rate": 0.0003243952439560086, + "loss": 0.8398, + "step": 11644 + }, + { + "epoch": 0.63, + "learning_rate": 0.00032431375209027413, + "loss": 0.8008, + "step": 11645 + }, + { + "epoch": 0.63, + "learning_rate": 0.0003242322655484101, + "loss": 0.8672, + "step": 11646 + }, + { + "epoch": 0.63, + "learning_rate": 0.0003241507843328859, + "loss": 0.7539, + "step": 11647 + }, + { + "epoch": 0.63, + "learning_rate": 0.0003240693084461702, + "loss": 0.7539, + "step": 11648 + }, + { + "epoch": 0.63, + "learning_rate": 0.00032398783789073274, + "loss": 0.8203, + "step": 11649 + }, + { + "epoch": 0.63, + "learning_rate": 0.00032390637266904184, + "loss": 0.9258, + "step": 11650 + }, + { + "epoch": 0.63, + "learning_rate": 0.00032382491278356613, + "loss": 0.8281, + "step": 11651 + }, + { + "epoch": 0.63, + "learning_rate": 0.0003237434582367744, + "loss": 0.8828, + "step": 11652 + }, + { + "epoch": 0.63, + "learning_rate": 0.00032366200903113484, + "loss": 0.7656, + "step": 11653 + }, + { + "epoch": 0.63, + "learning_rate": 0.0003235805651691158, + "loss": 0.7656, + "step": 11654 + }, + { + "epoch": 0.63, + "learning_rate": 0.0003234991266531848, + "loss": 0.8477, + "step": 11655 + }, + { + "epoch": 0.63, + "learning_rate": 0.0003234176934858103, + "loss": 0.8281, + "step": 11656 + }, + { + "epoch": 0.63, + "learning_rate": 0.0003233362656694596, + "loss": 0.7773, + "step": 11657 + }, + { + "epoch": 0.63, + "learning_rate": 0.0003232548432066005, + "loss": 0.8945, + "step": 11658 + }, + { + "epoch": 0.63, + "learning_rate": 0.0003231734260997, + "loss": 0.8281, + "step": 11659 + }, + { + "epoch": 0.63, + "learning_rate": 0.00032309201435122564, + "loss": 0.8203, + "step": 11660 + }, + { + "epoch": 0.63, + "learning_rate": 0.0003230106079636444, + "loss": 0.8164, + "step": 11661 + }, + { + "epoch": 0.63, + "learning_rate": 0.0003229292069394232, + "loss": 0.8203, + "step": 11662 + }, + { + "epoch": 0.63, + "learning_rate": 0.0003228478112810284, + "loss": 0.8398, + "step": 11663 + }, + { + "epoch": 0.63, + "learning_rate": 0.00032276642099092706, + "loss": 0.8984, + "step": 11664 + }, + { + "epoch": 0.63, + "learning_rate": 0.0003226850360715853, + "loss": 0.7344, + "step": 11665 + }, + { + "epoch": 0.63, + "learning_rate": 0.0003226036565254694, + "loss": 0.7812, + "step": 11666 + }, + { + "epoch": 0.63, + "learning_rate": 0.0003225222823550452, + "loss": 0.8281, + "step": 11667 + }, + { + "epoch": 0.63, + "learning_rate": 0.00032244091356277904, + "loss": 0.8828, + "step": 11668 + }, + { + "epoch": 0.63, + "learning_rate": 0.0003223595501511366, + "loss": 0.8789, + "step": 11669 + }, + { + "epoch": 0.63, + "learning_rate": 0.00032227819212258315, + "loss": 0.7656, + "step": 11670 + }, + { + "epoch": 0.63, + "learning_rate": 0.0003221968394795841, + "loss": 0.8047, + "step": 11671 + }, + { + "epoch": 0.63, + "learning_rate": 0.00032211549222460503, + "loss": 0.8008, + "step": 11672 + }, + { + "epoch": 0.63, + "learning_rate": 0.0003220341503601108, + "loss": 0.8281, + "step": 11673 + }, + { + "epoch": 0.63, + "learning_rate": 0.00032195281388856643, + "loss": 0.7773, + "step": 11674 + }, + { + "epoch": 0.63, + "learning_rate": 0.0003218714828124366, + "loss": 0.793, + "step": 11675 + }, + { + "epoch": 0.63, + "learning_rate": 0.000321790157134186, + "loss": 0.9062, + "step": 11676 + }, + { + "epoch": 0.63, + "learning_rate": 0.0003217088368562792, + "loss": 0.8086, + "step": 11677 + }, + { + "epoch": 0.63, + "learning_rate": 0.00032162752198118007, + "loss": 0.8438, + "step": 11678 + }, + { + "epoch": 0.63, + "learning_rate": 0.0003215462125113529, + "loss": 0.8711, + "step": 11679 + }, + { + "epoch": 0.63, + "learning_rate": 0.0003214649084492618, + "loss": 0.793, + "step": 11680 + }, + { + "epoch": 0.63, + "learning_rate": 0.00032138360979737043, + "loss": 0.8516, + "step": 11681 + }, + { + "epoch": 0.63, + "learning_rate": 0.0003213023165581425, + "loss": 0.8242, + "step": 11682 + }, + { + "epoch": 0.63, + "learning_rate": 0.00032122102873404103, + "loss": 0.7852, + "step": 11683 + }, + { + "epoch": 0.63, + "learning_rate": 0.00032113974632752996, + "loss": 0.7617, + "step": 11684 + }, + { + "epoch": 0.63, + "learning_rate": 0.000321058469341072, + "loss": 0.8477, + "step": 11685 + }, + { + "epoch": 0.63, + "learning_rate": 0.00032097719777713014, + "loss": 0.9258, + "step": 11686 + }, + { + "epoch": 0.63, + "learning_rate": 0.00032089593163816735, + "loss": 0.832, + "step": 11687 + }, + { + "epoch": 0.63, + "learning_rate": 0.00032081467092664605, + "loss": 0.8594, + "step": 11688 + }, + { + "epoch": 0.63, + "learning_rate": 0.0003207334156450289, + "loss": 0.8008, + "step": 11689 + }, + { + "epoch": 0.63, + "learning_rate": 0.00032065216579577803, + "loss": 0.8242, + "step": 11690 + }, + { + "epoch": 0.63, + "learning_rate": 0.0003205709213813557, + "loss": 0.8008, + "step": 11691 + }, + { + "epoch": 0.63, + "learning_rate": 0.000320489682404224, + "loss": 0.7656, + "step": 11692 + }, + { + "epoch": 0.63, + "learning_rate": 0.0003204084488668444, + "loss": 0.7344, + "step": 11693 + }, + { + "epoch": 0.63, + "learning_rate": 0.0003203272207716787, + "loss": 0.8125, + "step": 11694 + }, + { + "epoch": 0.63, + "learning_rate": 0.0003202459981211885, + "loss": 0.8516, + "step": 11695 + }, + { + "epoch": 0.63, + "learning_rate": 0.00032016478091783496, + "loss": 0.8828, + "step": 11696 + }, + { + "epoch": 0.63, + "learning_rate": 0.00032008356916407945, + "loss": 0.7656, + "step": 11697 + }, + { + "epoch": 0.63, + "learning_rate": 0.0003200023628623824, + "loss": 0.8047, + "step": 11698 + }, + { + "epoch": 0.63, + "learning_rate": 0.00031992116201520537, + "loss": 0.832, + "step": 11699 + }, + { + "epoch": 0.63, + "learning_rate": 0.0003198399666250086, + "loss": 0.8008, + "step": 11700 + }, + { + "epoch": 0.63, + "learning_rate": 0.00031975877669425254, + "loss": 0.7812, + "step": 11701 + }, + { + "epoch": 0.63, + "learning_rate": 0.0003196775922253975, + "loss": 0.7812, + "step": 11702 + }, + { + "epoch": 0.63, + "learning_rate": 0.00031959641322090395, + "loss": 0.8828, + "step": 11703 + }, + { + "epoch": 0.63, + "learning_rate": 0.0003195152396832316, + "loss": 0.8086, + "step": 11704 + }, + { + "epoch": 0.63, + "learning_rate": 0.0003194340716148404, + "loss": 0.9102, + "step": 11705 + }, + { + "epoch": 0.63, + "learning_rate": 0.0003193529090181897, + "loss": 0.7734, + "step": 11706 + }, + { + "epoch": 0.63, + "learning_rate": 0.00031927175189573956, + "loss": 0.8711, + "step": 11707 + }, + { + "epoch": 0.63, + "learning_rate": 0.0003191906002499489, + "loss": 0.8633, + "step": 11708 + }, + { + "epoch": 0.63, + "learning_rate": 0.000319109454083277, + "loss": 0.7031, + "step": 11709 + }, + { + "epoch": 0.63, + "learning_rate": 0.0003190283133981827, + "loss": 0.8398, + "step": 11710 + }, + { + "epoch": 0.63, + "learning_rate": 0.00031894717819712506, + "loss": 0.8711, + "step": 11711 + }, + { + "epoch": 0.63, + "learning_rate": 0.00031886604848256275, + "loss": 0.8242, + "step": 11712 + }, + { + "epoch": 0.63, + "learning_rate": 0.00031878492425695403, + "loss": 0.6836, + "step": 11713 + }, + { + "epoch": 0.63, + "learning_rate": 0.00031870380552275715, + "loss": 0.707, + "step": 11714 + }, + { + "epoch": 0.63, + "learning_rate": 0.00031862269228243083, + "loss": 0.8594, + "step": 11715 + }, + { + "epoch": 0.63, + "learning_rate": 0.0003185415845384326, + "loss": 0.7891, + "step": 11716 + }, + { + "epoch": 0.63, + "learning_rate": 0.00031846048229322045, + "loss": 0.8516, + "step": 11717 + }, + { + "epoch": 0.63, + "learning_rate": 0.0003183793855492519, + "loss": 0.8477, + "step": 11718 + }, + { + "epoch": 0.63, + "learning_rate": 0.00031829829430898463, + "loss": 0.8281, + "step": 11719 + }, + { + "epoch": 0.63, + "learning_rate": 0.000318217208574876, + "loss": 0.9883, + "step": 11720 + }, + { + "epoch": 0.63, + "learning_rate": 0.000318136128349383, + "loss": 0.7109, + "step": 11721 + }, + { + "epoch": 0.63, + "learning_rate": 0.0003180550536349626, + "loss": 0.8594, + "step": 11722 + }, + { + "epoch": 0.63, + "learning_rate": 0.00031797398443407184, + "loss": 0.8164, + "step": 11723 + }, + { + "epoch": 0.63, + "learning_rate": 0.0003178929207491672, + "loss": 0.7656, + "step": 11724 + }, + { + "epoch": 0.63, + "learning_rate": 0.0003178118625827052, + "loss": 0.875, + "step": 11725 + }, + { + "epoch": 0.63, + "learning_rate": 0.0003177308099371423, + "loss": 0.8633, + "step": 11726 + }, + { + "epoch": 0.63, + "learning_rate": 0.0003176497628149346, + "loss": 0.832, + "step": 11727 + }, + { + "epoch": 0.63, + "learning_rate": 0.0003175687212185382, + "loss": 0.7969, + "step": 11728 + }, + { + "epoch": 0.63, + "learning_rate": 0.0003174876851504085, + "loss": 0.7852, + "step": 11729 + }, + { + "epoch": 0.63, + "learning_rate": 0.00031740665461300167, + "loss": 0.8477, + "step": 11730 + }, + { + "epoch": 0.63, + "learning_rate": 0.00031732562960877297, + "loss": 0.8164, + "step": 11731 + }, + { + "epoch": 0.63, + "learning_rate": 0.0003172446101401777, + "loss": 0.9609, + "step": 11732 + }, + { + "epoch": 0.63, + "learning_rate": 0.00031716359620967096, + "loss": 0.793, + "step": 11733 + }, + { + "epoch": 0.63, + "learning_rate": 0.000317082587819708, + "loss": 0.8086, + "step": 11734 + }, + { + "epoch": 0.63, + "learning_rate": 0.0003170015849727435, + "loss": 0.6875, + "step": 11735 + }, + { + "epoch": 0.63, + "learning_rate": 0.000316920587671232, + "loss": 0.832, + "step": 11736 + }, + { + "epoch": 0.63, + "learning_rate": 0.00031683959591762804, + "loss": 0.8945, + "step": 11737 + }, + { + "epoch": 0.63, + "learning_rate": 0.00031675860971438596, + "loss": 0.8672, + "step": 11738 + }, + { + "epoch": 0.63, + "learning_rate": 0.00031667762906396, + "loss": 0.8711, + "step": 11739 + }, + { + "epoch": 0.63, + "learning_rate": 0.0003165966539688041, + "loss": 0.8047, + "step": 11740 + }, + { + "epoch": 0.63, + "learning_rate": 0.00031651568443137184, + "loss": 0.7656, + "step": 11741 + }, + { + "epoch": 0.63, + "learning_rate": 0.0003164347204541172, + "loss": 0.7969, + "step": 11742 + }, + { + "epoch": 0.63, + "learning_rate": 0.00031635376203949364, + "loss": 0.7383, + "step": 11743 + }, + { + "epoch": 0.63, + "learning_rate": 0.0003162728091899542, + "loss": 0.7773, + "step": 11744 + }, + { + "epoch": 0.63, + "learning_rate": 0.000316191861907952, + "loss": 0.8203, + "step": 11745 + }, + { + "epoch": 0.63, + "learning_rate": 0.0003161109201959402, + "loss": 0.7812, + "step": 11746 + }, + { + "epoch": 0.63, + "learning_rate": 0.0003160299840563717, + "loss": 0.8789, + "step": 11747 + }, + { + "epoch": 0.63, + "learning_rate": 0.00031594905349169913, + "loss": 0.9336, + "step": 11748 + }, + { + "epoch": 0.63, + "learning_rate": 0.00031586812850437437, + "loss": 0.8633, + "step": 11749 + }, + { + "epoch": 0.63, + "learning_rate": 0.0003157872090968505, + "loss": 0.8125, + "step": 11750 + }, + { + "epoch": 0.63, + "learning_rate": 0.00031570629527157923, + "loss": 0.7891, + "step": 11751 + }, + { + "epoch": 0.63, + "learning_rate": 0.00031562538703101253, + "loss": 0.9414, + "step": 11752 + }, + { + "epoch": 0.63, + "learning_rate": 0.00031554448437760217, + "loss": 0.7422, + "step": 11753 + }, + { + "epoch": 0.63, + "learning_rate": 0.0003154635873138, + "loss": 0.8242, + "step": 11754 + }, + { + "epoch": 0.63, + "learning_rate": 0.00031538269584205716, + "loss": 0.8438, + "step": 11755 + }, + { + "epoch": 0.63, + "learning_rate": 0.0003153018099648253, + "loss": 0.8047, + "step": 11756 + }, + { + "epoch": 0.63, + "learning_rate": 0.0003152209296845549, + "loss": 0.7969, + "step": 11757 + }, + { + "epoch": 0.63, + "learning_rate": 0.0003151400550036976, + "loss": 0.7773, + "step": 11758 + }, + { + "epoch": 0.63, + "learning_rate": 0.0003150591859247038, + "loss": 0.8711, + "step": 11759 + }, + { + "epoch": 0.63, + "learning_rate": 0.00031497832245002414, + "loss": 0.8789, + "step": 11760 + }, + { + "epoch": 0.63, + "learning_rate": 0.0003148974645821091, + "loss": 0.8477, + "step": 11761 + }, + { + "epoch": 0.63, + "learning_rate": 0.00031481661232340887, + "loss": 0.7461, + "step": 11762 + }, + { + "epoch": 0.63, + "learning_rate": 0.00031473576567637366, + "loss": 0.8828, + "step": 11763 + }, + { + "epoch": 0.63, + "learning_rate": 0.0003146549246434531, + "loss": 0.793, + "step": 11764 + }, + { + "epoch": 0.63, + "learning_rate": 0.0003145740892270974, + "loss": 0.9141, + "step": 11765 + }, + { + "epoch": 0.63, + "learning_rate": 0.00031449325942975583, + "loss": 0.7852, + "step": 11766 + }, + { + "epoch": 0.63, + "learning_rate": 0.0003144124352538777, + "loss": 0.8906, + "step": 11767 + }, + { + "epoch": 0.63, + "learning_rate": 0.00031433161670191246, + "loss": 0.8906, + "step": 11768 + }, + { + "epoch": 0.63, + "learning_rate": 0.0003142508037763092, + "loss": 0.7773, + "step": 11769 + }, + { + "epoch": 0.63, + "learning_rate": 0.00031416999647951673, + "loss": 0.8828, + "step": 11770 + }, + { + "epoch": 0.63, + "learning_rate": 0.00031408919481398383, + "loss": 0.8281, + "step": 11771 + }, + { + "epoch": 0.63, + "learning_rate": 0.0003140083987821588, + "loss": 0.7852, + "step": 11772 + }, + { + "epoch": 0.63, + "learning_rate": 0.0003139276083864904, + "loss": 0.7852, + "step": 11773 + }, + { + "epoch": 0.63, + "learning_rate": 0.00031384682362942665, + "loss": 0.9141, + "step": 11774 + }, + { + "epoch": 0.63, + "learning_rate": 0.0003137660445134156, + "loss": 0.8359, + "step": 11775 + }, + { + "epoch": 0.63, + "learning_rate": 0.0003136852710409051, + "loss": 0.8164, + "step": 11776 + }, + { + "epoch": 0.63, + "learning_rate": 0.00031360450321434296, + "loss": 0.7031, + "step": 11777 + }, + { + "epoch": 0.63, + "learning_rate": 0.00031352374103617677, + "loss": 0.7852, + "step": 11778 + }, + { + "epoch": 0.63, + "learning_rate": 0.00031344298450885366, + "loss": 0.875, + "step": 11779 + }, + { + "epoch": 0.63, + "learning_rate": 0.0003133622336348208, + "loss": 0.6719, + "step": 11780 + }, + { + "epoch": 0.63, + "learning_rate": 0.0003132814884165255, + "loss": 0.8906, + "step": 11781 + }, + { + "epoch": 0.63, + "learning_rate": 0.00031320074885641435, + "loss": 0.7891, + "step": 11782 + }, + { + "epoch": 0.63, + "learning_rate": 0.0003131200149569341, + "loss": 0.7266, + "step": 11783 + }, + { + "epoch": 0.63, + "learning_rate": 0.00031303928672053125, + "loss": 0.7773, + "step": 11784 + }, + { + "epoch": 0.63, + "learning_rate": 0.0003129585641496522, + "loss": 0.8711, + "step": 11785 + }, + { + "epoch": 0.63, + "learning_rate": 0.00031287784724674307, + "loss": 0.8477, + "step": 11786 + }, + { + "epoch": 0.63, + "learning_rate": 0.00031279713601424985, + "loss": 0.8672, + "step": 11787 + }, + { + "epoch": 0.63, + "learning_rate": 0.0003127164304546182, + "loss": 0.7188, + "step": 11788 + }, + { + "epoch": 0.63, + "learning_rate": 0.00031263573057029395, + "loss": 0.7852, + "step": 11789 + }, + { + "epoch": 0.63, + "learning_rate": 0.0003125550363637226, + "loss": 0.7266, + "step": 11790 + }, + { + "epoch": 0.63, + "learning_rate": 0.0003124743478373494, + "loss": 0.7773, + "step": 11791 + }, + { + "epoch": 0.63, + "learning_rate": 0.00031239366499361915, + "loss": 0.75, + "step": 11792 + }, + { + "epoch": 0.63, + "learning_rate": 0.0003123129878349774, + "loss": 0.8711, + "step": 11793 + }, + { + "epoch": 0.63, + "learning_rate": 0.0003122323163638686, + "loss": 0.875, + "step": 11794 + }, + { + "epoch": 0.63, + "learning_rate": 0.0003121516505827373, + "loss": 0.8594, + "step": 11795 + }, + { + "epoch": 0.63, + "learning_rate": 0.000312070990494028, + "loss": 0.8164, + "step": 11796 + }, + { + "epoch": 0.63, + "learning_rate": 0.0003119903361001851, + "loss": 0.8906, + "step": 11797 + }, + { + "epoch": 0.63, + "learning_rate": 0.0003119096874036526, + "loss": 0.9062, + "step": 11798 + }, + { + "epoch": 0.63, + "learning_rate": 0.00031182904440687424, + "loss": 0.875, + "step": 11799 + }, + { + "epoch": 0.63, + "learning_rate": 0.0003117484071122941, + "loss": 0.8672, + "step": 11800 + }, + { + "epoch": 0.63, + "learning_rate": 0.0003116677755223557, + "loss": 0.7461, + "step": 11801 + }, + { + "epoch": 0.63, + "learning_rate": 0.0003115871496395022, + "loss": 0.7539, + "step": 11802 + }, + { + "epoch": 0.63, + "learning_rate": 0.0003115065294661769, + "loss": 0.832, + "step": 11803 + }, + { + "epoch": 0.63, + "learning_rate": 0.00031142591500482304, + "loss": 0.8125, + "step": 11804 + }, + { + "epoch": 0.63, + "learning_rate": 0.0003113453062578834, + "loss": 0.7422, + "step": 11805 + }, + { + "epoch": 0.63, + "learning_rate": 0.00031126470322780054, + "loss": 0.8516, + "step": 11806 + }, + { + "epoch": 0.63, + "learning_rate": 0.0003111841059170172, + "loss": 0.7891, + "step": 11807 + }, + { + "epoch": 0.63, + "learning_rate": 0.0003111035143279757, + "loss": 0.7773, + "step": 11808 + }, + { + "epoch": 0.63, + "learning_rate": 0.00031102292846311823, + "loss": 0.7812, + "step": 11809 + }, + { + "epoch": 0.63, + "learning_rate": 0.00031094234832488676, + "loss": 0.7383, + "step": 11810 + }, + { + "epoch": 0.63, + "learning_rate": 0.00031086177391572307, + "loss": 0.7188, + "step": 11811 + }, + { + "epoch": 0.63, + "learning_rate": 0.000310781205238069, + "loss": 0.8711, + "step": 11812 + }, + { + "epoch": 0.63, + "learning_rate": 0.0003107006422943659, + "loss": 0.8906, + "step": 11813 + }, + { + "epoch": 0.63, + "learning_rate": 0.0003106200850870553, + "loss": 0.8164, + "step": 11814 + }, + { + "epoch": 0.64, + "learning_rate": 0.0003105395336185779, + "loss": 0.8242, + "step": 11815 + }, + { + "epoch": 0.64, + "learning_rate": 0.00031045898789137526, + "loss": 0.7266, + "step": 11816 + }, + { + "epoch": 0.64, + "learning_rate": 0.00031037844790788785, + "loss": 0.7578, + "step": 11817 + }, + { + "epoch": 0.64, + "learning_rate": 0.0003102979136705563, + "loss": 0.8789, + "step": 11818 + }, + { + "epoch": 0.64, + "learning_rate": 0.00031021738518182096, + "loss": 0.8672, + "step": 11819 + }, + { + "epoch": 0.64, + "learning_rate": 0.00031013686244412244, + "loss": 0.8789, + "step": 11820 + }, + { + "epoch": 0.64, + "learning_rate": 0.00031005634545990055, + "loss": 0.7188, + "step": 11821 + }, + { + "epoch": 0.64, + "learning_rate": 0.00030997583423159546, + "loss": 0.7969, + "step": 11822 + }, + { + "epoch": 0.64, + "learning_rate": 0.0003098953287616465, + "loss": 0.8711, + "step": 11823 + }, + { + "epoch": 0.64, + "learning_rate": 0.00030981482905249386, + "loss": 0.8281, + "step": 11824 + }, + { + "epoch": 0.64, + "learning_rate": 0.0003097343351065765, + "loss": 0.8047, + "step": 11825 + }, + { + "epoch": 0.64, + "learning_rate": 0.0003096538469263338, + "loss": 0.8711, + "step": 11826 + }, + { + "epoch": 0.64, + "learning_rate": 0.00030957336451420457, + "loss": 0.8672, + "step": 11827 + }, + { + "epoch": 0.64, + "learning_rate": 0.00030949288787262815, + "loss": 0.8203, + "step": 11828 + }, + { + "epoch": 0.64, + "learning_rate": 0.00030941241700404307, + "loss": 0.8477, + "step": 11829 + }, + { + "epoch": 0.64, + "learning_rate": 0.00030933195191088766, + "loss": 0.8789, + "step": 11830 + }, + { + "epoch": 0.64, + "learning_rate": 0.00030925149259560035, + "loss": 0.7969, + "step": 11831 + }, + { + "epoch": 0.64, + "learning_rate": 0.00030917103906061944, + "loss": 0.8398, + "step": 11832 + }, + { + "epoch": 0.64, + "learning_rate": 0.0003090905913083829, + "loss": 0.8477, + "step": 11833 + }, + { + "epoch": 0.64, + "learning_rate": 0.0003090101493413285, + "loss": 0.7344, + "step": 11834 + }, + { + "epoch": 0.64, + "learning_rate": 0.000308929713161894, + "loss": 0.7891, + "step": 11835 + }, + { + "epoch": 0.64, + "learning_rate": 0.00030884928277251676, + "loss": 0.8086, + "step": 11836 + }, + { + "epoch": 0.64, + "learning_rate": 0.0003087688581756344, + "loss": 0.8594, + "step": 11837 + }, + { + "epoch": 0.64, + "learning_rate": 0.0003086884393736834, + "loss": 0.8359, + "step": 11838 + }, + { + "epoch": 0.64, + "learning_rate": 0.00030860802636910133, + "loss": 0.8242, + "step": 11839 + }, + { + "epoch": 0.64, + "learning_rate": 0.00030852761916432475, + "loss": 0.8359, + "step": 11840 + }, + { + "epoch": 0.64, + "learning_rate": 0.00030844721776179026, + "loss": 0.8281, + "step": 11841 + }, + { + "epoch": 0.64, + "learning_rate": 0.0003083668221639342, + "loss": 0.7891, + "step": 11842 + }, + { + "epoch": 0.64, + "learning_rate": 0.0003082864323731929, + "loss": 0.8711, + "step": 11843 + }, + { + "epoch": 0.64, + "learning_rate": 0.0003082060483920026, + "loss": 0.8242, + "step": 11844 + }, + { + "epoch": 0.64, + "learning_rate": 0.00030812567022279905, + "loss": 0.8438, + "step": 11845 + }, + { + "epoch": 0.64, + "learning_rate": 0.0003080452978680177, + "loss": 0.8125, + "step": 11846 + }, + { + "epoch": 0.64, + "learning_rate": 0.0003079649313300945, + "loss": 0.8555, + "step": 11847 + }, + { + "epoch": 0.64, + "learning_rate": 0.00030788457061146473, + "loss": 0.7969, + "step": 11848 + }, + { + "epoch": 0.64, + "learning_rate": 0.0003078042157145635, + "loss": 0.75, + "step": 11849 + }, + { + "epoch": 0.64, + "learning_rate": 0.0003077238666418258, + "loss": 0.7695, + "step": 11850 + }, + { + "epoch": 0.64, + "learning_rate": 0.00030764352339568657, + "loss": 0.7695, + "step": 11851 + }, + { + "epoch": 0.64, + "learning_rate": 0.0003075631859785806, + "loss": 0.875, + "step": 11852 + }, + { + "epoch": 0.64, + "learning_rate": 0.0003074828543929421, + "loss": 0.7695, + "step": 11853 + }, + { + "epoch": 0.64, + "learning_rate": 0.0003074025286412053, + "loss": 0.7617, + "step": 11854 + }, + { + "epoch": 0.64, + "learning_rate": 0.0003073222087258047, + "loss": 0.8125, + "step": 11855 + }, + { + "epoch": 0.64, + "learning_rate": 0.0003072418946491741, + "loss": 0.8281, + "step": 11856 + }, + { + "epoch": 0.64, + "learning_rate": 0.00030716158641374734, + "loss": 0.7812, + "step": 11857 + }, + { + "epoch": 0.64, + "learning_rate": 0.00030708128402195766, + "loss": 0.8164, + "step": 11858 + }, + { + "epoch": 0.64, + "learning_rate": 0.0003070009874762391, + "loss": 0.8828, + "step": 11859 + }, + { + "epoch": 0.64, + "learning_rate": 0.0003069206967790245, + "loss": 0.8047, + "step": 11860 + }, + { + "epoch": 0.64, + "learning_rate": 0.00030684041193274693, + "loss": 0.8398, + "step": 11861 + }, + { + "epoch": 0.64, + "learning_rate": 0.00030676013293983936, + "loss": 0.832, + "step": 11862 + }, + { + "epoch": 0.64, + "learning_rate": 0.00030667985980273457, + "loss": 0.7305, + "step": 11863 + }, + { + "epoch": 0.64, + "learning_rate": 0.00030659959252386505, + "loss": 0.832, + "step": 11864 + }, + { + "epoch": 0.64, + "learning_rate": 0.00030651933110566323, + "loss": 0.8672, + "step": 11865 + }, + { + "epoch": 0.64, + "learning_rate": 0.00030643907555056096, + "loss": 0.8984, + "step": 11866 + }, + { + "epoch": 0.64, + "learning_rate": 0.0003063588258609908, + "loss": 0.8867, + "step": 11867 + }, + { + "epoch": 0.64, + "learning_rate": 0.0003062785820393842, + "loss": 0.9023, + "step": 11868 + }, + { + "epoch": 0.64, + "learning_rate": 0.0003061983440881729, + "loss": 0.7656, + "step": 11869 + }, + { + "epoch": 0.64, + "learning_rate": 0.00030611811200978835, + "loss": 0.8164, + "step": 11870 + }, + { + "epoch": 0.64, + "learning_rate": 0.00030603788580666185, + "loss": 0.8086, + "step": 11871 + }, + { + "epoch": 0.64, + "learning_rate": 0.00030595766548122474, + "loss": 0.8711, + "step": 11872 + }, + { + "epoch": 0.64, + "learning_rate": 0.0003058774510359074, + "loss": 0.8945, + "step": 11873 + }, + { + "epoch": 0.64, + "learning_rate": 0.00030579724247314124, + "loss": 0.8555, + "step": 11874 + }, + { + "epoch": 0.64, + "learning_rate": 0.00030571703979535646, + "loss": 0.8672, + "step": 11875 + }, + { + "epoch": 0.64, + "learning_rate": 0.0003056368430049835, + "loss": 0.8359, + "step": 11876 + }, + { + "epoch": 0.64, + "learning_rate": 0.00030555665210445264, + "loss": 0.8711, + "step": 11877 + }, + { + "epoch": 0.64, + "learning_rate": 0.000305476467096194, + "loss": 0.8711, + "step": 11878 + }, + { + "epoch": 0.64, + "learning_rate": 0.0003053962879826374, + "loss": 0.8906, + "step": 11879 + }, + { + "epoch": 0.64, + "learning_rate": 0.00030531611476621257, + "loss": 0.8125, + "step": 11880 + }, + { + "epoch": 0.64, + "learning_rate": 0.0003052359474493487, + "loss": 0.8242, + "step": 11881 + }, + { + "epoch": 0.64, + "learning_rate": 0.00030515578603447567, + "loss": 0.8359, + "step": 11882 + }, + { + "epoch": 0.64, + "learning_rate": 0.0003050756305240222, + "loss": 0.9023, + "step": 11883 + }, + { + "epoch": 0.64, + "learning_rate": 0.0003049954809204175, + "loss": 0.8203, + "step": 11884 + }, + { + "epoch": 0.64, + "learning_rate": 0.0003049153372260901, + "loss": 0.793, + "step": 11885 + }, + { + "epoch": 0.64, + "learning_rate": 0.00030483519944346895, + "loss": 0.7969, + "step": 11886 + }, + { + "epoch": 0.64, + "learning_rate": 0.0003047550675749823, + "loss": 0.7969, + "step": 11887 + }, + { + "epoch": 0.64, + "learning_rate": 0.00030467494162305866, + "loss": 0.8711, + "step": 11888 + }, + { + "epoch": 0.64, + "learning_rate": 0.0003045948215901254, + "loss": 0.7539, + "step": 11889 + }, + { + "epoch": 0.64, + "learning_rate": 0.00030451470747861137, + "loss": 0.8438, + "step": 11890 + }, + { + "epoch": 0.64, + "learning_rate": 0.0003044345992909437, + "loss": 0.8945, + "step": 11891 + }, + { + "epoch": 0.64, + "learning_rate": 0.00030435449702955, + "loss": 0.7773, + "step": 11892 + }, + { + "epoch": 0.64, + "learning_rate": 0.0003042744006968577, + "loss": 0.8125, + "step": 11893 + }, + { + "epoch": 0.64, + "learning_rate": 0.000304194310295294, + "loss": 0.8359, + "step": 11894 + }, + { + "epoch": 0.64, + "learning_rate": 0.000304114225827286, + "loss": 0.8477, + "step": 11895 + }, + { + "epoch": 0.64, + "learning_rate": 0.00030403414729526036, + "loss": 0.8984, + "step": 11896 + }, + { + "epoch": 0.64, + "learning_rate": 0.0003039540747016436, + "loss": 0.8516, + "step": 11897 + }, + { + "epoch": 0.64, + "learning_rate": 0.00030387400804886245, + "loss": 0.8594, + "step": 11898 + }, + { + "epoch": 0.64, + "learning_rate": 0.00030379394733934316, + "loss": 0.8203, + "step": 11899 + }, + { + "epoch": 0.64, + "learning_rate": 0.00030371389257551176, + "loss": 0.8828, + "step": 11900 + }, + { + "epoch": 0.64, + "learning_rate": 0.00030363384375979395, + "loss": 0.8242, + "step": 11901 + }, + { + "epoch": 0.64, + "learning_rate": 0.00030355380089461595, + "loss": 0.8164, + "step": 11902 + }, + { + "epoch": 0.64, + "learning_rate": 0.0003034737639824031, + "loss": 0.9102, + "step": 11903 + }, + { + "epoch": 0.64, + "learning_rate": 0.00030339373302558066, + "loss": 0.8242, + "step": 11904 + }, + { + "epoch": 0.64, + "learning_rate": 0.00030331370802657377, + "loss": 0.9297, + "step": 11905 + }, + { + "epoch": 0.64, + "learning_rate": 0.0003032336889878078, + "loss": 0.8281, + "step": 11906 + }, + { + "epoch": 0.64, + "learning_rate": 0.0003031536759117073, + "loss": 0.918, + "step": 11907 + }, + { + "epoch": 0.64, + "learning_rate": 0.0003030736688006972, + "loss": 0.9258, + "step": 11908 + }, + { + "epoch": 0.64, + "learning_rate": 0.0003029936676572015, + "loss": 0.7812, + "step": 11909 + }, + { + "epoch": 0.64, + "learning_rate": 0.0003029136724836451, + "loss": 0.7695, + "step": 11910 + }, + { + "epoch": 0.64, + "learning_rate": 0.00030283368328245163, + "loss": 0.9023, + "step": 11911 + }, + { + "epoch": 0.64, + "learning_rate": 0.0003027537000560452, + "loss": 0.8555, + "step": 11912 + }, + { + "epoch": 0.64, + "learning_rate": 0.0003026737228068497, + "loss": 0.7969, + "step": 11913 + }, + { + "epoch": 0.64, + "learning_rate": 0.0003025937515372885, + "loss": 0.9141, + "step": 11914 + }, + { + "epoch": 0.64, + "learning_rate": 0.0003025137862497851, + "loss": 0.7031, + "step": 11915 + }, + { + "epoch": 0.64, + "learning_rate": 0.00030243382694676254, + "loss": 0.7812, + "step": 11916 + }, + { + "epoch": 0.64, + "learning_rate": 0.00030235387363064417, + "loss": 0.9414, + "step": 11917 + }, + { + "epoch": 0.64, + "learning_rate": 0.0003022739263038526, + "loss": 0.7695, + "step": 11918 + }, + { + "epoch": 0.64, + "learning_rate": 0.0003021939849688107, + "loss": 0.7969, + "step": 11919 + }, + { + "epoch": 0.64, + "learning_rate": 0.0003021140496279405, + "loss": 0.8984, + "step": 11920 + }, + { + "epoch": 0.64, + "learning_rate": 0.0003020341202836648, + "loss": 0.8047, + "step": 11921 + }, + { + "epoch": 0.64, + "learning_rate": 0.00030195419693840553, + "loss": 0.9375, + "step": 11922 + }, + { + "epoch": 0.64, + "learning_rate": 0.0003018742795945847, + "loss": 0.8789, + "step": 11923 + }, + { + "epoch": 0.64, + "learning_rate": 0.0003017943682546238, + "loss": 0.7891, + "step": 11924 + }, + { + "epoch": 0.64, + "learning_rate": 0.00030171446292094487, + "loss": 0.8203, + "step": 11925 + }, + { + "epoch": 0.64, + "learning_rate": 0.00030163456359596884, + "loss": 0.7891, + "step": 11926 + }, + { + "epoch": 0.64, + "learning_rate": 0.00030155467028211724, + "loss": 0.8789, + "step": 11927 + }, + { + "epoch": 0.64, + "learning_rate": 0.0003014747829818109, + "loss": 0.7734, + "step": 11928 + }, + { + "epoch": 0.64, + "learning_rate": 0.0003013949016974707, + "loss": 0.8555, + "step": 11929 + }, + { + "epoch": 0.64, + "learning_rate": 0.00030131502643151754, + "loss": 0.875, + "step": 11930 + }, + { + "epoch": 0.64, + "learning_rate": 0.00030123515718637173, + "loss": 0.793, + "step": 11931 + }, + { + "epoch": 0.64, + "learning_rate": 0.0003011552939644533, + "loss": 0.8086, + "step": 11932 + }, + { + "epoch": 0.64, + "learning_rate": 0.00030107543676818297, + "loss": 0.8906, + "step": 11933 + }, + { + "epoch": 0.64, + "learning_rate": 0.0003009955855999801, + "loss": 0.8125, + "step": 11934 + }, + { + "epoch": 0.64, + "learning_rate": 0.00030091574046226476, + "loss": 0.8047, + "step": 11935 + }, + { + "epoch": 0.64, + "learning_rate": 0.0003008359013574563, + "loss": 0.793, + "step": 11936 + }, + { + "epoch": 0.64, + "learning_rate": 0.0003007560682879744, + "loss": 0.8086, + "step": 11937 + }, + { + "epoch": 0.64, + "learning_rate": 0.0003006762412562382, + "loss": 0.7383, + "step": 11938 + }, + { + "epoch": 0.64, + "learning_rate": 0.00030059642026466647, + "loss": 0.8711, + "step": 11939 + }, + { + "epoch": 0.64, + "learning_rate": 0.00030051660531567814, + "loss": 0.8086, + "step": 11940 + }, + { + "epoch": 0.64, + "learning_rate": 0.0003004367964116921, + "loss": 0.8086, + "step": 11941 + }, + { + "epoch": 0.64, + "learning_rate": 0.00030035699355512657, + "loss": 0.8398, + "step": 11942 + }, + { + "epoch": 0.64, + "learning_rate": 0.00030027719674839996, + "loss": 0.8711, + "step": 11943 + }, + { + "epoch": 0.64, + "learning_rate": 0.00030019740599393025, + "loss": 0.7266, + "step": 11944 + }, + { + "epoch": 0.64, + "learning_rate": 0.0003001176212941356, + "loss": 0.832, + "step": 11945 + }, + { + "epoch": 0.64, + "learning_rate": 0.00030003784265143356, + "loss": 0.7852, + "step": 11946 + }, + { + "epoch": 0.64, + "learning_rate": 0.0002999580700682415, + "loss": 0.8398, + "step": 11947 + }, + { + "epoch": 0.64, + "learning_rate": 0.0002998783035469773, + "loss": 0.7695, + "step": 11948 + }, + { + "epoch": 0.64, + "learning_rate": 0.00029979854309005783, + "loss": 0.8008, + "step": 11949 + }, + { + "epoch": 0.64, + "learning_rate": 0.00029971878869990013, + "loss": 0.7461, + "step": 11950 + }, + { + "epoch": 0.64, + "learning_rate": 0.0002996390403789209, + "loss": 0.8047, + "step": 11951 + }, + { + "epoch": 0.64, + "learning_rate": 0.00029955929812953687, + "loss": 0.7773, + "step": 11952 + }, + { + "epoch": 0.64, + "learning_rate": 0.0002994795619541647, + "loss": 0.75, + "step": 11953 + }, + { + "epoch": 0.64, + "learning_rate": 0.00029939983185522045, + "loss": 0.8125, + "step": 11954 + }, + { + "epoch": 0.64, + "learning_rate": 0.00029932010783512, + "loss": 0.7734, + "step": 11955 + }, + { + "epoch": 0.64, + "learning_rate": 0.00029924038989627964, + "loss": 0.8438, + "step": 11956 + }, + { + "epoch": 0.64, + "learning_rate": 0.000299160678041115, + "loss": 0.7266, + "step": 11957 + }, + { + "epoch": 0.64, + "learning_rate": 0.00029908097227204143, + "loss": 0.8438, + "step": 11958 + }, + { + "epoch": 0.64, + "learning_rate": 0.0002990012725914743, + "loss": 0.8711, + "step": 11959 + }, + { + "epoch": 0.64, + "learning_rate": 0.00029892157900182896, + "loss": 0.7773, + "step": 11960 + }, + { + "epoch": 0.64, + "learning_rate": 0.00029884189150552044, + "loss": 0.8203, + "step": 11961 + }, + { + "epoch": 0.64, + "learning_rate": 0.0002987622101049632, + "loss": 0.7578, + "step": 11962 + }, + { + "epoch": 0.64, + "learning_rate": 0.0002986825348025719, + "loss": 0.8281, + "step": 11963 + }, + { + "epoch": 0.64, + "learning_rate": 0.00029860286560076123, + "loss": 0.7695, + "step": 11964 + }, + { + "epoch": 0.64, + "learning_rate": 0.0002985232025019453, + "loss": 0.8789, + "step": 11965 + }, + { + "epoch": 0.64, + "learning_rate": 0.00029844354550853815, + "loss": 0.8555, + "step": 11966 + }, + { + "epoch": 0.64, + "learning_rate": 0.0002983638946229534, + "loss": 0.7617, + "step": 11967 + }, + { + "epoch": 0.64, + "learning_rate": 0.0002982842498476053, + "loss": 0.7852, + "step": 11968 + }, + { + "epoch": 0.64, + "learning_rate": 0.000298204611184907, + "loss": 0.7891, + "step": 11969 + }, + { + "epoch": 0.64, + "learning_rate": 0.0002981249786372718, + "loss": 0.8672, + "step": 11970 + }, + { + "epoch": 0.64, + "learning_rate": 0.0002980453522071127, + "loss": 0.7812, + "step": 11971 + }, + { + "epoch": 0.64, + "learning_rate": 0.00029796573189684296, + "loss": 0.8281, + "step": 11972 + }, + { + "epoch": 0.64, + "learning_rate": 0.00029788611770887517, + "loss": 0.8555, + "step": 11973 + }, + { + "epoch": 0.64, + "learning_rate": 0.00029780650964562205, + "loss": 0.8711, + "step": 11974 + }, + { + "epoch": 0.64, + "learning_rate": 0.0002977269077094955, + "loss": 0.7969, + "step": 11975 + }, + { + "epoch": 0.64, + "learning_rate": 0.0002976473119029084, + "loss": 0.8516, + "step": 11976 + }, + { + "epoch": 0.64, + "learning_rate": 0.00029756772222827235, + "loss": 0.8047, + "step": 11977 + }, + { + "epoch": 0.64, + "learning_rate": 0.0002974881386879993, + "loss": 0.7734, + "step": 11978 + }, + { + "epoch": 0.64, + "learning_rate": 0.0002974085612845007, + "loss": 0.7969, + "step": 11979 + }, + { + "epoch": 0.64, + "learning_rate": 0.00029732899002018825, + "loss": 0.8047, + "step": 11980 + }, + { + "epoch": 0.64, + "learning_rate": 0.0002972494248974732, + "loss": 0.9141, + "step": 11981 + }, + { + "epoch": 0.64, + "learning_rate": 0.00029716986591876666, + "loss": 0.9531, + "step": 11982 + }, + { + "epoch": 0.64, + "learning_rate": 0.0002970903130864791, + "loss": 0.8789, + "step": 11983 + }, + { + "epoch": 0.64, + "learning_rate": 0.00029701076640302194, + "loss": 0.7578, + "step": 11984 + }, + { + "epoch": 0.64, + "learning_rate": 0.0002969312258708052, + "loss": 0.8672, + "step": 11985 + }, + { + "epoch": 0.64, + "learning_rate": 0.0002968516914922392, + "loss": 0.8555, + "step": 11986 + }, + { + "epoch": 0.64, + "learning_rate": 0.0002967721632697344, + "loss": 0.8086, + "step": 11987 + }, + { + "epoch": 0.64, + "learning_rate": 0.0002966926412057007, + "loss": 0.7891, + "step": 11988 + }, + { + "epoch": 0.64, + "learning_rate": 0.0002966131253025478, + "loss": 0.8281, + "step": 11989 + }, + { + "epoch": 0.64, + "learning_rate": 0.000296533615562685, + "loss": 0.8438, + "step": 11990 + }, + { + "epoch": 0.64, + "learning_rate": 0.00029645411198852233, + "loss": 0.8047, + "step": 11991 + }, + { + "epoch": 0.64, + "learning_rate": 0.0002963746145824686, + "loss": 0.8438, + "step": 11992 + }, + { + "epoch": 0.64, + "learning_rate": 0.0002962951233469329, + "loss": 0.793, + "step": 11993 + }, + { + "epoch": 0.64, + "learning_rate": 0.00029621563828432395, + "loss": 0.8359, + "step": 11994 + }, + { + "epoch": 0.64, + "learning_rate": 0.00029613615939705064, + "loss": 0.8555, + "step": 11995 + }, + { + "epoch": 0.64, + "learning_rate": 0.0002960566866875214, + "loss": 0.7969, + "step": 11996 + }, + { + "epoch": 0.64, + "learning_rate": 0.00029597722015814454, + "loss": 0.8203, + "step": 11997 + }, + { + "epoch": 0.64, + "learning_rate": 0.00029589775981132783, + "loss": 0.8633, + "step": 11998 + }, + { + "epoch": 0.64, + "learning_rate": 0.0002958183056494797, + "loss": 0.8164, + "step": 11999 + }, + { + "epoch": 0.64, + "learning_rate": 0.00029573885767500755, + "loss": 0.8281, + "step": 12000 + }, + { + "epoch": 0.65, + "learning_rate": 0.0002956594158903189, + "loss": 0.8086, + "step": 12001 + }, + { + "epoch": 0.65, + "learning_rate": 0.00029557998029782113, + "loss": 0.7773, + "step": 12002 + }, + { + "epoch": 0.65, + "learning_rate": 0.0002955005508999215, + "loss": 0.7773, + "step": 12003 + }, + { + "epoch": 0.65, + "learning_rate": 0.00029542112769902706, + "loss": 0.8867, + "step": 12004 + }, + { + "epoch": 0.65, + "learning_rate": 0.00029534171069754434, + "loss": 0.8359, + "step": 12005 + }, + { + "epoch": 0.65, + "learning_rate": 0.0002952622998978799, + "loss": 0.8086, + "step": 12006 + }, + { + "epoch": 0.65, + "learning_rate": 0.0002951828953024405, + "loss": 0.8477, + "step": 12007 + }, + { + "epoch": 0.65, + "learning_rate": 0.00029510349691363205, + "loss": 0.832, + "step": 12008 + }, + { + "epoch": 0.65, + "learning_rate": 0.0002950241047338607, + "loss": 0.8633, + "step": 12009 + }, + { + "epoch": 0.65, + "learning_rate": 0.0002949447187655322, + "loss": 0.8594, + "step": 12010 + }, + { + "epoch": 0.65, + "learning_rate": 0.0002948653390110524, + "loss": 0.8164, + "step": 12011 + }, + { + "epoch": 0.65, + "learning_rate": 0.0002947859654728268, + "loss": 0.9102, + "step": 12012 + }, + { + "epoch": 0.65, + "learning_rate": 0.0002947065981532604, + "loss": 0.7539, + "step": 12013 + }, + { + "epoch": 0.65, + "learning_rate": 0.0002946272370547583, + "loss": 0.8398, + "step": 12014 + }, + { + "epoch": 0.65, + "learning_rate": 0.00029454788217972563, + "loss": 0.8008, + "step": 12015 + }, + { + "epoch": 0.65, + "learning_rate": 0.00029446853353056704, + "loss": 0.8359, + "step": 12016 + }, + { + "epoch": 0.65, + "learning_rate": 0.0002943891911096871, + "loss": 0.8633, + "step": 12017 + }, + { + "epoch": 0.65, + "learning_rate": 0.00029430985491948967, + "loss": 0.8359, + "step": 12018 + }, + { + "epoch": 0.65, + "learning_rate": 0.00029423052496237975, + "loss": 0.8125, + "step": 12019 + }, + { + "epoch": 0.65, + "learning_rate": 0.0002941512012407607, + "loss": 0.8477, + "step": 12020 + }, + { + "epoch": 0.65, + "learning_rate": 0.00029407188375703627, + "loss": 0.8359, + "step": 12021 + }, + { + "epoch": 0.65, + "learning_rate": 0.0002939925725136103, + "loss": 0.7656, + "step": 12022 + }, + { + "epoch": 0.65, + "learning_rate": 0.00029391326751288606, + "loss": 0.8555, + "step": 12023 + }, + { + "epoch": 0.65, + "learning_rate": 0.0002938339687572668, + "loss": 0.7539, + "step": 12024 + }, + { + "epoch": 0.65, + "learning_rate": 0.00029375467624915544, + "loss": 0.8828, + "step": 12025 + }, + { + "epoch": 0.65, + "learning_rate": 0.00029367538999095486, + "loss": 0.8438, + "step": 12026 + }, + { + "epoch": 0.65, + "learning_rate": 0.0002935961099850679, + "loss": 0.793, + "step": 12027 + }, + { + "epoch": 0.65, + "learning_rate": 0.0002935168362338967, + "loss": 0.8984, + "step": 12028 + }, + { + "epoch": 0.65, + "learning_rate": 0.0002934375687398435, + "loss": 0.8555, + "step": 12029 + }, + { + "epoch": 0.65, + "learning_rate": 0.00029335830750531057, + "loss": 0.75, + "step": 12030 + }, + { + "epoch": 0.65, + "learning_rate": 0.00029327905253269973, + "loss": 0.9414, + "step": 12031 + }, + { + "epoch": 0.65, + "learning_rate": 0.0002931998038244127, + "loss": 0.8086, + "step": 12032 + }, + { + "epoch": 0.65, + "learning_rate": 0.0002931205613828507, + "loss": 0.8125, + "step": 12033 + }, + { + "epoch": 0.65, + "learning_rate": 0.00029304132521041547, + "loss": 0.8164, + "step": 12034 + }, + { + "epoch": 0.65, + "learning_rate": 0.0002929620953095078, + "loss": 0.8086, + "step": 12035 + }, + { + "epoch": 0.65, + "learning_rate": 0.00029288287168252874, + "loss": 0.9336, + "step": 12036 + }, + { + "epoch": 0.65, + "learning_rate": 0.00029280365433187894, + "loss": 0.8125, + "step": 12037 + }, + { + "epoch": 0.65, + "learning_rate": 0.0002927244432599591, + "loss": 0.8086, + "step": 12038 + }, + { + "epoch": 0.65, + "learning_rate": 0.00029264523846916945, + "loss": 0.8672, + "step": 12039 + }, + { + "epoch": 0.65, + "learning_rate": 0.0002925660399619103, + "loss": 0.8867, + "step": 12040 + }, + { + "epoch": 0.65, + "learning_rate": 0.0002924868477405812, + "loss": 0.8164, + "step": 12041 + }, + { + "epoch": 0.65, + "learning_rate": 0.00029240766180758265, + "loss": 0.7617, + "step": 12042 + }, + { + "epoch": 0.65, + "learning_rate": 0.0002923284821653137, + "loss": 0.8594, + "step": 12043 + }, + { + "epoch": 0.65, + "learning_rate": 0.00029224930881617387, + "loss": 0.8516, + "step": 12044 + }, + { + "epoch": 0.65, + "learning_rate": 0.00029217014176256237, + "loss": 0.7773, + "step": 12045 + }, + { + "epoch": 0.65, + "learning_rate": 0.0002920909810068783, + "loss": 0.8281, + "step": 12046 + }, + { + "epoch": 0.65, + "learning_rate": 0.0002920118265515206, + "loss": 0.7891, + "step": 12047 + }, + { + "epoch": 0.65, + "learning_rate": 0.0002919326783988876, + "loss": 0.8633, + "step": 12048 + }, + { + "epoch": 0.65, + "learning_rate": 0.00029185353655137783, + "loss": 0.8359, + "step": 12049 + }, + { + "epoch": 0.65, + "learning_rate": 0.00029177440101138964, + "loss": 0.8672, + "step": 12050 + }, + { + "epoch": 0.65, + "learning_rate": 0.0002916952717813212, + "loss": 0.8906, + "step": 12051 + }, + { + "epoch": 0.65, + "learning_rate": 0.00029161614886357036, + "loss": 0.7773, + "step": 12052 + }, + { + "epoch": 0.65, + "learning_rate": 0.00029153703226053444, + "loss": 0.8594, + "step": 12053 + }, + { + "epoch": 0.65, + "learning_rate": 0.0002914579219746113, + "loss": 0.8359, + "step": 12054 + }, + { + "epoch": 0.65, + "learning_rate": 0.00029137881800819825, + "loss": 0.8281, + "step": 12055 + }, + { + "epoch": 0.65, + "learning_rate": 0.0002912997203636921, + "loss": 0.8789, + "step": 12056 + }, + { + "epoch": 0.65, + "learning_rate": 0.00029122062904349, + "loss": 0.8086, + "step": 12057 + }, + { + "epoch": 0.65, + "learning_rate": 0.00029114154404998884, + "loss": 0.7969, + "step": 12058 + }, + { + "epoch": 0.65, + "learning_rate": 0.000291062465385585, + "loss": 0.9648, + "step": 12059 + }, + { + "epoch": 0.65, + "learning_rate": 0.0002909833930526743, + "loss": 0.7305, + "step": 12060 + }, + { + "epoch": 0.65, + "learning_rate": 0.0002909043270536538, + "loss": 0.8359, + "step": 12061 + }, + { + "epoch": 0.65, + "learning_rate": 0.00029082526739091907, + "loss": 0.8633, + "step": 12062 + }, + { + "epoch": 0.65, + "learning_rate": 0.00029074621406686556, + "loss": 0.7812, + "step": 12063 + }, + { + "epoch": 0.65, + "learning_rate": 0.0002906671670838892, + "loss": 0.8516, + "step": 12064 + }, + { + "epoch": 0.65, + "learning_rate": 0.00029058812644438544, + "loss": 0.8867, + "step": 12065 + }, + { + "epoch": 0.65, + "learning_rate": 0.0002905090921507493, + "loss": 0.7461, + "step": 12066 + }, + { + "epoch": 0.65, + "learning_rate": 0.00029043006420537566, + "loss": 0.918, + "step": 12067 + }, + { + "epoch": 0.65, + "learning_rate": 0.00029035104261065947, + "loss": 0.9258, + "step": 12068 + }, + { + "epoch": 0.65, + "learning_rate": 0.0002902720273689955, + "loss": 0.8047, + "step": 12069 + }, + { + "epoch": 0.65, + "learning_rate": 0.0002901930184827779, + "loss": 0.7812, + "step": 12070 + }, + { + "epoch": 0.65, + "learning_rate": 0.00029011401595440117, + "loss": 0.8008, + "step": 12071 + }, + { + "epoch": 0.65, + "learning_rate": 0.000290035019786259, + "loss": 0.832, + "step": 12072 + }, + { + "epoch": 0.65, + "learning_rate": 0.00028995602998074544, + "loss": 0.8828, + "step": 12073 + }, + { + "epoch": 0.65, + "learning_rate": 0.00028987704654025425, + "loss": 0.8633, + "step": 12074 + }, + { + "epoch": 0.65, + "learning_rate": 0.00028979806946717883, + "loss": 0.9023, + "step": 12075 + }, + { + "epoch": 0.65, + "learning_rate": 0.00028971909876391216, + "loss": 0.7891, + "step": 12076 + }, + { + "epoch": 0.65, + "learning_rate": 0.00028964013443284753, + "loss": 0.9141, + "step": 12077 + }, + { + "epoch": 0.65, + "learning_rate": 0.00028956117647637803, + "loss": 0.9023, + "step": 12078 + }, + { + "epoch": 0.65, + "learning_rate": 0.0002894822248968961, + "loss": 0.8789, + "step": 12079 + }, + { + "epoch": 0.65, + "learning_rate": 0.0002894032796967939, + "loss": 0.832, + "step": 12080 + }, + { + "epoch": 0.65, + "learning_rate": 0.00028932434087846454, + "loss": 0.8438, + "step": 12081 + }, + { + "epoch": 0.65, + "learning_rate": 0.00028924540844429964, + "loss": 0.7773, + "step": 12082 + }, + { + "epoch": 0.65, + "learning_rate": 0.00028916648239669094, + "loss": 0.8945, + "step": 12083 + }, + { + "epoch": 0.65, + "learning_rate": 0.0002890875627380304, + "loss": 0.8555, + "step": 12084 + }, + { + "epoch": 0.65, + "learning_rate": 0.00028900864947070963, + "loss": 0.9258, + "step": 12085 + }, + { + "epoch": 0.65, + "learning_rate": 0.00028892974259711975, + "loss": 0.8906, + "step": 12086 + }, + { + "epoch": 0.65, + "learning_rate": 0.00028885084211965217, + "loss": 0.9219, + "step": 12087 + }, + { + "epoch": 0.65, + "learning_rate": 0.0002887719480406974, + "loss": 0.8711, + "step": 12088 + }, + { + "epoch": 0.65, + "learning_rate": 0.00028869306036264676, + "loss": 0.8008, + "step": 12089 + }, + { + "epoch": 0.65, + "learning_rate": 0.00028861417908789016, + "loss": 0.7812, + "step": 12090 + }, + { + "epoch": 0.65, + "learning_rate": 0.00028853530421881857, + "loss": 0.8438, + "step": 12091 + }, + { + "epoch": 0.65, + "learning_rate": 0.0002884564357578217, + "loss": 0.8906, + "step": 12092 + }, + { + "epoch": 0.65, + "learning_rate": 0.00028837757370728966, + "loss": 0.875, + "step": 12093 + }, + { + "epoch": 0.65, + "learning_rate": 0.0002882987180696125, + "loss": 0.7852, + "step": 12094 + }, + { + "epoch": 0.65, + "learning_rate": 0.00028821986884717936, + "loss": 0.8047, + "step": 12095 + }, + { + "epoch": 0.65, + "learning_rate": 0.0002881410260423799, + "loss": 0.7773, + "step": 12096 + }, + { + "epoch": 0.65, + "learning_rate": 0.00028806218965760343, + "loss": 0.793, + "step": 12097 + }, + { + "epoch": 0.65, + "learning_rate": 0.00028798335969523883, + "loss": 0.7969, + "step": 12098 + }, + { + "epoch": 0.65, + "learning_rate": 0.0002879045361576746, + "loss": 0.8516, + "step": 12099 + }, + { + "epoch": 0.65, + "learning_rate": 0.00028782571904729964, + "loss": 0.8789, + "step": 12100 + }, + { + "epoch": 0.65, + "learning_rate": 0.00028774690836650253, + "loss": 0.8281, + "step": 12101 + }, + { + "epoch": 0.65, + "learning_rate": 0.00028766810411767134, + "loss": 0.8008, + "step": 12102 + }, + { + "epoch": 0.65, + "learning_rate": 0.0002875893063031937, + "loss": 0.875, + "step": 12103 + }, + { + "epoch": 0.65, + "learning_rate": 0.0002875105149254581, + "loss": 0.7969, + "step": 12104 + }, + { + "epoch": 0.65, + "learning_rate": 0.00028743172998685194, + "loss": 0.832, + "step": 12105 + }, + { + "epoch": 0.65, + "learning_rate": 0.00028735295148976246, + "loss": 0.8242, + "step": 12106 + }, + { + "epoch": 0.65, + "learning_rate": 0.00028727417943657705, + "loss": 0.8516, + "step": 12107 + }, + { + "epoch": 0.65, + "learning_rate": 0.0002871954138296829, + "loss": 0.8711, + "step": 12108 + }, + { + "epoch": 0.65, + "learning_rate": 0.0002871166546714665, + "loss": 0.8047, + "step": 12109 + }, + { + "epoch": 0.65, + "learning_rate": 0.00028703790196431505, + "loss": 0.8203, + "step": 12110 + }, + { + "epoch": 0.65, + "learning_rate": 0.0002869591557106145, + "loss": 0.7383, + "step": 12111 + }, + { + "epoch": 0.65, + "learning_rate": 0.0002868804159127515, + "loss": 0.7578, + "step": 12112 + }, + { + "epoch": 0.65, + "learning_rate": 0.00028680168257311174, + "loss": 0.8047, + "step": 12113 + }, + { + "epoch": 0.65, + "learning_rate": 0.00028672295569408154, + "loss": 0.875, + "step": 12114 + }, + { + "epoch": 0.65, + "learning_rate": 0.00028664423527804614, + "loss": 0.8477, + "step": 12115 + }, + { + "epoch": 0.65, + "learning_rate": 0.0002865655213273912, + "loss": 0.707, + "step": 12116 + }, + { + "epoch": 0.65, + "learning_rate": 0.0002864868138445024, + "loss": 0.7422, + "step": 12117 + }, + { + "epoch": 0.65, + "learning_rate": 0.0002864081128317643, + "loss": 0.875, + "step": 12118 + }, + { + "epoch": 0.65, + "learning_rate": 0.0002863294182915619, + "loss": 0.8594, + "step": 12119 + }, + { + "epoch": 0.65, + "learning_rate": 0.00028625073022628, + "loss": 0.793, + "step": 12120 + }, + { + "epoch": 0.65, + "learning_rate": 0.00028617204863830325, + "loss": 0.8555, + "step": 12121 + }, + { + "epoch": 0.65, + "learning_rate": 0.00028609337353001563, + "loss": 0.8633, + "step": 12122 + }, + { + "epoch": 0.65, + "learning_rate": 0.0002860147049038014, + "loss": 0.8438, + "step": 12123 + }, + { + "epoch": 0.65, + "learning_rate": 0.0002859360427620448, + "loss": 0.8359, + "step": 12124 + }, + { + "epoch": 0.65, + "learning_rate": 0.00028585738710712927, + "loss": 0.7969, + "step": 12125 + }, + { + "epoch": 0.65, + "learning_rate": 0.00028577873794143803, + "loss": 0.8047, + "step": 12126 + }, + { + "epoch": 0.65, + "learning_rate": 0.0002857000952673547, + "loss": 0.7734, + "step": 12127 + }, + { + "epoch": 0.65, + "learning_rate": 0.0002856214590872628, + "loss": 0.875, + "step": 12128 + }, + { + "epoch": 0.65, + "learning_rate": 0.0002855428294035445, + "loss": 0.793, + "step": 12129 + }, + { + "epoch": 0.65, + "learning_rate": 0.00028546420621858315, + "loss": 0.9102, + "step": 12130 + }, + { + "epoch": 0.65, + "learning_rate": 0.0002853855895347609, + "loss": 0.832, + "step": 12131 + }, + { + "epoch": 0.65, + "learning_rate": 0.00028530697935446047, + "loss": 0.8555, + "step": 12132 + }, + { + "epoch": 0.65, + "learning_rate": 0.00028522837568006356, + "loss": 0.7305, + "step": 12133 + }, + { + "epoch": 0.65, + "learning_rate": 0.0002851497785139523, + "loss": 0.8438, + "step": 12134 + }, + { + "epoch": 0.65, + "learning_rate": 0.0002850711878585088, + "loss": 0.7344, + "step": 12135 + }, + { + "epoch": 0.65, + "learning_rate": 0.000284992603716114, + "loss": 0.8789, + "step": 12136 + }, + { + "epoch": 0.65, + "learning_rate": 0.0002849140260891499, + "loss": 0.7461, + "step": 12137 + }, + { + "epoch": 0.65, + "learning_rate": 0.000284835454979997, + "loss": 0.8359, + "step": 12138 + }, + { + "epoch": 0.65, + "learning_rate": 0.00028475689039103656, + "loss": 0.7148, + "step": 12139 + }, + { + "epoch": 0.65, + "learning_rate": 0.0002846783323246497, + "loss": 0.8945, + "step": 12140 + }, + { + "epoch": 0.65, + "learning_rate": 0.0002845997807832166, + "loss": 0.7734, + "step": 12141 + }, + { + "epoch": 0.65, + "learning_rate": 0.0002845212357691175, + "loss": 0.8555, + "step": 12142 + }, + { + "epoch": 0.65, + "learning_rate": 0.00028444269728473276, + "loss": 0.8672, + "step": 12143 + }, + { + "epoch": 0.65, + "learning_rate": 0.00028436416533244256, + "loss": 0.8281, + "step": 12144 + }, + { + "epoch": 0.65, + "learning_rate": 0.00028428563991462645, + "loss": 0.8828, + "step": 12145 + }, + { + "epoch": 0.65, + "learning_rate": 0.00028420712103366364, + "loss": 0.8633, + "step": 12146 + }, + { + "epoch": 0.65, + "learning_rate": 0.00028412860869193434, + "loss": 0.7852, + "step": 12147 + }, + { + "epoch": 0.65, + "learning_rate": 0.00028405010289181724, + "loss": 0.8711, + "step": 12148 + }, + { + "epoch": 0.65, + "learning_rate": 0.00028397160363569123, + "loss": 0.7852, + "step": 12149 + }, + { + "epoch": 0.65, + "learning_rate": 0.0002838931109259352, + "loss": 0.8008, + "step": 12150 + }, + { + "epoch": 0.65, + "learning_rate": 0.000283814624764928, + "loss": 0.7422, + "step": 12151 + }, + { + "epoch": 0.65, + "learning_rate": 0.00028373614515504763, + "loss": 0.8125, + "step": 12152 + }, + { + "epoch": 0.65, + "learning_rate": 0.00028365767209867254, + "loss": 0.7969, + "step": 12153 + }, + { + "epoch": 0.65, + "learning_rate": 0.0002835792055981805, + "loss": 0.8828, + "step": 12154 + }, + { + "epoch": 0.65, + "learning_rate": 0.0002835007456559497, + "loss": 0.7734, + "step": 12155 + }, + { + "epoch": 0.65, + "learning_rate": 0.0002834222922743572, + "loss": 0.7812, + "step": 12156 + }, + { + "epoch": 0.65, + "learning_rate": 0.0002833438454557808, + "loss": 0.7266, + "step": 12157 + }, + { + "epoch": 0.65, + "learning_rate": 0.0002832654052025975, + "loss": 0.8633, + "step": 12158 + }, + { + "epoch": 0.65, + "learning_rate": 0.0002831869715171842, + "loss": 0.7656, + "step": 12159 + }, + { + "epoch": 0.65, + "learning_rate": 0.00028310854440191815, + "loss": 0.8789, + "step": 12160 + }, + { + "epoch": 0.65, + "learning_rate": 0.0002830301238591756, + "loss": 0.7812, + "step": 12161 + }, + { + "epoch": 0.65, + "learning_rate": 0.00028295170989133277, + "loss": 0.9141, + "step": 12162 + }, + { + "epoch": 0.65, + "learning_rate": 0.00028287330250076614, + "loss": 0.8438, + "step": 12163 + }, + { + "epoch": 0.65, + "learning_rate": 0.0002827949016898519, + "loss": 0.8594, + "step": 12164 + }, + { + "epoch": 0.65, + "learning_rate": 0.00028271650746096547, + "loss": 0.8203, + "step": 12165 + }, + { + "epoch": 0.65, + "learning_rate": 0.00028263811981648246, + "loss": 0.8945, + "step": 12166 + }, + { + "epoch": 0.65, + "learning_rate": 0.00028255973875877864, + "loss": 0.6914, + "step": 12167 + }, + { + "epoch": 0.65, + "learning_rate": 0.000282481364290229, + "loss": 0.7852, + "step": 12168 + }, + { + "epoch": 0.65, + "learning_rate": 0.00028240299641320814, + "loss": 0.8203, + "step": 12169 + }, + { + "epoch": 0.65, + "learning_rate": 0.00028232463513009167, + "loss": 0.7891, + "step": 12170 + }, + { + "epoch": 0.65, + "learning_rate": 0.00028224628044325374, + "loss": 0.8047, + "step": 12171 + }, + { + "epoch": 0.65, + "learning_rate": 0.00028216793235506864, + "loss": 0.8867, + "step": 12172 + }, + { + "epoch": 0.65, + "learning_rate": 0.00028208959086791077, + "loss": 0.9648, + "step": 12173 + }, + { + "epoch": 0.65, + "learning_rate": 0.00028201125598415416, + "loss": 0.8086, + "step": 12174 + }, + { + "epoch": 0.65, + "learning_rate": 0.0002819329277061724, + "loss": 0.8047, + "step": 12175 + }, + { + "epoch": 0.65, + "learning_rate": 0.00028185460603633944, + "loss": 0.8008, + "step": 12176 + }, + { + "epoch": 0.65, + "learning_rate": 0.0002817762909770283, + "loss": 0.8984, + "step": 12177 + }, + { + "epoch": 0.65, + "learning_rate": 0.0002816979825306125, + "loss": 0.8164, + "step": 12178 + }, + { + "epoch": 0.65, + "learning_rate": 0.0002816196806994649, + "loss": 0.8203, + "step": 12179 + }, + { + "epoch": 0.65, + "learning_rate": 0.0002815413854859583, + "loss": 0.8203, + "step": 12180 + }, + { + "epoch": 0.65, + "learning_rate": 0.0002814630968924652, + "loss": 0.8672, + "step": 12181 + }, + { + "epoch": 0.65, + "learning_rate": 0.0002813848149213581, + "loss": 0.8438, + "step": 12182 + }, + { + "epoch": 0.65, + "learning_rate": 0.0002813065395750094, + "loss": 0.6992, + "step": 12183 + }, + { + "epoch": 0.65, + "learning_rate": 0.00028122827085579093, + "loss": 0.7891, + "step": 12184 + }, + { + "epoch": 0.65, + "learning_rate": 0.0002811500087660743, + "loss": 0.7656, + "step": 12185 + }, + { + "epoch": 0.65, + "learning_rate": 0.0002810717533082313, + "loss": 0.832, + "step": 12186 + }, + { + "epoch": 0.66, + "learning_rate": 0.00028099350448463354, + "loss": 0.8164, + "step": 12187 + }, + { + "epoch": 0.66, + "learning_rate": 0.00028091526229765166, + "loss": 0.8203, + "step": 12188 + }, + { + "epoch": 0.66, + "learning_rate": 0.0002808370267496571, + "loss": 0.8398, + "step": 12189 + }, + { + "epoch": 0.66, + "learning_rate": 0.0002807587978430207, + "loss": 0.8281, + "step": 12190 + }, + { + "epoch": 0.66, + "learning_rate": 0.00028068057558011287, + "loss": 0.8281, + "step": 12191 + }, + { + "epoch": 0.66, + "learning_rate": 0.00028060235996330387, + "loss": 0.8789, + "step": 12192 + }, + { + "epoch": 0.66, + "learning_rate": 0.00028052415099496397, + "loss": 0.8828, + "step": 12193 + }, + { + "epoch": 0.66, + "learning_rate": 0.00028044594867746356, + "loss": 0.9141, + "step": 12194 + }, + { + "epoch": 0.66, + "learning_rate": 0.00028036775301317174, + "loss": 0.8789, + "step": 12195 + }, + { + "epoch": 0.66, + "learning_rate": 0.0002802895640044587, + "loss": 0.9727, + "step": 12196 + }, + { + "epoch": 0.66, + "learning_rate": 0.0002802113816536934, + "loss": 0.8672, + "step": 12197 + }, + { + "epoch": 0.66, + "learning_rate": 0.00028013320596324544, + "loss": 0.7852, + "step": 12198 + }, + { + "epoch": 0.66, + "learning_rate": 0.00028005503693548335, + "loss": 0.8594, + "step": 12199 + }, + { + "epoch": 0.66, + "learning_rate": 0.0002799768745727763, + "loss": 0.8711, + "step": 12200 + }, + { + "epoch": 0.66, + "learning_rate": 0.00027989871887749247, + "loss": 0.7578, + "step": 12201 + }, + { + "epoch": 0.66, + "learning_rate": 0.00027982056985200045, + "loss": 0.832, + "step": 12202 + }, + { + "epoch": 0.66, + "learning_rate": 0.0002797424274986686, + "loss": 0.8008, + "step": 12203 + }, + { + "epoch": 0.66, + "learning_rate": 0.00027966429181986477, + "loss": 0.7422, + "step": 12204 + }, + { + "epoch": 0.66, + "learning_rate": 0.0002795861628179562, + "loss": 0.8438, + "step": 12205 + }, + { + "epoch": 0.66, + "learning_rate": 0.0002795080404953113, + "loss": 0.8945, + "step": 12206 + }, + { + "epoch": 0.66, + "learning_rate": 0.0002794299248542971, + "loss": 0.7695, + "step": 12207 + }, + { + "epoch": 0.66, + "learning_rate": 0.0002793518158972804, + "loss": 0.8906, + "step": 12208 + }, + { + "epoch": 0.66, + "learning_rate": 0.00027927371362662857, + "loss": 0.8672, + "step": 12209 + }, + { + "epoch": 0.66, + "learning_rate": 0.00027919561804470844, + "loss": 0.9023, + "step": 12210 + }, + { + "epoch": 0.66, + "learning_rate": 0.0002791175291538864, + "loss": 0.707, + "step": 12211 + }, + { + "epoch": 0.66, + "learning_rate": 0.00027903944695652834, + "loss": 0.8203, + "step": 12212 + }, + { + "epoch": 0.66, + "learning_rate": 0.0002789613714550013, + "loss": 0.7578, + "step": 12213 + }, + { + "epoch": 0.66, + "learning_rate": 0.0002788833026516707, + "loss": 0.8398, + "step": 12214 + }, + { + "epoch": 0.66, + "learning_rate": 0.0002788052405489022, + "loss": 0.8203, + "step": 12215 + }, + { + "epoch": 0.66, + "learning_rate": 0.0002787271851490615, + "loss": 0.8711, + "step": 12216 + }, + { + "epoch": 0.66, + "learning_rate": 0.0002786491364545142, + "loss": 0.918, + "step": 12217 + }, + { + "epoch": 0.66, + "learning_rate": 0.00027857109446762483, + "loss": 0.9336, + "step": 12218 + }, + { + "epoch": 0.66, + "learning_rate": 0.0002784930591907589, + "loss": 0.7773, + "step": 12219 + }, + { + "epoch": 0.66, + "learning_rate": 0.0002784150306262807, + "loss": 0.8711, + "step": 12220 + }, + { + "epoch": 0.66, + "learning_rate": 0.0002783370087765551, + "loss": 0.7031, + "step": 12221 + }, + { + "epoch": 0.66, + "learning_rate": 0.00027825899364394607, + "loss": 0.8125, + "step": 12222 + }, + { + "epoch": 0.66, + "learning_rate": 0.00027818098523081815, + "loss": 0.918, + "step": 12223 + }, + { + "epoch": 0.66, + "learning_rate": 0.00027810298353953477, + "loss": 0.7969, + "step": 12224 + }, + { + "epoch": 0.66, + "learning_rate": 0.0002780249885724599, + "loss": 0.8984, + "step": 12225 + }, + { + "epoch": 0.66, + "learning_rate": 0.0002779470003319572, + "loss": 0.8125, + "step": 12226 + }, + { + "epoch": 0.66, + "learning_rate": 0.00027786901882038983, + "loss": 0.8125, + "step": 12227 + }, + { + "epoch": 0.66, + "learning_rate": 0.00027779104404012056, + "loss": 0.8203, + "step": 12228 + }, + { + "epoch": 0.66, + "learning_rate": 0.0002777130759935127, + "loss": 0.7891, + "step": 12229 + }, + { + "epoch": 0.66, + "learning_rate": 0.0002776351146829289, + "loss": 0.7969, + "step": 12230 + }, + { + "epoch": 0.66, + "learning_rate": 0.0002775571601107314, + "loss": 0.8594, + "step": 12231 + }, + { + "epoch": 0.66, + "learning_rate": 0.00027747921227928266, + "loss": 0.8008, + "step": 12232 + }, + { + "epoch": 0.66, + "learning_rate": 0.00027740127119094485, + "loss": 0.7188, + "step": 12233 + }, + { + "epoch": 0.66, + "learning_rate": 0.00027732333684807976, + "loss": 0.8398, + "step": 12234 + }, + { + "epoch": 0.66, + "learning_rate": 0.00027724540925304873, + "loss": 0.8047, + "step": 12235 + }, + { + "epoch": 0.66, + "learning_rate": 0.00027716748840821365, + "loss": 0.8828, + "step": 12236 + }, + { + "epoch": 0.66, + "learning_rate": 0.00027708957431593574, + "loss": 0.8398, + "step": 12237 + }, + { + "epoch": 0.66, + "learning_rate": 0.00027701166697857585, + "loss": 0.832, + "step": 12238 + }, + { + "epoch": 0.66, + "learning_rate": 0.000276933766398495, + "loss": 0.8984, + "step": 12239 + }, + { + "epoch": 0.66, + "learning_rate": 0.00027685587257805355, + "loss": 0.8516, + "step": 12240 + }, + { + "epoch": 0.66, + "learning_rate": 0.0002767779855196124, + "loss": 0.8086, + "step": 12241 + }, + { + "epoch": 0.66, + "learning_rate": 0.00027670010522553136, + "loss": 0.8438, + "step": 12242 + }, + { + "epoch": 0.66, + "learning_rate": 0.0002766222316981708, + "loss": 0.7461, + "step": 12243 + }, + { + "epoch": 0.66, + "learning_rate": 0.0002765443649398902, + "loss": 0.8672, + "step": 12244 + }, + { + "epoch": 0.66, + "learning_rate": 0.0002764665049530494, + "loss": 0.8359, + "step": 12245 + }, + { + "epoch": 0.66, + "learning_rate": 0.0002763886517400078, + "loss": 0.8477, + "step": 12246 + }, + { + "epoch": 0.66, + "learning_rate": 0.00027631080530312447, + "loss": 0.875, + "step": 12247 + }, + { + "epoch": 0.66, + "learning_rate": 0.00027623296564475855, + "loss": 0.8164, + "step": 12248 + }, + { + "epoch": 0.66, + "learning_rate": 0.00027615513276726895, + "loss": 0.8477, + "step": 12249 + }, + { + "epoch": 0.66, + "learning_rate": 0.00027607730667301414, + "loss": 0.7852, + "step": 12250 + }, + { + "epoch": 0.66, + "learning_rate": 0.00027599948736435224, + "loss": 1.0, + "step": 12251 + }, + { + "epoch": 0.66, + "learning_rate": 0.00027592167484364173, + "loss": 0.8398, + "step": 12252 + }, + { + "epoch": 0.66, + "learning_rate": 0.00027584386911324064, + "loss": 0.8398, + "step": 12253 + }, + { + "epoch": 0.66, + "learning_rate": 0.0002757660701755067, + "loss": 0.8008, + "step": 12254 + }, + { + "epoch": 0.66, + "learning_rate": 0.00027568827803279704, + "loss": 0.8516, + "step": 12255 + }, + { + "epoch": 0.66, + "learning_rate": 0.00027561049268746973, + "loss": 0.8125, + "step": 12256 + }, + { + "epoch": 0.66, + "learning_rate": 0.0002755327141418815, + "loss": 0.8203, + "step": 12257 + }, + { + "epoch": 0.66, + "learning_rate": 0.00027545494239838926, + "loss": 0.7773, + "step": 12258 + }, + { + "epoch": 0.66, + "learning_rate": 0.00027537717745934983, + "loss": 0.8516, + "step": 12259 + }, + { + "epoch": 0.66, + "learning_rate": 0.00027529941932711996, + "loss": 0.7578, + "step": 12260 + }, + { + "epoch": 0.66, + "learning_rate": 0.0002752216680040556, + "loss": 0.9609, + "step": 12261 + }, + { + "epoch": 0.66, + "learning_rate": 0.00027514392349251314, + "loss": 0.7539, + "step": 12262 + }, + { + "epoch": 0.66, + "learning_rate": 0.00027506618579484834, + "loss": 0.8164, + "step": 12263 + }, + { + "epoch": 0.66, + "learning_rate": 0.00027498845491341715, + "loss": 0.8008, + "step": 12264 + }, + { + "epoch": 0.66, + "learning_rate": 0.0002749107308505747, + "loss": 0.8359, + "step": 12265 + }, + { + "epoch": 0.66, + "learning_rate": 0.0002748330136086766, + "loss": 0.7969, + "step": 12266 + }, + { + "epoch": 0.66, + "learning_rate": 0.0002747553031900777, + "loss": 0.8125, + "step": 12267 + }, + { + "epoch": 0.66, + "learning_rate": 0.000274677599597133, + "loss": 0.8281, + "step": 12268 + }, + { + "epoch": 0.66, + "learning_rate": 0.00027459990283219734, + "loss": 0.8477, + "step": 12269 + }, + { + "epoch": 0.66, + "learning_rate": 0.000274522212897625, + "loss": 0.7812, + "step": 12270 + }, + { + "epoch": 0.66, + "learning_rate": 0.00027444452979576996, + "loss": 0.8203, + "step": 12271 + }, + { + "epoch": 0.66, + "learning_rate": 0.0002743668535289868, + "loss": 0.8359, + "step": 12272 + }, + { + "epoch": 0.66, + "learning_rate": 0.00027428918409962936, + "loss": 0.8086, + "step": 12273 + }, + { + "epoch": 0.66, + "learning_rate": 0.0002742115215100507, + "loss": 0.8516, + "step": 12274 + }, + { + "epoch": 0.66, + "learning_rate": 0.0002741338657626047, + "loss": 0.8125, + "step": 12275 + }, + { + "epoch": 0.66, + "learning_rate": 0.0002740562168596446, + "loss": 0.9492, + "step": 12276 + }, + { + "epoch": 0.66, + "learning_rate": 0.0002739785748035234, + "loss": 0.8281, + "step": 12277 + }, + { + "epoch": 0.66, + "learning_rate": 0.00027390093959659355, + "loss": 0.7969, + "step": 12278 + }, + { + "epoch": 0.66, + "learning_rate": 0.00027382331124120796, + "loss": 0.8008, + "step": 12279 + }, + { + "epoch": 0.66, + "learning_rate": 0.00027374568973971916, + "loss": 0.832, + "step": 12280 + }, + { + "epoch": 0.66, + "learning_rate": 0.000273668075094479, + "loss": 0.7852, + "step": 12281 + }, + { + "epoch": 0.66, + "learning_rate": 0.0002735904673078396, + "loss": 0.8047, + "step": 12282 + }, + { + "epoch": 0.66, + "learning_rate": 0.00027351286638215293, + "loss": 0.7773, + "step": 12283 + }, + { + "epoch": 0.66, + "learning_rate": 0.00027343527231977024, + "loss": 0.7891, + "step": 12284 + }, + { + "epoch": 0.66, + "learning_rate": 0.00027335768512304316, + "loss": 0.8281, + "step": 12285 + }, + { + "epoch": 0.66, + "learning_rate": 0.0002732801047943225, + "loss": 0.75, + "step": 12286 + }, + { + "epoch": 0.66, + "learning_rate": 0.0002732025313359596, + "loss": 0.8164, + "step": 12287 + }, + { + "epoch": 0.66, + "learning_rate": 0.0002731249647503049, + "loss": 0.8398, + "step": 12288 + }, + { + "epoch": 0.66, + "learning_rate": 0.000273047405039709, + "loss": 0.8594, + "step": 12289 + }, + { + "epoch": 0.66, + "learning_rate": 0.0002729698522065223, + "loss": 0.7305, + "step": 12290 + }, + { + "epoch": 0.66, + "learning_rate": 0.00027289230625309456, + "loss": 0.9062, + "step": 12291 + }, + { + "epoch": 0.66, + "learning_rate": 0.0002728147671817763, + "loss": 0.7617, + "step": 12292 + }, + { + "epoch": 0.66, + "learning_rate": 0.00027273723499491676, + "loss": 0.8828, + "step": 12293 + }, + { + "epoch": 0.66, + "learning_rate": 0.0002726597096948654, + "loss": 0.8086, + "step": 12294 + }, + { + "epoch": 0.66, + "learning_rate": 0.00027258219128397157, + "loss": 0.7578, + "step": 12295 + }, + { + "epoch": 0.66, + "learning_rate": 0.0002725046797645846, + "loss": 0.7695, + "step": 12296 + }, + { + "epoch": 0.66, + "learning_rate": 0.00027242717513905284, + "loss": 0.7852, + "step": 12297 + }, + { + "epoch": 0.66, + "learning_rate": 0.0002723496774097253, + "loss": 0.8281, + "step": 12298 + }, + { + "epoch": 0.66, + "learning_rate": 0.0002722721865789504, + "loss": 0.8086, + "step": 12299 + }, + { + "epoch": 0.66, + "learning_rate": 0.00027219470264907636, + "loss": 0.9062, + "step": 12300 + }, + { + "epoch": 0.66, + "learning_rate": 0.00027211722562245095, + "loss": 0.8828, + "step": 12301 + }, + { + "epoch": 0.66, + "learning_rate": 0.0002720397555014221, + "loss": 0.7695, + "step": 12302 + }, + { + "epoch": 0.66, + "learning_rate": 0.0002719622922883377, + "loss": 0.7891, + "step": 12303 + }, + { + "epoch": 0.66, + "learning_rate": 0.0002718848359855447, + "loss": 0.8086, + "step": 12304 + }, + { + "epoch": 0.66, + "learning_rate": 0.00027180738659539073, + "loss": 0.8125, + "step": 12305 + }, + { + "epoch": 0.66, + "learning_rate": 0.00027172994412022223, + "loss": 0.8867, + "step": 12306 + }, + { + "epoch": 0.66, + "learning_rate": 0.0002716525085623865, + "loss": 0.8711, + "step": 12307 + }, + { + "epoch": 0.66, + "learning_rate": 0.0002715750799242296, + "loss": 0.8555, + "step": 12308 + }, + { + "epoch": 0.66, + "learning_rate": 0.00027149765820809837, + "loss": 0.8867, + "step": 12309 + }, + { + "epoch": 0.66, + "learning_rate": 0.00027142024341633844, + "loss": 0.7461, + "step": 12310 + }, + { + "epoch": 0.66, + "learning_rate": 0.00027134283555129595, + "loss": 0.8594, + "step": 12311 + }, + { + "epoch": 0.66, + "learning_rate": 0.0002712654346153169, + "loss": 0.7539, + "step": 12312 + }, + { + "epoch": 0.66, + "learning_rate": 0.00027118804061074654, + "loss": 0.7773, + "step": 12313 + }, + { + "epoch": 0.66, + "learning_rate": 0.0002711106535399297, + "loss": 0.9258, + "step": 12314 + }, + { + "epoch": 0.66, + "learning_rate": 0.0002710332734052123, + "loss": 0.7422, + "step": 12315 + }, + { + "epoch": 0.66, + "learning_rate": 0.0002709559002089389, + "loss": 0.8945, + "step": 12316 + }, + { + "epoch": 0.66, + "learning_rate": 0.0002708785339534538, + "loss": 0.8516, + "step": 12317 + }, + { + "epoch": 0.66, + "learning_rate": 0.00027080117464110175, + "loss": 0.9297, + "step": 12318 + }, + { + "epoch": 0.66, + "learning_rate": 0.0002707238222742273, + "loss": 0.7969, + "step": 12319 + }, + { + "epoch": 0.66, + "learning_rate": 0.0002706464768551741, + "loss": 0.7852, + "step": 12320 + }, + { + "epoch": 0.66, + "learning_rate": 0.00027056913838628563, + "loss": 0.8633, + "step": 12321 + }, + { + "epoch": 0.66, + "learning_rate": 0.00027049180686990627, + "loss": 0.875, + "step": 12322 + }, + { + "epoch": 0.66, + "learning_rate": 0.00027041448230837907, + "loss": 0.832, + "step": 12323 + }, + { + "epoch": 0.66, + "learning_rate": 0.000270337164704047, + "loss": 0.8047, + "step": 12324 + }, + { + "epoch": 0.66, + "learning_rate": 0.00027025985405925324, + "loss": 0.793, + "step": 12325 + }, + { + "epoch": 0.66, + "learning_rate": 0.00027018255037634074, + "loss": 0.7852, + "step": 12326 + }, + { + "epoch": 0.66, + "learning_rate": 0.0002701052536576517, + "loss": 0.8164, + "step": 12327 + }, + { + "epoch": 0.66, + "learning_rate": 0.00027002796390552885, + "loss": 0.7461, + "step": 12328 + }, + { + "epoch": 0.66, + "learning_rate": 0.00026995068112231385, + "loss": 0.7695, + "step": 12329 + }, + { + "epoch": 0.66, + "learning_rate": 0.0002698734053103491, + "loss": 0.8398, + "step": 12330 + }, + { + "epoch": 0.66, + "learning_rate": 0.0002697961364719759, + "loss": 0.8555, + "step": 12331 + }, + { + "epoch": 0.66, + "learning_rate": 0.00026971887460953617, + "loss": 0.8398, + "step": 12332 + }, + { + "epoch": 0.66, + "learning_rate": 0.00026964161972537073, + "loss": 0.7383, + "step": 12333 + }, + { + "epoch": 0.66, + "learning_rate": 0.0002695643718218209, + "loss": 0.6719, + "step": 12334 + }, + { + "epoch": 0.66, + "learning_rate": 0.0002694871309012278, + "loss": 0.8242, + "step": 12335 + }, + { + "epoch": 0.66, + "learning_rate": 0.0002694098969659318, + "loss": 0.8555, + "step": 12336 + }, + { + "epoch": 0.66, + "learning_rate": 0.0002693326700182731, + "loss": 0.832, + "step": 12337 + }, + { + "epoch": 0.66, + "learning_rate": 0.0002692554500605923, + "loss": 0.7734, + "step": 12338 + }, + { + "epoch": 0.66, + "learning_rate": 0.00026917823709522937, + "loss": 0.8242, + "step": 12339 + }, + { + "epoch": 0.66, + "learning_rate": 0.000269101031124524, + "loss": 0.793, + "step": 12340 + }, + { + "epoch": 0.66, + "learning_rate": 0.0002690238321508157, + "loss": 0.8242, + "step": 12341 + }, + { + "epoch": 0.66, + "learning_rate": 0.00026894664017644435, + "loss": 0.7109, + "step": 12342 + }, + { + "epoch": 0.66, + "learning_rate": 0.00026886945520374875, + "loss": 0.7266, + "step": 12343 + }, + { + "epoch": 0.66, + "learning_rate": 0.0002687922772350676, + "loss": 0.8359, + "step": 12344 + }, + { + "epoch": 0.66, + "learning_rate": 0.00026871510627274, + "loss": 0.8438, + "step": 12345 + }, + { + "epoch": 0.66, + "learning_rate": 0.00026863794231910454, + "loss": 0.8047, + "step": 12346 + }, + { + "epoch": 0.66, + "learning_rate": 0.0002685607853764992, + "loss": 0.8203, + "step": 12347 + }, + { + "epoch": 0.66, + "learning_rate": 0.0002684836354472625, + "loss": 0.7656, + "step": 12348 + }, + { + "epoch": 0.66, + "learning_rate": 0.000268406492533732, + "loss": 0.7383, + "step": 12349 + }, + { + "epoch": 0.66, + "learning_rate": 0.0002683293566382455, + "loss": 0.875, + "step": 12350 + }, + { + "epoch": 0.66, + "learning_rate": 0.0002682522277631406, + "loss": 0.9023, + "step": 12351 + }, + { + "epoch": 0.66, + "learning_rate": 0.00026817510591075456, + "loss": 0.8203, + "step": 12352 + }, + { + "epoch": 0.66, + "learning_rate": 0.00026809799108342406, + "loss": 0.8828, + "step": 12353 + }, + { + "epoch": 0.66, + "learning_rate": 0.0002680208832834862, + "loss": 0.8086, + "step": 12354 + }, + { + "epoch": 0.66, + "learning_rate": 0.0002679437825132778, + "loss": 0.7891, + "step": 12355 + }, + { + "epoch": 0.66, + "learning_rate": 0.0002678666887751348, + "loss": 0.8438, + "step": 12356 + }, + { + "epoch": 0.66, + "learning_rate": 0.0002677896020713937, + "loss": 0.6953, + "step": 12357 + }, + { + "epoch": 0.66, + "learning_rate": 0.0002677125224043906, + "loss": 0.7148, + "step": 12358 + }, + { + "epoch": 0.66, + "learning_rate": 0.00026763544977646104, + "loss": 0.7422, + "step": 12359 + }, + { + "epoch": 0.66, + "learning_rate": 0.0002675583841899405, + "loss": 0.9297, + "step": 12360 + }, + { + "epoch": 0.66, + "learning_rate": 0.0002674813256471644, + "loss": 0.8242, + "step": 12361 + }, + { + "epoch": 0.66, + "learning_rate": 0.00026740427415046806, + "loss": 0.8164, + "step": 12362 + }, + { + "epoch": 0.66, + "learning_rate": 0.00026732722970218613, + "loss": 0.8594, + "step": 12363 + }, + { + "epoch": 0.66, + "learning_rate": 0.0002672501923046534, + "loss": 0.7305, + "step": 12364 + }, + { + "epoch": 0.66, + "learning_rate": 0.00026717316196020463, + "loss": 0.793, + "step": 12365 + }, + { + "epoch": 0.66, + "learning_rate": 0.00026709613867117375, + "loss": 0.8594, + "step": 12366 + }, + { + "epoch": 0.66, + "learning_rate": 0.0002670191224398948, + "loss": 0.8711, + "step": 12367 + }, + { + "epoch": 0.66, + "learning_rate": 0.00026694211326870176, + "loss": 0.8594, + "step": 12368 + }, + { + "epoch": 0.66, + "learning_rate": 0.0002668651111599284, + "loss": 0.793, + "step": 12369 + }, + { + "epoch": 0.66, + "learning_rate": 0.0002667881161159077, + "loss": 0.7734, + "step": 12370 + }, + { + "epoch": 0.66, + "learning_rate": 0.00026671112813897343, + "loss": 0.8555, + "step": 12371 + }, + { + "epoch": 0.66, + "learning_rate": 0.00026663414723145806, + "loss": 0.8477, + "step": 12372 + }, + { + "epoch": 0.67, + "learning_rate": 0.0002665571733956947, + "loss": 0.8008, + "step": 12373 + }, + { + "epoch": 0.67, + "learning_rate": 0.0002664802066340157, + "loss": 0.8398, + "step": 12374 + }, + { + "epoch": 0.67, + "learning_rate": 0.0002664032469487537, + "loss": 0.875, + "step": 12375 + }, + { + "epoch": 0.67, + "learning_rate": 0.00026632629434224047, + "loss": 0.8906, + "step": 12376 + }, + { + "epoch": 0.67, + "learning_rate": 0.00026624934881680804, + "loss": 0.7969, + "step": 12377 + }, + { + "epoch": 0.67, + "learning_rate": 0.00026617241037478847, + "loss": 0.8203, + "step": 12378 + }, + { + "epoch": 0.67, + "learning_rate": 0.00026609547901851284, + "loss": 0.7734, + "step": 12379 + }, + { + "epoch": 0.67, + "learning_rate": 0.0002660185547503121, + "loss": 0.7617, + "step": 12380 + }, + { + "epoch": 0.67, + "learning_rate": 0.0002659416375725182, + "loss": 0.8125, + "step": 12381 + }, + { + "epoch": 0.67, + "learning_rate": 0.0002658647274874615, + "loss": 0.8164, + "step": 12382 + }, + { + "epoch": 0.67, + "learning_rate": 0.0002657878244974725, + "loss": 0.9102, + "step": 12383 + }, + { + "epoch": 0.67, + "learning_rate": 0.0002657109286048817, + "loss": 0.7617, + "step": 12384 + }, + { + "epoch": 0.67, + "learning_rate": 0.0002656340398120195, + "loss": 0.8203, + "step": 12385 + }, + { + "epoch": 0.67, + "learning_rate": 0.00026555715812121586, + "loss": 0.8086, + "step": 12386 + }, + { + "epoch": 0.67, + "learning_rate": 0.00026548028353480014, + "loss": 0.832, + "step": 12387 + }, + { + "epoch": 0.67, + "learning_rate": 0.0002654034160551022, + "loss": 0.8984, + "step": 12388 + }, + { + "epoch": 0.67, + "learning_rate": 0.0002653265556844516, + "loss": 0.8828, + "step": 12389 + }, + { + "epoch": 0.67, + "learning_rate": 0.000265249702425177, + "loss": 0.8555, + "step": 12390 + }, + { + "epoch": 0.67, + "learning_rate": 0.00026517285627960775, + "loss": 0.8086, + "step": 12391 + }, + { + "epoch": 0.67, + "learning_rate": 0.0002650960172500721, + "loss": 0.832, + "step": 12392 + }, + { + "epoch": 0.67, + "learning_rate": 0.0002650191853388987, + "loss": 0.8164, + "step": 12393 + }, + { + "epoch": 0.67, + "learning_rate": 0.00026494236054841615, + "loss": 0.9297, + "step": 12394 + }, + { + "epoch": 0.67, + "learning_rate": 0.00026486554288095187, + "loss": 0.8828, + "step": 12395 + }, + { + "epoch": 0.67, + "learning_rate": 0.0002647887323388343, + "loss": 0.7617, + "step": 12396 + }, + { + "epoch": 0.67, + "learning_rate": 0.0002647119289243906, + "loss": 0.7812, + "step": 12397 + }, + { + "epoch": 0.67, + "learning_rate": 0.00026463513263994844, + "loss": 0.7773, + "step": 12398 + }, + { + "epoch": 0.67, + "learning_rate": 0.0002645583434878347, + "loss": 0.8984, + "step": 12399 + }, + { + "epoch": 0.67, + "learning_rate": 0.0002644815614703765, + "loss": 0.7734, + "step": 12400 + }, + { + "epoch": 0.67, + "learning_rate": 0.00026440478658990084, + "loss": 0.918, + "step": 12401 + }, + { + "epoch": 0.67, + "learning_rate": 0.00026432801884873413, + "loss": 0.8555, + "step": 12402 + }, + { + "epoch": 0.67, + "learning_rate": 0.0002642512582492023, + "loss": 0.8594, + "step": 12403 + }, + { + "epoch": 0.67, + "learning_rate": 0.00026417450479363175, + "loss": 0.8633, + "step": 12404 + }, + { + "epoch": 0.67, + "learning_rate": 0.0002640977584843486, + "loss": 0.8164, + "step": 12405 + }, + { + "epoch": 0.67, + "learning_rate": 0.00026402101932367794, + "loss": 0.9102, + "step": 12406 + }, + { + "epoch": 0.67, + "learning_rate": 0.00026394428731394564, + "loss": 0.7852, + "step": 12407 + }, + { + "epoch": 0.67, + "learning_rate": 0.000263867562457477, + "loss": 0.8828, + "step": 12408 + }, + { + "epoch": 0.67, + "learning_rate": 0.0002637908447565969, + "loss": 0.8125, + "step": 12409 + }, + { + "epoch": 0.67, + "learning_rate": 0.00026371413421362986, + "loss": 0.8945, + "step": 12410 + }, + { + "epoch": 0.67, + "learning_rate": 0.00026363743083090074, + "loss": 0.8047, + "step": 12411 + }, + { + "epoch": 0.67, + "learning_rate": 0.00026356073461073404, + "loss": 0.7266, + "step": 12412 + }, + { + "epoch": 0.67, + "learning_rate": 0.0002634840455554536, + "loss": 0.7969, + "step": 12413 + }, + { + "epoch": 0.67, + "learning_rate": 0.0002634073636673836, + "loss": 0.7969, + "step": 12414 + }, + { + "epoch": 0.67, + "learning_rate": 0.0002633306889488475, + "loss": 0.7383, + "step": 12415 + }, + { + "epoch": 0.67, + "learning_rate": 0.00026325402140216914, + "loss": 0.7344, + "step": 12416 + }, + { + "epoch": 0.67, + "learning_rate": 0.0002631773610296714, + "loss": 0.7188, + "step": 12417 + }, + { + "epoch": 0.67, + "learning_rate": 0.0002631007078336777, + "loss": 0.8047, + "step": 12418 + }, + { + "epoch": 0.67, + "learning_rate": 0.00026302406181651056, + "loss": 0.7617, + "step": 12419 + }, + { + "epoch": 0.67, + "learning_rate": 0.0002629474229804928, + "loss": 0.8125, + "step": 12420 + }, + { + "epoch": 0.67, + "learning_rate": 0.00026287079132794686, + "loss": 0.75, + "step": 12421 + }, + { + "epoch": 0.67, + "learning_rate": 0.000262794166861195, + "loss": 0.8047, + "step": 12422 + }, + { + "epoch": 0.67, + "learning_rate": 0.0002627175495825586, + "loss": 0.6953, + "step": 12423 + }, + { + "epoch": 0.67, + "learning_rate": 0.0002626409394943603, + "loss": 0.8594, + "step": 12424 + }, + { + "epoch": 0.67, + "learning_rate": 0.0002625643365989212, + "loss": 0.793, + "step": 12425 + }, + { + "epoch": 0.67, + "learning_rate": 0.00026248774089856244, + "loss": 0.6602, + "step": 12426 + }, + { + "epoch": 0.67, + "learning_rate": 0.0002624111523956053, + "loss": 0.8477, + "step": 12427 + }, + { + "epoch": 0.67, + "learning_rate": 0.0002623345710923709, + "loss": 0.8398, + "step": 12428 + }, + { + "epoch": 0.67, + "learning_rate": 0.00026225799699117945, + "loss": 0.8398, + "step": 12429 + }, + { + "epoch": 0.67, + "learning_rate": 0.0002621814300943517, + "loss": 0.8203, + "step": 12430 + }, + { + "epoch": 0.67, + "learning_rate": 0.00026210487040420795, + "loss": 0.8242, + "step": 12431 + }, + { + "epoch": 0.67, + "learning_rate": 0.000262028317923068, + "loss": 0.7188, + "step": 12432 + }, + { + "epoch": 0.67, + "learning_rate": 0.00026195177265325167, + "loss": 0.7852, + "step": 12433 + }, + { + "epoch": 0.67, + "learning_rate": 0.0002618752345970784, + "loss": 0.8555, + "step": 12434 + }, + { + "epoch": 0.67, + "learning_rate": 0.00026179870375686804, + "loss": 0.7891, + "step": 12435 + }, + { + "epoch": 0.67, + "learning_rate": 0.0002617221801349391, + "loss": 0.7891, + "step": 12436 + }, + { + "epoch": 0.67, + "learning_rate": 0.0002616456637336111, + "loss": 0.8945, + "step": 12437 + }, + { + "epoch": 0.67, + "learning_rate": 0.00026156915455520226, + "loss": 0.9141, + "step": 12438 + }, + { + "epoch": 0.67, + "learning_rate": 0.00026149265260203133, + "loss": 0.8672, + "step": 12439 + }, + { + "epoch": 0.67, + "learning_rate": 0.0002614161578764163, + "loss": 0.707, + "step": 12440 + }, + { + "epoch": 0.67, + "learning_rate": 0.00026133967038067564, + "loss": 0.7969, + "step": 12441 + }, + { + "epoch": 0.67, + "learning_rate": 0.0002612631901171267, + "loss": 0.8398, + "step": 12442 + }, + { + "epoch": 0.67, + "learning_rate": 0.00026118671708808726, + "loss": 0.8555, + "step": 12443 + }, + { + "epoch": 0.67, + "learning_rate": 0.00026111025129587495, + "loss": 0.8633, + "step": 12444 + }, + { + "epoch": 0.67, + "learning_rate": 0.0002610337927428068, + "loss": 0.7773, + "step": 12445 + }, + { + "epoch": 0.67, + "learning_rate": 0.00026095734143119926, + "loss": 0.6953, + "step": 12446 + }, + { + "epoch": 0.67, + "learning_rate": 0.0002608808973633698, + "loss": 0.8398, + "step": 12447 + }, + { + "epoch": 0.67, + "learning_rate": 0.0002608044605416348, + "loss": 0.832, + "step": 12448 + }, + { + "epoch": 0.67, + "learning_rate": 0.00026072803096831, + "loss": 0.8711, + "step": 12449 + }, + { + "epoch": 0.67, + "learning_rate": 0.00026065160864571194, + "loss": 0.7031, + "step": 12450 + }, + { + "epoch": 0.67, + "learning_rate": 0.00026057519357615644, + "loss": 0.9297, + "step": 12451 + }, + { + "epoch": 0.67, + "learning_rate": 0.0002604987857619591, + "loss": 0.8555, + "step": 12452 + }, + { + "epoch": 0.67, + "learning_rate": 0.00026042238520543505, + "loss": 0.8906, + "step": 12453 + }, + { + "epoch": 0.67, + "learning_rate": 0.0002603459919088996, + "loss": 0.8828, + "step": 12454 + }, + { + "epoch": 0.67, + "learning_rate": 0.0002602696058746682, + "loss": 0.7422, + "step": 12455 + }, + { + "epoch": 0.67, + "learning_rate": 0.0002601932271050549, + "loss": 0.8359, + "step": 12456 + }, + { + "epoch": 0.67, + "learning_rate": 0.00026011685560237477, + "loss": 0.7695, + "step": 12457 + }, + { + "epoch": 0.67, + "learning_rate": 0.00026004049136894164, + "loss": 0.8242, + "step": 12458 + }, + { + "epoch": 0.67, + "learning_rate": 0.0002599641344070699, + "loss": 0.8789, + "step": 12459 + }, + { + "epoch": 0.67, + "learning_rate": 0.0002598877847190735, + "loss": 0.8516, + "step": 12460 + }, + { + "epoch": 0.67, + "learning_rate": 0.000259811442307266, + "loss": 0.8711, + "step": 12461 + }, + { + "epoch": 0.67, + "learning_rate": 0.00025973510717396055, + "loss": 0.8711, + "step": 12462 + }, + { + "epoch": 0.67, + "learning_rate": 0.0002596587793214706, + "loss": 0.7734, + "step": 12463 + }, + { + "epoch": 0.67, + "learning_rate": 0.0002595824587521093, + "loss": 0.8555, + "step": 12464 + }, + { + "epoch": 0.67, + "learning_rate": 0.0002595061454681892, + "loss": 0.8008, + "step": 12465 + }, + { + "epoch": 0.67, + "learning_rate": 0.0002594298394720225, + "loss": 0.9258, + "step": 12466 + }, + { + "epoch": 0.67, + "learning_rate": 0.00025935354076592226, + "loss": 0.7852, + "step": 12467 + }, + { + "epoch": 0.67, + "learning_rate": 0.0002592772493522002, + "loss": 0.9023, + "step": 12468 + }, + { + "epoch": 0.67, + "learning_rate": 0.00025920096523316804, + "loss": 0.8164, + "step": 12469 + }, + { + "epoch": 0.67, + "learning_rate": 0.0002591246884111377, + "loss": 0.6797, + "step": 12470 + }, + { + "epoch": 0.67, + "learning_rate": 0.0002590484188884206, + "loss": 0.7734, + "step": 12471 + }, + { + "epoch": 0.67, + "learning_rate": 0.00025897215666732777, + "loss": 0.8359, + "step": 12472 + }, + { + "epoch": 0.67, + "learning_rate": 0.00025889590175017023, + "loss": 0.7617, + "step": 12473 + }, + { + "epoch": 0.67, + "learning_rate": 0.0002588196541392591, + "loss": 0.8555, + "step": 12474 + }, + { + "epoch": 0.67, + "learning_rate": 0.0002587434138369047, + "loss": 0.875, + "step": 12475 + }, + { + "epoch": 0.67, + "learning_rate": 0.0002586671808454171, + "loss": 0.9023, + "step": 12476 + }, + { + "epoch": 0.67, + "learning_rate": 0.0002585909551671067, + "loss": 0.875, + "step": 12477 + }, + { + "epoch": 0.67, + "learning_rate": 0.0002585147368042835, + "loss": 0.8242, + "step": 12478 + }, + { + "epoch": 0.67, + "learning_rate": 0.00025843852575925687, + "loss": 0.7969, + "step": 12479 + }, + { + "epoch": 0.67, + "learning_rate": 0.0002583623220343365, + "loss": 0.7344, + "step": 12480 + }, + { + "epoch": 0.67, + "learning_rate": 0.0002582861256318314, + "loss": 0.8047, + "step": 12481 + }, + { + "epoch": 0.67, + "learning_rate": 0.00025820993655405086, + "loss": 0.7305, + "step": 12482 + }, + { + "epoch": 0.67, + "learning_rate": 0.00025813375480330326, + "loss": 0.7852, + "step": 12483 + }, + { + "epoch": 0.67, + "learning_rate": 0.0002580575803818975, + "loss": 0.8359, + "step": 12484 + }, + { + "epoch": 0.67, + "learning_rate": 0.00025798141329214173, + "loss": 0.8281, + "step": 12485 + }, + { + "epoch": 0.67, + "learning_rate": 0.00025790525353634405, + "loss": 0.8555, + "step": 12486 + }, + { + "epoch": 0.67, + "learning_rate": 0.00025782910111681257, + "loss": 0.8242, + "step": 12487 + }, + { + "epoch": 0.67, + "learning_rate": 0.0002577529560358549, + "loss": 0.8125, + "step": 12488 + }, + { + "epoch": 0.67, + "learning_rate": 0.00025767681829577796, + "loss": 0.7383, + "step": 12489 + }, + { + "epoch": 0.67, + "learning_rate": 0.0002576006878988898, + "loss": 0.7969, + "step": 12490 + }, + { + "epoch": 0.67, + "learning_rate": 0.0002575245648474971, + "loss": 0.8281, + "step": 12491 + }, + { + "epoch": 0.67, + "learning_rate": 0.00025744844914390633, + "loss": 0.7812, + "step": 12492 + }, + { + "epoch": 0.67, + "learning_rate": 0.0002573723407904242, + "loss": 0.793, + "step": 12493 + }, + { + "epoch": 0.67, + "learning_rate": 0.00025729623978935744, + "loss": 0.8359, + "step": 12494 + }, + { + "epoch": 0.67, + "learning_rate": 0.0002572201461430118, + "loss": 0.7852, + "step": 12495 + }, + { + "epoch": 0.67, + "learning_rate": 0.000257144059853693, + "loss": 0.8672, + "step": 12496 + }, + { + "epoch": 0.67, + "learning_rate": 0.000257067980923707, + "loss": 0.8633, + "step": 12497 + }, + { + "epoch": 0.67, + "learning_rate": 0.00025699190935535933, + "loss": 0.8086, + "step": 12498 + }, + { + "epoch": 0.67, + "learning_rate": 0.0002569158451509548, + "loss": 0.8008, + "step": 12499 + }, + { + "epoch": 0.67, + "learning_rate": 0.00025683978831279885, + "loss": 0.8555, + "step": 12500 + }, + { + "epoch": 0.67, + "learning_rate": 0.00025676373884319585, + "loss": 0.8281, + "step": 12501 + }, + { + "epoch": 0.67, + "learning_rate": 0.00025668769674445056, + "loss": 0.8438, + "step": 12502 + }, + { + "epoch": 0.67, + "learning_rate": 0.00025661166201886753, + "loss": 0.9414, + "step": 12503 + }, + { + "epoch": 0.67, + "learning_rate": 0.0002565356346687504, + "loss": 0.8359, + "step": 12504 + }, + { + "epoch": 0.67, + "learning_rate": 0.0002564596146964035, + "loss": 0.8984, + "step": 12505 + }, + { + "epoch": 0.67, + "learning_rate": 0.00025638360210413, + "loss": 0.8672, + "step": 12506 + }, + { + "epoch": 0.67, + "learning_rate": 0.00025630759689423385, + "loss": 0.8047, + "step": 12507 + }, + { + "epoch": 0.67, + "learning_rate": 0.00025623159906901775, + "loss": 0.7266, + "step": 12508 + }, + { + "epoch": 0.67, + "learning_rate": 0.000256155608630785, + "loss": 0.7383, + "step": 12509 + }, + { + "epoch": 0.67, + "learning_rate": 0.0002560796255818384, + "loss": 0.7109, + "step": 12510 + }, + { + "epoch": 0.67, + "learning_rate": 0.0002560036499244804, + "loss": 0.8125, + "step": 12511 + }, + { + "epoch": 0.67, + "learning_rate": 0.00025592768166101293, + "loss": 0.7969, + "step": 12512 + }, + { + "epoch": 0.67, + "learning_rate": 0.0002558517207937388, + "loss": 0.9258, + "step": 12513 + }, + { + "epoch": 0.67, + "learning_rate": 0.00025577576732495956, + "loss": 0.8555, + "step": 12514 + }, + { + "epoch": 0.67, + "learning_rate": 0.00025569982125697656, + "loss": 0.8984, + "step": 12515 + }, + { + "epoch": 0.67, + "learning_rate": 0.00025562388259209144, + "loss": 0.875, + "step": 12516 + }, + { + "epoch": 0.67, + "learning_rate": 0.0002555479513326056, + "loss": 0.8906, + "step": 12517 + }, + { + "epoch": 0.67, + "learning_rate": 0.0002554720274808199, + "loss": 0.8477, + "step": 12518 + }, + { + "epoch": 0.67, + "learning_rate": 0.0002553961110390347, + "loss": 0.8281, + "step": 12519 + }, + { + "epoch": 0.67, + "learning_rate": 0.00025532020200955087, + "loss": 0.9102, + "step": 12520 + }, + { + "epoch": 0.67, + "learning_rate": 0.0002552443003946688, + "loss": 0.8438, + "step": 12521 + }, + { + "epoch": 0.67, + "learning_rate": 0.0002551684061966882, + "loss": 0.8633, + "step": 12522 + }, + { + "epoch": 0.67, + "learning_rate": 0.00025509251941790935, + "loss": 0.8555, + "step": 12523 + }, + { + "epoch": 0.67, + "learning_rate": 0.0002550166400606314, + "loss": 0.7852, + "step": 12524 + }, + { + "epoch": 0.67, + "learning_rate": 0.00025494076812715405, + "loss": 0.8438, + "step": 12525 + }, + { + "epoch": 0.67, + "learning_rate": 0.00025486490361977654, + "loss": 0.7969, + "step": 12526 + }, + { + "epoch": 0.67, + "learning_rate": 0.0002547890465407977, + "loss": 0.8789, + "step": 12527 + }, + { + "epoch": 0.67, + "learning_rate": 0.000254713196892516, + "loss": 0.8203, + "step": 12528 + }, + { + "epoch": 0.67, + "learning_rate": 0.0002546373546772301, + "loss": 0.8164, + "step": 12529 + }, + { + "epoch": 0.67, + "learning_rate": 0.00025456151989723864, + "loss": 0.7695, + "step": 12530 + }, + { + "epoch": 0.67, + "learning_rate": 0.0002544856925548393, + "loss": 0.8242, + "step": 12531 + }, + { + "epoch": 0.67, + "learning_rate": 0.00025440987265232956, + "loss": 0.8438, + "step": 12532 + }, + { + "epoch": 0.67, + "learning_rate": 0.0002543340601920078, + "loss": 0.8438, + "step": 12533 + }, + { + "epoch": 0.67, + "learning_rate": 0.000254258255176171, + "loss": 0.8125, + "step": 12534 + }, + { + "epoch": 0.67, + "learning_rate": 0.000254182457607116, + "loss": 0.7148, + "step": 12535 + }, + { + "epoch": 0.67, + "learning_rate": 0.00025410666748714003, + "loss": 0.8438, + "step": 12536 + }, + { + "epoch": 0.67, + "learning_rate": 0.00025403088481854, + "loss": 0.9023, + "step": 12537 + }, + { + "epoch": 0.67, + "learning_rate": 0.00025395510960361177, + "loss": 0.7461, + "step": 12538 + }, + { + "epoch": 0.67, + "learning_rate": 0.0002538793418446523, + "loss": 0.8125, + "step": 12539 + }, + { + "epoch": 0.67, + "learning_rate": 0.00025380358154395696, + "loss": 0.7461, + "step": 12540 + }, + { + "epoch": 0.67, + "learning_rate": 0.000253727828703822, + "loss": 0.7422, + "step": 12541 + }, + { + "epoch": 0.67, + "learning_rate": 0.0002536520833265426, + "loss": 0.9453, + "step": 12542 + }, + { + "epoch": 0.67, + "learning_rate": 0.00025357634541441434, + "loss": 0.8242, + "step": 12543 + }, + { + "epoch": 0.67, + "learning_rate": 0.00025350061496973235, + "loss": 0.8086, + "step": 12544 + }, + { + "epoch": 0.67, + "learning_rate": 0.0002534248919947913, + "loss": 0.8359, + "step": 12545 + }, + { + "epoch": 0.67, + "learning_rate": 0.00025334917649188615, + "loss": 0.9414, + "step": 12546 + }, + { + "epoch": 0.67, + "learning_rate": 0.000253273468463311, + "loss": 0.793, + "step": 12547 + }, + { + "epoch": 0.67, + "learning_rate": 0.0002531977679113604, + "loss": 0.7617, + "step": 12548 + }, + { + "epoch": 0.67, + "learning_rate": 0.0002531220748383279, + "loss": 0.6289, + "step": 12549 + }, + { + "epoch": 0.67, + "learning_rate": 0.0002530463892465077, + "loss": 0.6289, + "step": 12550 + }, + { + "epoch": 0.67, + "learning_rate": 0.00025297071113819286, + "loss": 0.793, + "step": 12551 + }, + { + "epoch": 0.67, + "learning_rate": 0.0002528950405156769, + "loss": 0.7812, + "step": 12552 + }, + { + "epoch": 0.67, + "learning_rate": 0.00025281937738125314, + "loss": 0.7422, + "step": 12553 + }, + { + "epoch": 0.67, + "learning_rate": 0.00025274372173721425, + "loss": 0.8398, + "step": 12554 + }, + { + "epoch": 0.67, + "learning_rate": 0.0002526680735858523, + "loss": 0.8281, + "step": 12555 + }, + { + "epoch": 0.67, + "learning_rate": 0.0002525924329294606, + "loss": 0.8633, + "step": 12556 + }, + { + "epoch": 0.67, + "learning_rate": 0.0002525167997703308, + "loss": 0.8242, + "step": 12557 + }, + { + "epoch": 0.67, + "learning_rate": 0.00025244117411075484, + "loss": 0.8711, + "step": 12558 + }, + { + "epoch": 0.67, + "learning_rate": 0.00025236555595302434, + "loss": 0.8398, + "step": 12559 + }, + { + "epoch": 0.68, + "learning_rate": 0.00025228994529943123, + "loss": 0.8203, + "step": 12560 + }, + { + "epoch": 0.68, + "learning_rate": 0.00025221434215226643, + "loss": 0.7422, + "step": 12561 + }, + { + "epoch": 0.68, + "learning_rate": 0.00025213874651382076, + "loss": 0.8516, + "step": 12562 + }, + { + "epoch": 0.68, + "learning_rate": 0.0002520631583863853, + "loss": 0.8516, + "step": 12563 + }, + { + "epoch": 0.68, + "learning_rate": 0.0002519875777722507, + "loss": 0.7891, + "step": 12564 + }, + { + "epoch": 0.68, + "learning_rate": 0.00025191200467370703, + "loss": 0.8086, + "step": 12565 + }, + { + "epoch": 0.68, + "learning_rate": 0.0002518364390930447, + "loss": 0.8242, + "step": 12566 + }, + { + "epoch": 0.68, + "learning_rate": 0.00025176088103255324, + "loss": 0.8008, + "step": 12567 + }, + { + "epoch": 0.68, + "learning_rate": 0.0002516853304945225, + "loss": 0.8672, + "step": 12568 + }, + { + "epoch": 0.68, + "learning_rate": 0.00025160978748124213, + "loss": 0.8789, + "step": 12569 + }, + { + "epoch": 0.68, + "learning_rate": 0.0002515342519950011, + "loss": 0.9727, + "step": 12570 + }, + { + "epoch": 0.68, + "learning_rate": 0.00025145872403808825, + "loss": 0.8945, + "step": 12571 + }, + { + "epoch": 0.68, + "learning_rate": 0.0002513832036127924, + "loss": 0.7188, + "step": 12572 + }, + { + "epoch": 0.68, + "learning_rate": 0.00025130769072140237, + "loss": 0.8789, + "step": 12573 + }, + { + "epoch": 0.68, + "learning_rate": 0.00025123218536620616, + "loss": 0.8008, + "step": 12574 + }, + { + "epoch": 0.68, + "learning_rate": 0.0002511566875494916, + "loss": 0.832, + "step": 12575 + }, + { + "epoch": 0.68, + "learning_rate": 0.00025108119727354717, + "loss": 0.8281, + "step": 12576 + }, + { + "epoch": 0.68, + "learning_rate": 0.0002510057145406601, + "loss": 0.8398, + "step": 12577 + }, + { + "epoch": 0.68, + "learning_rate": 0.0002509302393531175, + "loss": 0.8984, + "step": 12578 + }, + { + "epoch": 0.68, + "learning_rate": 0.0002508547717132069, + "loss": 0.8711, + "step": 12579 + }, + { + "epoch": 0.68, + "learning_rate": 0.0002507793116232153, + "loss": 0.8086, + "step": 12580 + }, + { + "epoch": 0.68, + "learning_rate": 0.00025070385908542895, + "loss": 0.7695, + "step": 12581 + }, + { + "epoch": 0.68, + "learning_rate": 0.00025062841410213466, + "loss": 0.8633, + "step": 12582 + }, + { + "epoch": 0.68, + "learning_rate": 0.00025055297667561866, + "loss": 0.7344, + "step": 12583 + }, + { + "epoch": 0.68, + "learning_rate": 0.00025047754680816687, + "loss": 0.8945, + "step": 12584 + }, + { + "epoch": 0.68, + "learning_rate": 0.0002504021245020649, + "loss": 0.8516, + "step": 12585 + }, + { + "epoch": 0.68, + "learning_rate": 0.00025032670975959836, + "loss": 0.8086, + "step": 12586 + }, + { + "epoch": 0.68, + "learning_rate": 0.00025025130258305285, + "loss": 0.8086, + "step": 12587 + }, + { + "epoch": 0.68, + "learning_rate": 0.00025017590297471317, + "loss": 0.8516, + "step": 12588 + }, + { + "epoch": 0.68, + "learning_rate": 0.00025010051093686433, + "loss": 0.8359, + "step": 12589 + }, + { + "epoch": 0.68, + "learning_rate": 0.0002500251264717907, + "loss": 0.7539, + "step": 12590 + }, + { + "epoch": 0.68, + "learning_rate": 0.00024994974958177687, + "loss": 0.7891, + "step": 12591 + }, + { + "epoch": 0.68, + "learning_rate": 0.0002498743802691072, + "loss": 0.8203, + "step": 12592 + }, + { + "epoch": 0.68, + "learning_rate": 0.0002497990185360654, + "loss": 0.8359, + "step": 12593 + }, + { + "epoch": 0.68, + "learning_rate": 0.00024972366438493507, + "loss": 0.8047, + "step": 12594 + }, + { + "epoch": 0.68, + "learning_rate": 0.0002496483178179997, + "loss": 0.7891, + "step": 12595 + }, + { + "epoch": 0.68, + "learning_rate": 0.00024957297883754296, + "loss": 0.7539, + "step": 12596 + }, + { + "epoch": 0.68, + "learning_rate": 0.00024949764744584753, + "loss": 0.8281, + "step": 12597 + }, + { + "epoch": 0.68, + "learning_rate": 0.00024942232364519583, + "loss": 0.6758, + "step": 12598 + }, + { + "epoch": 0.68, + "learning_rate": 0.00024934700743787125, + "loss": 0.8047, + "step": 12599 + }, + { + "epoch": 0.68, + "learning_rate": 0.0002492716988261557, + "loss": 0.8125, + "step": 12600 + }, + { + "epoch": 0.68, + "learning_rate": 0.00024919639781233107, + "loss": 0.8438, + "step": 12601 + }, + { + "epoch": 0.68, + "learning_rate": 0.00024912110439867936, + "loss": 0.8359, + "step": 12602 + }, + { + "epoch": 0.68, + "learning_rate": 0.00024904581858748256, + "loss": 0.7891, + "step": 12603 + }, + { + "epoch": 0.68, + "learning_rate": 0.0002489705403810215, + "loss": 0.8398, + "step": 12604 + }, + { + "epoch": 0.68, + "learning_rate": 0.00024889526978157794, + "loss": 0.8086, + "step": 12605 + }, + { + "epoch": 0.68, + "learning_rate": 0.0002488200067914323, + "loss": 0.918, + "step": 12606 + }, + { + "epoch": 0.68, + "learning_rate": 0.00024874475141286576, + "loss": 0.8047, + "step": 12607 + }, + { + "epoch": 0.68, + "learning_rate": 0.0002486695036481583, + "loss": 0.7578, + "step": 12608 + }, + { + "epoch": 0.68, + "learning_rate": 0.00024859426349959073, + "loss": 0.8516, + "step": 12609 + }, + { + "epoch": 0.68, + "learning_rate": 0.00024851903096944256, + "loss": 0.7617, + "step": 12610 + }, + { + "epoch": 0.68, + "learning_rate": 0.00024844380605999386, + "loss": 0.8984, + "step": 12611 + }, + { + "epoch": 0.68, + "learning_rate": 0.0002483685887735243, + "loss": 0.8672, + "step": 12612 + }, + { + "epoch": 0.68, + "learning_rate": 0.00024829337911231306, + "loss": 0.8086, + "step": 12613 + }, + { + "epoch": 0.68, + "learning_rate": 0.0002482181770786391, + "loss": 0.8125, + "step": 12614 + }, + { + "epoch": 0.68, + "learning_rate": 0.00024814298267478136, + "loss": 0.7227, + "step": 12615 + }, + { + "epoch": 0.68, + "learning_rate": 0.0002480677959030188, + "loss": 0.8984, + "step": 12616 + }, + { + "epoch": 0.68, + "learning_rate": 0.00024799261676562934, + "loss": 0.832, + "step": 12617 + }, + { + "epoch": 0.68, + "learning_rate": 0.0002479174452648914, + "loss": 0.9727, + "step": 12618 + }, + { + "epoch": 0.68, + "learning_rate": 0.00024784228140308314, + "loss": 0.8672, + "step": 12619 + }, + { + "epoch": 0.68, + "learning_rate": 0.00024776712518248203, + "loss": 0.8672, + "step": 12620 + }, + { + "epoch": 0.68, + "learning_rate": 0.0002476919766053651, + "loss": 0.9023, + "step": 12621 + }, + { + "epoch": 0.68, + "learning_rate": 0.00024761683567401055, + "loss": 0.8477, + "step": 12622 + }, + { + "epoch": 0.68, + "learning_rate": 0.00024754170239069486, + "loss": 0.8594, + "step": 12623 + }, + { + "epoch": 0.68, + "learning_rate": 0.00024746657675769467, + "loss": 0.8398, + "step": 12624 + }, + { + "epoch": 0.68, + "learning_rate": 0.00024739145877728674, + "loss": 0.8242, + "step": 12625 + }, + { + "epoch": 0.68, + "learning_rate": 0.00024731634845174763, + "loss": 0.7969, + "step": 12626 + }, + { + "epoch": 0.68, + "learning_rate": 0.00024724124578335313, + "loss": 0.8281, + "step": 12627 + }, + { + "epoch": 0.68, + "learning_rate": 0.0002471661507743789, + "loss": 0.8242, + "step": 12628 + }, + { + "epoch": 0.68, + "learning_rate": 0.0002470910634271009, + "loss": 0.8516, + "step": 12629 + }, + { + "epoch": 0.68, + "learning_rate": 0.00024701598374379455, + "loss": 0.9258, + "step": 12630 + }, + { + "epoch": 0.68, + "learning_rate": 0.0002469409117267347, + "loss": 0.8398, + "step": 12631 + }, + { + "epoch": 0.68, + "learning_rate": 0.0002468658473781967, + "loss": 0.9336, + "step": 12632 + }, + { + "epoch": 0.68, + "learning_rate": 0.0002467907907004549, + "loss": 0.6992, + "step": 12633 + }, + { + "epoch": 0.68, + "learning_rate": 0.00024671574169578384, + "loss": 0.8711, + "step": 12634 + }, + { + "epoch": 0.68, + "learning_rate": 0.000246640700366458, + "loss": 0.7812, + "step": 12635 + }, + { + "epoch": 0.68, + "learning_rate": 0.00024656566671475114, + "loss": 0.7031, + "step": 12636 + }, + { + "epoch": 0.68, + "learning_rate": 0.0002464906407429369, + "loss": 0.875, + "step": 12637 + }, + { + "epoch": 0.68, + "learning_rate": 0.00024641562245328894, + "loss": 0.8125, + "step": 12638 + }, + { + "epoch": 0.68, + "learning_rate": 0.0002463406118480808, + "loss": 0.6484, + "step": 12639 + }, + { + "epoch": 0.68, + "learning_rate": 0.0002462656089295853, + "loss": 0.8008, + "step": 12640 + }, + { + "epoch": 0.68, + "learning_rate": 0.000246190613700075, + "loss": 0.7266, + "step": 12641 + }, + { + "epoch": 0.68, + "learning_rate": 0.0002461156261618232, + "loss": 0.8555, + "step": 12642 + }, + { + "epoch": 0.68, + "learning_rate": 0.0002460406463171019, + "loss": 0.793, + "step": 12643 + }, + { + "epoch": 0.68, + "learning_rate": 0.000245965674168183, + "loss": 0.8203, + "step": 12644 + }, + { + "epoch": 0.68, + "learning_rate": 0.00024589070971733863, + "loss": 0.7617, + "step": 12645 + }, + { + "epoch": 0.68, + "learning_rate": 0.0002458157529668406, + "loss": 0.7734, + "step": 12646 + }, + { + "epoch": 0.68, + "learning_rate": 0.0002457408039189601, + "loss": 0.793, + "step": 12647 + }, + { + "epoch": 0.68, + "learning_rate": 0.00024566586257596855, + "loss": 0.8047, + "step": 12648 + }, + { + "epoch": 0.68, + "learning_rate": 0.0002455909289401367, + "loss": 0.8359, + "step": 12649 + }, + { + "epoch": 0.68, + "learning_rate": 0.0002455160030137355, + "loss": 0.8242, + "step": 12650 + }, + { + "epoch": 0.68, + "learning_rate": 0.0002454410847990352, + "loss": 0.875, + "step": 12651 + }, + { + "epoch": 0.68, + "learning_rate": 0.0002453661742983064, + "loss": 0.8086, + "step": 12652 + }, + { + "epoch": 0.68, + "learning_rate": 0.0002452912715138187, + "loss": 0.9102, + "step": 12653 + }, + { + "epoch": 0.68, + "learning_rate": 0.00024521637644784217, + "loss": 0.7734, + "step": 12654 + }, + { + "epoch": 0.68, + "learning_rate": 0.00024514148910264656, + "loss": 0.8555, + "step": 12655 + }, + { + "epoch": 0.68, + "learning_rate": 0.0002450666094805008, + "loss": 0.8867, + "step": 12656 + }, + { + "epoch": 0.68, + "learning_rate": 0.0002449917375836743, + "loss": 0.8047, + "step": 12657 + }, + { + "epoch": 0.68, + "learning_rate": 0.0002449168734144357, + "loss": 0.8242, + "step": 12658 + }, + { + "epoch": 0.68, + "learning_rate": 0.0002448420169750539, + "loss": 0.8984, + "step": 12659 + }, + { + "epoch": 0.68, + "learning_rate": 0.00024476716826779695, + "loss": 0.7812, + "step": 12660 + }, + { + "epoch": 0.68, + "learning_rate": 0.0002446923272949332, + "loss": 0.6797, + "step": 12661 + }, + { + "epoch": 0.68, + "learning_rate": 0.00024461749405873075, + "loss": 0.7734, + "step": 12662 + }, + { + "epoch": 0.68, + "learning_rate": 0.00024454266856145705, + "loss": 0.8867, + "step": 12663 + }, + { + "epoch": 0.68, + "learning_rate": 0.00024446785080537933, + "loss": 0.8789, + "step": 12664 + }, + { + "epoch": 0.68, + "learning_rate": 0.0002443930407927654, + "loss": 0.8945, + "step": 12665 + }, + { + "epoch": 0.68, + "learning_rate": 0.00024431823852588203, + "loss": 0.8164, + "step": 12666 + }, + { + "epoch": 0.68, + "learning_rate": 0.0002442434440069957, + "loss": 0.8008, + "step": 12667 + }, + { + "epoch": 0.68, + "learning_rate": 0.0002441686572383731, + "loss": 0.8867, + "step": 12668 + }, + { + "epoch": 0.68, + "learning_rate": 0.00024409387822228073, + "loss": 0.8477, + "step": 12669 + }, + { + "epoch": 0.68, + "learning_rate": 0.0002440191069609845, + "loss": 0.8203, + "step": 12670 + }, + { + "epoch": 0.68, + "learning_rate": 0.00024394434345674992, + "loss": 0.7695, + "step": 12671 + }, + { + "epoch": 0.68, + "learning_rate": 0.00024386958771184286, + "loss": 0.8086, + "step": 12672 + }, + { + "epoch": 0.68, + "learning_rate": 0.00024379483972852885, + "loss": 0.8047, + "step": 12673 + }, + { + "epoch": 0.68, + "learning_rate": 0.00024372009950907254, + "loss": 0.9258, + "step": 12674 + }, + { + "epoch": 0.68, + "learning_rate": 0.00024364536705573925, + "loss": 0.7109, + "step": 12675 + }, + { + "epoch": 0.68, + "learning_rate": 0.00024357064237079325, + "loss": 0.875, + "step": 12676 + }, + { + "epoch": 0.68, + "learning_rate": 0.00024349592545649913, + "loss": 0.8047, + "step": 12677 + }, + { + "epoch": 0.68, + "learning_rate": 0.00024342121631512116, + "loss": 0.8906, + "step": 12678 + }, + { + "epoch": 0.68, + "learning_rate": 0.00024334651494892318, + "loss": 0.8398, + "step": 12679 + }, + { + "epoch": 0.68, + "learning_rate": 0.0002432718213601687, + "loss": 0.7734, + "step": 12680 + }, + { + "epoch": 0.68, + "learning_rate": 0.00024319713555112128, + "loss": 0.8516, + "step": 12681 + }, + { + "epoch": 0.68, + "learning_rate": 0.0002431224575240444, + "loss": 0.8125, + "step": 12682 + }, + { + "epoch": 0.68, + "learning_rate": 0.00024304778728120063, + "loss": 0.8125, + "step": 12683 + }, + { + "epoch": 0.68, + "learning_rate": 0.00024297312482485302, + "loss": 0.8047, + "step": 12684 + }, + { + "epoch": 0.68, + "learning_rate": 0.00024289847015726413, + "loss": 0.8125, + "step": 12685 + }, + { + "epoch": 0.68, + "learning_rate": 0.0002428238232806962, + "loss": 0.8008, + "step": 12686 + }, + { + "epoch": 0.68, + "learning_rate": 0.00024274918419741093, + "loss": 0.75, + "step": 12687 + }, + { + "epoch": 0.68, + "learning_rate": 0.00024267455290967046, + "loss": 0.8281, + "step": 12688 + }, + { + "epoch": 0.68, + "learning_rate": 0.00024259992941973646, + "loss": 0.8203, + "step": 12689 + }, + { + "epoch": 0.68, + "learning_rate": 0.0002425253137298699, + "loss": 0.832, + "step": 12690 + }, + { + "epoch": 0.68, + "learning_rate": 0.00024245070584233213, + "loss": 0.8398, + "step": 12691 + }, + { + "epoch": 0.68, + "learning_rate": 0.00024237610575938417, + "loss": 0.875, + "step": 12692 + }, + { + "epoch": 0.68, + "learning_rate": 0.00024230151348328643, + "loss": 0.8555, + "step": 12693 + }, + { + "epoch": 0.68, + "learning_rate": 0.00024222692901629912, + "loss": 0.8359, + "step": 12694 + }, + { + "epoch": 0.68, + "learning_rate": 0.0002421523523606826, + "loss": 0.9375, + "step": 12695 + }, + { + "epoch": 0.68, + "learning_rate": 0.00024207778351869702, + "loss": 0.8047, + "step": 12696 + }, + { + "epoch": 0.68, + "learning_rate": 0.0002420032224926016, + "loss": 0.8008, + "step": 12697 + }, + { + "epoch": 0.68, + "learning_rate": 0.00024192866928465618, + "loss": 0.8828, + "step": 12698 + }, + { + "epoch": 0.68, + "learning_rate": 0.00024185412389711965, + "loss": 0.8945, + "step": 12699 + }, + { + "epoch": 0.68, + "learning_rate": 0.00024177958633225105, + "loss": 0.8711, + "step": 12700 + }, + { + "epoch": 0.68, + "learning_rate": 0.00024170505659230947, + "loss": 0.8945, + "step": 12701 + }, + { + "epoch": 0.68, + "learning_rate": 0.0002416305346795531, + "loss": 0.6992, + "step": 12702 + }, + { + "epoch": 0.68, + "learning_rate": 0.00024155602059624, + "loss": 0.8164, + "step": 12703 + }, + { + "epoch": 0.68, + "learning_rate": 0.0002414815143446284, + "loss": 0.793, + "step": 12704 + }, + { + "epoch": 0.68, + "learning_rate": 0.00024140701592697633, + "loss": 0.8906, + "step": 12705 + }, + { + "epoch": 0.68, + "learning_rate": 0.00024133252534554113, + "loss": 0.832, + "step": 12706 + }, + { + "epoch": 0.68, + "learning_rate": 0.00024125804260257967, + "loss": 0.7656, + "step": 12707 + }, + { + "epoch": 0.68, + "learning_rate": 0.00024118356770034982, + "loss": 0.6797, + "step": 12708 + }, + { + "epoch": 0.68, + "learning_rate": 0.00024110910064110804, + "loss": 0.8047, + "step": 12709 + }, + { + "epoch": 0.68, + "learning_rate": 0.00024103464142711067, + "loss": 0.8047, + "step": 12710 + }, + { + "epoch": 0.68, + "learning_rate": 0.00024096019006061438, + "loss": 0.8516, + "step": 12711 + }, + { + "epoch": 0.68, + "learning_rate": 0.0002408857465438754, + "loss": 0.8242, + "step": 12712 + }, + { + "epoch": 0.68, + "learning_rate": 0.00024081131087914926, + "loss": 0.8477, + "step": 12713 + }, + { + "epoch": 0.68, + "learning_rate": 0.00024073688306869195, + "loss": 0.9023, + "step": 12714 + }, + { + "epoch": 0.68, + "learning_rate": 0.00024066246311475859, + "loss": 0.8359, + "step": 12715 + }, + { + "epoch": 0.68, + "learning_rate": 0.00024058805101960463, + "loss": 0.7344, + "step": 12716 + }, + { + "epoch": 0.68, + "learning_rate": 0.0002405136467854846, + "loss": 0.7656, + "step": 12717 + }, + { + "epoch": 0.68, + "learning_rate": 0.0002404392504146537, + "loss": 0.8477, + "step": 12718 + }, + { + "epoch": 0.68, + "learning_rate": 0.00024036486190936584, + "loss": 0.8789, + "step": 12719 + }, + { + "epoch": 0.68, + "learning_rate": 0.0002402904812718756, + "loss": 0.7773, + "step": 12720 + }, + { + "epoch": 0.68, + "learning_rate": 0.00024021610850443704, + "loss": 0.8594, + "step": 12721 + }, + { + "epoch": 0.68, + "learning_rate": 0.00024014174360930375, + "loss": 0.75, + "step": 12722 + }, + { + "epoch": 0.68, + "learning_rate": 0.00024006738658872894, + "loss": 0.8047, + "step": 12723 + }, + { + "epoch": 0.68, + "learning_rate": 0.00023999303744496627, + "loss": 0.8047, + "step": 12724 + }, + { + "epoch": 0.68, + "learning_rate": 0.00023991869618026872, + "loss": 0.8555, + "step": 12725 + }, + { + "epoch": 0.68, + "learning_rate": 0.00023984436279688887, + "loss": 0.9375, + "step": 12726 + }, + { + "epoch": 0.68, + "learning_rate": 0.00023977003729707937, + "loss": 0.793, + "step": 12727 + }, + { + "epoch": 0.68, + "learning_rate": 0.00023969571968309272, + "loss": 0.8945, + "step": 12728 + }, + { + "epoch": 0.68, + "learning_rate": 0.00023962140995718083, + "loss": 0.8789, + "step": 12729 + }, + { + "epoch": 0.68, + "learning_rate": 0.00023954710812159515, + "loss": 0.8867, + "step": 12730 + }, + { + "epoch": 0.68, + "learning_rate": 0.00023947281417858795, + "loss": 0.7148, + "step": 12731 + }, + { + "epoch": 0.68, + "learning_rate": 0.0002393985281304103, + "loss": 0.7266, + "step": 12732 + }, + { + "epoch": 0.68, + "learning_rate": 0.00023932424997931312, + "loss": 0.8359, + "step": 12733 + }, + { + "epoch": 0.68, + "learning_rate": 0.00023924997972754735, + "loss": 0.8711, + "step": 12734 + }, + { + "epoch": 0.68, + "learning_rate": 0.000239175717377364, + "loss": 0.75, + "step": 12735 + }, + { + "epoch": 0.68, + "learning_rate": 0.0002391014629310131, + "loss": 0.8555, + "step": 12736 + }, + { + "epoch": 0.68, + "learning_rate": 0.0002390272163907447, + "loss": 0.7852, + "step": 12737 + }, + { + "epoch": 0.68, + "learning_rate": 0.0002389529777588089, + "loss": 0.8438, + "step": 12738 + }, + { + "epoch": 0.68, + "learning_rate": 0.00023887874703745553, + "loss": 0.875, + "step": 12739 + }, + { + "epoch": 0.68, + "learning_rate": 0.00023880452422893374, + "loss": 0.9219, + "step": 12740 + }, + { + "epoch": 0.68, + "learning_rate": 0.000238730309335493, + "loss": 0.8125, + "step": 12741 + }, + { + "epoch": 0.68, + "learning_rate": 0.0002386561023593819, + "loss": 0.8711, + "step": 12742 + }, + { + "epoch": 0.68, + "learning_rate": 0.00023858190330284935, + "loss": 0.8594, + "step": 12743 + }, + { + "epoch": 0.68, + "learning_rate": 0.00023850771216814404, + "loss": 0.8516, + "step": 12744 + }, + { + "epoch": 0.68, + "learning_rate": 0.00023843352895751402, + "loss": 0.8984, + "step": 12745 + }, + { + "epoch": 0.69, + "learning_rate": 0.00023835935367320704, + "loss": 0.7617, + "step": 12746 + }, + { + "epoch": 0.69, + "learning_rate": 0.00023828518631747108, + "loss": 0.918, + "step": 12747 + }, + { + "epoch": 0.69, + "learning_rate": 0.0002382110268925539, + "loss": 0.832, + "step": 12748 + }, + { + "epoch": 0.69, + "learning_rate": 0.00023813687540070251, + "loss": 0.7734, + "step": 12749 + }, + { + "epoch": 0.69, + "learning_rate": 0.0002380627318441636, + "loss": 0.8398, + "step": 12750 + }, + { + "epoch": 0.69, + "learning_rate": 0.00023798859622518471, + "loss": 0.8789, + "step": 12751 + }, + { + "epoch": 0.69, + "learning_rate": 0.00023791446854601195, + "loss": 0.9062, + "step": 12752 + }, + { + "epoch": 0.69, + "learning_rate": 0.00023784034880889161, + "loss": 0.875, + "step": 12753 + }, + { + "epoch": 0.69, + "learning_rate": 0.00023776623701606976, + "loss": 0.7422, + "step": 12754 + }, + { + "epoch": 0.69, + "learning_rate": 0.00023769213316979254, + "loss": 0.8438, + "step": 12755 + }, + { + "epoch": 0.69, + "learning_rate": 0.00023761803727230513, + "loss": 0.8359, + "step": 12756 + }, + { + "epoch": 0.69, + "learning_rate": 0.00023754394932585322, + "loss": 0.8672, + "step": 12757 + }, + { + "epoch": 0.69, + "learning_rate": 0.0002374698693326816, + "loss": 0.8867, + "step": 12758 + }, + { + "epoch": 0.69, + "learning_rate": 0.00023739579729503547, + "loss": 0.8203, + "step": 12759 + }, + { + "epoch": 0.69, + "learning_rate": 0.00023732173321515916, + "loss": 0.8008, + "step": 12760 + }, + { + "epoch": 0.69, + "learning_rate": 0.0002372476770952973, + "loss": 0.8516, + "step": 12761 + }, + { + "epoch": 0.69, + "learning_rate": 0.00023717362893769373, + "loss": 0.8789, + "step": 12762 + }, + { + "epoch": 0.69, + "learning_rate": 0.00023709958874459258, + "loss": 0.8203, + "step": 12763 + }, + { + "epoch": 0.69, + "learning_rate": 0.00023702555651823766, + "loss": 0.7969, + "step": 12764 + }, + { + "epoch": 0.69, + "learning_rate": 0.00023695153226087196, + "loss": 0.8125, + "step": 12765 + }, + { + "epoch": 0.69, + "learning_rate": 0.00023687751597473896, + "loss": 0.8359, + "step": 12766 + }, + { + "epoch": 0.69, + "learning_rate": 0.00023680350766208165, + "loss": 0.707, + "step": 12767 + }, + { + "epoch": 0.69, + "learning_rate": 0.00023672950732514265, + "loss": 0.8711, + "step": 12768 + }, + { + "epoch": 0.69, + "learning_rate": 0.00023665551496616417, + "loss": 0.8164, + "step": 12769 + }, + { + "epoch": 0.69, + "learning_rate": 0.00023658153058738862, + "loss": 0.8438, + "step": 12770 + }, + { + "epoch": 0.69, + "learning_rate": 0.00023650755419105817, + "loss": 0.8711, + "step": 12771 + }, + { + "epoch": 0.69, + "learning_rate": 0.00023643358577941437, + "loss": 0.7539, + "step": 12772 + }, + { + "epoch": 0.69, + "learning_rate": 0.0002363596253546983, + "loss": 0.7891, + "step": 12773 + }, + { + "epoch": 0.69, + "learning_rate": 0.000236285672919152, + "loss": 0.832, + "step": 12774 + }, + { + "epoch": 0.69, + "learning_rate": 0.000236211728475016, + "loss": 0.8281, + "step": 12775 + }, + { + "epoch": 0.69, + "learning_rate": 0.00023613779202453095, + "loss": 0.8047, + "step": 12776 + }, + { + "epoch": 0.69, + "learning_rate": 0.00023606386356993754, + "loss": 0.8438, + "step": 12777 + }, + { + "epoch": 0.69, + "learning_rate": 0.0002359899431134762, + "loss": 0.8125, + "step": 12778 + }, + { + "epoch": 0.69, + "learning_rate": 0.00023591603065738665, + "loss": 0.875, + "step": 12779 + }, + { + "epoch": 0.69, + "learning_rate": 0.00023584212620390905, + "loss": 0.7305, + "step": 12780 + }, + { + "epoch": 0.69, + "learning_rate": 0.0002357682297552825, + "loss": 0.8281, + "step": 12781 + }, + { + "epoch": 0.69, + "learning_rate": 0.0002356943413137468, + "loss": 0.8555, + "step": 12782 + }, + { + "epoch": 0.69, + "learning_rate": 0.00023562046088154048, + "loss": 0.8789, + "step": 12783 + }, + { + "epoch": 0.69, + "learning_rate": 0.0002355465884609029, + "loss": 0.8594, + "step": 12784 + }, + { + "epoch": 0.69, + "learning_rate": 0.0002354727240540721, + "loss": 0.8242, + "step": 12785 + }, + { + "epoch": 0.69, + "learning_rate": 0.00023539886766328673, + "loss": 0.793, + "step": 12786 + }, + { + "epoch": 0.69, + "learning_rate": 0.000235325019290785, + "loss": 0.8828, + "step": 12787 + }, + { + "epoch": 0.69, + "learning_rate": 0.0002352511789388046, + "loss": 0.8555, + "step": 12788 + }, + { + "epoch": 0.69, + "learning_rate": 0.00023517734660958296, + "loss": 0.8594, + "step": 12789 + }, + { + "epoch": 0.69, + "learning_rate": 0.0002351035223053576, + "loss": 0.7617, + "step": 12790 + }, + { + "epoch": 0.69, + "learning_rate": 0.00023502970602836583, + "loss": 0.7695, + "step": 12791 + }, + { + "epoch": 0.69, + "learning_rate": 0.00023495589778084415, + "loss": 0.875, + "step": 12792 + }, + { + "epoch": 0.69, + "learning_rate": 0.00023488209756502938, + "loss": 0.7773, + "step": 12793 + }, + { + "epoch": 0.69, + "learning_rate": 0.0002348083053831581, + "loss": 0.8398, + "step": 12794 + }, + { + "epoch": 0.69, + "learning_rate": 0.0002347345212374663, + "loss": 0.8438, + "step": 12795 + }, + { + "epoch": 0.69, + "learning_rate": 0.00023466074513018965, + "loss": 0.8438, + "step": 12796 + }, + { + "epoch": 0.69, + "learning_rate": 0.00023458697706356401, + "loss": 0.8438, + "step": 12797 + }, + { + "epoch": 0.69, + "learning_rate": 0.00023451321703982496, + "loss": 0.8125, + "step": 12798 + }, + { + "epoch": 0.69, + "learning_rate": 0.0002344394650612074, + "loss": 0.8828, + "step": 12799 + }, + { + "epoch": 0.69, + "learning_rate": 0.00023436572112994647, + "loss": 0.918, + "step": 12800 + }, + { + "epoch": 0.69, + "learning_rate": 0.0002342919852482766, + "loss": 0.8008, + "step": 12801 + }, + { + "epoch": 0.69, + "learning_rate": 0.00023421825741843256, + "loss": 0.7695, + "step": 12802 + }, + { + "epoch": 0.69, + "learning_rate": 0.00023414453764264813, + "loss": 0.7461, + "step": 12803 + }, + { + "epoch": 0.69, + "learning_rate": 0.0002340708259231576, + "loss": 0.7852, + "step": 12804 + }, + { + "epoch": 0.69, + "learning_rate": 0.00023399712226219467, + "loss": 0.8047, + "step": 12805 + }, + { + "epoch": 0.69, + "learning_rate": 0.00023392342666199252, + "loss": 0.8242, + "step": 12806 + }, + { + "epoch": 0.69, + "learning_rate": 0.00023384973912478474, + "loss": 0.8438, + "step": 12807 + }, + { + "epoch": 0.69, + "learning_rate": 0.00023377605965280396, + "loss": 0.8398, + "step": 12808 + }, + { + "epoch": 0.69, + "learning_rate": 0.00023370238824828304, + "loss": 0.8008, + "step": 12809 + }, + { + "epoch": 0.69, + "learning_rate": 0.00023362872491345472, + "loss": 0.8281, + "step": 12810 + }, + { + "epoch": 0.69, + "learning_rate": 0.00023355506965055102, + "loss": 0.8047, + "step": 12811 + }, + { + "epoch": 0.69, + "learning_rate": 0.0002334814224618037, + "loss": 0.793, + "step": 12812 + }, + { + "epoch": 0.69, + "learning_rate": 0.00023340778334944473, + "loss": 0.7969, + "step": 12813 + }, + { + "epoch": 0.69, + "learning_rate": 0.00023333415231570586, + "loss": 0.7266, + "step": 12814 + }, + { + "epoch": 0.69, + "learning_rate": 0.00023326052936281812, + "loss": 0.7734, + "step": 12815 + }, + { + "epoch": 0.69, + "learning_rate": 0.0002331869144930121, + "loss": 0.7148, + "step": 12816 + }, + { + "epoch": 0.69, + "learning_rate": 0.00023311330770851946, + "loss": 0.7695, + "step": 12817 + }, + { + "epoch": 0.69, + "learning_rate": 0.00023303970901157017, + "loss": 0.8477, + "step": 12818 + }, + { + "epoch": 0.69, + "learning_rate": 0.00023296611840439453, + "loss": 0.7891, + "step": 12819 + }, + { + "epoch": 0.69, + "learning_rate": 0.0002328925358892226, + "loss": 0.9023, + "step": 12820 + }, + { + "epoch": 0.69, + "learning_rate": 0.00023281896146828442, + "loss": 0.8633, + "step": 12821 + }, + { + "epoch": 0.69, + "learning_rate": 0.00023274539514380916, + "loss": 0.875, + "step": 12822 + }, + { + "epoch": 0.69, + "learning_rate": 0.00023267183691802647, + "loss": 0.8477, + "step": 12823 + }, + { + "epoch": 0.69, + "learning_rate": 0.00023259828679316507, + "loss": 0.7188, + "step": 12824 + }, + { + "epoch": 0.69, + "learning_rate": 0.00023252474477145418, + "loss": 0.8945, + "step": 12825 + }, + { + "epoch": 0.69, + "learning_rate": 0.00023245121085512188, + "loss": 0.8125, + "step": 12826 + }, + { + "epoch": 0.69, + "learning_rate": 0.00023237768504639694, + "loss": 0.8359, + "step": 12827 + }, + { + "epoch": 0.69, + "learning_rate": 0.0002323041673475071, + "loss": 0.832, + "step": 12828 + }, + { + "epoch": 0.69, + "learning_rate": 0.00023223065776068025, + "loss": 0.8594, + "step": 12829 + }, + { + "epoch": 0.69, + "learning_rate": 0.00023215715628814427, + "loss": 0.7344, + "step": 12830 + }, + { + "epoch": 0.69, + "learning_rate": 0.00023208366293212625, + "loss": 0.7852, + "step": 12831 + }, + { + "epoch": 0.69, + "learning_rate": 0.00023201017769485305, + "loss": 0.8711, + "step": 12832 + }, + { + "epoch": 0.69, + "learning_rate": 0.00023193670057855187, + "loss": 0.7148, + "step": 12833 + }, + { + "epoch": 0.69, + "learning_rate": 0.00023186323158544932, + "loss": 0.8711, + "step": 12834 + }, + { + "epoch": 0.69, + "learning_rate": 0.00023178977071777146, + "loss": 0.7891, + "step": 12835 + }, + { + "epoch": 0.69, + "learning_rate": 0.00023171631797774456, + "loss": 0.7969, + "step": 12836 + }, + { + "epoch": 0.69, + "learning_rate": 0.00023164287336759466, + "loss": 0.7148, + "step": 12837 + }, + { + "epoch": 0.69, + "learning_rate": 0.00023156943688954724, + "loss": 0.7969, + "step": 12838 + }, + { + "epoch": 0.69, + "learning_rate": 0.00023149600854582725, + "loss": 0.8438, + "step": 12839 + }, + { + "epoch": 0.69, + "learning_rate": 0.00023142258833866054, + "loss": 0.8438, + "step": 12840 + }, + { + "epoch": 0.69, + "learning_rate": 0.00023134917627027162, + "loss": 0.9453, + "step": 12841 + }, + { + "epoch": 0.69, + "learning_rate": 0.00023127577234288493, + "loss": 0.9023, + "step": 12842 + }, + { + "epoch": 0.69, + "learning_rate": 0.00023120237655872505, + "loss": 0.8125, + "step": 12843 + }, + { + "epoch": 0.69, + "learning_rate": 0.00023112898892001634, + "loss": 0.8555, + "step": 12844 + }, + { + "epoch": 0.69, + "learning_rate": 0.00023105560942898224, + "loss": 0.832, + "step": 12845 + }, + { + "epoch": 0.69, + "learning_rate": 0.0002309822380878468, + "loss": 0.7812, + "step": 12846 + }, + { + "epoch": 0.69, + "learning_rate": 0.000230908874898833, + "loss": 0.8438, + "step": 12847 + }, + { + "epoch": 0.69, + "learning_rate": 0.0002308355198641644, + "loss": 0.8555, + "step": 12848 + }, + { + "epoch": 0.69, + "learning_rate": 0.00023076217298606357, + "loss": 0.7305, + "step": 12849 + }, + { + "epoch": 0.69, + "learning_rate": 0.00023068883426675342, + "loss": 0.8281, + "step": 12850 + }, + { + "epoch": 0.69, + "learning_rate": 0.00023061550370845606, + "loss": 0.7695, + "step": 12851 + }, + { + "epoch": 0.69, + "learning_rate": 0.00023054218131339383, + "loss": 0.7852, + "step": 12852 + }, + { + "epoch": 0.69, + "learning_rate": 0.00023046886708378878, + "loss": 0.8789, + "step": 12853 + }, + { + "epoch": 0.69, + "learning_rate": 0.00023039556102186242, + "loss": 0.9375, + "step": 12854 + }, + { + "epoch": 0.69, + "learning_rate": 0.00023032226312983602, + "loss": 0.9062, + "step": 12855 + }, + { + "epoch": 0.69, + "learning_rate": 0.00023024897340993085, + "loss": 0.8242, + "step": 12856 + }, + { + "epoch": 0.69, + "learning_rate": 0.00023017569186436804, + "loss": 0.8203, + "step": 12857 + }, + { + "epoch": 0.69, + "learning_rate": 0.0002301024184953679, + "loss": 0.8438, + "step": 12858 + }, + { + "epoch": 0.69, + "learning_rate": 0.00023002915330515107, + "loss": 0.75, + "step": 12859 + }, + { + "epoch": 0.69, + "learning_rate": 0.00022995589629593784, + "loss": 0.7617, + "step": 12860 + }, + { + "epoch": 0.69, + "learning_rate": 0.00022988264746994804, + "loss": 0.6641, + "step": 12861 + }, + { + "epoch": 0.69, + "learning_rate": 0.0002298094068294011, + "loss": 0.875, + "step": 12862 + }, + { + "epoch": 0.69, + "learning_rate": 0.00022973617437651667, + "loss": 0.7617, + "step": 12863 + }, + { + "epoch": 0.69, + "learning_rate": 0.00022966295011351412, + "loss": 0.8359, + "step": 12864 + }, + { + "epoch": 0.69, + "learning_rate": 0.00022958973404261197, + "loss": 0.8711, + "step": 12865 + }, + { + "epoch": 0.69, + "learning_rate": 0.00022951652616602931, + "loss": 0.7734, + "step": 12866 + }, + { + "epoch": 0.69, + "learning_rate": 0.0002294433264859842, + "loss": 0.8984, + "step": 12867 + }, + { + "epoch": 0.69, + "learning_rate": 0.00022937013500469528, + "loss": 0.832, + "step": 12868 + }, + { + "epoch": 0.69, + "learning_rate": 0.00022929695172437998, + "loss": 0.7031, + "step": 12869 + }, + { + "epoch": 0.69, + "learning_rate": 0.00022922377664725646, + "loss": 0.8398, + "step": 12870 + }, + { + "epoch": 0.69, + "learning_rate": 0.00022915060977554176, + "loss": 0.7734, + "step": 12871 + }, + { + "epoch": 0.69, + "learning_rate": 0.0002290774511114533, + "loss": 0.832, + "step": 12872 + }, + { + "epoch": 0.69, + "learning_rate": 0.0002290043006572082, + "loss": 0.7422, + "step": 12873 + }, + { + "epoch": 0.69, + "learning_rate": 0.0002289311584150229, + "loss": 0.8633, + "step": 12874 + }, + { + "epoch": 0.69, + "learning_rate": 0.00022885802438711355, + "loss": 0.8281, + "step": 12875 + }, + { + "epoch": 0.69, + "learning_rate": 0.0002287848985756971, + "loss": 0.75, + "step": 12876 + }, + { + "epoch": 0.69, + "learning_rate": 0.00022871178098298912, + "loss": 0.7891, + "step": 12877 + }, + { + "epoch": 0.69, + "learning_rate": 0.0002286386716112051, + "loss": 0.875, + "step": 12878 + }, + { + "epoch": 0.69, + "learning_rate": 0.00022856557046256073, + "loss": 0.7578, + "step": 12879 + }, + { + "epoch": 0.69, + "learning_rate": 0.00022849247753927132, + "loss": 0.8711, + "step": 12880 + }, + { + "epoch": 0.69, + "learning_rate": 0.00022841939284355174, + "loss": 0.7383, + "step": 12881 + }, + { + "epoch": 0.69, + "learning_rate": 0.00022834631637761628, + "loss": 0.7188, + "step": 12882 + }, + { + "epoch": 0.69, + "learning_rate": 0.0002282732481436801, + "loss": 0.7422, + "step": 12883 + }, + { + "epoch": 0.69, + "learning_rate": 0.0002282001881439571, + "loss": 0.8477, + "step": 12884 + }, + { + "epoch": 0.69, + "learning_rate": 0.000228127136380661, + "loss": 0.8008, + "step": 12885 + }, + { + "epoch": 0.69, + "learning_rate": 0.00022805409285600576, + "loss": 0.7031, + "step": 12886 + }, + { + "epoch": 0.69, + "learning_rate": 0.00022798105757220494, + "loss": 0.8047, + "step": 12887 + }, + { + "epoch": 0.69, + "learning_rate": 0.00022790803053147142, + "loss": 0.7852, + "step": 12888 + }, + { + "epoch": 0.69, + "learning_rate": 0.00022783501173601856, + "loss": 0.7969, + "step": 12889 + }, + { + "epoch": 0.69, + "learning_rate": 0.00022776200118805866, + "loss": 0.8438, + "step": 12890 + }, + { + "epoch": 0.69, + "learning_rate": 0.00022768899888980454, + "loss": 0.8789, + "step": 12891 + }, + { + "epoch": 0.69, + "learning_rate": 0.00022761600484346806, + "loss": 0.7148, + "step": 12892 + }, + { + "epoch": 0.69, + "learning_rate": 0.00022754301905126157, + "loss": 0.8086, + "step": 12893 + }, + { + "epoch": 0.69, + "learning_rate": 0.00022747004151539642, + "loss": 0.7812, + "step": 12894 + }, + { + "epoch": 0.69, + "learning_rate": 0.00022739707223808414, + "loss": 0.7461, + "step": 12895 + }, + { + "epoch": 0.69, + "learning_rate": 0.00022732411122153623, + "loss": 0.7344, + "step": 12896 + }, + { + "epoch": 0.69, + "learning_rate": 0.00022725115846796345, + "loss": 0.7656, + "step": 12897 + }, + { + "epoch": 0.69, + "learning_rate": 0.00022717821397957633, + "loss": 0.793, + "step": 12898 + }, + { + "epoch": 0.69, + "learning_rate": 0.00022710527775858546, + "loss": 0.8711, + "step": 12899 + }, + { + "epoch": 0.69, + "learning_rate": 0.00022703234980720128, + "loss": 0.7266, + "step": 12900 + }, + { + "epoch": 0.69, + "learning_rate": 0.00022695943012763337, + "loss": 0.7852, + "step": 12901 + }, + { + "epoch": 0.69, + "learning_rate": 0.0002268865187220916, + "loss": 0.8516, + "step": 12902 + }, + { + "epoch": 0.69, + "learning_rate": 0.00022681361559278562, + "loss": 0.8555, + "step": 12903 + }, + { + "epoch": 0.69, + "learning_rate": 0.0002267407207419245, + "loss": 0.8477, + "step": 12904 + }, + { + "epoch": 0.69, + "learning_rate": 0.00022666783417171695, + "loss": 0.8711, + "step": 12905 + }, + { + "epoch": 0.69, + "learning_rate": 0.00022659495588437178, + "loss": 0.8008, + "step": 12906 + }, + { + "epoch": 0.69, + "learning_rate": 0.00022652208588209782, + "loss": 0.6719, + "step": 12907 + }, + { + "epoch": 0.69, + "learning_rate": 0.00022644922416710273, + "loss": 0.8203, + "step": 12908 + }, + { + "epoch": 0.69, + "learning_rate": 0.00022637637074159495, + "loss": 0.793, + "step": 12909 + }, + { + "epoch": 0.69, + "learning_rate": 0.00022630352560778174, + "loss": 0.8672, + "step": 12910 + }, + { + "epoch": 0.69, + "learning_rate": 0.0002262306887678709, + "loss": 0.8242, + "step": 12911 + }, + { + "epoch": 0.69, + "learning_rate": 0.0002261578602240693, + "loss": 0.7539, + "step": 12912 + }, + { + "epoch": 0.69, + "learning_rate": 0.00022608503997858403, + "loss": 0.6953, + "step": 12913 + }, + { + "epoch": 0.69, + "learning_rate": 0.00022601222803362203, + "loss": 0.8867, + "step": 12914 + }, + { + "epoch": 0.69, + "learning_rate": 0.0002259394243913893, + "loss": 0.9258, + "step": 12915 + }, + { + "epoch": 0.69, + "learning_rate": 0.00022586662905409245, + "loss": 0.8008, + "step": 12916 + }, + { + "epoch": 0.69, + "learning_rate": 0.00022579384202393705, + "loss": 0.7578, + "step": 12917 + }, + { + "epoch": 0.69, + "learning_rate": 0.00022572106330312892, + "loss": 0.8203, + "step": 12918 + }, + { + "epoch": 0.69, + "learning_rate": 0.00022564829289387374, + "loss": 0.7812, + "step": 12919 + }, + { + "epoch": 0.69, + "learning_rate": 0.00022557553079837644, + "loss": 0.8789, + "step": 12920 + }, + { + "epoch": 0.69, + "learning_rate": 0.00022550277701884186, + "loss": 0.7617, + "step": 12921 + }, + { + "epoch": 0.69, + "learning_rate": 0.00022543003155747477, + "loss": 0.8398, + "step": 12922 + }, + { + "epoch": 0.69, + "learning_rate": 0.00022535729441647978, + "loss": 0.8828, + "step": 12923 + }, + { + "epoch": 0.69, + "learning_rate": 0.00022528456559806097, + "loss": 0.875, + "step": 12924 + }, + { + "epoch": 0.69, + "learning_rate": 0.00022521184510442184, + "loss": 0.7148, + "step": 12925 + }, + { + "epoch": 0.69, + "learning_rate": 0.00022513913293776682, + "loss": 0.8281, + "step": 12926 + }, + { + "epoch": 0.69, + "learning_rate": 0.0002250664291002989, + "loss": 0.9961, + "step": 12927 + }, + { + "epoch": 0.69, + "learning_rate": 0.0002249937335942211, + "loss": 0.7969, + "step": 12928 + }, + { + "epoch": 0.69, + "learning_rate": 0.00022492104642173654, + "loss": 0.9297, + "step": 12929 + }, + { + "epoch": 0.69, + "learning_rate": 0.000224848367585048, + "loss": 0.8594, + "step": 12930 + }, + { + "epoch": 0.69, + "learning_rate": 0.00022477569708635758, + "loss": 0.7852, + "step": 12931 + }, + { + "epoch": 0.7, + "learning_rate": 0.00022470303492786781, + "loss": 0.7969, + "step": 12932 + }, + { + "epoch": 0.7, + "learning_rate": 0.00022463038111178012, + "loss": 0.7656, + "step": 12933 + }, + { + "epoch": 0.7, + "learning_rate": 0.00022455773564029668, + "loss": 0.7812, + "step": 12934 + }, + { + "epoch": 0.7, + "learning_rate": 0.00022448509851561842, + "loss": 0.7969, + "step": 12935 + }, + { + "epoch": 0.7, + "learning_rate": 0.0002244124697399469, + "loss": 0.9062, + "step": 12936 + }, + { + "epoch": 0.7, + "learning_rate": 0.00022433984931548257, + "loss": 0.8203, + "step": 12937 + }, + { + "epoch": 0.7, + "learning_rate": 0.00022426723724442628, + "loss": 0.8164, + "step": 12938 + }, + { + "epoch": 0.7, + "learning_rate": 0.00022419463352897867, + "loss": 0.8828, + "step": 12939 + }, + { + "epoch": 0.7, + "learning_rate": 0.0002241220381713396, + "loss": 0.8555, + "step": 12940 + }, + { + "epoch": 0.7, + "learning_rate": 0.00022404945117370862, + "loss": 0.8516, + "step": 12941 + }, + { + "epoch": 0.7, + "learning_rate": 0.0002239768725382861, + "loss": 0.7734, + "step": 12942 + }, + { + "epoch": 0.7, + "learning_rate": 0.00022390430226727098, + "loss": 0.8984, + "step": 12943 + }, + { + "epoch": 0.7, + "learning_rate": 0.00022383174036286224, + "loss": 0.8047, + "step": 12944 + }, + { + "epoch": 0.7, + "learning_rate": 0.00022375918682725898, + "loss": 0.8164, + "step": 12945 + }, + { + "epoch": 0.7, + "learning_rate": 0.0002236866416626599, + "loss": 0.7227, + "step": 12946 + }, + { + "epoch": 0.7, + "learning_rate": 0.00022361410487126322, + "loss": 0.8203, + "step": 12947 + }, + { + "epoch": 0.7, + "learning_rate": 0.00022354157645526686, + "loss": 0.9648, + "step": 12948 + }, + { + "epoch": 0.7, + "learning_rate": 0.00022346905641686888, + "loss": 0.8047, + "step": 12949 + }, + { + "epoch": 0.7, + "learning_rate": 0.00022339654475826698, + "loss": 0.8008, + "step": 12950 + }, + { + "epoch": 0.7, + "learning_rate": 0.0002233240414816582, + "loss": 0.7578, + "step": 12951 + }, + { + "epoch": 0.7, + "learning_rate": 0.0002232515465892398, + "loss": 0.8047, + "step": 12952 + }, + { + "epoch": 0.7, + "learning_rate": 0.00022317906008320875, + "loss": 0.707, + "step": 12953 + }, + { + "epoch": 0.7, + "learning_rate": 0.00022310658196576132, + "loss": 0.8711, + "step": 12954 + }, + { + "epoch": 0.7, + "learning_rate": 0.0002230341122390942, + "loss": 0.75, + "step": 12955 + }, + { + "epoch": 0.7, + "learning_rate": 0.00022296165090540304, + "loss": 0.7656, + "step": 12956 + }, + { + "epoch": 0.7, + "learning_rate": 0.00022288919796688406, + "loss": 0.7422, + "step": 12957 + }, + { + "epoch": 0.7, + "learning_rate": 0.00022281675342573248, + "loss": 0.8438, + "step": 12958 + }, + { + "epoch": 0.7, + "learning_rate": 0.00022274431728414395, + "loss": 0.8086, + "step": 12959 + }, + { + "epoch": 0.7, + "learning_rate": 0.00022267188954431318, + "loss": 0.7344, + "step": 12960 + }, + { + "epoch": 0.7, + "learning_rate": 0.00022259947020843507, + "loss": 0.8633, + "step": 12961 + }, + { + "epoch": 0.7, + "learning_rate": 0.00022252705927870443, + "loss": 0.8945, + "step": 12962 + }, + { + "epoch": 0.7, + "learning_rate": 0.00022245465675731536, + "loss": 0.7344, + "step": 12963 + }, + { + "epoch": 0.7, + "learning_rate": 0.00022238226264646165, + "loss": 0.7812, + "step": 12964 + }, + { + "epoch": 0.7, + "learning_rate": 0.00022230987694833731, + "loss": 0.7969, + "step": 12965 + }, + { + "epoch": 0.7, + "learning_rate": 0.00022223749966513606, + "loss": 0.7969, + "step": 12966 + }, + { + "epoch": 0.7, + "learning_rate": 0.00022216513079905076, + "loss": 0.8164, + "step": 12967 + }, + { + "epoch": 0.7, + "learning_rate": 0.00022209277035227466, + "loss": 0.918, + "step": 12968 + }, + { + "epoch": 0.7, + "learning_rate": 0.00022202041832700072, + "loss": 0.8633, + "step": 12969 + }, + { + "epoch": 0.7, + "learning_rate": 0.0002219480747254211, + "loss": 0.8438, + "step": 12970 + }, + { + "epoch": 0.7, + "learning_rate": 0.00022187573954972806, + "loss": 0.8984, + "step": 12971 + }, + { + "epoch": 0.7, + "learning_rate": 0.0002218034128021137, + "loss": 0.9688, + "step": 12972 + }, + { + "epoch": 0.7, + "learning_rate": 0.00022173109448476986, + "loss": 0.9023, + "step": 12973 + }, + { + "epoch": 0.7, + "learning_rate": 0.0002216587845998878, + "loss": 0.7656, + "step": 12974 + }, + { + "epoch": 0.7, + "learning_rate": 0.00022158648314965908, + "loss": 0.8633, + "step": 12975 + }, + { + "epoch": 0.7, + "learning_rate": 0.00022151419013627423, + "loss": 0.9062, + "step": 12976 + }, + { + "epoch": 0.7, + "learning_rate": 0.00022144190556192434, + "loss": 0.918, + "step": 12977 + }, + { + "epoch": 0.7, + "learning_rate": 0.00022136962942879957, + "loss": 0.8516, + "step": 12978 + }, + { + "epoch": 0.7, + "learning_rate": 0.00022129736173909044, + "loss": 0.8516, + "step": 12979 + }, + { + "epoch": 0.7, + "learning_rate": 0.00022122510249498656, + "loss": 0.7695, + "step": 12980 + }, + { + "epoch": 0.7, + "learning_rate": 0.00022115285169867782, + "loss": 0.8398, + "step": 12981 + }, + { + "epoch": 0.7, + "learning_rate": 0.0002210806093523538, + "loss": 0.8438, + "step": 12982 + }, + { + "epoch": 0.7, + "learning_rate": 0.0002210083754582035, + "loss": 0.7539, + "step": 12983 + }, + { + "epoch": 0.7, + "learning_rate": 0.00022093615001841555, + "loss": 0.8828, + "step": 12984 + }, + { + "epoch": 0.7, + "learning_rate": 0.00022086393303517927, + "loss": 0.8516, + "step": 12985 + }, + { + "epoch": 0.7, + "learning_rate": 0.0002207917245106827, + "loss": 0.8477, + "step": 12986 + }, + { + "epoch": 0.7, + "learning_rate": 0.00022071952444711385, + "loss": 0.8008, + "step": 12987 + }, + { + "epoch": 0.7, + "learning_rate": 0.00022064733284666077, + "loss": 0.7031, + "step": 12988 + }, + { + "epoch": 0.7, + "learning_rate": 0.00022057514971151132, + "loss": 0.7852, + "step": 12989 + }, + { + "epoch": 0.7, + "learning_rate": 0.00022050297504385276, + "loss": 0.8828, + "step": 12990 + }, + { + "epoch": 0.7, + "learning_rate": 0.00022043080884587168, + "loss": 0.8711, + "step": 12991 + }, + { + "epoch": 0.7, + "learning_rate": 0.00022035865111975583, + "loss": 0.9297, + "step": 12992 + }, + { + "epoch": 0.7, + "learning_rate": 0.00022028650186769146, + "loss": 0.8086, + "step": 12993 + }, + { + "epoch": 0.7, + "learning_rate": 0.00022021436109186465, + "loss": 0.8555, + "step": 12994 + }, + { + "epoch": 0.7, + "learning_rate": 0.00022014222879446178, + "loss": 0.8008, + "step": 12995 + }, + { + "epoch": 0.7, + "learning_rate": 0.00022007010497766884, + "loss": 0.8125, + "step": 12996 + }, + { + "epoch": 0.7, + "learning_rate": 0.000219997989643671, + "loss": 0.8516, + "step": 12997 + }, + { + "epoch": 0.7, + "learning_rate": 0.00021992588279465408, + "loss": 0.7852, + "step": 12998 + }, + { + "epoch": 0.7, + "learning_rate": 0.00021985378443280267, + "loss": 0.8789, + "step": 12999 + }, + { + "epoch": 0.7, + "learning_rate": 0.00021978169456030202, + "loss": 0.918, + "step": 13000 + }, + { + "epoch": 0.7, + "learning_rate": 0.0002197096131793363, + "loss": 0.7578, + "step": 13001 + }, + { + "epoch": 0.7, + "learning_rate": 0.00021963754029209016, + "loss": 0.75, + "step": 13002 + }, + { + "epoch": 0.7, + "learning_rate": 0.00021956547590074732, + "loss": 0.7266, + "step": 13003 + }, + { + "epoch": 0.7, + "learning_rate": 0.00021949342000749178, + "loss": 0.7266, + "step": 13004 + }, + { + "epoch": 0.7, + "learning_rate": 0.00021942137261450717, + "loss": 0.8516, + "step": 13005 + }, + { + "epoch": 0.7, + "learning_rate": 0.00021934933372397665, + "loss": 0.75, + "step": 13006 + }, + { + "epoch": 0.7, + "learning_rate": 0.00021927730333808293, + "loss": 0.8164, + "step": 13007 + }, + { + "epoch": 0.7, + "learning_rate": 0.00021920528145900913, + "loss": 0.8438, + "step": 13008 + }, + { + "epoch": 0.7, + "learning_rate": 0.00021913326808893779, + "loss": 0.9258, + "step": 13009 + }, + { + "epoch": 0.7, + "learning_rate": 0.00021906126323005087, + "loss": 0.7227, + "step": 13010 + }, + { + "epoch": 0.7, + "learning_rate": 0.00021898926688453048, + "loss": 0.9609, + "step": 13011 + }, + { + "epoch": 0.7, + "learning_rate": 0.00021891727905455855, + "loss": 0.6953, + "step": 13012 + }, + { + "epoch": 0.7, + "learning_rate": 0.00021884529974231643, + "loss": 0.8516, + "step": 13013 + }, + { + "epoch": 0.7, + "learning_rate": 0.00021877332894998502, + "loss": 0.8594, + "step": 13014 + }, + { + "epoch": 0.7, + "learning_rate": 0.00021870136667974556, + "loss": 0.7891, + "step": 13015 + }, + { + "epoch": 0.7, + "learning_rate": 0.0002186294129337788, + "loss": 0.7188, + "step": 13016 + }, + { + "epoch": 0.7, + "learning_rate": 0.000218557467714265, + "loss": 0.8594, + "step": 13017 + }, + { + "epoch": 0.7, + "learning_rate": 0.00021848553102338453, + "loss": 0.8828, + "step": 13018 + }, + { + "epoch": 0.7, + "learning_rate": 0.00021841360286331703, + "loss": 0.6992, + "step": 13019 + }, + { + "epoch": 0.7, + "learning_rate": 0.0002183416832362423, + "loss": 0.8047, + "step": 13020 + }, + { + "epoch": 0.7, + "learning_rate": 0.00021826977214433997, + "loss": 0.9023, + "step": 13021 + }, + { + "epoch": 0.7, + "learning_rate": 0.00021819786958978898, + "loss": 0.8242, + "step": 13022 + }, + { + "epoch": 0.7, + "learning_rate": 0.00021812597557476805, + "loss": 0.8633, + "step": 13023 + }, + { + "epoch": 0.7, + "learning_rate": 0.0002180540901014559, + "loss": 0.7969, + "step": 13024 + }, + { + "epoch": 0.7, + "learning_rate": 0.00021798221317203116, + "loss": 0.9102, + "step": 13025 + }, + { + "epoch": 0.7, + "learning_rate": 0.00021791034478867155, + "loss": 0.8555, + "step": 13026 + }, + { + "epoch": 0.7, + "learning_rate": 0.00021783848495355513, + "loss": 0.7109, + "step": 13027 + }, + { + "epoch": 0.7, + "learning_rate": 0.00021776663366885961, + "loss": 0.8359, + "step": 13028 + }, + { + "epoch": 0.7, + "learning_rate": 0.0002176947909367622, + "loss": 0.7461, + "step": 13029 + }, + { + "epoch": 0.7, + "learning_rate": 0.00021762295675943977, + "loss": 0.8164, + "step": 13030 + }, + { + "epoch": 0.7, + "learning_rate": 0.00021755113113906927, + "loss": 0.8203, + "step": 13031 + }, + { + "epoch": 0.7, + "learning_rate": 0.00021747931407782752, + "loss": 0.7344, + "step": 13032 + }, + { + "epoch": 0.7, + "learning_rate": 0.00021740750557789034, + "loss": 0.7812, + "step": 13033 + }, + { + "epoch": 0.7, + "learning_rate": 0.00021733570564143407, + "loss": 0.7734, + "step": 13034 + }, + { + "epoch": 0.7, + "learning_rate": 0.00021726391427063452, + "loss": 0.7969, + "step": 13035 + }, + { + "epoch": 0.7, + "learning_rate": 0.00021719213146766713, + "loss": 0.832, + "step": 13036 + }, + { + "epoch": 0.7, + "learning_rate": 0.00021712035723470697, + "loss": 0.8125, + "step": 13037 + }, + { + "epoch": 0.7, + "learning_rate": 0.0002170485915739292, + "loss": 0.8398, + "step": 13038 + }, + { + "epoch": 0.7, + "learning_rate": 0.0002169768344875087, + "loss": 0.8516, + "step": 13039 + }, + { + "epoch": 0.7, + "learning_rate": 0.0002169050859776196, + "loss": 0.9102, + "step": 13040 + }, + { + "epoch": 0.7, + "learning_rate": 0.0002168333460464365, + "loss": 0.8984, + "step": 13041 + }, + { + "epoch": 0.7, + "learning_rate": 0.000216761614696133, + "loss": 0.9141, + "step": 13042 + }, + { + "epoch": 0.7, + "learning_rate": 0.0002166898919288831, + "loss": 0.8281, + "step": 13043 + }, + { + "epoch": 0.7, + "learning_rate": 0.00021661817774686, + "loss": 0.8242, + "step": 13044 + }, + { + "epoch": 0.7, + "learning_rate": 0.00021654647215223706, + "loss": 0.8047, + "step": 13045 + }, + { + "epoch": 0.7, + "learning_rate": 0.00021647477514718694, + "loss": 0.793, + "step": 13046 + }, + { + "epoch": 0.7, + "learning_rate": 0.0002164030867338825, + "loss": 0.7734, + "step": 13047 + }, + { + "epoch": 0.7, + "learning_rate": 0.00021633140691449627, + "loss": 0.8945, + "step": 13048 + }, + { + "epoch": 0.7, + "learning_rate": 0.00021625973569120016, + "loss": 0.9141, + "step": 13049 + }, + { + "epoch": 0.7, + "learning_rate": 0.00021618807306616577, + "loss": 0.832, + "step": 13050 + }, + { + "epoch": 0.7, + "learning_rate": 0.00021611641904156537, + "loss": 0.8438, + "step": 13051 + }, + { + "epoch": 0.7, + "learning_rate": 0.00021604477361956997, + "loss": 0.8125, + "step": 13052 + }, + { + "epoch": 0.7, + "learning_rate": 0.00021597313680235054, + "loss": 0.8008, + "step": 13053 + }, + { + "epoch": 0.7, + "learning_rate": 0.00021590150859207796, + "loss": 0.8945, + "step": 13054 + }, + { + "epoch": 0.7, + "learning_rate": 0.00021582988899092315, + "loss": 0.8672, + "step": 13055 + }, + { + "epoch": 0.7, + "learning_rate": 0.00021575827800105607, + "loss": 0.793, + "step": 13056 + }, + { + "epoch": 0.7, + "learning_rate": 0.00021568667562464673, + "loss": 0.8359, + "step": 13057 + }, + { + "epoch": 0.7, + "learning_rate": 0.00021561508186386497, + "loss": 0.8125, + "step": 13058 + }, + { + "epoch": 0.7, + "learning_rate": 0.00021554349672088063, + "loss": 0.7852, + "step": 13059 + }, + { + "epoch": 0.7, + "learning_rate": 0.0002154719201978625, + "loss": 0.8633, + "step": 13060 + }, + { + "epoch": 0.7, + "learning_rate": 0.00021540035229698, + "loss": 0.8164, + "step": 13061 + }, + { + "epoch": 0.7, + "learning_rate": 0.00021532879302040147, + "loss": 0.6992, + "step": 13062 + }, + { + "epoch": 0.7, + "learning_rate": 0.00021525724237029566, + "loss": 0.6719, + "step": 13063 + }, + { + "epoch": 0.7, + "learning_rate": 0.00021518570034883088, + "loss": 0.8906, + "step": 13064 + }, + { + "epoch": 0.7, + "learning_rate": 0.00021511416695817477, + "loss": 0.8516, + "step": 13065 + }, + { + "epoch": 0.7, + "learning_rate": 0.00021504264220049534, + "loss": 0.8203, + "step": 13066 + }, + { + "epoch": 0.7, + "learning_rate": 0.00021497112607795976, + "loss": 0.8008, + "step": 13067 + }, + { + "epoch": 0.7, + "learning_rate": 0.0002148996185927355, + "loss": 0.8516, + "step": 13068 + }, + { + "epoch": 0.7, + "learning_rate": 0.00021482811974698907, + "loss": 0.8516, + "step": 13069 + }, + { + "epoch": 0.7, + "learning_rate": 0.00021475662954288739, + "loss": 0.832, + "step": 13070 + }, + { + "epoch": 0.7, + "learning_rate": 0.00021468514798259693, + "loss": 0.7305, + "step": 13071 + }, + { + "epoch": 0.7, + "learning_rate": 0.00021461367506828372, + "loss": 0.7969, + "step": 13072 + }, + { + "epoch": 0.7, + "learning_rate": 0.00021454221080211344, + "loss": 0.793, + "step": 13073 + }, + { + "epoch": 0.7, + "learning_rate": 0.0002144707551862518, + "loss": 0.8203, + "step": 13074 + }, + { + "epoch": 0.7, + "learning_rate": 0.0002143993082228644, + "loss": 0.8516, + "step": 13075 + }, + { + "epoch": 0.7, + "learning_rate": 0.00021432786991411597, + "loss": 0.8047, + "step": 13076 + }, + { + "epoch": 0.7, + "learning_rate": 0.00021425644026217138, + "loss": 0.8516, + "step": 13077 + }, + { + "epoch": 0.7, + "learning_rate": 0.0002141850192691955, + "loss": 0.9102, + "step": 13078 + }, + { + "epoch": 0.7, + "learning_rate": 0.00021411360693735233, + "loss": 0.7891, + "step": 13079 + }, + { + "epoch": 0.7, + "learning_rate": 0.0002140422032688058, + "loss": 0.7227, + "step": 13080 + }, + { + "epoch": 0.7, + "learning_rate": 0.00021397080826571986, + "loss": 0.8164, + "step": 13081 + }, + { + "epoch": 0.7, + "learning_rate": 0.00021389942193025813, + "loss": 0.7734, + "step": 13082 + }, + { + "epoch": 0.7, + "learning_rate": 0.0002138280442645835, + "loss": 0.8555, + "step": 13083 + }, + { + "epoch": 0.7, + "learning_rate": 0.00021375667527085933, + "loss": 0.75, + "step": 13084 + }, + { + "epoch": 0.7, + "learning_rate": 0.000213685314951248, + "loss": 0.8789, + "step": 13085 + }, + { + "epoch": 0.7, + "learning_rate": 0.00021361396330791228, + "loss": 0.8945, + "step": 13086 + }, + { + "epoch": 0.7, + "learning_rate": 0.00021354262034301401, + "loss": 0.7969, + "step": 13087 + }, + { + "epoch": 0.7, + "learning_rate": 0.00021347128605871552, + "loss": 0.8516, + "step": 13088 + }, + { + "epoch": 0.7, + "learning_rate": 0.00021339996045717809, + "loss": 0.7656, + "step": 13089 + }, + { + "epoch": 0.7, + "learning_rate": 0.0002133286435405632, + "loss": 0.793, + "step": 13090 + }, + { + "epoch": 0.7, + "learning_rate": 0.0002132573353110323, + "loss": 0.7734, + "step": 13091 + }, + { + "epoch": 0.7, + "learning_rate": 0.00021318603577074607, + "loss": 0.7891, + "step": 13092 + }, + { + "epoch": 0.7, + "learning_rate": 0.00021311474492186468, + "loss": 0.7734, + "step": 13093 + }, + { + "epoch": 0.7, + "learning_rate": 0.00021304346276654923, + "loss": 0.8516, + "step": 13094 + }, + { + "epoch": 0.7, + "learning_rate": 0.0002129721893069595, + "loss": 0.6992, + "step": 13095 + }, + { + "epoch": 0.7, + "learning_rate": 0.00021290092454525505, + "loss": 0.9375, + "step": 13096 + }, + { + "epoch": 0.7, + "learning_rate": 0.00021282966848359565, + "loss": 0.7656, + "step": 13097 + }, + { + "epoch": 0.7, + "learning_rate": 0.00021275842112414078, + "loss": 0.8203, + "step": 13098 + }, + { + "epoch": 0.7, + "learning_rate": 0.0002126871824690491, + "loss": 0.8359, + "step": 13099 + }, + { + "epoch": 0.7, + "learning_rate": 0.0002126159525204796, + "loss": 0.8398, + "step": 13100 + }, + { + "epoch": 0.7, + "learning_rate": 0.00021254473128059094, + "loss": 0.8516, + "step": 13101 + }, + { + "epoch": 0.7, + "learning_rate": 0.0002124735187515411, + "loss": 0.7812, + "step": 13102 + }, + { + "epoch": 0.7, + "learning_rate": 0.0002124023149354879, + "loss": 0.9102, + "step": 13103 + }, + { + "epoch": 0.7, + "learning_rate": 0.0002123311198345893, + "loss": 0.8438, + "step": 13104 + }, + { + "epoch": 0.7, + "learning_rate": 0.00021225993345100292, + "loss": 0.8281, + "step": 13105 + }, + { + "epoch": 0.7, + "learning_rate": 0.0002121887557868855, + "loss": 0.8633, + "step": 13106 + }, + { + "epoch": 0.7, + "learning_rate": 0.00021211758684439442, + "loss": 0.8164, + "step": 13107 + }, + { + "epoch": 0.7, + "learning_rate": 0.00021204642662568584, + "loss": 0.8633, + "step": 13108 + }, + { + "epoch": 0.7, + "learning_rate": 0.00021197527513291666, + "loss": 0.7656, + "step": 13109 + }, + { + "epoch": 0.7, + "learning_rate": 0.00021190413236824252, + "loss": 0.7305, + "step": 13110 + }, + { + "epoch": 0.7, + "learning_rate": 0.00021183299833381968, + "loss": 0.7734, + "step": 13111 + }, + { + "epoch": 0.7, + "learning_rate": 0.00021176187303180343, + "loss": 0.875, + "step": 13112 + }, + { + "epoch": 0.7, + "learning_rate": 0.0002116907564643492, + "loss": 0.7109, + "step": 13113 + }, + { + "epoch": 0.7, + "learning_rate": 0.00021161964863361228, + "loss": 0.8203, + "step": 13114 + }, + { + "epoch": 0.7, + "learning_rate": 0.0002115485495417473, + "loss": 0.8242, + "step": 13115 + }, + { + "epoch": 0.7, + "learning_rate": 0.0002114774591909084, + "loss": 0.8477, + "step": 13116 + }, + { + "epoch": 0.7, + "learning_rate": 0.0002114063775832506, + "loss": 0.8477, + "step": 13117 + }, + { + "epoch": 0.71, + "learning_rate": 0.00021133530472092755, + "loss": 0.7695, + "step": 13118 + }, + { + "epoch": 0.71, + "learning_rate": 0.00021126424060609274, + "loss": 0.7812, + "step": 13119 + }, + { + "epoch": 0.71, + "learning_rate": 0.0002111931852408999, + "loss": 0.8984, + "step": 13120 + }, + { + "epoch": 0.71, + "learning_rate": 0.0002111221386275024, + "loss": 0.8281, + "step": 13121 + }, + { + "epoch": 0.71, + "learning_rate": 0.00021105110076805305, + "loss": 0.8164, + "step": 13122 + }, + { + "epoch": 0.71, + "learning_rate": 0.0002109800716647043, + "loss": 0.8633, + "step": 13123 + }, + { + "epoch": 0.71, + "learning_rate": 0.0002109090513196087, + "loss": 0.8945, + "step": 13124 + }, + { + "epoch": 0.71, + "learning_rate": 0.00021083803973491867, + "loss": 0.7031, + "step": 13125 + }, + { + "epoch": 0.71, + "learning_rate": 0.0002107670369127857, + "loss": 0.7578, + "step": 13126 + }, + { + "epoch": 0.71, + "learning_rate": 0.00021069604285536175, + "loss": 0.918, + "step": 13127 + }, + { + "epoch": 0.71, + "learning_rate": 0.00021062505756479788, + "loss": 0.7734, + "step": 13128 + }, + { + "epoch": 0.71, + "learning_rate": 0.00021055408104324525, + "loss": 0.9414, + "step": 13129 + }, + { + "epoch": 0.71, + "learning_rate": 0.00021048311329285497, + "loss": 0.8477, + "step": 13130 + }, + { + "epoch": 0.71, + "learning_rate": 0.00021041215431577732, + "loss": 0.8789, + "step": 13131 + }, + { + "epoch": 0.71, + "learning_rate": 0.00021034120411416246, + "loss": 0.8477, + "step": 13132 + }, + { + "epoch": 0.71, + "learning_rate": 0.0002102702626901606, + "loss": 0.7852, + "step": 13133 + }, + { + "epoch": 0.71, + "learning_rate": 0.0002101993300459217, + "loss": 0.8164, + "step": 13134 + }, + { + "epoch": 0.71, + "learning_rate": 0.00021012840618359497, + "loss": 0.8086, + "step": 13135 + }, + { + "epoch": 0.71, + "learning_rate": 0.0002100574911053294, + "loss": 0.8086, + "step": 13136 + }, + { + "epoch": 0.71, + "learning_rate": 0.0002099865848132746, + "loss": 0.7422, + "step": 13137 + }, + { + "epoch": 0.71, + "learning_rate": 0.00020991568730957888, + "loss": 0.8555, + "step": 13138 + }, + { + "epoch": 0.71, + "learning_rate": 0.00020984479859639061, + "loss": 0.7461, + "step": 13139 + }, + { + "epoch": 0.71, + "learning_rate": 0.000209773918675858, + "loss": 0.7812, + "step": 13140 + }, + { + "epoch": 0.71, + "learning_rate": 0.00020970304755012914, + "loss": 0.8086, + "step": 13141 + }, + { + "epoch": 0.71, + "learning_rate": 0.00020963218522135135, + "loss": 0.8711, + "step": 13142 + }, + { + "epoch": 0.71, + "learning_rate": 0.00020956133169167218, + "loss": 0.7812, + "step": 13143 + }, + { + "epoch": 0.71, + "learning_rate": 0.00020949048696323886, + "loss": 0.793, + "step": 13144 + }, + { + "epoch": 0.71, + "learning_rate": 0.00020941965103819804, + "loss": 0.8633, + "step": 13145 + }, + { + "epoch": 0.71, + "learning_rate": 0.00020934882391869614, + "loss": 0.7305, + "step": 13146 + }, + { + "epoch": 0.71, + "learning_rate": 0.0002092780056068796, + "loss": 0.7656, + "step": 13147 + }, + { + "epoch": 0.71, + "learning_rate": 0.00020920719610489463, + "loss": 0.7695, + "step": 13148 + }, + { + "epoch": 0.71, + "learning_rate": 0.00020913639541488666, + "loss": 0.8164, + "step": 13149 + }, + { + "epoch": 0.71, + "learning_rate": 0.0002090656035390015, + "loss": 0.8516, + "step": 13150 + }, + { + "epoch": 0.71, + "learning_rate": 0.000208994820479384, + "loss": 0.6953, + "step": 13151 + }, + { + "epoch": 0.71, + "learning_rate": 0.00020892404623817955, + "loss": 0.8125, + "step": 13152 + }, + { + "epoch": 0.71, + "learning_rate": 0.00020885328081753248, + "loss": 0.9141, + "step": 13153 + }, + { + "epoch": 0.71, + "learning_rate": 0.0002087825242195875, + "loss": 0.8281, + "step": 13154 + }, + { + "epoch": 0.71, + "learning_rate": 0.00020871177644648847, + "loss": 0.7578, + "step": 13155 + }, + { + "epoch": 0.71, + "learning_rate": 0.00020864103750037943, + "loss": 0.832, + "step": 13156 + }, + { + "epoch": 0.71, + "learning_rate": 0.0002085703073834042, + "loss": 0.8438, + "step": 13157 + }, + { + "epoch": 0.71, + "learning_rate": 0.00020849958609770591, + "loss": 0.7734, + "step": 13158 + }, + { + "epoch": 0.71, + "learning_rate": 0.0002084288736454274, + "loss": 0.9141, + "step": 13159 + }, + { + "epoch": 0.71, + "learning_rate": 0.00020835817002871204, + "loss": 0.8164, + "step": 13160 + }, + { + "epoch": 0.71, + "learning_rate": 0.0002082874752497022, + "loss": 0.7969, + "step": 13161 + }, + { + "epoch": 0.71, + "learning_rate": 0.0002082167893105399, + "loss": 0.6953, + "step": 13162 + }, + { + "epoch": 0.71, + "learning_rate": 0.00020814611221336727, + "loss": 0.7695, + "step": 13163 + }, + { + "epoch": 0.71, + "learning_rate": 0.00020807544396032635, + "loss": 0.7266, + "step": 13164 + }, + { + "epoch": 0.71, + "learning_rate": 0.00020800478455355837, + "loss": 0.7227, + "step": 13165 + }, + { + "epoch": 0.71, + "learning_rate": 0.0002079341339952044, + "loss": 0.9219, + "step": 13166 + }, + { + "epoch": 0.71, + "learning_rate": 0.00020786349228740553, + "loss": 0.9375, + "step": 13167 + }, + { + "epoch": 0.71, + "learning_rate": 0.0002077928594323027, + "loss": 0.7188, + "step": 13168 + }, + { + "epoch": 0.71, + "learning_rate": 0.00020772223543203582, + "loss": 0.8438, + "step": 13169 + }, + { + "epoch": 0.71, + "learning_rate": 0.00020765162028874552, + "loss": 0.832, + "step": 13170 + }, + { + "epoch": 0.71, + "learning_rate": 0.0002075810140045712, + "loss": 0.8281, + "step": 13171 + }, + { + "epoch": 0.71, + "learning_rate": 0.00020751041658165277, + "loss": 0.8125, + "step": 13172 + }, + { + "epoch": 0.71, + "learning_rate": 0.00020743982802212958, + "loss": 0.7773, + "step": 13173 + }, + { + "epoch": 0.71, + "learning_rate": 0.00020736924832814047, + "loss": 0.8789, + "step": 13174 + }, + { + "epoch": 0.71, + "learning_rate": 0.00020729867750182457, + "loss": 0.7773, + "step": 13175 + }, + { + "epoch": 0.71, + "learning_rate": 0.00020722811554532, + "loss": 0.8242, + "step": 13176 + }, + { + "epoch": 0.71, + "learning_rate": 0.00020715756246076546, + "loss": 0.7852, + "step": 13177 + }, + { + "epoch": 0.71, + "learning_rate": 0.00020708701825029847, + "loss": 0.8438, + "step": 13178 + }, + { + "epoch": 0.71, + "learning_rate": 0.00020701648291605702, + "loss": 0.8281, + "step": 13179 + }, + { + "epoch": 0.71, + "learning_rate": 0.00020694595646017865, + "loss": 0.7734, + "step": 13180 + }, + { + "epoch": 0.71, + "learning_rate": 0.0002068754388848005, + "loss": 0.7852, + "step": 13181 + }, + { + "epoch": 0.71, + "learning_rate": 0.00020680493019205903, + "loss": 0.8203, + "step": 13182 + }, + { + "epoch": 0.71, + "learning_rate": 0.00020673443038409162, + "loss": 0.9023, + "step": 13183 + }, + { + "epoch": 0.71, + "learning_rate": 0.0002066639394630343, + "loss": 0.7969, + "step": 13184 + }, + { + "epoch": 0.71, + "learning_rate": 0.00020659345743102297, + "loss": 0.8086, + "step": 13185 + }, + { + "epoch": 0.71, + "learning_rate": 0.00020652298429019367, + "loss": 0.8984, + "step": 13186 + }, + { + "epoch": 0.71, + "learning_rate": 0.00020645252004268216, + "loss": 0.7305, + "step": 13187 + }, + { + "epoch": 0.71, + "learning_rate": 0.00020638206469062353, + "loss": 0.7734, + "step": 13188 + }, + { + "epoch": 0.71, + "learning_rate": 0.00020631161823615263, + "loss": 0.7969, + "step": 13189 + }, + { + "epoch": 0.71, + "learning_rate": 0.0002062411806814044, + "loss": 0.7422, + "step": 13190 + }, + { + "epoch": 0.71, + "learning_rate": 0.00020617075202851353, + "loss": 0.8242, + "step": 13191 + }, + { + "epoch": 0.71, + "learning_rate": 0.0002061003322796139, + "loss": 0.8398, + "step": 13192 + }, + { + "epoch": 0.71, + "learning_rate": 0.00020602992143683974, + "loss": 0.7852, + "step": 13193 + }, + { + "epoch": 0.71, + "learning_rate": 0.00020595951950232446, + "loss": 0.8008, + "step": 13194 + }, + { + "epoch": 0.71, + "learning_rate": 0.00020588912647820156, + "loss": 0.832, + "step": 13195 + }, + { + "epoch": 0.71, + "learning_rate": 0.00020581874236660441, + "loss": 0.8008, + "step": 13196 + }, + { + "epoch": 0.71, + "learning_rate": 0.00020574836716966572, + "loss": 0.8203, + "step": 13197 + }, + { + "epoch": 0.71, + "learning_rate": 0.0002056780008895179, + "loss": 0.8594, + "step": 13198 + }, + { + "epoch": 0.71, + "learning_rate": 0.0002056076435282934, + "loss": 0.8125, + "step": 13199 + }, + { + "epoch": 0.71, + "learning_rate": 0.00020553729508812453, + "loss": 0.8086, + "step": 13200 + }, + { + "epoch": 0.71, + "learning_rate": 0.00020546695557114287, + "loss": 0.793, + "step": 13201 + }, + { + "epoch": 0.71, + "learning_rate": 0.0002053966249794796, + "loss": 0.9492, + "step": 13202 + }, + { + "epoch": 0.71, + "learning_rate": 0.00020532630331526663, + "loss": 0.7656, + "step": 13203 + }, + { + "epoch": 0.71, + "learning_rate": 0.00020525599058063466, + "loss": 0.875, + "step": 13204 + }, + { + "epoch": 0.71, + "learning_rate": 0.0002051856867777141, + "loss": 0.7617, + "step": 13205 + }, + { + "epoch": 0.71, + "learning_rate": 0.00020511539190863572, + "loss": 0.7305, + "step": 13206 + }, + { + "epoch": 0.71, + "learning_rate": 0.00020504510597552973, + "loss": 0.9336, + "step": 13207 + }, + { + "epoch": 0.71, + "learning_rate": 0.00020497482898052576, + "loss": 0.8359, + "step": 13208 + }, + { + "epoch": 0.71, + "learning_rate": 0.0002049045609257537, + "loss": 0.7891, + "step": 13209 + }, + { + "epoch": 0.71, + "learning_rate": 0.00020483430181334267, + "loss": 0.8594, + "step": 13210 + }, + { + "epoch": 0.71, + "learning_rate": 0.000204764051645422, + "loss": 0.8711, + "step": 13211 + }, + { + "epoch": 0.71, + "learning_rate": 0.00020469381042412016, + "loss": 0.8281, + "step": 13212 + }, + { + "epoch": 0.71, + "learning_rate": 0.0002046235781515659, + "loss": 0.8203, + "step": 13213 + }, + { + "epoch": 0.71, + "learning_rate": 0.00020455335482988767, + "loss": 0.875, + "step": 13214 + }, + { + "epoch": 0.71, + "learning_rate": 0.00020448314046121307, + "loss": 0.8281, + "step": 13215 + }, + { + "epoch": 0.71, + "learning_rate": 0.0002044129350476702, + "loss": 0.7812, + "step": 13216 + }, + { + "epoch": 0.71, + "learning_rate": 0.00020434273859138626, + "loss": 0.7617, + "step": 13217 + }, + { + "epoch": 0.71, + "learning_rate": 0.00020427255109448862, + "loss": 0.793, + "step": 13218 + }, + { + "epoch": 0.71, + "learning_rate": 0.0002042023725591039, + "loss": 0.7852, + "step": 13219 + }, + { + "epoch": 0.71, + "learning_rate": 0.00020413220298735912, + "loss": 0.8477, + "step": 13220 + }, + { + "epoch": 0.71, + "learning_rate": 0.00020406204238138033, + "loss": 0.8633, + "step": 13221 + }, + { + "epoch": 0.71, + "learning_rate": 0.0002039918907432937, + "loss": 0.8242, + "step": 13222 + }, + { + "epoch": 0.71, + "learning_rate": 0.00020392174807522536, + "loss": 0.875, + "step": 13223 + }, + { + "epoch": 0.71, + "learning_rate": 0.0002038516143793005, + "loss": 0.7734, + "step": 13224 + }, + { + "epoch": 0.71, + "learning_rate": 0.00020378148965764426, + "loss": 0.8359, + "step": 13225 + }, + { + "epoch": 0.71, + "learning_rate": 0.0002037113739123822, + "loss": 0.6875, + "step": 13226 + }, + { + "epoch": 0.71, + "learning_rate": 0.0002036412671456388, + "loss": 0.8398, + "step": 13227 + }, + { + "epoch": 0.71, + "learning_rate": 0.0002035711693595383, + "loss": 0.8398, + "step": 13228 + }, + { + "epoch": 0.71, + "learning_rate": 0.00020350108055620502, + "loss": 0.8086, + "step": 13229 + }, + { + "epoch": 0.71, + "learning_rate": 0.00020343100073776321, + "loss": 0.8906, + "step": 13230 + }, + { + "epoch": 0.71, + "learning_rate": 0.0002033609299063362, + "loss": 0.8203, + "step": 13231 + }, + { + "epoch": 0.71, + "learning_rate": 0.00020329086806404728, + "loss": 0.7812, + "step": 13232 + }, + { + "epoch": 0.71, + "learning_rate": 0.00020322081521301967, + "loss": 0.7578, + "step": 13233 + }, + { + "epoch": 0.71, + "learning_rate": 0.00020315077135537636, + "loss": 0.7773, + "step": 13234 + }, + { + "epoch": 0.71, + "learning_rate": 0.00020308073649323967, + "loss": 0.7344, + "step": 13235 + }, + { + "epoch": 0.71, + "learning_rate": 0.0002030107106287321, + "loss": 0.8516, + "step": 13236 + }, + { + "epoch": 0.71, + "learning_rate": 0.0002029406937639754, + "loss": 0.7578, + "step": 13237 + }, + { + "epoch": 0.71, + "learning_rate": 0.00020287068590109147, + "loss": 0.8867, + "step": 13238 + }, + { + "epoch": 0.71, + "learning_rate": 0.00020280068704220188, + "loss": 0.9062, + "step": 13239 + }, + { + "epoch": 0.71, + "learning_rate": 0.00020273069718942783, + "loss": 0.7344, + "step": 13240 + }, + { + "epoch": 0.71, + "learning_rate": 0.00020266071634488986, + "loss": 0.8242, + "step": 13241 + }, + { + "epoch": 0.71, + "learning_rate": 0.0002025907445107089, + "loss": 0.7812, + "step": 13242 + }, + { + "epoch": 0.71, + "learning_rate": 0.00020252078168900555, + "loss": 0.875, + "step": 13243 + }, + { + "epoch": 0.71, + "learning_rate": 0.0002024508278818996, + "loss": 0.8164, + "step": 13244 + }, + { + "epoch": 0.71, + "learning_rate": 0.00020238088309151064, + "loss": 0.8828, + "step": 13245 + }, + { + "epoch": 0.71, + "learning_rate": 0.00020231094731995892, + "loss": 0.8477, + "step": 13246 + }, + { + "epoch": 0.71, + "learning_rate": 0.00020224102056936333, + "loss": 0.8359, + "step": 13247 + }, + { + "epoch": 0.71, + "learning_rate": 0.00020217110284184275, + "loss": 0.7578, + "step": 13248 + }, + { + "epoch": 0.71, + "learning_rate": 0.000202101194139516, + "loss": 0.8633, + "step": 13249 + }, + { + "epoch": 0.71, + "learning_rate": 0.00020203129446450186, + "loss": 0.8633, + "step": 13250 + }, + { + "epoch": 0.71, + "learning_rate": 0.00020196140381891804, + "loss": 0.8906, + "step": 13251 + }, + { + "epoch": 0.71, + "learning_rate": 0.0002018915222048827, + "loss": 0.875, + "step": 13252 + }, + { + "epoch": 0.71, + "learning_rate": 0.00020182164962451366, + "loss": 0.7539, + "step": 13253 + }, + { + "epoch": 0.71, + "learning_rate": 0.00020175178607992805, + "loss": 0.875, + "step": 13254 + }, + { + "epoch": 0.71, + "learning_rate": 0.00020168193157324289, + "loss": 0.832, + "step": 13255 + }, + { + "epoch": 0.71, + "learning_rate": 0.00020161208610657504, + "loss": 0.8555, + "step": 13256 + }, + { + "epoch": 0.71, + "learning_rate": 0.00020154224968204138, + "loss": 0.8359, + "step": 13257 + }, + { + "epoch": 0.71, + "learning_rate": 0.00020147242230175773, + "loss": 0.7695, + "step": 13258 + }, + { + "epoch": 0.71, + "learning_rate": 0.00020140260396784044, + "loss": 0.9492, + "step": 13259 + }, + { + "epoch": 0.71, + "learning_rate": 0.00020133279468240495, + "loss": 0.875, + "step": 13260 + }, + { + "epoch": 0.71, + "learning_rate": 0.00020126299444756684, + "loss": 0.8008, + "step": 13261 + }, + { + "epoch": 0.71, + "learning_rate": 0.00020119320326544144, + "loss": 0.875, + "step": 13262 + }, + { + "epoch": 0.71, + "learning_rate": 0.00020112342113814353, + "loss": 0.8203, + "step": 13263 + }, + { + "epoch": 0.71, + "learning_rate": 0.00020105364806778753, + "loss": 0.8516, + "step": 13264 + }, + { + "epoch": 0.71, + "learning_rate": 0.000200983884056488, + "loss": 0.8594, + "step": 13265 + }, + { + "epoch": 0.71, + "learning_rate": 0.00020091412910635916, + "loss": 0.8242, + "step": 13266 + }, + { + "epoch": 0.71, + "learning_rate": 0.00020084438321951465, + "loss": 0.8555, + "step": 13267 + }, + { + "epoch": 0.71, + "learning_rate": 0.00020077464639806759, + "loss": 0.8984, + "step": 13268 + }, + { + "epoch": 0.71, + "learning_rate": 0.00020070491864413205, + "loss": 0.8281, + "step": 13269 + }, + { + "epoch": 0.71, + "learning_rate": 0.0002006351999598206, + "loss": 0.8711, + "step": 13270 + }, + { + "epoch": 0.71, + "learning_rate": 0.00020056549034724576, + "loss": 0.7695, + "step": 13271 + }, + { + "epoch": 0.71, + "learning_rate": 0.00020049578980852012, + "loss": 0.7422, + "step": 13272 + }, + { + "epoch": 0.71, + "learning_rate": 0.0002004260983457561, + "loss": 0.7969, + "step": 13273 + }, + { + "epoch": 0.71, + "learning_rate": 0.00020035641596106513, + "loss": 0.8086, + "step": 13274 + }, + { + "epoch": 0.71, + "learning_rate": 0.00020028674265655923, + "loss": 0.8242, + "step": 13275 + }, + { + "epoch": 0.71, + "learning_rate": 0.00020021707843434928, + "loss": 0.832, + "step": 13276 + }, + { + "epoch": 0.71, + "learning_rate": 0.00020014742329654684, + "loss": 0.8242, + "step": 13277 + }, + { + "epoch": 0.71, + "learning_rate": 0.0002000777772452622, + "loss": 0.8164, + "step": 13278 + }, + { + "epoch": 0.71, + "learning_rate": 0.00020000814028260633, + "loss": 0.8125, + "step": 13279 + }, + { + "epoch": 0.71, + "learning_rate": 0.000199938512410689, + "loss": 0.7266, + "step": 13280 + }, + { + "epoch": 0.71, + "learning_rate": 0.0001998688936316204, + "loss": 0.7852, + "step": 13281 + }, + { + "epoch": 0.71, + "learning_rate": 0.0001997992839475104, + "loss": 0.8203, + "step": 13282 + }, + { + "epoch": 0.71, + "learning_rate": 0.00019972968336046815, + "loss": 0.7773, + "step": 13283 + }, + { + "epoch": 0.71, + "learning_rate": 0.0001996600918726027, + "loss": 0.8477, + "step": 13284 + }, + { + "epoch": 0.71, + "learning_rate": 0.00019959050948602298, + "loss": 0.7773, + "step": 13285 + }, + { + "epoch": 0.71, + "learning_rate": 0.00019952093620283784, + "loss": 0.8125, + "step": 13286 + }, + { + "epoch": 0.71, + "learning_rate": 0.00019945137202515513, + "loss": 0.8164, + "step": 13287 + }, + { + "epoch": 0.71, + "learning_rate": 0.0001993818169550831, + "loss": 0.8672, + "step": 13288 + }, + { + "epoch": 0.71, + "learning_rate": 0.0001993122709947297, + "loss": 0.8516, + "step": 13289 + }, + { + "epoch": 0.71, + "learning_rate": 0.00019924273414620214, + "loss": 0.7695, + "step": 13290 + }, + { + "epoch": 0.71, + "learning_rate": 0.00019917320641160734, + "loss": 0.8984, + "step": 13291 + }, + { + "epoch": 0.71, + "learning_rate": 0.00019910368779305288, + "loss": 0.7695, + "step": 13292 + }, + { + "epoch": 0.71, + "learning_rate": 0.00019903417829264508, + "loss": 0.8867, + "step": 13293 + }, + { + "epoch": 0.71, + "learning_rate": 0.00019896467791249013, + "loss": 0.7969, + "step": 13294 + }, + { + "epoch": 0.71, + "learning_rate": 0.00019889518665469432, + "loss": 0.832, + "step": 13295 + }, + { + "epoch": 0.71, + "learning_rate": 0.0001988257045213635, + "loss": 0.75, + "step": 13296 + }, + { + "epoch": 0.71, + "learning_rate": 0.00019875623151460322, + "loss": 0.8438, + "step": 13297 + }, + { + "epoch": 0.71, + "learning_rate": 0.00019868676763651845, + "loss": 0.8555, + "step": 13298 + }, + { + "epoch": 0.71, + "learning_rate": 0.00019861731288921437, + "loss": 0.8398, + "step": 13299 + }, + { + "epoch": 0.71, + "learning_rate": 0.00019854786727479585, + "loss": 0.7539, + "step": 13300 + }, + { + "epoch": 0.71, + "learning_rate": 0.00019847843079536697, + "loss": 0.8242, + "step": 13301 + }, + { + "epoch": 0.71, + "learning_rate": 0.0001984090034530323, + "loss": 0.9102, + "step": 13302 + }, + { + "epoch": 0.71, + "learning_rate": 0.00019833958524989533, + "loss": 0.7734, + "step": 13303 + }, + { + "epoch": 0.72, + "learning_rate": 0.0001982701761880598, + "loss": 0.707, + "step": 13304 + }, + { + "epoch": 0.72, + "learning_rate": 0.00019820077626962924, + "loss": 0.8359, + "step": 13305 + }, + { + "epoch": 0.72, + "learning_rate": 0.00019813138549670651, + "loss": 0.7969, + "step": 13306 + }, + { + "epoch": 0.72, + "learning_rate": 0.00019806200387139421, + "loss": 0.9297, + "step": 13307 + }, + { + "epoch": 0.72, + "learning_rate": 0.000197992631395795, + "loss": 0.8359, + "step": 13308 + }, + { + "epoch": 0.72, + "learning_rate": 0.00019792326807201128, + "loss": 0.8828, + "step": 13309 + }, + { + "epoch": 0.72, + "learning_rate": 0.0001978539139021448, + "loss": 0.8125, + "step": 13310 + }, + { + "epoch": 0.72, + "learning_rate": 0.00019778456888829682, + "loss": 0.8125, + "step": 13311 + }, + { + "epoch": 0.72, + "learning_rate": 0.00019771523303256956, + "loss": 0.8828, + "step": 13312 + }, + { + "epoch": 0.72, + "learning_rate": 0.00019764590633706365, + "loss": 0.7656, + "step": 13313 + }, + { + "epoch": 0.72, + "learning_rate": 0.00019757658880387974, + "loss": 0.8516, + "step": 13314 + }, + { + "epoch": 0.72, + "learning_rate": 0.00019750728043511863, + "loss": 0.8359, + "step": 13315 + }, + { + "epoch": 0.72, + "learning_rate": 0.00019743798123288071, + "loss": 0.8008, + "step": 13316 + }, + { + "epoch": 0.72, + "learning_rate": 0.00019736869119926564, + "loss": 0.8516, + "step": 13317 + }, + { + "epoch": 0.72, + "learning_rate": 0.0001972994103363735, + "loss": 0.8203, + "step": 13318 + }, + { + "epoch": 0.72, + "learning_rate": 0.00019723013864630335, + "loss": 0.9609, + "step": 13319 + }, + { + "epoch": 0.72, + "learning_rate": 0.00019716087613115473, + "loss": 0.8555, + "step": 13320 + }, + { + "epoch": 0.72, + "learning_rate": 0.00019709162279302618, + "loss": 0.7969, + "step": 13321 + }, + { + "epoch": 0.72, + "learning_rate": 0.00019702237863401662, + "loss": 0.8516, + "step": 13322 + }, + { + "epoch": 0.72, + "learning_rate": 0.00019695314365622402, + "loss": 0.7695, + "step": 13323 + }, + { + "epoch": 0.72, + "learning_rate": 0.00019688391786174664, + "loss": 0.8945, + "step": 13324 + }, + { + "epoch": 0.72, + "learning_rate": 0.00019681470125268235, + "loss": 0.7734, + "step": 13325 + }, + { + "epoch": 0.72, + "learning_rate": 0.00019674549383112838, + "loss": 0.7695, + "step": 13326 + }, + { + "epoch": 0.72, + "learning_rate": 0.00019667629559918227, + "loss": 0.875, + "step": 13327 + }, + { + "epoch": 0.72, + "learning_rate": 0.0001966071065589406, + "loss": 0.8984, + "step": 13328 + }, + { + "epoch": 0.72, + "learning_rate": 0.00019653792671250032, + "loss": 0.793, + "step": 13329 + }, + { + "epoch": 0.72, + "learning_rate": 0.00019646875606195752, + "loss": 0.8477, + "step": 13330 + }, + { + "epoch": 0.72, + "learning_rate": 0.00019639959460940844, + "loss": 0.8672, + "step": 13331 + }, + { + "epoch": 0.72, + "learning_rate": 0.00019633044235694907, + "loss": 0.793, + "step": 13332 + }, + { + "epoch": 0.72, + "learning_rate": 0.00019626129930667486, + "loss": 0.6875, + "step": 13333 + }, + { + "epoch": 0.72, + "learning_rate": 0.00019619216546068065, + "loss": 0.9727, + "step": 13334 + }, + { + "epoch": 0.72, + "learning_rate": 0.00019612304082106209, + "loss": 0.8438, + "step": 13335 + }, + { + "epoch": 0.72, + "learning_rate": 0.00019605392538991356, + "loss": 0.8711, + "step": 13336 + }, + { + "epoch": 0.72, + "learning_rate": 0.0001959848191693293, + "loss": 0.7305, + "step": 13337 + }, + { + "epoch": 0.72, + "learning_rate": 0.00019591572216140369, + "loss": 0.8047, + "step": 13338 + }, + { + "epoch": 0.72, + "learning_rate": 0.00019584663436823075, + "loss": 0.8125, + "step": 13339 + }, + { + "epoch": 0.72, + "learning_rate": 0.00019577755579190388, + "loss": 0.875, + "step": 13340 + }, + { + "epoch": 0.72, + "learning_rate": 0.00019570848643451615, + "loss": 0.8555, + "step": 13341 + }, + { + "epoch": 0.72, + "learning_rate": 0.00019563942629816084, + "loss": 0.9023, + "step": 13342 + }, + { + "epoch": 0.72, + "learning_rate": 0.00019557037538493088, + "loss": 0.7383, + "step": 13343 + }, + { + "epoch": 0.72, + "learning_rate": 0.00019550133369691835, + "loss": 0.8477, + "step": 13344 + }, + { + "epoch": 0.72, + "learning_rate": 0.00019543230123621582, + "loss": 0.957, + "step": 13345 + }, + { + "epoch": 0.72, + "learning_rate": 0.00019536327800491482, + "loss": 0.7773, + "step": 13346 + }, + { + "epoch": 0.72, + "learning_rate": 0.00019529426400510718, + "loss": 0.8516, + "step": 13347 + }, + { + "epoch": 0.72, + "learning_rate": 0.0001952252592388844, + "loss": 0.7969, + "step": 13348 + }, + { + "epoch": 0.72, + "learning_rate": 0.00019515626370833738, + "loss": 0.832, + "step": 13349 + }, + { + "epoch": 0.72, + "learning_rate": 0.00019508727741555676, + "loss": 0.8711, + "step": 13350 + }, + { + "epoch": 0.72, + "learning_rate": 0.00019501830036263324, + "loss": 0.8125, + "step": 13351 + }, + { + "epoch": 0.72, + "learning_rate": 0.0001949493325516572, + "loss": 0.8203, + "step": 13352 + }, + { + "epoch": 0.72, + "learning_rate": 0.0001948803739847182, + "loss": 0.8672, + "step": 13353 + }, + { + "epoch": 0.72, + "learning_rate": 0.00019481142466390617, + "loss": 0.7461, + "step": 13354 + }, + { + "epoch": 0.72, + "learning_rate": 0.0001947424845913106, + "loss": 0.8867, + "step": 13355 + }, + { + "epoch": 0.72, + "learning_rate": 0.00019467355376902046, + "loss": 0.7578, + "step": 13356 + }, + { + "epoch": 0.72, + "learning_rate": 0.00019460463219912445, + "loss": 0.8359, + "step": 13357 + }, + { + "epoch": 0.72, + "learning_rate": 0.0001945357198837112, + "loss": 0.6758, + "step": 13358 + }, + { + "epoch": 0.72, + "learning_rate": 0.0001944668168248692, + "loss": 0.8555, + "step": 13359 + }, + { + "epoch": 0.72, + "learning_rate": 0.00019439792302468619, + "loss": 0.8008, + "step": 13360 + }, + { + "epoch": 0.72, + "learning_rate": 0.00019432903848524985, + "loss": 0.8906, + "step": 13361 + }, + { + "epoch": 0.72, + "learning_rate": 0.00019426016320864793, + "loss": 0.793, + "step": 13362 + }, + { + "epoch": 0.72, + "learning_rate": 0.00019419129719696736, + "loss": 0.6992, + "step": 13363 + }, + { + "epoch": 0.72, + "learning_rate": 0.00019412244045229483, + "loss": 0.7852, + "step": 13364 + }, + { + "epoch": 0.72, + "learning_rate": 0.0001940535929767171, + "loss": 0.8086, + "step": 13365 + }, + { + "epoch": 0.72, + "learning_rate": 0.0001939847547723207, + "loss": 0.7812, + "step": 13366 + }, + { + "epoch": 0.72, + "learning_rate": 0.00019391592584119116, + "loss": 0.7695, + "step": 13367 + }, + { + "epoch": 0.72, + "learning_rate": 0.00019384710618541474, + "loss": 0.8008, + "step": 13368 + }, + { + "epoch": 0.72, + "learning_rate": 0.00019377829580707645, + "loss": 0.8984, + "step": 13369 + }, + { + "epoch": 0.72, + "learning_rate": 0.0001937094947082617, + "loss": 0.8555, + "step": 13370 + }, + { + "epoch": 0.72, + "learning_rate": 0.00019364070289105558, + "loss": 0.8008, + "step": 13371 + }, + { + "epoch": 0.72, + "learning_rate": 0.00019357192035754245, + "loss": 0.8125, + "step": 13372 + }, + { + "epoch": 0.72, + "learning_rate": 0.0001935031471098065, + "loss": 0.6797, + "step": 13373 + }, + { + "epoch": 0.72, + "learning_rate": 0.000193434383149932, + "loss": 0.9102, + "step": 13374 + }, + { + "epoch": 0.72, + "learning_rate": 0.00019336562848000282, + "loss": 0.7852, + "step": 13375 + }, + { + "epoch": 0.72, + "learning_rate": 0.0001932968831021024, + "loss": 0.8555, + "step": 13376 + }, + { + "epoch": 0.72, + "learning_rate": 0.00019322814701831353, + "loss": 0.8711, + "step": 13377 + }, + { + "epoch": 0.72, + "learning_rate": 0.0001931594202307198, + "loss": 0.8203, + "step": 13378 + }, + { + "epoch": 0.72, + "learning_rate": 0.00019309070274140363, + "loss": 0.8438, + "step": 13379 + }, + { + "epoch": 0.72, + "learning_rate": 0.00019302199455244708, + "loss": 0.8359, + "step": 13380 + }, + { + "epoch": 0.72, + "learning_rate": 0.00019295329566593246, + "loss": 0.8438, + "step": 13381 + }, + { + "epoch": 0.72, + "learning_rate": 0.0001928846060839417, + "loss": 0.7969, + "step": 13382 + }, + { + "epoch": 0.72, + "learning_rate": 0.00019281592580855606, + "loss": 0.7812, + "step": 13383 + }, + { + "epoch": 0.72, + "learning_rate": 0.00019274725484185707, + "loss": 0.7695, + "step": 13384 + }, + { + "epoch": 0.72, + "learning_rate": 0.0001926785931859254, + "loss": 0.8867, + "step": 13385 + }, + { + "epoch": 0.72, + "learning_rate": 0.00019260994084284195, + "loss": 0.7266, + "step": 13386 + }, + { + "epoch": 0.72, + "learning_rate": 0.00019254129781468688, + "loss": 0.8516, + "step": 13387 + }, + { + "epoch": 0.72, + "learning_rate": 0.0001924726641035406, + "loss": 0.8203, + "step": 13388 + }, + { + "epoch": 0.72, + "learning_rate": 0.00019240403971148258, + "loss": 0.8672, + "step": 13389 + }, + { + "epoch": 0.72, + "learning_rate": 0.00019233542464059257, + "loss": 0.7969, + "step": 13390 + }, + { + "epoch": 0.72, + "learning_rate": 0.00019226681889295, + "loss": 0.8008, + "step": 13391 + }, + { + "epoch": 0.72, + "learning_rate": 0.0001921982224706336, + "loss": 0.7773, + "step": 13392 + }, + { + "epoch": 0.72, + "learning_rate": 0.00019212963537572204, + "loss": 0.8125, + "step": 13393 + }, + { + "epoch": 0.72, + "learning_rate": 0.00019206105761029375, + "loss": 0.7422, + "step": 13394 + }, + { + "epoch": 0.72, + "learning_rate": 0.00019199248917642714, + "loss": 0.8984, + "step": 13395 + }, + { + "epoch": 0.72, + "learning_rate": 0.0001919239300761997, + "loss": 0.9141, + "step": 13396 + }, + { + "epoch": 0.72, + "learning_rate": 0.0001918553803116891, + "loss": 0.8203, + "step": 13397 + }, + { + "epoch": 0.72, + "learning_rate": 0.00019178683988497287, + "loss": 0.7891, + "step": 13398 + }, + { + "epoch": 0.72, + "learning_rate": 0.00019171830879812784, + "loss": 0.793, + "step": 13399 + }, + { + "epoch": 0.72, + "learning_rate": 0.00019164978705323032, + "loss": 0.8086, + "step": 13400 + }, + { + "epoch": 0.72, + "learning_rate": 0.00019158127465235746, + "loss": 0.8086, + "step": 13401 + }, + { + "epoch": 0.72, + "learning_rate": 0.00019151277159758513, + "loss": 0.7773, + "step": 13402 + }, + { + "epoch": 0.72, + "learning_rate": 0.0001914442778909889, + "loss": 0.7773, + "step": 13403 + }, + { + "epoch": 0.72, + "learning_rate": 0.00019137579353464463, + "loss": 0.8594, + "step": 13404 + }, + { + "epoch": 0.72, + "learning_rate": 0.00019130731853062778, + "loss": 0.7812, + "step": 13405 + }, + { + "epoch": 0.72, + "learning_rate": 0.00019123885288101317, + "loss": 0.8359, + "step": 13406 + }, + { + "epoch": 0.72, + "learning_rate": 0.00019117039658787533, + "loss": 0.8633, + "step": 13407 + }, + { + "epoch": 0.72, + "learning_rate": 0.00019110194965328892, + "loss": 0.8242, + "step": 13408 + }, + { + "epoch": 0.72, + "learning_rate": 0.00019103351207932833, + "loss": 0.793, + "step": 13409 + }, + { + "epoch": 0.72, + "learning_rate": 0.00019096508386806706, + "loss": 0.8008, + "step": 13410 + }, + { + "epoch": 0.72, + "learning_rate": 0.00019089666502157898, + "loss": 0.7812, + "step": 13411 + }, + { + "epoch": 0.72, + "learning_rate": 0.00019082825554193712, + "loss": 0.8672, + "step": 13412 + }, + { + "epoch": 0.72, + "learning_rate": 0.00019075985543121472, + "loss": 0.793, + "step": 13413 + }, + { + "epoch": 0.72, + "learning_rate": 0.0001906914646914847, + "loss": 0.8242, + "step": 13414 + }, + { + "epoch": 0.72, + "learning_rate": 0.00019062308332481925, + "loss": 0.7891, + "step": 13415 + }, + { + "epoch": 0.72, + "learning_rate": 0.0001905547113332905, + "loss": 0.8164, + "step": 13416 + }, + { + "epoch": 0.72, + "learning_rate": 0.0001904863487189704, + "loss": 0.8398, + "step": 13417 + }, + { + "epoch": 0.72, + "learning_rate": 0.00019041799548393085, + "loss": 0.7227, + "step": 13418 + }, + { + "epoch": 0.72, + "learning_rate": 0.00019034965163024299, + "loss": 0.8867, + "step": 13419 + }, + { + "epoch": 0.72, + "learning_rate": 0.00019028131715997744, + "loss": 0.8359, + "step": 13420 + }, + { + "epoch": 0.72, + "learning_rate": 0.00019021299207520564, + "loss": 0.8633, + "step": 13421 + }, + { + "epoch": 0.72, + "learning_rate": 0.00019014467637799783, + "loss": 0.7578, + "step": 13422 + }, + { + "epoch": 0.72, + "learning_rate": 0.000190076370070424, + "loss": 0.7773, + "step": 13423 + }, + { + "epoch": 0.72, + "learning_rate": 0.00019000807315455414, + "loss": 0.7266, + "step": 13424 + }, + { + "epoch": 0.72, + "learning_rate": 0.00018993978563245812, + "loss": 0.8281, + "step": 13425 + }, + { + "epoch": 0.72, + "learning_rate": 0.0001898715075062049, + "loss": 0.7891, + "step": 13426 + }, + { + "epoch": 0.72, + "learning_rate": 0.00018980323877786393, + "loss": 0.75, + "step": 13427 + }, + { + "epoch": 0.72, + "learning_rate": 0.0001897349794495035, + "loss": 0.7578, + "step": 13428 + }, + { + "epoch": 0.72, + "learning_rate": 0.00018966672952319262, + "loss": 0.8086, + "step": 13429 + }, + { + "epoch": 0.72, + "learning_rate": 0.00018959848900099903, + "loss": 0.9414, + "step": 13430 + }, + { + "epoch": 0.72, + "learning_rate": 0.00018953025788499096, + "loss": 0.7969, + "step": 13431 + }, + { + "epoch": 0.72, + "learning_rate": 0.0001894620361772358, + "loss": 0.7969, + "step": 13432 + }, + { + "epoch": 0.72, + "learning_rate": 0.00018939382387980096, + "loss": 0.8789, + "step": 13433 + }, + { + "epoch": 0.72, + "learning_rate": 0.0001893256209947537, + "loss": 0.8047, + "step": 13434 + }, + { + "epoch": 0.72, + "learning_rate": 0.00018925742752416046, + "loss": 0.7539, + "step": 13435 + }, + { + "epoch": 0.72, + "learning_rate": 0.00018918924347008793, + "loss": 0.7617, + "step": 13436 + }, + { + "epoch": 0.72, + "learning_rate": 0.00018912106883460245, + "loss": 0.6758, + "step": 13437 + }, + { + "epoch": 0.72, + "learning_rate": 0.00018905290361976974, + "loss": 0.832, + "step": 13438 + }, + { + "epoch": 0.72, + "learning_rate": 0.0001889847478276553, + "loss": 0.8711, + "step": 13439 + }, + { + "epoch": 0.72, + "learning_rate": 0.00018891660146032462, + "loss": 0.8242, + "step": 13440 + }, + { + "epoch": 0.72, + "learning_rate": 0.00018884846451984295, + "loss": 0.8477, + "step": 13441 + }, + { + "epoch": 0.72, + "learning_rate": 0.00018878033700827485, + "loss": 0.8164, + "step": 13442 + }, + { + "epoch": 0.72, + "learning_rate": 0.00018871221892768447, + "loss": 0.7539, + "step": 13443 + }, + { + "epoch": 0.72, + "learning_rate": 0.0001886441102801368, + "loss": 0.7695, + "step": 13444 + }, + { + "epoch": 0.72, + "learning_rate": 0.00018857601106769524, + "loss": 0.7617, + "step": 13445 + }, + { + "epoch": 0.72, + "learning_rate": 0.0001885079212924234, + "loss": 0.8047, + "step": 13446 + }, + { + "epoch": 0.72, + "learning_rate": 0.00018843984095638473, + "loss": 0.8008, + "step": 13447 + }, + { + "epoch": 0.72, + "learning_rate": 0.00018837177006164247, + "loss": 0.875, + "step": 13448 + }, + { + "epoch": 0.72, + "learning_rate": 0.00018830370861025903, + "loss": 0.8281, + "step": 13449 + }, + { + "epoch": 0.72, + "learning_rate": 0.00018823565660429725, + "loss": 0.7773, + "step": 13450 + }, + { + "epoch": 0.72, + "learning_rate": 0.000188167614045819, + "loss": 0.8008, + "step": 13451 + }, + { + "epoch": 0.72, + "learning_rate": 0.00018809958093688652, + "loss": 0.7773, + "step": 13452 + }, + { + "epoch": 0.72, + "learning_rate": 0.0001880315572795611, + "loss": 0.7734, + "step": 13453 + }, + { + "epoch": 0.72, + "learning_rate": 0.00018796354307590445, + "loss": 0.8008, + "step": 13454 + }, + { + "epoch": 0.72, + "learning_rate": 0.00018789553832797724, + "loss": 0.8359, + "step": 13455 + }, + { + "epoch": 0.72, + "learning_rate": 0.0001878275430378404, + "loss": 0.8477, + "step": 13456 + }, + { + "epoch": 0.72, + "learning_rate": 0.0001877595572075546, + "loss": 0.8203, + "step": 13457 + }, + { + "epoch": 0.72, + "learning_rate": 0.00018769158083917992, + "loss": 0.8672, + "step": 13458 + }, + { + "epoch": 0.72, + "learning_rate": 0.000187623613934776, + "loss": 0.7695, + "step": 13459 + }, + { + "epoch": 0.72, + "learning_rate": 0.00018755565649640265, + "loss": 0.7305, + "step": 13460 + }, + { + "epoch": 0.72, + "learning_rate": 0.00018748770852611946, + "loss": 0.7695, + "step": 13461 + }, + { + "epoch": 0.72, + "learning_rate": 0.00018741977002598503, + "loss": 0.875, + "step": 13462 + }, + { + "epoch": 0.72, + "learning_rate": 0.00018735184099805835, + "loss": 0.8125, + "step": 13463 + }, + { + "epoch": 0.72, + "learning_rate": 0.00018728392144439805, + "loss": 0.7617, + "step": 13464 + }, + { + "epoch": 0.72, + "learning_rate": 0.0001872160113670622, + "loss": 0.7734, + "step": 13465 + }, + { + "epoch": 0.72, + "learning_rate": 0.00018714811076810838, + "loss": 0.7812, + "step": 13466 + }, + { + "epoch": 0.72, + "learning_rate": 0.0001870802196495945, + "loss": 0.8828, + "step": 13467 + }, + { + "epoch": 0.72, + "learning_rate": 0.00018701233801357797, + "loss": 0.8789, + "step": 13468 + }, + { + "epoch": 0.72, + "learning_rate": 0.0001869444658621156, + "loss": 0.7891, + "step": 13469 + }, + { + "epoch": 0.72, + "learning_rate": 0.0001868766031972644, + "loss": 0.8945, + "step": 13470 + }, + { + "epoch": 0.72, + "learning_rate": 0.0001868087500210805, + "loss": 0.8086, + "step": 13471 + }, + { + "epoch": 0.72, + "learning_rate": 0.0001867409063356204, + "loss": 0.7734, + "step": 13472 + }, + { + "epoch": 0.72, + "learning_rate": 0.00018667307214293967, + "loss": 0.8711, + "step": 13473 + }, + { + "epoch": 0.72, + "learning_rate": 0.0001866052474450941, + "loss": 0.9414, + "step": 13474 + }, + { + "epoch": 0.72, + "learning_rate": 0.0001865374322441391, + "loss": 0.7344, + "step": 13475 + }, + { + "epoch": 0.72, + "learning_rate": 0.0001864696265421294, + "loss": 0.7461, + "step": 13476 + }, + { + "epoch": 0.72, + "learning_rate": 0.00018640183034112013, + "loss": 0.7109, + "step": 13477 + }, + { + "epoch": 0.72, + "learning_rate": 0.0001863340436431653, + "loss": 0.7617, + "step": 13478 + }, + { + "epoch": 0.72, + "learning_rate": 0.0001862662664503193, + "loss": 0.832, + "step": 13479 + }, + { + "epoch": 0.72, + "learning_rate": 0.00018619849876463611, + "loss": 0.832, + "step": 13480 + }, + { + "epoch": 0.72, + "learning_rate": 0.0001861307405881692, + "loss": 0.6758, + "step": 13481 + }, + { + "epoch": 0.72, + "learning_rate": 0.0001860629919229717, + "loss": 0.8047, + "step": 13482 + }, + { + "epoch": 0.72, + "learning_rate": 0.0001859952527710968, + "loss": 0.8477, + "step": 13483 + }, + { + "epoch": 0.72, + "learning_rate": 0.0001859275231345973, + "loss": 0.8086, + "step": 13484 + }, + { + "epoch": 0.72, + "learning_rate": 0.00018585980301552552, + "loss": 0.8125, + "step": 13485 + }, + { + "epoch": 0.72, + "learning_rate": 0.00018579209241593325, + "loss": 0.7773, + "step": 13486 + }, + { + "epoch": 0.72, + "learning_rate": 0.00018572439133787312, + "loss": 0.7539, + "step": 13487 + }, + { + "epoch": 0.72, + "learning_rate": 0.00018565669978339616, + "loss": 0.7578, + "step": 13488 + }, + { + "epoch": 0.72, + "learning_rate": 0.00018558901775455366, + "loss": 0.8867, + "step": 13489 + }, + { + "epoch": 0.73, + "learning_rate": 0.00018552134525339663, + "loss": 0.7617, + "step": 13490 + }, + { + "epoch": 0.73, + "learning_rate": 0.00018545368228197606, + "loss": 0.7656, + "step": 13491 + }, + { + "epoch": 0.73, + "learning_rate": 0.000185386028842342, + "loss": 0.8047, + "step": 13492 + }, + { + "epoch": 0.73, + "learning_rate": 0.00018531838493654484, + "loss": 0.8164, + "step": 13493 + }, + { + "epoch": 0.73, + "learning_rate": 0.00018525075056663416, + "loss": 0.6953, + "step": 13494 + }, + { + "epoch": 0.73, + "learning_rate": 0.00018518312573465972, + "loss": 0.7539, + "step": 13495 + }, + { + "epoch": 0.73, + "learning_rate": 0.00018511551044267055, + "loss": 0.8047, + "step": 13496 + }, + { + "epoch": 0.73, + "learning_rate": 0.00018504790469271593, + "loss": 0.7891, + "step": 13497 + }, + { + "epoch": 0.73, + "learning_rate": 0.0001849803084868442, + "loss": 0.875, + "step": 13498 + }, + { + "epoch": 0.73, + "learning_rate": 0.0001849127218271039, + "loss": 0.8672, + "step": 13499 + }, + { + "epoch": 0.73, + "learning_rate": 0.00018484514471554326, + "loss": 0.9141, + "step": 13500 + }, + { + "epoch": 0.73, + "learning_rate": 0.00018477757715421, + "loss": 0.8203, + "step": 13501 + }, + { + "epoch": 0.73, + "learning_rate": 0.0001847100191451514, + "loss": 0.8516, + "step": 13502 + }, + { + "epoch": 0.73, + "learning_rate": 0.0001846424706904149, + "loss": 0.8672, + "step": 13503 + }, + { + "epoch": 0.73, + "learning_rate": 0.00018457493179204759, + "loss": 0.8633, + "step": 13504 + }, + { + "epoch": 0.73, + "learning_rate": 0.0001845074024520958, + "loss": 0.875, + "step": 13505 + }, + { + "epoch": 0.73, + "learning_rate": 0.00018443988267260604, + "loss": 0.8477, + "step": 13506 + }, + { + "epoch": 0.73, + "learning_rate": 0.0001843723724556246, + "loss": 0.8672, + "step": 13507 + }, + { + "epoch": 0.73, + "learning_rate": 0.000184304871803197, + "loss": 0.7578, + "step": 13508 + }, + { + "epoch": 0.73, + "learning_rate": 0.00018423738071736846, + "loss": 0.8398, + "step": 13509 + }, + { + "epoch": 0.73, + "learning_rate": 0.00018416989920018483, + "loss": 0.7812, + "step": 13510 + }, + { + "epoch": 0.73, + "learning_rate": 0.00018410242725369069, + "loss": 0.8555, + "step": 13511 + }, + { + "epoch": 0.73, + "learning_rate": 0.00018403496487993053, + "loss": 0.8281, + "step": 13512 + }, + { + "epoch": 0.73, + "learning_rate": 0.0001839675120809488, + "loss": 0.8867, + "step": 13513 + }, + { + "epoch": 0.73, + "learning_rate": 0.00018390006885878973, + "loss": 0.8164, + "step": 13514 + }, + { + "epoch": 0.73, + "learning_rate": 0.00018383263521549675, + "loss": 0.875, + "step": 13515 + }, + { + "epoch": 0.73, + "learning_rate": 0.00018376521115311362, + "loss": 0.8438, + "step": 13516 + }, + { + "epoch": 0.73, + "learning_rate": 0.00018369779667368319, + "loss": 0.875, + "step": 13517 + }, + { + "epoch": 0.73, + "learning_rate": 0.00018363039177924872, + "loss": 0.7812, + "step": 13518 + }, + { + "epoch": 0.73, + "learning_rate": 0.00018356299647185237, + "loss": 0.8594, + "step": 13519 + }, + { + "epoch": 0.73, + "learning_rate": 0.00018349561075353687, + "loss": 0.7969, + "step": 13520 + }, + { + "epoch": 0.73, + "learning_rate": 0.00018342823462634385, + "loss": 0.8203, + "step": 13521 + }, + { + "epoch": 0.73, + "learning_rate": 0.00018336086809231521, + "loss": 0.8047, + "step": 13522 + }, + { + "epoch": 0.73, + "learning_rate": 0.00018329351115349253, + "loss": 0.7461, + "step": 13523 + }, + { + "epoch": 0.73, + "learning_rate": 0.00018322616381191676, + "loss": 0.7617, + "step": 13524 + }, + { + "epoch": 0.73, + "learning_rate": 0.00018315882606962858, + "loss": 0.7734, + "step": 13525 + }, + { + "epoch": 0.73, + "learning_rate": 0.0001830914979286688, + "loss": 0.7773, + "step": 13526 + }, + { + "epoch": 0.73, + "learning_rate": 0.00018302417939107773, + "loss": 0.7148, + "step": 13527 + }, + { + "epoch": 0.73, + "learning_rate": 0.00018295687045889508, + "loss": 0.8594, + "step": 13528 + }, + { + "epoch": 0.73, + "learning_rate": 0.00018288957113416065, + "loss": 0.832, + "step": 13529 + }, + { + "epoch": 0.73, + "learning_rate": 0.0001828222814189141, + "loss": 0.793, + "step": 13530 + }, + { + "epoch": 0.73, + "learning_rate": 0.00018275500131519423, + "loss": 0.8047, + "step": 13531 + }, + { + "epoch": 0.73, + "learning_rate": 0.00018268773082503976, + "loss": 0.8203, + "step": 13532 + }, + { + "epoch": 0.73, + "learning_rate": 0.00018262046995048937, + "loss": 0.7852, + "step": 13533 + }, + { + "epoch": 0.73, + "learning_rate": 0.00018255321869358143, + "loss": 0.7383, + "step": 13534 + }, + { + "epoch": 0.73, + "learning_rate": 0.00018248597705635355, + "loss": 0.8672, + "step": 13535 + }, + { + "epoch": 0.73, + "learning_rate": 0.00018241874504084372, + "loss": 0.8164, + "step": 13536 + }, + { + "epoch": 0.73, + "learning_rate": 0.0001823515226490889, + "loss": 0.8047, + "step": 13537 + }, + { + "epoch": 0.73, + "learning_rate": 0.00018228430988312654, + "loss": 0.8789, + "step": 13538 + }, + { + "epoch": 0.73, + "learning_rate": 0.0001822171067449931, + "loss": 0.8477, + "step": 13539 + }, + { + "epoch": 0.73, + "learning_rate": 0.0001821499132367253, + "loss": 0.8008, + "step": 13540 + }, + { + "epoch": 0.73, + "learning_rate": 0.00018208272936035908, + "loss": 0.8242, + "step": 13541 + }, + { + "epoch": 0.73, + "learning_rate": 0.00018201555511793038, + "loss": 0.7344, + "step": 13542 + }, + { + "epoch": 0.73, + "learning_rate": 0.0001819483905114751, + "loss": 0.8242, + "step": 13543 + }, + { + "epoch": 0.73, + "learning_rate": 0.0001818812355430284, + "loss": 0.8047, + "step": 13544 + }, + { + "epoch": 0.73, + "learning_rate": 0.00018181409021462476, + "loss": 0.8438, + "step": 13545 + }, + { + "epoch": 0.73, + "learning_rate": 0.00018174695452829976, + "loss": 0.8086, + "step": 13546 + }, + { + "epoch": 0.73, + "learning_rate": 0.00018167982848608745, + "loss": 0.7969, + "step": 13547 + }, + { + "epoch": 0.73, + "learning_rate": 0.0001816127120900218, + "loss": 0.8711, + "step": 13548 + }, + { + "epoch": 0.73, + "learning_rate": 0.00018154560534213677, + "loss": 0.9023, + "step": 13549 + }, + { + "epoch": 0.73, + "learning_rate": 0.00018147850824446615, + "loss": 0.8867, + "step": 13550 + }, + { + "epoch": 0.73, + "learning_rate": 0.000181411420799043, + "loss": 0.75, + "step": 13551 + }, + { + "epoch": 0.73, + "learning_rate": 0.00018134434300789997, + "loss": 0.7891, + "step": 13552 + }, + { + "epoch": 0.73, + "learning_rate": 0.00018127727487307044, + "loss": 0.8477, + "step": 13553 + }, + { + "epoch": 0.73, + "learning_rate": 0.00018121021639658637, + "loss": 0.8047, + "step": 13554 + }, + { + "epoch": 0.73, + "learning_rate": 0.00018114316758047978, + "loss": 0.8359, + "step": 13555 + }, + { + "epoch": 0.73, + "learning_rate": 0.00018107612842678257, + "loss": 0.7852, + "step": 13556 + }, + { + "epoch": 0.73, + "learning_rate": 0.00018100909893752648, + "loss": 0.8125, + "step": 13557 + }, + { + "epoch": 0.73, + "learning_rate": 0.0001809420791147423, + "loss": 0.8672, + "step": 13558 + }, + { + "epoch": 0.73, + "learning_rate": 0.00018087506896046136, + "loss": 0.8125, + "step": 13559 + }, + { + "epoch": 0.73, + "learning_rate": 0.00018080806847671393, + "loss": 0.8242, + "step": 13560 + }, + { + "epoch": 0.73, + "learning_rate": 0.00018074107766553066, + "loss": 0.7383, + "step": 13561 + }, + { + "epoch": 0.73, + "learning_rate": 0.00018067409652894128, + "loss": 0.8281, + "step": 13562 + }, + { + "epoch": 0.73, + "learning_rate": 0.00018060712506897593, + "loss": 0.8281, + "step": 13563 + }, + { + "epoch": 0.73, + "learning_rate": 0.00018054016328766359, + "loss": 0.8359, + "step": 13564 + }, + { + "epoch": 0.73, + "learning_rate": 0.00018047321118703369, + "loss": 0.9219, + "step": 13565 + }, + { + "epoch": 0.73, + "learning_rate": 0.00018040626876911526, + "loss": 0.7734, + "step": 13566 + }, + { + "epoch": 0.73, + "learning_rate": 0.00018033933603593666, + "loss": 0.7969, + "step": 13567 + }, + { + "epoch": 0.73, + "learning_rate": 0.00018027241298952603, + "loss": 0.8008, + "step": 13568 + }, + { + "epoch": 0.73, + "learning_rate": 0.00018020549963191157, + "loss": 0.8008, + "step": 13569 + }, + { + "epoch": 0.73, + "learning_rate": 0.00018013859596512105, + "loss": 0.7227, + "step": 13570 + }, + { + "epoch": 0.73, + "learning_rate": 0.00018007170199118162, + "loss": 0.7773, + "step": 13571 + }, + { + "epoch": 0.73, + "learning_rate": 0.00018000481771212053, + "loss": 0.9492, + "step": 13572 + }, + { + "epoch": 0.73, + "learning_rate": 0.00017993794312996475, + "loss": 0.9258, + "step": 13573 + }, + { + "epoch": 0.73, + "learning_rate": 0.00017987107824674064, + "loss": 0.7852, + "step": 13574 + }, + { + "epoch": 0.73, + "learning_rate": 0.00017980422306447424, + "loss": 0.7148, + "step": 13575 + }, + { + "epoch": 0.73, + "learning_rate": 0.0001797373775851917, + "loss": 0.7539, + "step": 13576 + }, + { + "epoch": 0.73, + "learning_rate": 0.00017967054181091874, + "loss": 0.918, + "step": 13577 + }, + { + "epoch": 0.73, + "learning_rate": 0.00017960371574368046, + "loss": 0.8867, + "step": 13578 + }, + { + "epoch": 0.73, + "learning_rate": 0.00017953689938550216, + "loss": 0.7773, + "step": 13579 + }, + { + "epoch": 0.73, + "learning_rate": 0.0001794700927384083, + "loss": 0.8359, + "step": 13580 + }, + { + "epoch": 0.73, + "learning_rate": 0.00017940329580442372, + "loss": 0.8281, + "step": 13581 + }, + { + "epoch": 0.73, + "learning_rate": 0.00017933650858557217, + "loss": 0.7656, + "step": 13582 + }, + { + "epoch": 0.73, + "learning_rate": 0.00017926973108387773, + "loss": 0.7734, + "step": 13583 + }, + { + "epoch": 0.73, + "learning_rate": 0.0001792029633013642, + "loss": 0.7852, + "step": 13584 + }, + { + "epoch": 0.73, + "learning_rate": 0.0001791362052400544, + "loss": 0.8828, + "step": 13585 + }, + { + "epoch": 0.73, + "learning_rate": 0.00017906945690197179, + "loss": 0.7891, + "step": 13586 + }, + { + "epoch": 0.73, + "learning_rate": 0.0001790027182891386, + "loss": 0.7891, + "step": 13587 + }, + { + "epoch": 0.73, + "learning_rate": 0.00017893598940357748, + "loss": 0.8516, + "step": 13588 + }, + { + "epoch": 0.73, + "learning_rate": 0.0001788692702473107, + "loss": 0.7695, + "step": 13589 + }, + { + "epoch": 0.73, + "learning_rate": 0.0001788025608223599, + "loss": 0.8164, + "step": 13590 + }, + { + "epoch": 0.73, + "learning_rate": 0.00017873586113074637, + "loss": 0.7578, + "step": 13591 + }, + { + "epoch": 0.73, + "learning_rate": 0.00017866917117449155, + "loss": 0.8008, + "step": 13592 + }, + { + "epoch": 0.73, + "learning_rate": 0.00017860249095561653, + "loss": 0.8008, + "step": 13593 + }, + { + "epoch": 0.73, + "learning_rate": 0.00017853582047614157, + "loss": 0.8047, + "step": 13594 + }, + { + "epoch": 0.73, + "learning_rate": 0.00017846915973808726, + "loss": 0.8516, + "step": 13595 + }, + { + "epoch": 0.73, + "learning_rate": 0.0001784025087434737, + "loss": 0.8906, + "step": 13596 + }, + { + "epoch": 0.73, + "learning_rate": 0.00017833586749432052, + "loss": 0.8516, + "step": 13597 + }, + { + "epoch": 0.73, + "learning_rate": 0.000178269235992647, + "loss": 0.8008, + "step": 13598 + }, + { + "epoch": 0.73, + "learning_rate": 0.00017820261424047245, + "loss": 0.8086, + "step": 13599 + }, + { + "epoch": 0.73, + "learning_rate": 0.0001781360022398159, + "loss": 0.7695, + "step": 13600 + }, + { + "epoch": 0.73, + "learning_rate": 0.00017806939999269556, + "loss": 0.7539, + "step": 13601 + }, + { + "epoch": 0.73, + "learning_rate": 0.00017800280750113008, + "loss": 0.8438, + "step": 13602 + }, + { + "epoch": 0.73, + "learning_rate": 0.00017793622476713704, + "loss": 0.7344, + "step": 13603 + }, + { + "epoch": 0.73, + "learning_rate": 0.00017786965179273446, + "loss": 0.8164, + "step": 13604 + }, + { + "epoch": 0.73, + "learning_rate": 0.00017780308857993942, + "loss": 0.793, + "step": 13605 + }, + { + "epoch": 0.73, + "learning_rate": 0.00017773653513076932, + "loss": 0.9258, + "step": 13606 + }, + { + "epoch": 0.73, + "learning_rate": 0.0001776699914472406, + "loss": 0.8828, + "step": 13607 + }, + { + "epoch": 0.73, + "learning_rate": 0.00017760345753136997, + "loss": 0.9062, + "step": 13608 + }, + { + "epoch": 0.73, + "learning_rate": 0.00017753693338517373, + "loss": 0.8359, + "step": 13609 + }, + { + "epoch": 0.73, + "learning_rate": 0.00017747041901066763, + "loss": 0.8828, + "step": 13610 + }, + { + "epoch": 0.73, + "learning_rate": 0.00017740391440986697, + "loss": 0.7695, + "step": 13611 + }, + { + "epoch": 0.73, + "learning_rate": 0.00017733741958478772, + "loss": 0.8242, + "step": 13612 + }, + { + "epoch": 0.73, + "learning_rate": 0.00017727093453744446, + "loss": 0.8711, + "step": 13613 + }, + { + "epoch": 0.73, + "learning_rate": 0.0001772044592698519, + "loss": 0.8516, + "step": 13614 + }, + { + "epoch": 0.73, + "learning_rate": 0.0001771379937840245, + "loss": 0.7773, + "step": 13615 + }, + { + "epoch": 0.73, + "learning_rate": 0.00017707153808197663, + "loss": 0.7773, + "step": 13616 + }, + { + "epoch": 0.73, + "learning_rate": 0.0001770050921657219, + "loss": 0.8672, + "step": 13617 + }, + { + "epoch": 0.73, + "learning_rate": 0.00017693865603727365, + "loss": 0.8086, + "step": 13618 + }, + { + "epoch": 0.73, + "learning_rate": 0.00017687222969864537, + "loss": 0.7852, + "step": 13619 + }, + { + "epoch": 0.73, + "learning_rate": 0.00017680581315185012, + "loss": 0.8047, + "step": 13620 + }, + { + "epoch": 0.73, + "learning_rate": 0.00017673940639890019, + "loss": 0.7578, + "step": 13621 + }, + { + "epoch": 0.73, + "learning_rate": 0.00017667300944180803, + "loss": 0.8633, + "step": 13622 + }, + { + "epoch": 0.73, + "learning_rate": 0.00017660662228258594, + "loss": 0.8594, + "step": 13623 + }, + { + "epoch": 0.73, + "learning_rate": 0.00017654024492324538, + "loss": 0.8477, + "step": 13624 + }, + { + "epoch": 0.73, + "learning_rate": 0.000176473877365798, + "loss": 0.793, + "step": 13625 + }, + { + "epoch": 0.73, + "learning_rate": 0.0001764075196122547, + "loss": 0.8477, + "step": 13626 + }, + { + "epoch": 0.73, + "learning_rate": 0.0001763411716646267, + "loss": 0.8242, + "step": 13627 + }, + { + "epoch": 0.73, + "learning_rate": 0.00017627483352492418, + "loss": 0.7617, + "step": 13628 + }, + { + "epoch": 0.73, + "learning_rate": 0.00017620850519515775, + "loss": 0.9258, + "step": 13629 + }, + { + "epoch": 0.73, + "learning_rate": 0.000176142186677337, + "loss": 0.8633, + "step": 13630 + }, + { + "epoch": 0.73, + "learning_rate": 0.00017607587797347185, + "loss": 0.8398, + "step": 13631 + }, + { + "epoch": 0.73, + "learning_rate": 0.00017600957908557176, + "loss": 0.8125, + "step": 13632 + }, + { + "epoch": 0.73, + "learning_rate": 0.0001759432900156457, + "loss": 0.7656, + "step": 13633 + }, + { + "epoch": 0.73, + "learning_rate": 0.00017587701076570222, + "loss": 0.7422, + "step": 13634 + }, + { + "epoch": 0.73, + "learning_rate": 0.00017581074133775, + "loss": 0.8164, + "step": 13635 + }, + { + "epoch": 0.73, + "learning_rate": 0.00017574448173379736, + "loss": 0.8398, + "step": 13636 + }, + { + "epoch": 0.73, + "learning_rate": 0.00017567823195585193, + "loss": 0.8086, + "step": 13637 + }, + { + "epoch": 0.73, + "learning_rate": 0.00017561199200592136, + "loss": 0.8867, + "step": 13638 + }, + { + "epoch": 0.73, + "learning_rate": 0.00017554576188601322, + "loss": 0.9023, + "step": 13639 + }, + { + "epoch": 0.73, + "learning_rate": 0.00017547954159813424, + "loss": 0.8555, + "step": 13640 + }, + { + "epoch": 0.73, + "learning_rate": 0.0001754133311442909, + "loss": 0.8789, + "step": 13641 + }, + { + "epoch": 0.73, + "learning_rate": 0.00017534713052648994, + "loss": 0.7695, + "step": 13642 + }, + { + "epoch": 0.73, + "learning_rate": 0.00017528093974673742, + "loss": 0.8398, + "step": 13643 + }, + { + "epoch": 0.73, + "learning_rate": 0.00017521475880703897, + "loss": 0.8516, + "step": 13644 + }, + { + "epoch": 0.73, + "learning_rate": 0.0001751485877094003, + "loss": 0.7969, + "step": 13645 + }, + { + "epoch": 0.73, + "learning_rate": 0.00017508242645582634, + "loss": 0.9375, + "step": 13646 + }, + { + "epoch": 0.73, + "learning_rate": 0.00017501627504832234, + "loss": 0.8398, + "step": 13647 + }, + { + "epoch": 0.73, + "learning_rate": 0.00017495013348889254, + "loss": 0.7422, + "step": 13648 + }, + { + "epoch": 0.73, + "learning_rate": 0.00017488400177954156, + "loss": 0.7148, + "step": 13649 + }, + { + "epoch": 0.73, + "learning_rate": 0.00017481787992227316, + "loss": 0.832, + "step": 13650 + }, + { + "epoch": 0.73, + "learning_rate": 0.00017475176791909114, + "loss": 0.7734, + "step": 13651 + }, + { + "epoch": 0.73, + "learning_rate": 0.00017468566577199908, + "loss": 0.7891, + "step": 13652 + }, + { + "epoch": 0.73, + "learning_rate": 0.00017461957348299996, + "loss": 0.75, + "step": 13653 + }, + { + "epoch": 0.73, + "learning_rate": 0.00017455349105409624, + "loss": 0.6797, + "step": 13654 + }, + { + "epoch": 0.73, + "learning_rate": 0.00017448741848729104, + "loss": 0.8203, + "step": 13655 + }, + { + "epoch": 0.73, + "learning_rate": 0.0001744213557845864, + "loss": 0.7148, + "step": 13656 + }, + { + "epoch": 0.73, + "learning_rate": 0.0001743553029479839, + "loss": 0.6914, + "step": 13657 + }, + { + "epoch": 0.73, + "learning_rate": 0.00017428925997948547, + "loss": 0.8438, + "step": 13658 + }, + { + "epoch": 0.73, + "learning_rate": 0.0001742232268810925, + "loss": 0.75, + "step": 13659 + }, + { + "epoch": 0.73, + "learning_rate": 0.00017415720365480586, + "loss": 0.8086, + "step": 13660 + }, + { + "epoch": 0.73, + "learning_rate": 0.00017409119030262598, + "loss": 0.7891, + "step": 13661 + }, + { + "epoch": 0.73, + "learning_rate": 0.0001740251868265539, + "loss": 0.8125, + "step": 13662 + }, + { + "epoch": 0.73, + "learning_rate": 0.00017395919322858939, + "loss": 0.832, + "step": 13663 + }, + { + "epoch": 0.73, + "learning_rate": 0.00017389320951073216, + "loss": 0.8711, + "step": 13664 + }, + { + "epoch": 0.73, + "learning_rate": 0.00017382723567498188, + "loss": 0.8477, + "step": 13665 + }, + { + "epoch": 0.73, + "learning_rate": 0.000173761271723338, + "loss": 0.7539, + "step": 13666 + }, + { + "epoch": 0.73, + "learning_rate": 0.00017369531765779893, + "loss": 0.7656, + "step": 13667 + }, + { + "epoch": 0.73, + "learning_rate": 0.00017362937348036384, + "loss": 0.8047, + "step": 13668 + }, + { + "epoch": 0.73, + "learning_rate": 0.00017356343919303064, + "loss": 0.7969, + "step": 13669 + }, + { + "epoch": 0.73, + "learning_rate": 0.00017349751479779762, + "loss": 0.8242, + "step": 13670 + }, + { + "epoch": 0.73, + "learning_rate": 0.00017343160029666233, + "loss": 0.8047, + "step": 13671 + }, + { + "epoch": 0.73, + "learning_rate": 0.00017336569569162237, + "loss": 0.8164, + "step": 13672 + }, + { + "epoch": 0.73, + "learning_rate": 0.00017329980098467458, + "loss": 0.9062, + "step": 13673 + }, + { + "epoch": 0.73, + "learning_rate": 0.00017323391617781597, + "loss": 0.8203, + "step": 13674 + }, + { + "epoch": 0.73, + "learning_rate": 0.00017316804127304324, + "loss": 0.7812, + "step": 13675 + }, + { + "epoch": 0.74, + "learning_rate": 0.00017310217627235247, + "loss": 0.8828, + "step": 13676 + }, + { + "epoch": 0.74, + "learning_rate": 0.00017303632117773922, + "loss": 0.7734, + "step": 13677 + }, + { + "epoch": 0.74, + "learning_rate": 0.00017297047599119976, + "loss": 0.8438, + "step": 13678 + }, + { + "epoch": 0.74, + "learning_rate": 0.00017290464071472906, + "loss": 0.8359, + "step": 13679 + }, + { + "epoch": 0.74, + "learning_rate": 0.0001728388153503221, + "loss": 0.8086, + "step": 13680 + }, + { + "epoch": 0.74, + "learning_rate": 0.00017277299989997368, + "loss": 0.7422, + "step": 13681 + }, + { + "epoch": 0.74, + "learning_rate": 0.0001727071943656784, + "loss": 0.875, + "step": 13682 + }, + { + "epoch": 0.74, + "learning_rate": 0.0001726413987494302, + "loss": 0.75, + "step": 13683 + }, + { + "epoch": 0.74, + "learning_rate": 0.00017257561305322278, + "loss": 0.7969, + "step": 13684 + }, + { + "epoch": 0.74, + "learning_rate": 0.00017250983727904983, + "loss": 0.8164, + "step": 13685 + }, + { + "epoch": 0.74, + "learning_rate": 0.00017244407142890473, + "loss": 0.8711, + "step": 13686 + }, + { + "epoch": 0.74, + "learning_rate": 0.00017237831550478, + "loss": 0.7852, + "step": 13687 + }, + { + "epoch": 0.74, + "learning_rate": 0.0001723125695086687, + "loss": 0.8086, + "step": 13688 + }, + { + "epoch": 0.74, + "learning_rate": 0.00017224683344256274, + "loss": 0.7422, + "step": 13689 + }, + { + "epoch": 0.74, + "learning_rate": 0.00017218110730845437, + "loss": 0.8672, + "step": 13690 + }, + { + "epoch": 0.74, + "learning_rate": 0.00017211539110833547, + "loss": 0.8594, + "step": 13691 + }, + { + "epoch": 0.74, + "learning_rate": 0.00017204968484419719, + "loss": 0.8047, + "step": 13692 + }, + { + "epoch": 0.74, + "learning_rate": 0.00017198398851803054, + "loss": 0.8164, + "step": 13693 + }, + { + "epoch": 0.74, + "learning_rate": 0.00017191830213182652, + "loss": 0.6719, + "step": 13694 + }, + { + "epoch": 0.74, + "learning_rate": 0.00017185262568757581, + "loss": 0.8086, + "step": 13695 + }, + { + "epoch": 0.74, + "learning_rate": 0.00017178695918726827, + "loss": 0.8008, + "step": 13696 + }, + { + "epoch": 0.74, + "learning_rate": 0.00017172130263289404, + "loss": 0.8906, + "step": 13697 + }, + { + "epoch": 0.74, + "learning_rate": 0.00017165565602644277, + "loss": 0.6641, + "step": 13698 + }, + { + "epoch": 0.74, + "learning_rate": 0.00017159001936990371, + "loss": 0.832, + "step": 13699 + }, + { + "epoch": 0.74, + "learning_rate": 0.00017152439266526558, + "loss": 0.7539, + "step": 13700 + }, + { + "epoch": 0.74, + "learning_rate": 0.0001714587759145174, + "loss": 0.7891, + "step": 13701 + }, + { + "epoch": 0.74, + "learning_rate": 0.0001713931691196477, + "loss": 0.8164, + "step": 13702 + }, + { + "epoch": 0.74, + "learning_rate": 0.00017132757228264416, + "loss": 0.8047, + "step": 13703 + }, + { + "epoch": 0.74, + "learning_rate": 0.00017126198540549486, + "loss": 0.8164, + "step": 13704 + }, + { + "epoch": 0.74, + "learning_rate": 0.00017119640849018737, + "loss": 0.7812, + "step": 13705 + }, + { + "epoch": 0.74, + "learning_rate": 0.00017113084153870878, + "loss": 0.8398, + "step": 13706 + }, + { + "epoch": 0.74, + "learning_rate": 0.00017106528455304576, + "loss": 0.8203, + "step": 13707 + }, + { + "epoch": 0.74, + "learning_rate": 0.00017099973753518515, + "loss": 0.793, + "step": 13708 + }, + { + "epoch": 0.74, + "learning_rate": 0.00017093420048711332, + "loss": 0.7695, + "step": 13709 + }, + { + "epoch": 0.74, + "learning_rate": 0.00017086867341081603, + "loss": 0.7656, + "step": 13710 + }, + { + "epoch": 0.74, + "learning_rate": 0.0001708031563082792, + "loss": 0.8594, + "step": 13711 + }, + { + "epoch": 0.74, + "learning_rate": 0.00017073764918148793, + "loss": 0.8711, + "step": 13712 + }, + { + "epoch": 0.74, + "learning_rate": 0.00017067215203242758, + "loss": 0.8125, + "step": 13713 + }, + { + "epoch": 0.74, + "learning_rate": 0.00017060666486308268, + "loss": 0.7656, + "step": 13714 + }, + { + "epoch": 0.74, + "learning_rate": 0.00017054118767543798, + "loss": 0.7969, + "step": 13715 + }, + { + "epoch": 0.74, + "learning_rate": 0.00017047572047147736, + "loss": 0.7969, + "step": 13716 + }, + { + "epoch": 0.74, + "learning_rate": 0.0001704102632531848, + "loss": 0.8789, + "step": 13717 + }, + { + "epoch": 0.74, + "learning_rate": 0.00017034481602254409, + "loss": 0.9062, + "step": 13718 + }, + { + "epoch": 0.74, + "learning_rate": 0.00017027937878153827, + "loss": 0.7461, + "step": 13719 + }, + { + "epoch": 0.74, + "learning_rate": 0.00017021395153215003, + "loss": 0.8672, + "step": 13720 + }, + { + "epoch": 0.74, + "learning_rate": 0.00017014853427636266, + "loss": 0.75, + "step": 13721 + }, + { + "epoch": 0.74, + "learning_rate": 0.00017008312701615824, + "loss": 0.8164, + "step": 13722 + }, + { + "epoch": 0.74, + "learning_rate": 0.00017001772975351864, + "loss": 0.7344, + "step": 13723 + }, + { + "epoch": 0.74, + "learning_rate": 0.00016995234249042574, + "loss": 0.793, + "step": 13724 + }, + { + "epoch": 0.74, + "learning_rate": 0.0001698869652288611, + "loss": 0.8047, + "step": 13725 + }, + { + "epoch": 0.74, + "learning_rate": 0.00016982159797080589, + "loss": 0.8594, + "step": 13726 + }, + { + "epoch": 0.74, + "learning_rate": 0.00016975624071824058, + "loss": 0.8672, + "step": 13727 + }, + { + "epoch": 0.74, + "learning_rate": 0.00016969089347314598, + "loss": 0.8477, + "step": 13728 + }, + { + "epoch": 0.74, + "learning_rate": 0.00016962555623750247, + "loss": 0.75, + "step": 13729 + }, + { + "epoch": 0.74, + "learning_rate": 0.00016956022901328966, + "loss": 0.9297, + "step": 13730 + }, + { + "epoch": 0.74, + "learning_rate": 0.00016949491180248756, + "loss": 0.8672, + "step": 13731 + }, + { + "epoch": 0.74, + "learning_rate": 0.00016942960460707507, + "loss": 0.6992, + "step": 13732 + }, + { + "epoch": 0.74, + "learning_rate": 0.00016936430742903142, + "loss": 0.7812, + "step": 13733 + }, + { + "epoch": 0.74, + "learning_rate": 0.0001692990202703355, + "loss": 0.7969, + "step": 13734 + }, + { + "epoch": 0.74, + "learning_rate": 0.00016923374313296534, + "loss": 0.7344, + "step": 13735 + }, + { + "epoch": 0.74, + "learning_rate": 0.0001691684760188995, + "loss": 0.7305, + "step": 13736 + }, + { + "epoch": 0.74, + "learning_rate": 0.00016910321893011537, + "loss": 0.9219, + "step": 13737 + }, + { + "epoch": 0.74, + "learning_rate": 0.00016903797186859081, + "loss": 0.8711, + "step": 13738 + }, + { + "epoch": 0.74, + "learning_rate": 0.0001689727348363027, + "loss": 0.832, + "step": 13739 + }, + { + "epoch": 0.74, + "learning_rate": 0.00016890750783522807, + "loss": 0.9023, + "step": 13740 + }, + { + "epoch": 0.74, + "learning_rate": 0.00016884229086734371, + "loss": 0.7656, + "step": 13741 + }, + { + "epoch": 0.74, + "learning_rate": 0.00016877708393462572, + "loss": 0.7969, + "step": 13742 + }, + { + "epoch": 0.74, + "learning_rate": 0.00016871188703904987, + "loss": 0.8789, + "step": 13743 + }, + { + "epoch": 0.74, + "learning_rate": 0.00016864670018259216, + "loss": 0.8164, + "step": 13744 + }, + { + "epoch": 0.74, + "learning_rate": 0.00016858152336722792, + "loss": 0.8125, + "step": 13745 + }, + { + "epoch": 0.74, + "learning_rate": 0.0001685163565949321, + "loss": 0.9922, + "step": 13746 + }, + { + "epoch": 0.74, + "learning_rate": 0.00016845119986767947, + "loss": 0.7383, + "step": 13747 + }, + { + "epoch": 0.74, + "learning_rate": 0.00016838605318744476, + "loss": 0.7852, + "step": 13748 + }, + { + "epoch": 0.74, + "learning_rate": 0.0001683209165562019, + "loss": 0.7891, + "step": 13749 + }, + { + "epoch": 0.74, + "learning_rate": 0.00016825578997592468, + "loss": 0.7656, + "step": 13750 + }, + { + "epoch": 0.74, + "learning_rate": 0.00016819067344858664, + "loss": 0.7539, + "step": 13751 + }, + { + "epoch": 0.74, + "learning_rate": 0.0001681255669761613, + "loss": 0.832, + "step": 13752 + }, + { + "epoch": 0.74, + "learning_rate": 0.00016806047056062135, + "loss": 0.8516, + "step": 13753 + }, + { + "epoch": 0.74, + "learning_rate": 0.00016799538420393963, + "loss": 0.8008, + "step": 13754 + }, + { + "epoch": 0.74, + "learning_rate": 0.0001679303079080881, + "loss": 0.8594, + "step": 13755 + }, + { + "epoch": 0.74, + "learning_rate": 0.00016786524167503925, + "loss": 0.8398, + "step": 13756 + }, + { + "epoch": 0.74, + "learning_rate": 0.0001678001855067644, + "loss": 0.8945, + "step": 13757 + }, + { + "epoch": 0.74, + "learning_rate": 0.0001677351394052353, + "loss": 0.7344, + "step": 13758 + }, + { + "epoch": 0.74, + "learning_rate": 0.0001676701033724228, + "loss": 0.9141, + "step": 13759 + }, + { + "epoch": 0.74, + "learning_rate": 0.00016760507741029774, + "loss": 0.8047, + "step": 13760 + }, + { + "epoch": 0.74, + "learning_rate": 0.0001675400615208309, + "loss": 0.8086, + "step": 13761 + }, + { + "epoch": 0.74, + "learning_rate": 0.0001674750557059922, + "loss": 0.7773, + "step": 13762 + }, + { + "epoch": 0.74, + "learning_rate": 0.00016741005996775133, + "loss": 0.8438, + "step": 13763 + }, + { + "epoch": 0.74, + "learning_rate": 0.00016734507430807844, + "loss": 0.8516, + "step": 13764 + }, + { + "epoch": 0.74, + "learning_rate": 0.0001672800987289425, + "loss": 0.8086, + "step": 13765 + }, + { + "epoch": 0.74, + "learning_rate": 0.0001672151332323123, + "loss": 0.8203, + "step": 13766 + }, + { + "epoch": 0.74, + "learning_rate": 0.00016715017782015674, + "loss": 0.9023, + "step": 13767 + }, + { + "epoch": 0.74, + "learning_rate": 0.0001670852324944443, + "loss": 0.7422, + "step": 13768 + }, + { + "epoch": 0.74, + "learning_rate": 0.00016702029725714268, + "loss": 0.8359, + "step": 13769 + }, + { + "epoch": 0.74, + "learning_rate": 0.00016695537211021988, + "loss": 0.707, + "step": 13770 + }, + { + "epoch": 0.74, + "learning_rate": 0.00016689045705564344, + "loss": 0.8398, + "step": 13771 + }, + { + "epoch": 0.74, + "learning_rate": 0.00016682555209538037, + "loss": 0.7539, + "step": 13772 + }, + { + "epoch": 0.74, + "learning_rate": 0.00016676065723139727, + "loss": 0.7109, + "step": 13773 + }, + { + "epoch": 0.74, + "learning_rate": 0.00016669577246566092, + "loss": 0.7461, + "step": 13774 + }, + { + "epoch": 0.74, + "learning_rate": 0.00016663089780013761, + "loss": 0.8281, + "step": 13775 + }, + { + "epoch": 0.74, + "learning_rate": 0.00016656603323679308, + "loss": 0.8008, + "step": 13776 + }, + { + "epoch": 0.74, + "learning_rate": 0.00016650117877759312, + "loss": 0.8125, + "step": 13777 + }, + { + "epoch": 0.74, + "learning_rate": 0.0001664363344245028, + "loss": 0.6992, + "step": 13778 + }, + { + "epoch": 0.74, + "learning_rate": 0.00016637150017948733, + "loss": 0.7812, + "step": 13779 + }, + { + "epoch": 0.74, + "learning_rate": 0.00016630667604451121, + "loss": 0.7695, + "step": 13780 + }, + { + "epoch": 0.74, + "learning_rate": 0.00016624186202153908, + "loss": 0.8203, + "step": 13781 + }, + { + "epoch": 0.74, + "learning_rate": 0.00016617705811253463, + "loss": 0.7734, + "step": 13782 + }, + { + "epoch": 0.74, + "learning_rate": 0.00016611226431946196, + "loss": 0.6484, + "step": 13783 + }, + { + "epoch": 0.74, + "learning_rate": 0.00016604748064428455, + "loss": 0.8047, + "step": 13784 + }, + { + "epoch": 0.74, + "learning_rate": 0.00016598270708896546, + "loss": 0.8281, + "step": 13785 + }, + { + "epoch": 0.74, + "learning_rate": 0.0001659179436554672, + "loss": 0.8125, + "step": 13786 + }, + { + "epoch": 0.74, + "learning_rate": 0.00016585319034575298, + "loss": 0.8359, + "step": 13787 + }, + { + "epoch": 0.74, + "learning_rate": 0.0001657884471617847, + "loss": 0.8008, + "step": 13788 + }, + { + "epoch": 0.74, + "learning_rate": 0.00016572371410552412, + "loss": 0.8242, + "step": 13789 + }, + { + "epoch": 0.74, + "learning_rate": 0.00016565899117893302, + "loss": 0.7969, + "step": 13790 + }, + { + "epoch": 0.74, + "learning_rate": 0.00016559427838397297, + "loss": 0.832, + "step": 13791 + }, + { + "epoch": 0.74, + "learning_rate": 0.00016552957572260463, + "loss": 0.8281, + "step": 13792 + }, + { + "epoch": 0.74, + "learning_rate": 0.00016546488319678877, + "loss": 0.7812, + "step": 13793 + }, + { + "epoch": 0.74, + "learning_rate": 0.00016540020080848573, + "loss": 0.8633, + "step": 13794 + }, + { + "epoch": 0.74, + "learning_rate": 0.00016533552855965594, + "loss": 0.8242, + "step": 13795 + }, + { + "epoch": 0.74, + "learning_rate": 0.00016527086645225876, + "loss": 0.957, + "step": 13796 + }, + { + "epoch": 0.74, + "learning_rate": 0.00016520621448825397, + "loss": 0.6641, + "step": 13797 + }, + { + "epoch": 0.74, + "learning_rate": 0.0001651415726696005, + "loss": 0.7695, + "step": 13798 + }, + { + "epoch": 0.74, + "learning_rate": 0.00016507694099825732, + "loss": 0.8203, + "step": 13799 + }, + { + "epoch": 0.74, + "learning_rate": 0.0001650123194761831, + "loss": 0.7617, + "step": 13800 + }, + { + "epoch": 0.74, + "learning_rate": 0.00016494770810533604, + "loss": 0.8438, + "step": 13801 + }, + { + "epoch": 0.74, + "learning_rate": 0.00016488310688767382, + "loss": 0.75, + "step": 13802 + }, + { + "epoch": 0.74, + "learning_rate": 0.00016481851582515422, + "loss": 0.7812, + "step": 13803 + }, + { + "epoch": 0.74, + "learning_rate": 0.00016475393491973485, + "loss": 0.7852, + "step": 13804 + }, + { + "epoch": 0.74, + "learning_rate": 0.00016468936417337237, + "loss": 0.9102, + "step": 13805 + }, + { + "epoch": 0.74, + "learning_rate": 0.00016462480358802334, + "loss": 0.8633, + "step": 13806 + }, + { + "epoch": 0.74, + "learning_rate": 0.00016456025316564465, + "loss": 0.8672, + "step": 13807 + }, + { + "epoch": 0.74, + "learning_rate": 0.0001644957129081922, + "loss": 0.8164, + "step": 13808 + }, + { + "epoch": 0.74, + "learning_rate": 0.00016443118281762154, + "loss": 0.7695, + "step": 13809 + }, + { + "epoch": 0.74, + "learning_rate": 0.00016436666289588837, + "loss": 0.7773, + "step": 13810 + }, + { + "epoch": 0.74, + "learning_rate": 0.0001643021531449479, + "loss": 0.8086, + "step": 13811 + }, + { + "epoch": 0.74, + "learning_rate": 0.00016423765356675475, + "loss": 0.75, + "step": 13812 + }, + { + "epoch": 0.74, + "learning_rate": 0.00016417316416326362, + "loss": 0.75, + "step": 13813 + }, + { + "epoch": 0.74, + "learning_rate": 0.00016410868493642889, + "loss": 0.7422, + "step": 13814 + }, + { + "epoch": 0.74, + "learning_rate": 0.00016404421588820433, + "loss": 0.793, + "step": 13815 + }, + { + "epoch": 0.74, + "learning_rate": 0.00016397975702054347, + "loss": 0.875, + "step": 13816 + }, + { + "epoch": 0.74, + "learning_rate": 0.00016391530833539968, + "loss": 0.8359, + "step": 13817 + }, + { + "epoch": 0.74, + "learning_rate": 0.00016385086983472625, + "loss": 0.7461, + "step": 13818 + }, + { + "epoch": 0.74, + "learning_rate": 0.00016378644152047545, + "loss": 0.8438, + "step": 13819 + }, + { + "epoch": 0.74, + "learning_rate": 0.00016372202339460002, + "loss": 0.7461, + "step": 13820 + }, + { + "epoch": 0.74, + "learning_rate": 0.00016365761545905172, + "loss": 0.8789, + "step": 13821 + }, + { + "epoch": 0.74, + "learning_rate": 0.00016359321771578268, + "loss": 0.7539, + "step": 13822 + }, + { + "epoch": 0.74, + "learning_rate": 0.00016352883016674402, + "loss": 0.8828, + "step": 13823 + }, + { + "epoch": 0.74, + "learning_rate": 0.0001634644528138872, + "loss": 0.8047, + "step": 13824 + }, + { + "epoch": 0.74, + "learning_rate": 0.00016340008565916275, + "loss": 0.8633, + "step": 13825 + }, + { + "epoch": 0.74, + "learning_rate": 0.00016333572870452141, + "loss": 0.8008, + "step": 13826 + }, + { + "epoch": 0.74, + "learning_rate": 0.0001632713819519135, + "loss": 0.8398, + "step": 13827 + }, + { + "epoch": 0.74, + "learning_rate": 0.00016320704540328878, + "loss": 0.707, + "step": 13828 + }, + { + "epoch": 0.74, + "learning_rate": 0.00016314271906059657, + "loss": 0.8555, + "step": 13829 + }, + { + "epoch": 0.74, + "learning_rate": 0.0001630784029257868, + "loss": 0.8086, + "step": 13830 + }, + { + "epoch": 0.74, + "learning_rate": 0.00016301409700080816, + "loss": 0.8281, + "step": 13831 + }, + { + "epoch": 0.74, + "learning_rate": 0.00016294980128760917, + "loss": 0.8047, + "step": 13832 + }, + { + "epoch": 0.74, + "learning_rate": 0.0001628855157881383, + "loss": 0.8398, + "step": 13833 + }, + { + "epoch": 0.74, + "learning_rate": 0.00016282124050434388, + "loss": 0.8594, + "step": 13834 + }, + { + "epoch": 0.74, + "learning_rate": 0.00016275697543817342, + "loss": 0.7656, + "step": 13835 + }, + { + "epoch": 0.74, + "learning_rate": 0.00016269272059157424, + "loss": 0.7773, + "step": 13836 + }, + { + "epoch": 0.74, + "learning_rate": 0.0001626284759664936, + "loss": 0.7695, + "step": 13837 + }, + { + "epoch": 0.74, + "learning_rate": 0.00016256424156487848, + "loss": 0.8516, + "step": 13838 + }, + { + "epoch": 0.74, + "learning_rate": 0.0001625000173886751, + "loss": 0.8242, + "step": 13839 + }, + { + "epoch": 0.74, + "learning_rate": 0.00016243580343983, + "loss": 0.7852, + "step": 13840 + }, + { + "epoch": 0.74, + "learning_rate": 0.0001623715997202887, + "loss": 0.7969, + "step": 13841 + }, + { + "epoch": 0.74, + "learning_rate": 0.00016230740623199698, + "loss": 1.0, + "step": 13842 + }, + { + "epoch": 0.74, + "learning_rate": 0.00016224322297690025, + "loss": 0.8359, + "step": 13843 + }, + { + "epoch": 0.74, + "learning_rate": 0.0001621790499569432, + "loss": 0.7852, + "step": 13844 + }, + { + "epoch": 0.74, + "learning_rate": 0.0001621148871740708, + "loss": 0.7852, + "step": 13845 + }, + { + "epoch": 0.74, + "learning_rate": 0.00016205073463022702, + "loss": 0.7344, + "step": 13846 + }, + { + "epoch": 0.74, + "learning_rate": 0.0001619865923273562, + "loss": 0.7383, + "step": 13847 + }, + { + "epoch": 0.74, + "learning_rate": 0.00016192246026740183, + "loss": 0.7734, + "step": 13848 + }, + { + "epoch": 0.74, + "learning_rate": 0.0001618583384523074, + "loss": 0.8086, + "step": 13849 + }, + { + "epoch": 0.74, + "learning_rate": 0.00016179422688401623, + "loss": 0.832, + "step": 13850 + }, + { + "epoch": 0.74, + "learning_rate": 0.00016173012556447091, + "loss": 0.7578, + "step": 13851 + }, + { + "epoch": 0.74, + "learning_rate": 0.00016166603449561368, + "loss": 0.8828, + "step": 13852 + }, + { + "epoch": 0.74, + "learning_rate": 0.00016160195367938724, + "loss": 0.7773, + "step": 13853 + }, + { + "epoch": 0.74, + "learning_rate": 0.00016153788311773317, + "loss": 0.8047, + "step": 13854 + }, + { + "epoch": 0.74, + "learning_rate": 0.00016147382281259288, + "loss": 0.957, + "step": 13855 + }, + { + "epoch": 0.74, + "learning_rate": 0.00016140977276590774, + "loss": 0.8125, + "step": 13856 + }, + { + "epoch": 0.74, + "learning_rate": 0.0001613457329796188, + "loss": 0.7695, + "step": 13857 + }, + { + "epoch": 0.74, + "learning_rate": 0.00016128170345566662, + "loss": 0.8594, + "step": 13858 + }, + { + "epoch": 0.74, + "learning_rate": 0.0001612176841959913, + "loss": 0.6992, + "step": 13859 + }, + { + "epoch": 0.74, + "learning_rate": 0.000161153675202533, + "loss": 0.8359, + "step": 13860 + }, + { + "epoch": 0.74, + "learning_rate": 0.0001610896764772315, + "loss": 0.7539, + "step": 13861 + }, + { + "epoch": 0.75, + "learning_rate": 0.00016102568802202593, + "loss": 0.9414, + "step": 13862 + }, + { + "epoch": 0.75, + "learning_rate": 0.0001609617098388556, + "loss": 0.6641, + "step": 13863 + }, + { + "epoch": 0.75, + "learning_rate": 0.000160897741929659, + "loss": 0.9453, + "step": 13864 + }, + { + "epoch": 0.75, + "learning_rate": 0.00016083378429637474, + "loss": 0.8477, + "step": 13865 + }, + { + "epoch": 0.75, + "learning_rate": 0.00016076983694094106, + "loss": 0.7266, + "step": 13866 + }, + { + "epoch": 0.75, + "learning_rate": 0.00016070589986529561, + "loss": 0.832, + "step": 13867 + }, + { + "epoch": 0.75, + "learning_rate": 0.00016064197307137572, + "loss": 0.8867, + "step": 13868 + }, + { + "epoch": 0.75, + "learning_rate": 0.00016057805656111878, + "loss": 0.7617, + "step": 13869 + }, + { + "epoch": 0.75, + "learning_rate": 0.0001605141503364619, + "loss": 0.9102, + "step": 13870 + }, + { + "epoch": 0.75, + "learning_rate": 0.00016045025439934135, + "loss": 0.8711, + "step": 13871 + }, + { + "epoch": 0.75, + "learning_rate": 0.0001603863687516931, + "loss": 0.8281, + "step": 13872 + }, + { + "epoch": 0.75, + "learning_rate": 0.0001603224933954538, + "loss": 0.8672, + "step": 13873 + }, + { + "epoch": 0.75, + "learning_rate": 0.0001602586283325587, + "loss": 0.7891, + "step": 13874 + }, + { + "epoch": 0.75, + "learning_rate": 0.00016019477356494306, + "loss": 0.8438, + "step": 13875 + }, + { + "epoch": 0.75, + "learning_rate": 0.00016013092909454192, + "loss": 0.7656, + "step": 13876 + }, + { + "epoch": 0.75, + "learning_rate": 0.00016006709492329019, + "loss": 0.7578, + "step": 13877 + }, + { + "epoch": 0.75, + "learning_rate": 0.00016000327105312196, + "loss": 0.8164, + "step": 13878 + }, + { + "epoch": 0.75, + "learning_rate": 0.0001599394574859716, + "loss": 0.7773, + "step": 13879 + }, + { + "epoch": 0.75, + "learning_rate": 0.00015987565422377254, + "loss": 0.8047, + "step": 13880 + }, + { + "epoch": 0.75, + "learning_rate": 0.0001598118612684586, + "loss": 0.7734, + "step": 13881 + }, + { + "epoch": 0.75, + "learning_rate": 0.0001597480786219625, + "loss": 0.8203, + "step": 13882 + }, + { + "epoch": 0.75, + "learning_rate": 0.00015968430628621728, + "loss": 0.832, + "step": 13883 + }, + { + "epoch": 0.75, + "learning_rate": 0.00015962054426315564, + "loss": 0.8398, + "step": 13884 + }, + { + "epoch": 0.75, + "learning_rate": 0.00015955679255470946, + "loss": 0.7812, + "step": 13885 + }, + { + "epoch": 0.75, + "learning_rate": 0.00015949305116281088, + "loss": 0.8008, + "step": 13886 + }, + { + "epoch": 0.75, + "learning_rate": 0.00015942932008939116, + "loss": 0.8086, + "step": 13887 + }, + { + "epoch": 0.75, + "learning_rate": 0.00015936559933638195, + "loss": 0.8867, + "step": 13888 + }, + { + "epoch": 0.75, + "learning_rate": 0.00015930188890571383, + "loss": 0.7461, + "step": 13889 + }, + { + "epoch": 0.75, + "learning_rate": 0.00015923818879931778, + "loss": 0.8047, + "step": 13890 + }, + { + "epoch": 0.75, + "learning_rate": 0.00015917449901912378, + "loss": 0.8906, + "step": 13891 + }, + { + "epoch": 0.75, + "learning_rate": 0.00015911081956706196, + "loss": 0.8438, + "step": 13892 + }, + { + "epoch": 0.75, + "learning_rate": 0.0001590471504450623, + "loss": 0.9453, + "step": 13893 + }, + { + "epoch": 0.75, + "learning_rate": 0.00015898349165505387, + "loss": 0.8945, + "step": 13894 + }, + { + "epoch": 0.75, + "learning_rate": 0.00015891984319896558, + "loss": 0.7969, + "step": 13895 + }, + { + "epoch": 0.75, + "learning_rate": 0.00015885620507872668, + "loss": 0.6875, + "step": 13896 + }, + { + "epoch": 0.75, + "learning_rate": 0.0001587925772962654, + "loss": 0.8477, + "step": 13897 + }, + { + "epoch": 0.75, + "learning_rate": 0.00015872895985350965, + "loss": 0.8633, + "step": 13898 + }, + { + "epoch": 0.75, + "learning_rate": 0.0001586653527523874, + "loss": 0.7148, + "step": 13899 + }, + { + "epoch": 0.75, + "learning_rate": 0.00015860175599482641, + "loss": 0.832, + "step": 13900 + }, + { + "epoch": 0.75, + "learning_rate": 0.00015853816958275358, + "loss": 0.8672, + "step": 13901 + }, + { + "epoch": 0.75, + "learning_rate": 0.00015847459351809567, + "loss": 0.8906, + "step": 13902 + }, + { + "epoch": 0.75, + "learning_rate": 0.00015841102780277949, + "loss": 0.8828, + "step": 13903 + }, + { + "epoch": 0.75, + "learning_rate": 0.00015834747243873132, + "loss": 0.7109, + "step": 13904 + }, + { + "epoch": 0.75, + "learning_rate": 0.0001582839274278769, + "loss": 0.8398, + "step": 13905 + }, + { + "epoch": 0.75, + "learning_rate": 0.00015822039277214205, + "loss": 0.8047, + "step": 13906 + }, + { + "epoch": 0.75, + "learning_rate": 0.0001581568684734519, + "loss": 0.7852, + "step": 13907 + }, + { + "epoch": 0.75, + "learning_rate": 0.00015809335453373147, + "loss": 0.8047, + "step": 13908 + }, + { + "epoch": 0.75, + "learning_rate": 0.00015802985095490562, + "loss": 0.7461, + "step": 13909 + }, + { + "epoch": 0.75, + "learning_rate": 0.00015796635773889867, + "loss": 0.9297, + "step": 13910 + }, + { + "epoch": 0.75, + "learning_rate": 0.0001579028748876344, + "loss": 0.9609, + "step": 13911 + }, + { + "epoch": 0.75, + "learning_rate": 0.00015783940240303673, + "loss": 0.7539, + "step": 13912 + }, + { + "epoch": 0.75, + "learning_rate": 0.0001577759402870293, + "loss": 0.8164, + "step": 13913 + }, + { + "epoch": 0.75, + "learning_rate": 0.00015771248854153498, + "loss": 0.8555, + "step": 13914 + }, + { + "epoch": 0.75, + "learning_rate": 0.00015764904716847634, + "loss": 0.7891, + "step": 13915 + }, + { + "epoch": 0.75, + "learning_rate": 0.0001575856161697764, + "loss": 0.7461, + "step": 13916 + }, + { + "epoch": 0.75, + "learning_rate": 0.00015752219554735703, + "loss": 0.7891, + "step": 13917 + }, + { + "epoch": 0.75, + "learning_rate": 0.00015745878530314, + "loss": 0.8203, + "step": 13918 + }, + { + "epoch": 0.75, + "learning_rate": 0.00015739538543904698, + "loss": 0.8164, + "step": 13919 + }, + { + "epoch": 0.75, + "learning_rate": 0.00015733199595699927, + "loss": 0.8867, + "step": 13920 + }, + { + "epoch": 0.75, + "learning_rate": 0.00015726861685891753, + "loss": 0.793, + "step": 13921 + }, + { + "epoch": 0.75, + "learning_rate": 0.00015720524814672254, + "loss": 0.7266, + "step": 13922 + }, + { + "epoch": 0.75, + "learning_rate": 0.00015714188982233468, + "loss": 0.8008, + "step": 13923 + }, + { + "epoch": 0.75, + "learning_rate": 0.0001570785418876738, + "loss": 0.9336, + "step": 13924 + }, + { + "epoch": 0.75, + "learning_rate": 0.00015701520434465937, + "loss": 0.8164, + "step": 13925 + }, + { + "epoch": 0.75, + "learning_rate": 0.00015695187719521087, + "loss": 0.8438, + "step": 13926 + }, + { + "epoch": 0.75, + "learning_rate": 0.00015688856044124755, + "loss": 0.8984, + "step": 13927 + }, + { + "epoch": 0.75, + "learning_rate": 0.00015682525408468767, + "loss": 0.8164, + "step": 13928 + }, + { + "epoch": 0.75, + "learning_rate": 0.0001567619581274501, + "loss": 0.707, + "step": 13929 + }, + { + "epoch": 0.75, + "learning_rate": 0.00015669867257145243, + "loss": 0.7461, + "step": 13930 + }, + { + "epoch": 0.75, + "learning_rate": 0.00015663539741861272, + "loss": 0.8867, + "step": 13931 + }, + { + "epoch": 0.75, + "learning_rate": 0.00015657213267084846, + "loss": 0.7773, + "step": 13932 + }, + { + "epoch": 0.75, + "learning_rate": 0.00015650887833007672, + "loss": 0.8438, + "step": 13933 + }, + { + "epoch": 0.75, + "learning_rate": 0.00015644563439821407, + "loss": 0.8398, + "step": 13934 + }, + { + "epoch": 0.75, + "learning_rate": 0.00015638240087717726, + "loss": 0.7031, + "step": 13935 + }, + { + "epoch": 0.75, + "learning_rate": 0.00015631917776888255, + "loss": 0.8555, + "step": 13936 + }, + { + "epoch": 0.75, + "learning_rate": 0.0001562559650752457, + "loss": 0.7188, + "step": 13937 + }, + { + "epoch": 0.75, + "learning_rate": 0.0001561927627981819, + "loss": 0.8711, + "step": 13938 + }, + { + "epoch": 0.75, + "learning_rate": 0.00015612957093960706, + "loss": 0.7109, + "step": 13939 + }, + { + "epoch": 0.75, + "learning_rate": 0.00015606638950143575, + "loss": 0.8125, + "step": 13940 + }, + { + "epoch": 0.75, + "learning_rate": 0.0001560032184855824, + "loss": 0.7422, + "step": 13941 + }, + { + "epoch": 0.75, + "learning_rate": 0.0001559400578939616, + "loss": 0.8633, + "step": 13942 + }, + { + "epoch": 0.75, + "learning_rate": 0.0001558769077284873, + "loss": 0.8047, + "step": 13943 + }, + { + "epoch": 0.75, + "learning_rate": 0.00015581376799107295, + "loss": 0.793, + "step": 13944 + }, + { + "epoch": 0.75, + "learning_rate": 0.00015575063868363215, + "loss": 0.8242, + "step": 13945 + }, + { + "epoch": 0.75, + "learning_rate": 0.0001556875198080776, + "loss": 0.8125, + "step": 13946 + }, + { + "epoch": 0.75, + "learning_rate": 0.00015562441136632245, + "loss": 0.8281, + "step": 13947 + }, + { + "epoch": 0.75, + "learning_rate": 0.0001555613133602786, + "loss": 0.8828, + "step": 13948 + }, + { + "epoch": 0.75, + "learning_rate": 0.00015549822579185858, + "loss": 0.875, + "step": 13949 + }, + { + "epoch": 0.75, + "learning_rate": 0.00015543514866297377, + "loss": 0.8945, + "step": 13950 + }, + { + "epoch": 0.75, + "learning_rate": 0.00015537208197553576, + "loss": 0.7578, + "step": 13951 + }, + { + "epoch": 0.75, + "learning_rate": 0.00015530902573145588, + "loss": 0.8008, + "step": 13952 + }, + { + "epoch": 0.75, + "learning_rate": 0.00015524597993264478, + "loss": 0.7969, + "step": 13953 + }, + { + "epoch": 0.75, + "learning_rate": 0.0001551829445810128, + "loss": 0.6797, + "step": 13954 + }, + { + "epoch": 0.75, + "learning_rate": 0.0001551199196784702, + "loss": 0.8164, + "step": 13955 + }, + { + "epoch": 0.75, + "learning_rate": 0.00015505690522692712, + "loss": 0.8125, + "step": 13956 + }, + { + "epoch": 0.75, + "learning_rate": 0.00015499390122829265, + "loss": 0.8398, + "step": 13957 + }, + { + "epoch": 0.75, + "learning_rate": 0.0001549309076844763, + "loss": 0.8281, + "step": 13958 + }, + { + "epoch": 0.75, + "learning_rate": 0.00015486792459738714, + "loss": 0.8555, + "step": 13959 + }, + { + "epoch": 0.75, + "learning_rate": 0.00015480495196893346, + "loss": 0.8594, + "step": 13960 + }, + { + "epoch": 0.75, + "learning_rate": 0.00015474198980102338, + "loss": 0.8438, + "step": 13961 + }, + { + "epoch": 0.75, + "learning_rate": 0.00015467903809556537, + "loss": 0.8281, + "step": 13962 + }, + { + "epoch": 0.75, + "learning_rate": 0.00015461609685446694, + "loss": 0.7578, + "step": 13963 + }, + { + "epoch": 0.75, + "learning_rate": 0.00015455316607963505, + "loss": 0.7812, + "step": 13964 + }, + { + "epoch": 0.75, + "learning_rate": 0.00015449024577297694, + "loss": 0.7539, + "step": 13965 + }, + { + "epoch": 0.75, + "learning_rate": 0.00015442733593639952, + "loss": 0.8789, + "step": 13966 + }, + { + "epoch": 0.75, + "learning_rate": 0.00015436443657180894, + "loss": 0.8789, + "step": 13967 + }, + { + "epoch": 0.75, + "learning_rate": 0.0001543015476811111, + "loss": 0.8281, + "step": 13968 + }, + { + "epoch": 0.75, + "learning_rate": 0.00015423866926621193, + "loss": 0.7773, + "step": 13969 + }, + { + "epoch": 0.75, + "learning_rate": 0.00015417580132901705, + "loss": 0.7969, + "step": 13970 + }, + { + "epoch": 0.75, + "learning_rate": 0.00015411294387143116, + "loss": 0.7109, + "step": 13971 + }, + { + "epoch": 0.75, + "learning_rate": 0.00015405009689535936, + "loss": 0.793, + "step": 13972 + }, + { + "epoch": 0.75, + "learning_rate": 0.0001539872604027059, + "loss": 0.7578, + "step": 13973 + }, + { + "epoch": 0.75, + "learning_rate": 0.000153924434395375, + "loss": 0.9062, + "step": 13974 + }, + { + "epoch": 0.75, + "learning_rate": 0.00015386161887527073, + "loss": 0.7812, + "step": 13975 + }, + { + "epoch": 0.75, + "learning_rate": 0.0001537988138442964, + "loss": 0.8008, + "step": 13976 + }, + { + "epoch": 0.75, + "learning_rate": 0.00015373601930435504, + "loss": 0.8789, + "step": 13977 + }, + { + "epoch": 0.75, + "learning_rate": 0.0001536732352573497, + "loss": 0.8359, + "step": 13978 + }, + { + "epoch": 0.75, + "learning_rate": 0.00015361046170518316, + "loss": 0.75, + "step": 13979 + }, + { + "epoch": 0.75, + "learning_rate": 0.00015354769864975738, + "loss": 0.8906, + "step": 13980 + }, + { + "epoch": 0.75, + "learning_rate": 0.00015348494609297407, + "loss": 0.7344, + "step": 13981 + }, + { + "epoch": 0.75, + "learning_rate": 0.00015342220403673546, + "loss": 0.8633, + "step": 13982 + }, + { + "epoch": 0.75, + "learning_rate": 0.00015335947248294252, + "loss": 0.7773, + "step": 13983 + }, + { + "epoch": 0.75, + "learning_rate": 0.00015329675143349602, + "loss": 0.832, + "step": 13984 + }, + { + "epoch": 0.75, + "learning_rate": 0.00015323404089029685, + "loss": 0.8828, + "step": 13985 + }, + { + "epoch": 0.75, + "learning_rate": 0.0001531713408552454, + "loss": 0.7891, + "step": 13986 + }, + { + "epoch": 0.75, + "learning_rate": 0.00015310865133024148, + "loss": 0.668, + "step": 13987 + }, + { + "epoch": 0.75, + "learning_rate": 0.00015304597231718503, + "loss": 0.8281, + "step": 13988 + }, + { + "epoch": 0.75, + "learning_rate": 0.00015298330381797515, + "loss": 0.918, + "step": 13989 + }, + { + "epoch": 0.75, + "learning_rate": 0.00015292064583451122, + "loss": 0.8281, + "step": 13990 + }, + { + "epoch": 0.75, + "learning_rate": 0.00015285799836869162, + "loss": 0.7539, + "step": 13991 + }, + { + "epoch": 0.75, + "learning_rate": 0.00015279536142241495, + "loss": 0.7656, + "step": 13992 + }, + { + "epoch": 0.75, + "learning_rate": 0.00015273273499757955, + "loss": 0.793, + "step": 13993 + }, + { + "epoch": 0.75, + "learning_rate": 0.0001526701190960828, + "loss": 0.832, + "step": 13994 + }, + { + "epoch": 0.75, + "learning_rate": 0.0001526075137198225, + "loss": 0.8594, + "step": 13995 + }, + { + "epoch": 0.75, + "learning_rate": 0.0001525449188706955, + "loss": 0.8359, + "step": 13996 + }, + { + "epoch": 0.75, + "learning_rate": 0.00015248233455059896, + "loss": 0.8203, + "step": 13997 + }, + { + "epoch": 0.75, + "learning_rate": 0.00015241976076142905, + "loss": 0.8945, + "step": 13998 + }, + { + "epoch": 0.75, + "learning_rate": 0.00015235719750508227, + "loss": 0.7695, + "step": 13999 + }, + { + "epoch": 0.75, + "learning_rate": 0.00015229464478345417, + "loss": 0.9375, + "step": 14000 + }, + { + "epoch": 0.75, + "learning_rate": 0.00015223210259844051, + "loss": 0.8125, + "step": 14001 + }, + { + "epoch": 0.75, + "learning_rate": 0.00015216957095193662, + "loss": 0.7539, + "step": 14002 + }, + { + "epoch": 0.75, + "learning_rate": 0.0001521070498458373, + "loss": 0.832, + "step": 14003 + }, + { + "epoch": 0.75, + "learning_rate": 0.00015204453928203683, + "loss": 0.7656, + "step": 14004 + }, + { + "epoch": 0.75, + "learning_rate": 0.00015198203926243008, + "loss": 0.7812, + "step": 14005 + }, + { + "epoch": 0.75, + "learning_rate": 0.0001519195497889107, + "loss": 0.6836, + "step": 14006 + }, + { + "epoch": 0.75, + "learning_rate": 0.00015185707086337225, + "loss": 0.875, + "step": 14007 + }, + { + "epoch": 0.75, + "learning_rate": 0.0001517946024877081, + "loss": 0.8242, + "step": 14008 + }, + { + "epoch": 0.75, + "learning_rate": 0.0001517321446638114, + "loss": 0.9062, + "step": 14009 + }, + { + "epoch": 0.75, + "learning_rate": 0.00015166969739357477, + "loss": 0.8633, + "step": 14010 + }, + { + "epoch": 0.75, + "learning_rate": 0.0001516072606788903, + "loss": 0.8477, + "step": 14011 + }, + { + "epoch": 0.75, + "learning_rate": 0.00015154483452165025, + "loss": 0.793, + "step": 14012 + }, + { + "epoch": 0.75, + "learning_rate": 0.00015148241892374648, + "loss": 0.8164, + "step": 14013 + }, + { + "epoch": 0.75, + "learning_rate": 0.0001514200138870701, + "loss": 0.7109, + "step": 14014 + }, + { + "epoch": 0.75, + "learning_rate": 0.00015135761941351245, + "loss": 0.8594, + "step": 14015 + }, + { + "epoch": 0.75, + "learning_rate": 0.00015129523550496405, + "loss": 0.793, + "step": 14016 + }, + { + "epoch": 0.75, + "learning_rate": 0.00015123286216331538, + "loss": 0.8516, + "step": 14017 + }, + { + "epoch": 0.75, + "learning_rate": 0.0001511704993904568, + "loss": 0.8516, + "step": 14018 + }, + { + "epoch": 0.75, + "learning_rate": 0.00015110814718827793, + "loss": 0.6797, + "step": 14019 + }, + { + "epoch": 0.75, + "learning_rate": 0.00015104580555866804, + "loss": 0.7461, + "step": 14020 + }, + { + "epoch": 0.75, + "learning_rate": 0.00015098347450351657, + "loss": 0.7969, + "step": 14021 + }, + { + "epoch": 0.75, + "learning_rate": 0.00015092115402471235, + "loss": 0.8867, + "step": 14022 + }, + { + "epoch": 0.75, + "learning_rate": 0.00015085884412414374, + "loss": 0.8125, + "step": 14023 + }, + { + "epoch": 0.75, + "learning_rate": 0.00015079654480369898, + "loss": 0.7578, + "step": 14024 + }, + { + "epoch": 0.75, + "learning_rate": 0.00015073425606526613, + "loss": 0.8672, + "step": 14025 + }, + { + "epoch": 0.75, + "learning_rate": 0.00015067197791073262, + "loss": 0.7266, + "step": 14026 + }, + { + "epoch": 0.75, + "learning_rate": 0.00015060971034198546, + "loss": 0.8281, + "step": 14027 + }, + { + "epoch": 0.75, + "learning_rate": 0.00015054745336091175, + "loss": 0.9141, + "step": 14028 + }, + { + "epoch": 0.75, + "learning_rate": 0.00015048520696939828, + "loss": 0.8516, + "step": 14029 + }, + { + "epoch": 0.75, + "learning_rate": 0.00015042297116933095, + "loss": 0.6914, + "step": 14030 + }, + { + "epoch": 0.75, + "learning_rate": 0.0001503607459625959, + "loss": 0.8359, + "step": 14031 + }, + { + "epoch": 0.75, + "learning_rate": 0.0001502985313510789, + "loss": 0.7422, + "step": 14032 + }, + { + "epoch": 0.75, + "learning_rate": 0.00015023632733666514, + "loss": 0.8516, + "step": 14033 + }, + { + "epoch": 0.75, + "learning_rate": 0.00015017413392123935, + "loss": 0.8516, + "step": 14034 + }, + { + "epoch": 0.75, + "learning_rate": 0.00015011195110668646, + "loss": 0.7812, + "step": 14035 + }, + { + "epoch": 0.75, + "learning_rate": 0.0001500497788948909, + "loss": 0.8711, + "step": 14036 + }, + { + "epoch": 0.75, + "learning_rate": 0.00014998761728773641, + "loss": 0.7852, + "step": 14037 + }, + { + "epoch": 0.75, + "learning_rate": 0.00014992546628710701, + "loss": 0.875, + "step": 14038 + }, + { + "epoch": 0.75, + "learning_rate": 0.00014986332589488573, + "loss": 0.793, + "step": 14039 + }, + { + "epoch": 0.75, + "learning_rate": 0.0001498011961129558, + "loss": 0.8008, + "step": 14040 + }, + { + "epoch": 0.75, + "learning_rate": 0.00014973907694320015, + "loss": 0.793, + "step": 14041 + }, + { + "epoch": 0.75, + "learning_rate": 0.000149676968387501, + "loss": 0.7109, + "step": 14042 + }, + { + "epoch": 0.75, + "learning_rate": 0.00014961487044774023, + "loss": 0.8672, + "step": 14043 + }, + { + "epoch": 0.75, + "learning_rate": 0.00014955278312579995, + "loss": 0.7773, + "step": 14044 + }, + { + "epoch": 0.75, + "learning_rate": 0.0001494907064235616, + "loss": 0.8047, + "step": 14045 + }, + { + "epoch": 0.75, + "learning_rate": 0.00014942864034290615, + "loss": 0.8555, + "step": 14046 + }, + { + "epoch": 0.75, + "learning_rate": 0.0001493665848857142, + "loss": 0.8945, + "step": 14047 + }, + { + "epoch": 0.76, + "learning_rate": 0.00014930454005386679, + "loss": 0.6953, + "step": 14048 + }, + { + "epoch": 0.76, + "learning_rate": 0.00014924250584924382, + "loss": 0.7422, + "step": 14049 + }, + { + "epoch": 0.76, + "learning_rate": 0.0001491804822737249, + "loss": 0.8125, + "step": 14050 + }, + { + "epoch": 0.76, + "learning_rate": 0.00014911846932918977, + "loss": 0.7383, + "step": 14051 + }, + { + "epoch": 0.76, + "learning_rate": 0.00014905646701751773, + "loss": 0.7383, + "step": 14052 + }, + { + "epoch": 0.76, + "learning_rate": 0.00014899447534058734, + "loss": 0.7109, + "step": 14053 + }, + { + "epoch": 0.76, + "learning_rate": 0.00014893249430027755, + "loss": 0.7852, + "step": 14054 + }, + { + "epoch": 0.76, + "learning_rate": 0.00014887052389846617, + "loss": 0.875, + "step": 14055 + }, + { + "epoch": 0.76, + "learning_rate": 0.00014880856413703147, + "loss": 0.7891, + "step": 14056 + }, + { + "epoch": 0.76, + "learning_rate": 0.00014874661501785075, + "loss": 0.7695, + "step": 14057 + }, + { + "epoch": 0.76, + "learning_rate": 0.00014868467654280155, + "loss": 0.7812, + "step": 14058 + }, + { + "epoch": 0.76, + "learning_rate": 0.00014862274871376048, + "loss": 0.8125, + "step": 14059 + }, + { + "epoch": 0.76, + "learning_rate": 0.0001485608315326043, + "loss": 0.7969, + "step": 14060 + }, + { + "epoch": 0.76, + "learning_rate": 0.00014849892500120953, + "loss": 0.8477, + "step": 14061 + }, + { + "epoch": 0.76, + "learning_rate": 0.00014843702912145196, + "loss": 0.793, + "step": 14062 + }, + { + "epoch": 0.76, + "learning_rate": 0.00014837514389520702, + "loss": 0.7852, + "step": 14063 + }, + { + "epoch": 0.76, + "learning_rate": 0.0001483132693243503, + "loss": 0.7617, + "step": 14064 + }, + { + "epoch": 0.76, + "learning_rate": 0.00014825140541075682, + "loss": 0.793, + "step": 14065 + }, + { + "epoch": 0.76, + "learning_rate": 0.00014818955215630108, + "loss": 0.832, + "step": 14066 + }, + { + "epoch": 0.76, + "learning_rate": 0.00014812770956285754, + "loss": 0.8008, + "step": 14067 + }, + { + "epoch": 0.76, + "learning_rate": 0.0001480658776323004, + "loss": 0.8164, + "step": 14068 + }, + { + "epoch": 0.76, + "learning_rate": 0.0001480040563665032, + "loss": 0.8516, + "step": 14069 + }, + { + "epoch": 0.76, + "learning_rate": 0.000147942245767339, + "loss": 0.8555, + "step": 14070 + }, + { + "epoch": 0.76, + "learning_rate": 0.00014788044583668154, + "loss": 0.7266, + "step": 14071 + }, + { + "epoch": 0.76, + "learning_rate": 0.0001478186565764032, + "loss": 0.7539, + "step": 14072 + }, + { + "epoch": 0.76, + "learning_rate": 0.00014775687798837623, + "loss": 0.8828, + "step": 14073 + }, + { + "epoch": 0.76, + "learning_rate": 0.00014769511007447296, + "loss": 0.8164, + "step": 14074 + }, + { + "epoch": 0.76, + "learning_rate": 0.0001476333528365652, + "loss": 0.8711, + "step": 14075 + }, + { + "epoch": 0.76, + "learning_rate": 0.00014757160627652433, + "loss": 0.9492, + "step": 14076 + }, + { + "epoch": 0.76, + "learning_rate": 0.0001475098703962213, + "loss": 0.8008, + "step": 14077 + }, + { + "epoch": 0.76, + "learning_rate": 0.00014744814519752707, + "loss": 0.8984, + "step": 14078 + }, + { + "epoch": 0.76, + "learning_rate": 0.00014738643068231227, + "loss": 0.7344, + "step": 14079 + }, + { + "epoch": 0.76, + "learning_rate": 0.00014732472685244675, + "loss": 0.668, + "step": 14080 + }, + { + "epoch": 0.76, + "learning_rate": 0.0001472630337098006, + "loss": 0.8477, + "step": 14081 + }, + { + "epoch": 0.76, + "learning_rate": 0.00014720135125624307, + "loss": 0.8242, + "step": 14082 + }, + { + "epoch": 0.76, + "learning_rate": 0.00014713967949364344, + "loss": 0.8711, + "step": 14083 + }, + { + "epoch": 0.76, + "learning_rate": 0.00014707801842387075, + "loss": 0.793, + "step": 14084 + }, + { + "epoch": 0.76, + "learning_rate": 0.00014701636804879343, + "loss": 0.7969, + "step": 14085 + }, + { + "epoch": 0.76, + "learning_rate": 0.00014695472837027946, + "loss": 0.8594, + "step": 14086 + }, + { + "epoch": 0.76, + "learning_rate": 0.0001468930993901969, + "loss": 0.8398, + "step": 14087 + }, + { + "epoch": 0.76, + "learning_rate": 0.00014683148111041343, + "loss": 0.8164, + "step": 14088 + }, + { + "epoch": 0.76, + "learning_rate": 0.00014676987353279624, + "loss": 0.8086, + "step": 14089 + }, + { + "epoch": 0.76, + "learning_rate": 0.00014670827665921188, + "loss": 0.8008, + "step": 14090 + }, + { + "epoch": 0.76, + "learning_rate": 0.0001466466904915275, + "loss": 0.8672, + "step": 14091 + }, + { + "epoch": 0.76, + "learning_rate": 0.0001465851150316091, + "loss": 0.8203, + "step": 14092 + }, + { + "epoch": 0.76, + "learning_rate": 0.00014652355028132248, + "loss": 0.7969, + "step": 14093 + }, + { + "epoch": 0.76, + "learning_rate": 0.00014646199624253337, + "loss": 0.832, + "step": 14094 + }, + { + "epoch": 0.76, + "learning_rate": 0.00014640045291710723, + "loss": 0.8594, + "step": 14095 + }, + { + "epoch": 0.76, + "learning_rate": 0.00014633892030690876, + "loss": 0.8438, + "step": 14096 + }, + { + "epoch": 0.76, + "learning_rate": 0.00014627739841380284, + "loss": 0.8203, + "step": 14097 + }, + { + "epoch": 0.76, + "learning_rate": 0.0001462158872396535, + "loss": 0.8594, + "step": 14098 + }, + { + "epoch": 0.76, + "learning_rate": 0.00014615438678632504, + "loss": 0.8203, + "step": 14099 + }, + { + "epoch": 0.76, + "learning_rate": 0.00014609289705568086, + "loss": 0.832, + "step": 14100 + }, + { + "epoch": 0.76, + "learning_rate": 0.00014603141804958453, + "loss": 0.7539, + "step": 14101 + }, + { + "epoch": 0.76, + "learning_rate": 0.0001459699497698988, + "loss": 0.8203, + "step": 14102 + }, + { + "epoch": 0.76, + "learning_rate": 0.00014590849221848652, + "loss": 0.8008, + "step": 14103 + }, + { + "epoch": 0.76, + "learning_rate": 0.00014584704539721022, + "loss": 0.8281, + "step": 14104 + }, + { + "epoch": 0.76, + "learning_rate": 0.0001457856093079316, + "loss": 0.8242, + "step": 14105 + }, + { + "epoch": 0.76, + "learning_rate": 0.00014572418395251253, + "loss": 0.75, + "step": 14106 + }, + { + "epoch": 0.76, + "learning_rate": 0.0001456627693328146, + "loss": 0.8242, + "step": 14107 + }, + { + "epoch": 0.76, + "learning_rate": 0.00014560136545069868, + "loss": 0.8789, + "step": 14108 + }, + { + "epoch": 0.76, + "learning_rate": 0.00014553997230802536, + "loss": 0.7969, + "step": 14109 + }, + { + "epoch": 0.76, + "learning_rate": 0.0001454785899066552, + "loss": 0.7344, + "step": 14110 + }, + { + "epoch": 0.76, + "learning_rate": 0.0001454172182484485, + "loss": 0.7734, + "step": 14111 + }, + { + "epoch": 0.76, + "learning_rate": 0.0001453558573352648, + "loss": 0.7461, + "step": 14112 + }, + { + "epoch": 0.76, + "learning_rate": 0.0001452945071689632, + "loss": 0.8359, + "step": 14113 + }, + { + "epoch": 0.76, + "learning_rate": 0.00014523316775140354, + "loss": 0.8398, + "step": 14114 + }, + { + "epoch": 0.76, + "learning_rate": 0.00014517183908444427, + "loss": 0.7656, + "step": 14115 + }, + { + "epoch": 0.76, + "learning_rate": 0.0001451105211699436, + "loss": 0.8633, + "step": 14116 + }, + { + "epoch": 0.76, + "learning_rate": 0.0001450492140097599, + "loss": 0.8594, + "step": 14117 + }, + { + "epoch": 0.76, + "learning_rate": 0.00014498791760575108, + "loss": 0.8281, + "step": 14118 + }, + { + "epoch": 0.76, + "learning_rate": 0.00014492663195977442, + "loss": 0.8242, + "step": 14119 + }, + { + "epoch": 0.76, + "learning_rate": 0.00014486535707368726, + "loss": 0.7539, + "step": 14120 + }, + { + "epoch": 0.76, + "learning_rate": 0.00014480409294934622, + "loss": 0.8359, + "step": 14121 + }, + { + "epoch": 0.76, + "learning_rate": 0.00014474283958860806, + "loss": 0.793, + "step": 14122 + }, + { + "epoch": 0.76, + "learning_rate": 0.00014468159699332863, + "loss": 0.7539, + "step": 14123 + }, + { + "epoch": 0.76, + "learning_rate": 0.00014462036516536415, + "loss": 0.7812, + "step": 14124 + }, + { + "epoch": 0.76, + "learning_rate": 0.00014455914410656977, + "loss": 0.8867, + "step": 14125 + }, + { + "epoch": 0.76, + "learning_rate": 0.00014449793381880087, + "loss": 0.7891, + "step": 14126 + }, + { + "epoch": 0.76, + "learning_rate": 0.00014443673430391253, + "loss": 0.8359, + "step": 14127 + }, + { + "epoch": 0.76, + "learning_rate": 0.0001443755455637591, + "loss": 0.8477, + "step": 14128 + }, + { + "epoch": 0.76, + "learning_rate": 0.00014431436760019463, + "loss": 0.7617, + "step": 14129 + }, + { + "epoch": 0.76, + "learning_rate": 0.00014425320041507318, + "loss": 0.7031, + "step": 14130 + }, + { + "epoch": 0.76, + "learning_rate": 0.00014419204401024843, + "loss": 0.793, + "step": 14131 + }, + { + "epoch": 0.76, + "learning_rate": 0.00014413089838757342, + "loss": 0.8477, + "step": 14132 + }, + { + "epoch": 0.76, + "learning_rate": 0.00014406976354890111, + "loss": 0.8398, + "step": 14133 + }, + { + "epoch": 0.76, + "learning_rate": 0.00014400863949608434, + "loss": 0.7656, + "step": 14134 + }, + { + "epoch": 0.76, + "learning_rate": 0.00014394752623097506, + "loss": 0.7422, + "step": 14135 + }, + { + "epoch": 0.76, + "learning_rate": 0.00014388642375542522, + "loss": 0.8242, + "step": 14136 + }, + { + "epoch": 0.76, + "learning_rate": 0.0001438253320712865, + "loss": 0.8594, + "step": 14137 + }, + { + "epoch": 0.76, + "learning_rate": 0.00014376425118041032, + "loss": 0.793, + "step": 14138 + }, + { + "epoch": 0.76, + "learning_rate": 0.00014370318108464735, + "loss": 0.7656, + "step": 14139 + }, + { + "epoch": 0.76, + "learning_rate": 0.0001436421217858485, + "loss": 0.8047, + "step": 14140 + }, + { + "epoch": 0.76, + "learning_rate": 0.00014358107328586378, + "loss": 0.8086, + "step": 14141 + }, + { + "epoch": 0.76, + "learning_rate": 0.00014352003558654349, + "loss": 0.7969, + "step": 14142 + }, + { + "epoch": 0.76, + "learning_rate": 0.0001434590086897369, + "loss": 0.7578, + "step": 14143 + }, + { + "epoch": 0.76, + "learning_rate": 0.00014339799259729353, + "loss": 0.7695, + "step": 14144 + }, + { + "epoch": 0.76, + "learning_rate": 0.00014333698731106248, + "loss": 0.9258, + "step": 14145 + }, + { + "epoch": 0.76, + "learning_rate": 0.00014327599283289218, + "loss": 0.8359, + "step": 14146 + }, + { + "epoch": 0.76, + "learning_rate": 0.00014321500916463122, + "loss": 0.832, + "step": 14147 + }, + { + "epoch": 0.76, + "learning_rate": 0.00014315403630812723, + "loss": 0.8164, + "step": 14148 + }, + { + "epoch": 0.76, + "learning_rate": 0.00014309307426522817, + "loss": 0.707, + "step": 14149 + }, + { + "epoch": 0.76, + "learning_rate": 0.00014303212303778146, + "loss": 0.832, + "step": 14150 + }, + { + "epoch": 0.76, + "learning_rate": 0.00014297118262763403, + "loss": 0.8398, + "step": 14151 + }, + { + "epoch": 0.76, + "learning_rate": 0.00014291025303663236, + "loss": 0.7031, + "step": 14152 + }, + { + "epoch": 0.76, + "learning_rate": 0.000142849334266623, + "loss": 0.7656, + "step": 14153 + }, + { + "epoch": 0.76, + "learning_rate": 0.0001427884263194521, + "loss": 0.8086, + "step": 14154 + }, + { + "epoch": 0.76, + "learning_rate": 0.0001427275291969653, + "loss": 0.8164, + "step": 14155 + }, + { + "epoch": 0.76, + "learning_rate": 0.0001426666429010076, + "loss": 0.8672, + "step": 14156 + }, + { + "epoch": 0.76, + "learning_rate": 0.00014260576743342475, + "loss": 0.9102, + "step": 14157 + }, + { + "epoch": 0.76, + "learning_rate": 0.0001425449027960611, + "loss": 0.8672, + "step": 14158 + }, + { + "epoch": 0.76, + "learning_rate": 0.00014248404899076084, + "loss": 0.8086, + "step": 14159 + }, + { + "epoch": 0.76, + "learning_rate": 0.00014242320601936837, + "loss": 0.8633, + "step": 14160 + }, + { + "epoch": 0.76, + "learning_rate": 0.0001423623738837274, + "loss": 0.7344, + "step": 14161 + }, + { + "epoch": 0.76, + "learning_rate": 0.00014230155258568118, + "loss": 0.8203, + "step": 14162 + }, + { + "epoch": 0.76, + "learning_rate": 0.000142240742127073, + "loss": 0.832, + "step": 14163 + }, + { + "epoch": 0.76, + "learning_rate": 0.00014217994250974536, + "loss": 0.7461, + "step": 14164 + }, + { + "epoch": 0.76, + "learning_rate": 0.00014211915373554102, + "loss": 0.8672, + "step": 14165 + }, + { + "epoch": 0.76, + "learning_rate": 0.00014205837580630171, + "loss": 0.8164, + "step": 14166 + }, + { + "epoch": 0.76, + "learning_rate": 0.00014199760872386959, + "loss": 0.8359, + "step": 14167 + }, + { + "epoch": 0.76, + "learning_rate": 0.00014193685249008563, + "loss": 0.7656, + "step": 14168 + }, + { + "epoch": 0.76, + "learning_rate": 0.00014187610710679127, + "loss": 0.7773, + "step": 14169 + }, + { + "epoch": 0.76, + "learning_rate": 0.00014181537257582745, + "loss": 0.7305, + "step": 14170 + }, + { + "epoch": 0.76, + "learning_rate": 0.00014175464889903434, + "loss": 0.8438, + "step": 14171 + }, + { + "epoch": 0.76, + "learning_rate": 0.000141693936078252, + "loss": 0.918, + "step": 14172 + }, + { + "epoch": 0.76, + "learning_rate": 0.00014163323411532031, + "loss": 0.7188, + "step": 14173 + }, + { + "epoch": 0.76, + "learning_rate": 0.000141572543012079, + "loss": 0.8359, + "step": 14174 + }, + { + "epoch": 0.76, + "learning_rate": 0.00014151186277036688, + "loss": 0.8398, + "step": 14175 + }, + { + "epoch": 0.76, + "learning_rate": 0.00014145119339202284, + "loss": 0.7773, + "step": 14176 + }, + { + "epoch": 0.76, + "learning_rate": 0.00014139053487888554, + "loss": 0.8555, + "step": 14177 + }, + { + "epoch": 0.76, + "learning_rate": 0.000141329887232793, + "loss": 0.8008, + "step": 14178 + }, + { + "epoch": 0.76, + "learning_rate": 0.00014126925045558275, + "loss": 0.7969, + "step": 14179 + }, + { + "epoch": 0.76, + "learning_rate": 0.0001412086245490929, + "loss": 0.7812, + "step": 14180 + }, + { + "epoch": 0.76, + "learning_rate": 0.00014114800951516026, + "loss": 0.8672, + "step": 14181 + }, + { + "epoch": 0.76, + "learning_rate": 0.0001410874053556216, + "loss": 0.7891, + "step": 14182 + }, + { + "epoch": 0.76, + "learning_rate": 0.00014102681207231343, + "loss": 0.8516, + "step": 14183 + }, + { + "epoch": 0.76, + "learning_rate": 0.00014096622966707224, + "loss": 0.7422, + "step": 14184 + }, + { + "epoch": 0.76, + "learning_rate": 0.00014090565814173345, + "loss": 0.7695, + "step": 14185 + }, + { + "epoch": 0.76, + "learning_rate": 0.0001408450974981329, + "loss": 0.7891, + "step": 14186 + }, + { + "epoch": 0.76, + "learning_rate": 0.00014078454773810555, + "loss": 0.8125, + "step": 14187 + }, + { + "epoch": 0.76, + "learning_rate": 0.00014072400886348653, + "loss": 0.7812, + "step": 14188 + }, + { + "epoch": 0.76, + "learning_rate": 0.00014066348087610997, + "loss": 0.7695, + "step": 14189 + }, + { + "epoch": 0.76, + "learning_rate": 0.00014060296377781047, + "loss": 0.7344, + "step": 14190 + }, + { + "epoch": 0.76, + "learning_rate": 0.0001405424575704216, + "loss": 0.8594, + "step": 14191 + }, + { + "epoch": 0.76, + "learning_rate": 0.00014048196225577698, + "loss": 0.6914, + "step": 14192 + }, + { + "epoch": 0.76, + "learning_rate": 0.00014042147783571002, + "loss": 0.8438, + "step": 14193 + }, + { + "epoch": 0.76, + "learning_rate": 0.0001403610043120534, + "loss": 0.8867, + "step": 14194 + }, + { + "epoch": 0.76, + "learning_rate": 0.00014030054168663958, + "loss": 0.7773, + "step": 14195 + }, + { + "epoch": 0.76, + "learning_rate": 0.00014024008996130089, + "loss": 0.8867, + "step": 14196 + }, + { + "epoch": 0.76, + "learning_rate": 0.00014017964913786934, + "loss": 0.8438, + "step": 14197 + }, + { + "epoch": 0.76, + "learning_rate": 0.00014011921921817626, + "loss": 0.7422, + "step": 14198 + }, + { + "epoch": 0.76, + "learning_rate": 0.00014005880020405293, + "loss": 0.7617, + "step": 14199 + }, + { + "epoch": 0.76, + "learning_rate": 0.00013999839209733044, + "loss": 0.8242, + "step": 14200 + }, + { + "epoch": 0.76, + "learning_rate": 0.00013993799489983927, + "loss": 0.7617, + "step": 14201 + }, + { + "epoch": 0.76, + "learning_rate": 0.00013987760861340937, + "loss": 0.832, + "step": 14202 + }, + { + "epoch": 0.76, + "learning_rate": 0.0001398172332398709, + "loss": 0.832, + "step": 14203 + }, + { + "epoch": 0.76, + "learning_rate": 0.00013975686878105355, + "loss": 0.8438, + "step": 14204 + }, + { + "epoch": 0.76, + "learning_rate": 0.00013969651523878623, + "loss": 0.832, + "step": 14205 + }, + { + "epoch": 0.76, + "learning_rate": 0.00013963617261489818, + "loss": 0.7461, + "step": 14206 + }, + { + "epoch": 0.76, + "learning_rate": 0.00013957584091121766, + "loss": 0.7695, + "step": 14207 + }, + { + "epoch": 0.76, + "learning_rate": 0.00013951552012957325, + "loss": 0.8672, + "step": 14208 + }, + { + "epoch": 0.76, + "learning_rate": 0.0001394552102717926, + "loss": 0.8672, + "step": 14209 + }, + { + "epoch": 0.76, + "learning_rate": 0.0001393949113397035, + "loss": 0.8125, + "step": 14210 + }, + { + "epoch": 0.76, + "learning_rate": 0.00013933462333513296, + "loss": 0.8398, + "step": 14211 + }, + { + "epoch": 0.76, + "learning_rate": 0.00013927434625990803, + "loss": 0.7852, + "step": 14212 + }, + { + "epoch": 0.76, + "learning_rate": 0.00013921408011585552, + "loss": 0.8438, + "step": 14213 + }, + { + "epoch": 0.76, + "learning_rate": 0.0001391538249048015, + "loss": 0.8594, + "step": 14214 + }, + { + "epoch": 0.76, + "learning_rate": 0.00013909358062857153, + "loss": 0.7383, + "step": 14215 + }, + { + "epoch": 0.76, + "learning_rate": 0.0001390333472889919, + "loss": 0.7461, + "step": 14216 + }, + { + "epoch": 0.76, + "learning_rate": 0.0001389731248878876, + "loss": 0.793, + "step": 14217 + }, + { + "epoch": 0.76, + "learning_rate": 0.00013891291342708333, + "loss": 0.7852, + "step": 14218 + }, + { + "epoch": 0.76, + "learning_rate": 0.0001388527129084039, + "loss": 0.7422, + "step": 14219 + }, + { + "epoch": 0.76, + "learning_rate": 0.0001387925233336737, + "loss": 0.8945, + "step": 14220 + }, + { + "epoch": 0.76, + "learning_rate": 0.00013873234470471657, + "loss": 0.8125, + "step": 14221 + }, + { + "epoch": 0.76, + "learning_rate": 0.00013867217702335578, + "loss": 0.8555, + "step": 14222 + }, + { + "epoch": 0.76, + "learning_rate": 0.00013861202029141523, + "loss": 0.8555, + "step": 14223 + }, + { + "epoch": 0.76, + "learning_rate": 0.00013855187451071755, + "loss": 0.7031, + "step": 14224 + }, + { + "epoch": 0.76, + "learning_rate": 0.00013849173968308515, + "loss": 0.7773, + "step": 14225 + }, + { + "epoch": 0.76, + "learning_rate": 0.00013843161581034058, + "loss": 0.8203, + "step": 14226 + }, + { + "epoch": 0.76, + "learning_rate": 0.00013837150289430588, + "loss": 0.9141, + "step": 14227 + }, + { + "epoch": 0.76, + "learning_rate": 0.00013831140093680238, + "loss": 0.8242, + "step": 14228 + }, + { + "epoch": 0.76, + "learning_rate": 0.00013825130993965168, + "loss": 0.8398, + "step": 14229 + }, + { + "epoch": 0.76, + "learning_rate": 0.00013819122990467436, + "loss": 0.9336, + "step": 14230 + }, + { + "epoch": 0.76, + "learning_rate": 0.00013813116083369144, + "loss": 0.793, + "step": 14231 + }, + { + "epoch": 0.76, + "learning_rate": 0.00013807110272852285, + "loss": 0.8086, + "step": 14232 + }, + { + "epoch": 0.76, + "learning_rate": 0.00013801105559098887, + "loss": 0.8086, + "step": 14233 + }, + { + "epoch": 0.77, + "learning_rate": 0.00013795101942290882, + "loss": 0.9688, + "step": 14234 + }, + { + "epoch": 0.77, + "learning_rate": 0.00013789099422610213, + "loss": 0.6836, + "step": 14235 + }, + { + "epoch": 0.77, + "learning_rate": 0.00013783098000238792, + "loss": 0.8828, + "step": 14236 + }, + { + "epoch": 0.77, + "learning_rate": 0.00013777097675358467, + "loss": 0.7891, + "step": 14237 + }, + { + "epoch": 0.77, + "learning_rate": 0.0001377109844815106, + "loss": 0.793, + "step": 14238 + }, + { + "epoch": 0.77, + "learning_rate": 0.00013765100318798368, + "loss": 0.707, + "step": 14239 + }, + { + "epoch": 0.77, + "learning_rate": 0.00013759103287482177, + "loss": 0.8477, + "step": 14240 + }, + { + "epoch": 0.77, + "learning_rate": 0.00013753107354384187, + "loss": 0.7812, + "step": 14241 + }, + { + "epoch": 0.77, + "learning_rate": 0.00013747112519686106, + "loss": 0.7891, + "step": 14242 + }, + { + "epoch": 0.77, + "learning_rate": 0.00013741118783569618, + "loss": 0.8594, + "step": 14243 + }, + { + "epoch": 0.77, + "learning_rate": 0.0001373512614621633, + "loss": 0.8359, + "step": 14244 + }, + { + "epoch": 0.77, + "learning_rate": 0.0001372913460780783, + "loss": 0.8984, + "step": 14245 + }, + { + "epoch": 0.77, + "learning_rate": 0.00013723144168525697, + "loss": 0.8086, + "step": 14246 + }, + { + "epoch": 0.77, + "learning_rate": 0.0001371715482855147, + "loss": 0.7188, + "step": 14247 + }, + { + "epoch": 0.77, + "learning_rate": 0.0001371116658806662, + "loss": 0.7227, + "step": 14248 + }, + { + "epoch": 0.77, + "learning_rate": 0.0001370517944725264, + "loss": 0.7266, + "step": 14249 + }, + { + "epoch": 0.77, + "learning_rate": 0.00013699193406290928, + "loss": 0.832, + "step": 14250 + }, + { + "epoch": 0.77, + "learning_rate": 0.00013693208465362911, + "loss": 0.918, + "step": 14251 + }, + { + "epoch": 0.77, + "learning_rate": 0.00013687224624649925, + "loss": 0.8984, + "step": 14252 + }, + { + "epoch": 0.77, + "learning_rate": 0.0001368124188433331, + "loss": 0.8203, + "step": 14253 + }, + { + "epoch": 0.77, + "learning_rate": 0.0001367526024459439, + "loss": 0.8359, + "step": 14254 + }, + { + "epoch": 0.77, + "learning_rate": 0.00013669279705614385, + "loss": 0.8906, + "step": 14255 + }, + { + "epoch": 0.77, + "learning_rate": 0.00013663300267574558, + "loss": 0.8203, + "step": 14256 + }, + { + "epoch": 0.77, + "learning_rate": 0.00013657321930656075, + "loss": 0.7812, + "step": 14257 + }, + { + "epoch": 0.77, + "learning_rate": 0.0001365134469504012, + "loss": 0.8164, + "step": 14258 + }, + { + "epoch": 0.77, + "learning_rate": 0.00013645368560907833, + "loss": 0.8555, + "step": 14259 + }, + { + "epoch": 0.77, + "learning_rate": 0.00013639393528440296, + "loss": 0.9102, + "step": 14260 + }, + { + "epoch": 0.77, + "learning_rate": 0.00013633419597818558, + "loss": 0.8906, + "step": 14261 + }, + { + "epoch": 0.77, + "learning_rate": 0.0001362744676922366, + "loss": 0.9336, + "step": 14262 + }, + { + "epoch": 0.77, + "learning_rate": 0.00013621475042836624, + "loss": 0.832, + "step": 14263 + }, + { + "epoch": 0.77, + "learning_rate": 0.0001361550441883837, + "loss": 0.75, + "step": 14264 + }, + { + "epoch": 0.77, + "learning_rate": 0.00013609534897409854, + "loss": 0.7305, + "step": 14265 + }, + { + "epoch": 0.77, + "learning_rate": 0.00013603566478731976, + "loss": 0.8086, + "step": 14266 + }, + { + "epoch": 0.77, + "learning_rate": 0.0001359759916298559, + "loss": 0.8164, + "step": 14267 + }, + { + "epoch": 0.77, + "learning_rate": 0.00013591632950351513, + "loss": 0.7773, + "step": 14268 + }, + { + "epoch": 0.77, + "learning_rate": 0.00013585667841010547, + "loss": 0.8242, + "step": 14269 + }, + { + "epoch": 0.77, + "learning_rate": 0.00013579703835143482, + "loss": 0.832, + "step": 14270 + }, + { + "epoch": 0.77, + "learning_rate": 0.00013573740932931, + "loss": 0.8906, + "step": 14271 + }, + { + "epoch": 0.77, + "learning_rate": 0.00013567779134553842, + "loss": 0.7344, + "step": 14272 + }, + { + "epoch": 0.77, + "learning_rate": 0.0001356181844019263, + "loss": 0.7891, + "step": 14273 + }, + { + "epoch": 0.77, + "learning_rate": 0.0001355585885002803, + "loss": 0.8008, + "step": 14274 + }, + { + "epoch": 0.77, + "learning_rate": 0.00013549900364240604, + "loss": 0.7852, + "step": 14275 + }, + { + "epoch": 0.77, + "learning_rate": 0.00013543942983010944, + "loss": 0.8438, + "step": 14276 + }, + { + "epoch": 0.77, + "learning_rate": 0.00013537986706519545, + "loss": 0.8125, + "step": 14277 + }, + { + "epoch": 0.77, + "learning_rate": 0.0001353203153494692, + "loss": 0.8242, + "step": 14278 + }, + { + "epoch": 0.77, + "learning_rate": 0.00013526077468473547, + "loss": 0.7891, + "step": 14279 + }, + { + "epoch": 0.77, + "learning_rate": 0.00013520124507279834, + "loss": 0.6758, + "step": 14280 + }, + { + "epoch": 0.77, + "learning_rate": 0.00013514172651546153, + "loss": 0.7812, + "step": 14281 + }, + { + "epoch": 0.77, + "learning_rate": 0.00013508221901452916, + "loss": 0.8398, + "step": 14282 + }, + { + "epoch": 0.77, + "learning_rate": 0.00013502272257180425, + "loss": 0.7773, + "step": 14283 + }, + { + "epoch": 0.77, + "learning_rate": 0.0001349632371890896, + "loss": 1.0, + "step": 14284 + }, + { + "epoch": 0.77, + "learning_rate": 0.00013490376286818788, + "loss": 0.7617, + "step": 14285 + }, + { + "epoch": 0.77, + "learning_rate": 0.0001348442996109016, + "loss": 0.9219, + "step": 14286 + }, + { + "epoch": 0.77, + "learning_rate": 0.00013478484741903253, + "loss": 0.832, + "step": 14287 + }, + { + "epoch": 0.77, + "learning_rate": 0.00013472540629438202, + "loss": 0.8633, + "step": 14288 + }, + { + "epoch": 0.77, + "learning_rate": 0.0001346659762387516, + "loss": 0.8594, + "step": 14289 + }, + { + "epoch": 0.77, + "learning_rate": 0.00013460655725394232, + "loss": 0.8008, + "step": 14290 + }, + { + "epoch": 0.77, + "learning_rate": 0.00013454714934175443, + "loss": 0.7031, + "step": 14291 + }, + { + "epoch": 0.77, + "learning_rate": 0.00013448775250398838, + "loss": 0.832, + "step": 14292 + }, + { + "epoch": 0.77, + "learning_rate": 0.00013442836674244424, + "loss": 0.7383, + "step": 14293 + }, + { + "epoch": 0.77, + "learning_rate": 0.00013436899205892123, + "loss": 0.7617, + "step": 14294 + }, + { + "epoch": 0.77, + "learning_rate": 0.00013430962845521893, + "loss": 0.8242, + "step": 14295 + }, + { + "epoch": 0.77, + "learning_rate": 0.000134250275933136, + "loss": 0.9102, + "step": 14296 + }, + { + "epoch": 0.77, + "learning_rate": 0.00013419093449447124, + "loss": 0.7461, + "step": 14297 + }, + { + "epoch": 0.77, + "learning_rate": 0.00013413160414102264, + "loss": 0.8516, + "step": 14298 + }, + { + "epoch": 0.77, + "learning_rate": 0.00013407228487458838, + "loss": 0.8828, + "step": 14299 + }, + { + "epoch": 0.77, + "learning_rate": 0.00013401297669696577, + "loss": 0.8398, + "step": 14300 + }, + { + "epoch": 0.77, + "learning_rate": 0.00013395367960995213, + "loss": 0.8555, + "step": 14301 + }, + { + "epoch": 0.77, + "learning_rate": 0.00013389439361534454, + "loss": 0.875, + "step": 14302 + }, + { + "epoch": 0.77, + "learning_rate": 0.00013383511871493947, + "loss": 0.7852, + "step": 14303 + }, + { + "epoch": 0.77, + "learning_rate": 0.0001337758549105329, + "loss": 0.7656, + "step": 14304 + }, + { + "epoch": 0.77, + "learning_rate": 0.0001337166022039209, + "loss": 0.8242, + "step": 14305 + }, + { + "epoch": 0.77, + "learning_rate": 0.0001336573605968992, + "loss": 0.8086, + "step": 14306 + }, + { + "epoch": 0.77, + "learning_rate": 0.00013359813009126265, + "loss": 0.8359, + "step": 14307 + }, + { + "epoch": 0.77, + "learning_rate": 0.00013353891068880632, + "loss": 0.8516, + "step": 14308 + }, + { + "epoch": 0.77, + "learning_rate": 0.00013347970239132489, + "loss": 0.8594, + "step": 14309 + }, + { + "epoch": 0.77, + "learning_rate": 0.0001334205052006125, + "loss": 0.8008, + "step": 14310 + }, + { + "epoch": 0.77, + "learning_rate": 0.00013336131911846272, + "loss": 0.8086, + "step": 14311 + }, + { + "epoch": 0.77, + "learning_rate": 0.00013330214414666936, + "loss": 0.7422, + "step": 14312 + }, + { + "epoch": 0.77, + "learning_rate": 0.00013324298028702565, + "loss": 0.7617, + "step": 14313 + }, + { + "epoch": 0.77, + "learning_rate": 0.0001331838275413243, + "loss": 0.8281, + "step": 14314 + }, + { + "epoch": 0.77, + "learning_rate": 0.00013312468591135797, + "loss": 0.7461, + "step": 14315 + }, + { + "epoch": 0.77, + "learning_rate": 0.00013306555539891867, + "loss": 0.8242, + "step": 14316 + }, + { + "epoch": 0.77, + "learning_rate": 0.0001330064360057985, + "loss": 0.8125, + "step": 14317 + }, + { + "epoch": 0.77, + "learning_rate": 0.00013294732773378866, + "loss": 0.8125, + "step": 14318 + }, + { + "epoch": 0.77, + "learning_rate": 0.0001328882305846806, + "loss": 0.8438, + "step": 14319 + }, + { + "epoch": 0.77, + "learning_rate": 0.00013282914456026496, + "loss": 0.8828, + "step": 14320 + }, + { + "epoch": 0.77, + "learning_rate": 0.0001327700696623323, + "loss": 0.707, + "step": 14321 + }, + { + "epoch": 0.77, + "learning_rate": 0.00013271100589267294, + "loss": 0.8633, + "step": 14322 + }, + { + "epoch": 0.77, + "learning_rate": 0.00013265195325307661, + "loss": 0.7578, + "step": 14323 + }, + { + "epoch": 0.77, + "learning_rate": 0.00013259291174533245, + "loss": 0.8711, + "step": 14324 + }, + { + "epoch": 0.77, + "learning_rate": 0.00013253388137123018, + "loss": 0.8047, + "step": 14325 + }, + { + "epoch": 0.77, + "learning_rate": 0.00013247486213255845, + "loss": 0.8242, + "step": 14326 + }, + { + "epoch": 0.77, + "learning_rate": 0.0001324158540311054, + "loss": 0.7656, + "step": 14327 + }, + { + "epoch": 0.77, + "learning_rate": 0.0001323568570686595, + "loss": 0.8945, + "step": 14328 + }, + { + "epoch": 0.77, + "learning_rate": 0.0001322978712470086, + "loss": 0.8945, + "step": 14329 + }, + { + "epoch": 0.77, + "learning_rate": 0.00013223889656794002, + "loss": 0.6836, + "step": 14330 + }, + { + "epoch": 0.77, + "learning_rate": 0.0001321799330332406, + "loss": 0.7812, + "step": 14331 + }, + { + "epoch": 0.77, + "learning_rate": 0.0001321209806446977, + "loss": 0.8594, + "step": 14332 + }, + { + "epoch": 0.77, + "learning_rate": 0.00013206203940409756, + "loss": 0.6758, + "step": 14333 + }, + { + "epoch": 0.77, + "learning_rate": 0.000132003109313226, + "loss": 0.8594, + "step": 14334 + }, + { + "epoch": 0.77, + "learning_rate": 0.00013194419037386912, + "loss": 0.8438, + "step": 14335 + }, + { + "epoch": 0.77, + "learning_rate": 0.00013188528258781235, + "loss": 0.7969, + "step": 14336 + }, + { + "epoch": 0.77, + "learning_rate": 0.0001318263859568406, + "loss": 0.7812, + "step": 14337 + }, + { + "epoch": 0.77, + "learning_rate": 0.00013176750048273883, + "loss": 0.8633, + "step": 14338 + }, + { + "epoch": 0.77, + "learning_rate": 0.0001317086261672913, + "loss": 0.8047, + "step": 14339 + }, + { + "epoch": 0.77, + "learning_rate": 0.00013164976301228222, + "loss": 0.7188, + "step": 14340 + }, + { + "epoch": 0.77, + "learning_rate": 0.00013159091101949517, + "loss": 0.6914, + "step": 14341 + }, + { + "epoch": 0.77, + "learning_rate": 0.00013153207019071385, + "loss": 0.7539, + "step": 14342 + }, + { + "epoch": 0.77, + "learning_rate": 0.00013147324052772098, + "loss": 0.6875, + "step": 14343 + }, + { + "epoch": 0.77, + "learning_rate": 0.0001314144220322994, + "loss": 0.793, + "step": 14344 + }, + { + "epoch": 0.77, + "learning_rate": 0.00013135561470623182, + "loss": 0.7617, + "step": 14345 + }, + { + "epoch": 0.77, + "learning_rate": 0.0001312968185513, + "loss": 0.8164, + "step": 14346 + }, + { + "epoch": 0.77, + "learning_rate": 0.00013123803356928536, + "loss": 0.7969, + "step": 14347 + }, + { + "epoch": 0.77, + "learning_rate": 0.00013117925976196998, + "loss": 0.8047, + "step": 14348 + }, + { + "epoch": 0.77, + "learning_rate": 0.0001311204971311345, + "loss": 0.8672, + "step": 14349 + }, + { + "epoch": 0.77, + "learning_rate": 0.0001310617456785595, + "loss": 0.8125, + "step": 14350 + }, + { + "epoch": 0.77, + "learning_rate": 0.0001310030054060255, + "loss": 0.8633, + "step": 14351 + }, + { + "epoch": 0.77, + "learning_rate": 0.0001309442763153127, + "loss": 0.6758, + "step": 14352 + }, + { + "epoch": 0.77, + "learning_rate": 0.0001308855584082006, + "loss": 0.7617, + "step": 14353 + }, + { + "epoch": 0.77, + "learning_rate": 0.00013082685168646835, + "loss": 0.7773, + "step": 14354 + }, + { + "epoch": 0.77, + "learning_rate": 0.0001307681561518952, + "loss": 0.8828, + "step": 14355 + }, + { + "epoch": 0.77, + "learning_rate": 0.00013070947180625993, + "loss": 0.7461, + "step": 14356 + }, + { + "epoch": 0.77, + "learning_rate": 0.00013065079865134054, + "loss": 0.7578, + "step": 14357 + }, + { + "epoch": 0.77, + "learning_rate": 0.00013059213668891535, + "loss": 0.7539, + "step": 14358 + }, + { + "epoch": 0.77, + "learning_rate": 0.0001305334859207617, + "loss": 0.7617, + "step": 14359 + }, + { + "epoch": 0.77, + "learning_rate": 0.0001304748463486571, + "loss": 0.8516, + "step": 14360 + }, + { + "epoch": 0.77, + "learning_rate": 0.00013041621797437852, + "loss": 0.8594, + "step": 14361 + }, + { + "epoch": 0.77, + "learning_rate": 0.00013035760079970266, + "loss": 0.9219, + "step": 14362 + }, + { + "epoch": 0.77, + "learning_rate": 0.00013029899482640552, + "loss": 0.8477, + "step": 14363 + }, + { + "epoch": 0.77, + "learning_rate": 0.0001302404000562632, + "loss": 0.7695, + "step": 14364 + }, + { + "epoch": 0.77, + "learning_rate": 0.0001301818164910515, + "loss": 0.8398, + "step": 14365 + }, + { + "epoch": 0.77, + "learning_rate": 0.0001301232441325454, + "loss": 0.9023, + "step": 14366 + }, + { + "epoch": 0.77, + "learning_rate": 0.00013006468298252, + "loss": 0.7852, + "step": 14367 + }, + { + "epoch": 0.77, + "learning_rate": 0.00013000613304275, + "loss": 0.8945, + "step": 14368 + }, + { + "epoch": 0.77, + "learning_rate": 0.00012994759431500953, + "loss": 0.7734, + "step": 14369 + }, + { + "epoch": 0.77, + "learning_rate": 0.00012988906680107238, + "loss": 0.9297, + "step": 14370 + }, + { + "epoch": 0.77, + "learning_rate": 0.0001298305505027122, + "loss": 0.8516, + "step": 14371 + }, + { + "epoch": 0.77, + "learning_rate": 0.00012977204542170245, + "loss": 0.832, + "step": 14372 + }, + { + "epoch": 0.77, + "learning_rate": 0.00012971355155981575, + "loss": 0.8438, + "step": 14373 + }, + { + "epoch": 0.77, + "learning_rate": 0.00012965506891882473, + "loss": 0.7344, + "step": 14374 + }, + { + "epoch": 0.77, + "learning_rate": 0.00012959659750050174, + "loss": 0.7695, + "step": 14375 + }, + { + "epoch": 0.77, + "learning_rate": 0.00012953813730661855, + "loss": 0.8047, + "step": 14376 + }, + { + "epoch": 0.77, + "learning_rate": 0.00012947968833894663, + "loss": 0.7734, + "step": 14377 + }, + { + "epoch": 0.77, + "learning_rate": 0.0001294212505992572, + "loss": 0.7969, + "step": 14378 + }, + { + "epoch": 0.77, + "learning_rate": 0.00012936282408932125, + "loss": 0.8711, + "step": 14379 + }, + { + "epoch": 0.77, + "learning_rate": 0.0001293044088109091, + "loss": 0.8164, + "step": 14380 + }, + { + "epoch": 0.77, + "learning_rate": 0.00012924600476579118, + "loss": 0.8008, + "step": 14381 + }, + { + "epoch": 0.77, + "learning_rate": 0.00012918761195573702, + "loss": 0.8398, + "step": 14382 + }, + { + "epoch": 0.77, + "learning_rate": 0.00012912923038251644, + "loss": 0.7422, + "step": 14383 + }, + { + "epoch": 0.77, + "learning_rate": 0.00012907086004789825, + "loss": 0.8359, + "step": 14384 + }, + { + "epoch": 0.77, + "learning_rate": 0.00012901250095365157, + "loss": 0.8555, + "step": 14385 + }, + { + "epoch": 0.77, + "learning_rate": 0.00012895415310154457, + "loss": 0.8203, + "step": 14386 + }, + { + "epoch": 0.77, + "learning_rate": 0.00012889581649334558, + "loss": 0.8242, + "step": 14387 + }, + { + "epoch": 0.77, + "learning_rate": 0.00012883749113082248, + "loss": 0.8789, + "step": 14388 + }, + { + "epoch": 0.77, + "learning_rate": 0.00012877917701574265, + "loss": 0.7812, + "step": 14389 + }, + { + "epoch": 0.77, + "learning_rate": 0.00012872087414987277, + "loss": 0.793, + "step": 14390 + }, + { + "epoch": 0.77, + "learning_rate": 0.0001286625825349803, + "loss": 0.832, + "step": 14391 + }, + { + "epoch": 0.77, + "learning_rate": 0.00012860430217283142, + "loss": 0.7695, + "step": 14392 + }, + { + "epoch": 0.77, + "learning_rate": 0.00012854603306519197, + "loss": 0.8594, + "step": 14393 + }, + { + "epoch": 0.77, + "learning_rate": 0.00012848777521382783, + "loss": 0.9102, + "step": 14394 + }, + { + "epoch": 0.77, + "learning_rate": 0.0001284295286205046, + "loss": 0.8164, + "step": 14395 + }, + { + "epoch": 0.77, + "learning_rate": 0.0001283712932869872, + "loss": 0.7695, + "step": 14396 + }, + { + "epoch": 0.77, + "learning_rate": 0.00012831306921504026, + "loss": 0.6797, + "step": 14397 + }, + { + "epoch": 0.77, + "learning_rate": 0.0001282548564064282, + "loss": 0.8086, + "step": 14398 + }, + { + "epoch": 0.77, + "learning_rate": 0.00012819665486291521, + "loss": 0.7891, + "step": 14399 + }, + { + "epoch": 0.77, + "learning_rate": 0.00012813846458626478, + "loss": 0.875, + "step": 14400 + }, + { + "epoch": 0.77, + "learning_rate": 0.00012808028557824048, + "loss": 0.8438, + "step": 14401 + }, + { + "epoch": 0.77, + "learning_rate": 0.00012802211784060507, + "loss": 0.8281, + "step": 14402 + }, + { + "epoch": 0.77, + "learning_rate": 0.0001279639613751214, + "loss": 0.8633, + "step": 14403 + }, + { + "epoch": 0.77, + "learning_rate": 0.0001279058161835519, + "loss": 0.7266, + "step": 14404 + }, + { + "epoch": 0.77, + "learning_rate": 0.0001278476822676583, + "loss": 0.75, + "step": 14405 + }, + { + "epoch": 0.77, + "learning_rate": 0.00012778955962920247, + "loss": 0.7656, + "step": 14406 + }, + { + "epoch": 0.77, + "learning_rate": 0.00012773144826994554, + "loss": 0.7852, + "step": 14407 + }, + { + "epoch": 0.77, + "learning_rate": 0.00012767334819164866, + "loss": 0.7344, + "step": 14408 + }, + { + "epoch": 0.77, + "learning_rate": 0.00012761525939607223, + "loss": 0.9453, + "step": 14409 + }, + { + "epoch": 0.77, + "learning_rate": 0.0001275571818849766, + "loss": 0.6602, + "step": 14410 + }, + { + "epoch": 0.77, + "learning_rate": 0.00012749911566012196, + "loss": 0.8438, + "step": 14411 + }, + { + "epoch": 0.77, + "learning_rate": 0.00012744106072326772, + "loss": 0.7969, + "step": 14412 + }, + { + "epoch": 0.77, + "learning_rate": 0.00012738301707617295, + "loss": 0.7539, + "step": 14413 + }, + { + "epoch": 0.77, + "learning_rate": 0.0001273249847205968, + "loss": 0.8086, + "step": 14414 + }, + { + "epoch": 0.77, + "learning_rate": 0.0001272669636582978, + "loss": 0.9062, + "step": 14415 + }, + { + "epoch": 0.77, + "learning_rate": 0.0001272089538910341, + "loss": 0.793, + "step": 14416 + }, + { + "epoch": 0.77, + "learning_rate": 0.00012715095542056366, + "loss": 0.7695, + "step": 14417 + }, + { + "epoch": 0.77, + "learning_rate": 0.00012709296824864414, + "loss": 0.7891, + "step": 14418 + }, + { + "epoch": 0.77, + "learning_rate": 0.0001270349923770326, + "loss": 0.8086, + "step": 14419 + }, + { + "epoch": 0.78, + "learning_rate": 0.0001269770278074857, + "loss": 0.8008, + "step": 14420 + }, + { + "epoch": 0.78, + "learning_rate": 0.00012691907454176027, + "loss": 0.7344, + "step": 14421 + }, + { + "epoch": 0.78, + "learning_rate": 0.00012686113258161247, + "loss": 0.7969, + "step": 14422 + }, + { + "epoch": 0.78, + "learning_rate": 0.00012680320192879791, + "loss": 0.7656, + "step": 14423 + }, + { + "epoch": 0.78, + "learning_rate": 0.0001267452825850724, + "loss": 0.8516, + "step": 14424 + }, + { + "epoch": 0.78, + "learning_rate": 0.0001266873745521907, + "loss": 0.8047, + "step": 14425 + }, + { + "epoch": 0.78, + "learning_rate": 0.00012662947783190798, + "loss": 0.8164, + "step": 14426 + }, + { + "epoch": 0.78, + "learning_rate": 0.00012657159242597838, + "loss": 0.8203, + "step": 14427 + }, + { + "epoch": 0.78, + "learning_rate": 0.00012651371833615626, + "loss": 0.7578, + "step": 14428 + }, + { + "epoch": 0.78, + "learning_rate": 0.00012645585556419526, + "loss": 0.8711, + "step": 14429 + }, + { + "epoch": 0.78, + "learning_rate": 0.00012639800411184875, + "loss": 0.7812, + "step": 14430 + }, + { + "epoch": 0.78, + "learning_rate": 0.00012634016398087018, + "loss": 0.8242, + "step": 14431 + }, + { + "epoch": 0.78, + "learning_rate": 0.00012628233517301195, + "loss": 0.8945, + "step": 14432 + }, + { + "epoch": 0.78, + "learning_rate": 0.00012622451769002635, + "loss": 0.7812, + "step": 14433 + }, + { + "epoch": 0.78, + "learning_rate": 0.00012616671153366587, + "loss": 0.8359, + "step": 14434 + }, + { + "epoch": 0.78, + "learning_rate": 0.00012610891670568203, + "loss": 0.7812, + "step": 14435 + }, + { + "epoch": 0.78, + "learning_rate": 0.00012605113320782608, + "loss": 0.793, + "step": 14436 + }, + { + "epoch": 0.78, + "learning_rate": 0.00012599336104184906, + "loss": 0.793, + "step": 14437 + }, + { + "epoch": 0.78, + "learning_rate": 0.00012593560020950195, + "loss": 0.7578, + "step": 14438 + }, + { + "epoch": 0.78, + "learning_rate": 0.00012587785071253467, + "loss": 0.7969, + "step": 14439 + }, + { + "epoch": 0.78, + "learning_rate": 0.00012582011255269748, + "loss": 0.7148, + "step": 14440 + }, + { + "epoch": 0.78, + "learning_rate": 0.00012576238573174013, + "loss": 0.7812, + "step": 14441 + }, + { + "epoch": 0.78, + "learning_rate": 0.0001257046702514118, + "loss": 0.7852, + "step": 14442 + }, + { + "epoch": 0.78, + "learning_rate": 0.00012564696611346127, + "loss": 0.793, + "step": 14443 + }, + { + "epoch": 0.78, + "learning_rate": 0.00012558927331963738, + "loss": 0.7891, + "step": 14444 + }, + { + "epoch": 0.78, + "learning_rate": 0.0001255315918716885, + "loss": 0.7344, + "step": 14445 + }, + { + "epoch": 0.78, + "learning_rate": 0.0001254739217713623, + "loss": 0.8047, + "step": 14446 + }, + { + "epoch": 0.78, + "learning_rate": 0.00012541626302040672, + "loss": 0.6992, + "step": 14447 + }, + { + "epoch": 0.78, + "learning_rate": 0.0001253586156205686, + "loss": 0.8906, + "step": 14448 + }, + { + "epoch": 0.78, + "learning_rate": 0.0001253009795735952, + "loss": 0.7969, + "step": 14449 + }, + { + "epoch": 0.78, + "learning_rate": 0.00012524335488123278, + "loss": 0.832, + "step": 14450 + }, + { + "epoch": 0.78, + "learning_rate": 0.00012518574154522788, + "loss": 0.8047, + "step": 14451 + }, + { + "epoch": 0.78, + "learning_rate": 0.00012512813956732605, + "loss": 0.7539, + "step": 14452 + }, + { + "epoch": 0.78, + "learning_rate": 0.000125070548949273, + "loss": 0.8164, + "step": 14453 + }, + { + "epoch": 0.78, + "learning_rate": 0.000125012969692814, + "loss": 0.9062, + "step": 14454 + }, + { + "epoch": 0.78, + "learning_rate": 0.00012495540179969378, + "loss": 0.8438, + "step": 14455 + }, + { + "epoch": 0.78, + "learning_rate": 0.00012489784527165655, + "loss": 0.8359, + "step": 14456 + }, + { + "epoch": 0.78, + "learning_rate": 0.00012484030011044705, + "loss": 0.7852, + "step": 14457 + }, + { + "epoch": 0.78, + "learning_rate": 0.00012478276631780878, + "loss": 0.8516, + "step": 14458 + }, + { + "epoch": 0.78, + "learning_rate": 0.00012472524389548506, + "loss": 0.7578, + "step": 14459 + }, + { + "epoch": 0.78, + "learning_rate": 0.00012466773284521915, + "loss": 0.9609, + "step": 14460 + }, + { + "epoch": 0.78, + "learning_rate": 0.00012461023316875404, + "loss": 0.9102, + "step": 14461 + }, + { + "epoch": 0.78, + "learning_rate": 0.00012455274486783185, + "loss": 0.8008, + "step": 14462 + }, + { + "epoch": 0.78, + "learning_rate": 0.00012449526794419468, + "loss": 0.6953, + "step": 14463 + }, + { + "epoch": 0.78, + "learning_rate": 0.0001244378023995843, + "loss": 0.7969, + "step": 14464 + }, + { + "epoch": 0.78, + "learning_rate": 0.00012438034823574235, + "loss": 0.7461, + "step": 14465 + }, + { + "epoch": 0.78, + "learning_rate": 0.00012432290545440956, + "loss": 0.8906, + "step": 14466 + }, + { + "epoch": 0.78, + "learning_rate": 0.00012426547405732684, + "loss": 0.7461, + "step": 14467 + }, + { + "epoch": 0.78, + "learning_rate": 0.00012420805404623437, + "loss": 0.7656, + "step": 14468 + }, + { + "epoch": 0.78, + "learning_rate": 0.00012415064542287225, + "loss": 0.8984, + "step": 14469 + }, + { + "epoch": 0.78, + "learning_rate": 0.00012409324818898033, + "loss": 0.8281, + "step": 14470 + }, + { + "epoch": 0.78, + "learning_rate": 0.00012403586234629776, + "loss": 0.8242, + "step": 14471 + }, + { + "epoch": 0.78, + "learning_rate": 0.0001239784878965634, + "loss": 0.7695, + "step": 14472 + }, + { + "epoch": 0.78, + "learning_rate": 0.000123921124841516, + "loss": 0.7383, + "step": 14473 + }, + { + "epoch": 0.78, + "learning_rate": 0.00012386377318289406, + "loss": 0.7266, + "step": 14474 + }, + { + "epoch": 0.78, + "learning_rate": 0.00012380643292243527, + "loss": 0.7969, + "step": 14475 + }, + { + "epoch": 0.78, + "learning_rate": 0.000123749104061877, + "loss": 0.8359, + "step": 14476 + }, + { + "epoch": 0.78, + "learning_rate": 0.0001236917866029571, + "loss": 0.8555, + "step": 14477 + }, + { + "epoch": 0.78, + "learning_rate": 0.0001236344805474121, + "loss": 0.9258, + "step": 14478 + }, + { + "epoch": 0.78, + "learning_rate": 0.0001235771858969786, + "loss": 0.8086, + "step": 14479 + }, + { + "epoch": 0.78, + "learning_rate": 0.00012351990265339275, + "loss": 0.8359, + "step": 14480 + }, + { + "epoch": 0.78, + "learning_rate": 0.0001234626308183907, + "loss": 0.7969, + "step": 14481 + }, + { + "epoch": 0.78, + "learning_rate": 0.00012340537039370764, + "loss": 0.7656, + "step": 14482 + }, + { + "epoch": 0.78, + "learning_rate": 0.00012334812138107892, + "loss": 0.8633, + "step": 14483 + }, + { + "epoch": 0.78, + "learning_rate": 0.00012329088378223947, + "loss": 0.7891, + "step": 14484 + }, + { + "epoch": 0.78, + "learning_rate": 0.00012323365759892368, + "loss": 0.707, + "step": 14485 + }, + { + "epoch": 0.78, + "learning_rate": 0.0001231764428328655, + "loss": 0.8672, + "step": 14486 + }, + { + "epoch": 0.78, + "learning_rate": 0.0001231192394857989, + "loss": 0.8242, + "step": 14487 + }, + { + "epoch": 0.78, + "learning_rate": 0.0001230620475594575, + "loss": 0.7461, + "step": 14488 + }, + { + "epoch": 0.78, + "learning_rate": 0.000123004867055574, + "loss": 0.832, + "step": 14489 + }, + { + "epoch": 0.78, + "learning_rate": 0.00012294769797588161, + "loss": 0.8398, + "step": 14490 + }, + { + "epoch": 0.78, + "learning_rate": 0.0001228905403221124, + "loss": 0.8047, + "step": 14491 + }, + { + "epoch": 0.78, + "learning_rate": 0.00012283339409599865, + "loss": 0.7266, + "step": 14492 + }, + { + "epoch": 0.78, + "learning_rate": 0.00012277625929927178, + "loss": 0.8008, + "step": 14493 + }, + { + "epoch": 0.78, + "learning_rate": 0.00012271913593366358, + "loss": 0.7773, + "step": 14494 + }, + { + "epoch": 0.78, + "learning_rate": 0.0001226620240009047, + "loss": 0.832, + "step": 14495 + }, + { + "epoch": 0.78, + "learning_rate": 0.00012260492350272596, + "loss": 0.7773, + "step": 14496 + }, + { + "epoch": 0.78, + "learning_rate": 0.0001225478344408579, + "loss": 0.7578, + "step": 14497 + }, + { + "epoch": 0.78, + "learning_rate": 0.00012249075681703027, + "loss": 0.8242, + "step": 14498 + }, + { + "epoch": 0.78, + "learning_rate": 0.00012243369063297244, + "loss": 0.8047, + "step": 14499 + }, + { + "epoch": 0.78, + "learning_rate": 0.00012237663589041432, + "loss": 0.8438, + "step": 14500 + }, + { + "epoch": 0.78, + "learning_rate": 0.0001223195925910846, + "loss": 0.8164, + "step": 14501 + }, + { + "epoch": 0.78, + "learning_rate": 0.00012226256073671166, + "loss": 0.793, + "step": 14502 + }, + { + "epoch": 0.78, + "learning_rate": 0.00012220554032902391, + "loss": 0.7969, + "step": 14503 + }, + { + "epoch": 0.78, + "learning_rate": 0.0001221485313697494, + "loss": 0.8281, + "step": 14504 + }, + { + "epoch": 0.78, + "learning_rate": 0.0001220915338606156, + "loss": 0.8047, + "step": 14505 + }, + { + "epoch": 0.78, + "learning_rate": 0.00012203454780334955, + "loss": 0.75, + "step": 14506 + }, + { + "epoch": 0.78, + "learning_rate": 0.00012197757319967822, + "loss": 0.7695, + "step": 14507 + }, + { + "epoch": 0.78, + "learning_rate": 0.00012192061005132826, + "loss": 0.793, + "step": 14508 + }, + { + "epoch": 0.78, + "learning_rate": 0.00012186365836002567, + "loss": 0.7383, + "step": 14509 + }, + { + "epoch": 0.78, + "learning_rate": 0.00012180671812749644, + "loss": 0.793, + "step": 14510 + }, + { + "epoch": 0.78, + "learning_rate": 0.00012174978935546577, + "loss": 0.7695, + "step": 14511 + }, + { + "epoch": 0.78, + "learning_rate": 0.00012169287204565905, + "loss": 0.8398, + "step": 14512 + }, + { + "epoch": 0.78, + "learning_rate": 0.00012163596619980104, + "loss": 0.832, + "step": 14513 + }, + { + "epoch": 0.78, + "learning_rate": 0.00012157907181961598, + "loss": 0.918, + "step": 14514 + }, + { + "epoch": 0.78, + "learning_rate": 0.0001215221889068282, + "loss": 0.75, + "step": 14515 + }, + { + "epoch": 0.78, + "learning_rate": 0.00012146531746316124, + "loss": 0.8633, + "step": 14516 + }, + { + "epoch": 0.78, + "learning_rate": 0.00012140845749033869, + "loss": 0.8008, + "step": 14517 + }, + { + "epoch": 0.78, + "learning_rate": 0.00012135160899008335, + "loss": 0.8242, + "step": 14518 + }, + { + "epoch": 0.78, + "learning_rate": 0.00012129477196411803, + "loss": 0.875, + "step": 14519 + }, + { + "epoch": 0.78, + "learning_rate": 0.00012123794641416525, + "loss": 0.7969, + "step": 14520 + }, + { + "epoch": 0.78, + "learning_rate": 0.00012118113234194683, + "loss": 0.7891, + "step": 14521 + }, + { + "epoch": 0.78, + "learning_rate": 0.00012112432974918419, + "loss": 0.8477, + "step": 14522 + }, + { + "epoch": 0.78, + "learning_rate": 0.00012106753863759918, + "loss": 0.8438, + "step": 14523 + }, + { + "epoch": 0.78, + "learning_rate": 0.00012101075900891245, + "loss": 0.75, + "step": 14524 + }, + { + "epoch": 0.78, + "learning_rate": 0.00012095399086484449, + "loss": 0.8125, + "step": 14525 + }, + { + "epoch": 0.78, + "learning_rate": 0.00012089723420711567, + "loss": 0.8516, + "step": 14526 + }, + { + "epoch": 0.78, + "learning_rate": 0.00012084048903744616, + "loss": 0.8008, + "step": 14527 + }, + { + "epoch": 0.78, + "learning_rate": 0.00012078375535755526, + "loss": 0.6797, + "step": 14528 + }, + { + "epoch": 0.78, + "learning_rate": 0.00012072703316916206, + "loss": 0.8594, + "step": 14529 + }, + { + "epoch": 0.78, + "learning_rate": 0.00012067032247398557, + "loss": 0.8359, + "step": 14530 + }, + { + "epoch": 0.78, + "learning_rate": 0.00012061362327374454, + "loss": 0.7305, + "step": 14531 + }, + { + "epoch": 0.78, + "learning_rate": 0.00012055693557015679, + "loss": 0.8203, + "step": 14532 + }, + { + "epoch": 0.78, + "learning_rate": 0.00012050025936494042, + "loss": 0.8086, + "step": 14533 + }, + { + "epoch": 0.78, + "learning_rate": 0.00012044359465981259, + "loss": 0.8047, + "step": 14534 + }, + { + "epoch": 0.78, + "learning_rate": 0.00012038694145649066, + "loss": 0.8906, + "step": 14535 + }, + { + "epoch": 0.78, + "learning_rate": 0.00012033029975669147, + "loss": 0.7812, + "step": 14536 + }, + { + "epoch": 0.78, + "learning_rate": 0.00012027366956213137, + "loss": 0.7891, + "step": 14537 + }, + { + "epoch": 0.78, + "learning_rate": 0.00012021705087452628, + "loss": 0.7969, + "step": 14538 + }, + { + "epoch": 0.78, + "learning_rate": 0.00012016044369559204, + "loss": 0.7695, + "step": 14539 + }, + { + "epoch": 0.78, + "learning_rate": 0.00012010384802704422, + "loss": 0.8164, + "step": 14540 + }, + { + "epoch": 0.78, + "learning_rate": 0.00012004726387059767, + "loss": 0.8125, + "step": 14541 + }, + { + "epoch": 0.78, + "learning_rate": 0.0001199906912279668, + "loss": 0.7617, + "step": 14542 + }, + { + "epoch": 0.78, + "learning_rate": 0.00011993413010086652, + "loss": 0.7852, + "step": 14543 + }, + { + "epoch": 0.78, + "learning_rate": 0.00011987758049101061, + "loss": 0.8242, + "step": 14544 + }, + { + "epoch": 0.78, + "learning_rate": 0.00011982104240011244, + "loss": 0.8867, + "step": 14545 + }, + { + "epoch": 0.78, + "learning_rate": 0.0001197645158298855, + "loss": 0.832, + "step": 14546 + }, + { + "epoch": 0.78, + "learning_rate": 0.00011970800078204291, + "loss": 0.8359, + "step": 14547 + }, + { + "epoch": 0.78, + "learning_rate": 0.00011965149725829689, + "loss": 0.793, + "step": 14548 + }, + { + "epoch": 0.78, + "learning_rate": 0.00011959500526036004, + "loss": 0.8477, + "step": 14549 + }, + { + "epoch": 0.78, + "learning_rate": 0.00011953852478994398, + "loss": 0.8086, + "step": 14550 + }, + { + "epoch": 0.78, + "learning_rate": 0.00011948205584876049, + "loss": 0.8789, + "step": 14551 + }, + { + "epoch": 0.78, + "learning_rate": 0.00011942559843852046, + "loss": 0.8594, + "step": 14552 + }, + { + "epoch": 0.78, + "learning_rate": 0.00011936915256093494, + "loss": 0.7227, + "step": 14553 + }, + { + "epoch": 0.78, + "learning_rate": 0.00011931271821771456, + "loss": 0.8164, + "step": 14554 + }, + { + "epoch": 0.78, + "learning_rate": 0.00011925629541056909, + "loss": 0.9531, + "step": 14555 + }, + { + "epoch": 0.78, + "learning_rate": 0.00011919988414120875, + "loss": 0.8047, + "step": 14556 + }, + { + "epoch": 0.78, + "learning_rate": 0.00011914348441134259, + "loss": 0.8086, + "step": 14557 + }, + { + "epoch": 0.78, + "learning_rate": 0.0001190870962226801, + "loss": 0.8281, + "step": 14558 + }, + { + "epoch": 0.78, + "learning_rate": 0.00011903071957692962, + "loss": 0.875, + "step": 14559 + }, + { + "epoch": 0.78, + "learning_rate": 0.00011897435447579991, + "loss": 0.8594, + "step": 14560 + }, + { + "epoch": 0.78, + "learning_rate": 0.0001189180009209987, + "loss": 0.7852, + "step": 14561 + }, + { + "epoch": 0.78, + "learning_rate": 0.00011886165891423384, + "loss": 0.8359, + "step": 14562 + }, + { + "epoch": 0.78, + "learning_rate": 0.00011880532845721287, + "loss": 0.8281, + "step": 14563 + }, + { + "epoch": 0.78, + "learning_rate": 0.00011874900955164258, + "loss": 0.8516, + "step": 14564 + }, + { + "epoch": 0.78, + "learning_rate": 0.00011869270219922934, + "loss": 0.7617, + "step": 14565 + }, + { + "epoch": 0.78, + "learning_rate": 0.00011863640640168, + "loss": 0.8594, + "step": 14566 + }, + { + "epoch": 0.78, + "learning_rate": 0.0001185801221607003, + "loss": 0.7969, + "step": 14567 + }, + { + "epoch": 0.78, + "learning_rate": 0.0001185238494779956, + "loss": 0.7227, + "step": 14568 + }, + { + "epoch": 0.78, + "learning_rate": 0.00011846758835527138, + "loss": 0.7578, + "step": 14569 + }, + { + "epoch": 0.78, + "learning_rate": 0.00011841133879423266, + "loss": 0.6992, + "step": 14570 + }, + { + "epoch": 0.78, + "learning_rate": 0.0001183551007965838, + "loss": 0.8398, + "step": 14571 + }, + { + "epoch": 0.78, + "learning_rate": 0.00011829887436402881, + "loss": 0.8281, + "step": 14572 + }, + { + "epoch": 0.78, + "learning_rate": 0.00011824265949827179, + "loss": 0.75, + "step": 14573 + }, + { + "epoch": 0.78, + "learning_rate": 0.00011818645620101637, + "loss": 0.7695, + "step": 14574 + }, + { + "epoch": 0.78, + "learning_rate": 0.00011813026447396529, + "loss": 0.7812, + "step": 14575 + }, + { + "epoch": 0.78, + "learning_rate": 0.00011807408431882178, + "loss": 0.7773, + "step": 14576 + }, + { + "epoch": 0.78, + "learning_rate": 0.00011801791573728787, + "loss": 0.6914, + "step": 14577 + }, + { + "epoch": 0.78, + "learning_rate": 0.00011796175873106591, + "loss": 0.8125, + "step": 14578 + }, + { + "epoch": 0.78, + "learning_rate": 0.00011790561330185767, + "loss": 0.8438, + "step": 14579 + }, + { + "epoch": 0.78, + "learning_rate": 0.00011784947945136448, + "loss": 0.793, + "step": 14580 + }, + { + "epoch": 0.78, + "learning_rate": 0.0001177933571812872, + "loss": 0.7695, + "step": 14581 + }, + { + "epoch": 0.78, + "learning_rate": 0.00011773724649332673, + "loss": 0.8789, + "step": 14582 + }, + { + "epoch": 0.78, + "learning_rate": 0.00011768114738918345, + "loss": 0.8516, + "step": 14583 + }, + { + "epoch": 0.78, + "learning_rate": 0.00011762505987055721, + "loss": 0.7422, + "step": 14584 + }, + { + "epoch": 0.78, + "learning_rate": 0.00011756898393914744, + "loss": 0.8281, + "step": 14585 + }, + { + "epoch": 0.78, + "learning_rate": 0.00011751291959665395, + "loss": 0.7812, + "step": 14586 + }, + { + "epoch": 0.78, + "learning_rate": 0.00011745686684477541, + "loss": 0.9102, + "step": 14587 + }, + { + "epoch": 0.78, + "learning_rate": 0.00011740082568521021, + "loss": 0.8359, + "step": 14588 + }, + { + "epoch": 0.78, + "learning_rate": 0.00011734479611965682, + "loss": 0.7891, + "step": 14589 + }, + { + "epoch": 0.78, + "learning_rate": 0.00011728877814981315, + "loss": 0.8047, + "step": 14590 + }, + { + "epoch": 0.78, + "learning_rate": 0.00011723277177737646, + "loss": 0.8398, + "step": 14591 + }, + { + "epoch": 0.78, + "learning_rate": 0.00011717677700404417, + "loss": 0.6719, + "step": 14592 + }, + { + "epoch": 0.78, + "learning_rate": 0.00011712079383151314, + "loss": 0.8203, + "step": 14593 + }, + { + "epoch": 0.78, + "learning_rate": 0.00011706482226147975, + "loss": 0.7188, + "step": 14594 + }, + { + "epoch": 0.78, + "learning_rate": 0.00011700886229563995, + "loss": 0.7344, + "step": 14595 + }, + { + "epoch": 0.78, + "learning_rate": 0.00011695291393568974, + "loss": 0.8906, + "step": 14596 + }, + { + "epoch": 0.78, + "learning_rate": 0.00011689697718332453, + "loss": 0.7695, + "step": 14597 + }, + { + "epoch": 0.78, + "learning_rate": 0.00011684105204023927, + "loss": 0.875, + "step": 14598 + }, + { + "epoch": 0.78, + "learning_rate": 0.00011678513850812884, + "loss": 0.8711, + "step": 14599 + }, + { + "epoch": 0.78, + "learning_rate": 0.00011672923658868739, + "loss": 0.7695, + "step": 14600 + }, + { + "epoch": 0.78, + "learning_rate": 0.00011667334628360898, + "loss": 0.7578, + "step": 14601 + }, + { + "epoch": 0.78, + "learning_rate": 0.00011661746759458752, + "loss": 0.9219, + "step": 14602 + }, + { + "epoch": 0.78, + "learning_rate": 0.00011656160052331616, + "loss": 0.8242, + "step": 14603 + }, + { + "epoch": 0.78, + "learning_rate": 0.00011650574507148764, + "loss": 0.7266, + "step": 14604 + }, + { + "epoch": 0.78, + "learning_rate": 0.00011644990124079474, + "loss": 0.7422, + "step": 14605 + }, + { + "epoch": 0.79, + "learning_rate": 0.00011639406903292987, + "loss": 0.8438, + "step": 14606 + }, + { + "epoch": 0.79, + "learning_rate": 0.0001163382484495848, + "loss": 0.8477, + "step": 14607 + }, + { + "epoch": 0.79, + "learning_rate": 0.00011628243949245077, + "loss": 0.9141, + "step": 14608 + }, + { + "epoch": 0.79, + "learning_rate": 0.00011622664216321954, + "loss": 0.7695, + "step": 14609 + }, + { + "epoch": 0.79, + "learning_rate": 0.00011617085646358166, + "loss": 0.9297, + "step": 14610 + }, + { + "epoch": 0.79, + "learning_rate": 0.00011611508239522756, + "loss": 0.8672, + "step": 14611 + }, + { + "epoch": 0.79, + "learning_rate": 0.00011605931995984736, + "loss": 0.8281, + "step": 14612 + }, + { + "epoch": 0.79, + "learning_rate": 0.00011600356915913113, + "loss": 0.7617, + "step": 14613 + }, + { + "epoch": 0.79, + "learning_rate": 0.00011594782999476799, + "loss": 0.875, + "step": 14614 + }, + { + "epoch": 0.79, + "learning_rate": 0.00011589210246844723, + "loss": 0.8008, + "step": 14615 + }, + { + "epoch": 0.79, + "learning_rate": 0.00011583638658185735, + "loss": 0.8516, + "step": 14616 + }, + { + "epoch": 0.79, + "learning_rate": 0.00011578068233668704, + "loss": 0.6875, + "step": 14617 + }, + { + "epoch": 0.79, + "learning_rate": 0.00011572498973462397, + "loss": 0.8242, + "step": 14618 + }, + { + "epoch": 0.79, + "learning_rate": 0.00011566930877735615, + "loss": 0.8438, + "step": 14619 + }, + { + "epoch": 0.79, + "learning_rate": 0.00011561363946657055, + "loss": 0.9062, + "step": 14620 + }, + { + "epoch": 0.79, + "learning_rate": 0.00011555798180395433, + "loss": 0.7734, + "step": 14621 + }, + { + "epoch": 0.79, + "learning_rate": 0.00011550233579119424, + "loss": 0.8516, + "step": 14622 + }, + { + "epoch": 0.79, + "learning_rate": 0.00011544670142997632, + "loss": 0.8359, + "step": 14623 + }, + { + "epoch": 0.79, + "learning_rate": 0.00011539107872198646, + "loss": 0.8555, + "step": 14624 + }, + { + "epoch": 0.79, + "learning_rate": 0.00011533546766891024, + "loss": 0.7266, + "step": 14625 + }, + { + "epoch": 0.79, + "learning_rate": 0.00011527986827243303, + "loss": 0.7734, + "step": 14626 + }, + { + "epoch": 0.79, + "learning_rate": 0.00011522428053423944, + "loss": 0.9102, + "step": 14627 + }, + { + "epoch": 0.79, + "learning_rate": 0.00011516870445601402, + "loss": 0.8008, + "step": 14628 + }, + { + "epoch": 0.79, + "learning_rate": 0.00011511314003944112, + "loss": 0.8008, + "step": 14629 + }, + { + "epoch": 0.79, + "learning_rate": 0.00011505758728620436, + "loss": 0.8438, + "step": 14630 + }, + { + "epoch": 0.79, + "learning_rate": 0.00011500204619798688, + "loss": 0.7578, + "step": 14631 + }, + { + "epoch": 0.79, + "learning_rate": 0.00011494651677647233, + "loss": 0.8516, + "step": 14632 + }, + { + "epoch": 0.79, + "learning_rate": 0.00011489099902334316, + "loss": 0.7656, + "step": 14633 + }, + { + "epoch": 0.79, + "learning_rate": 0.00011483549294028156, + "loss": 0.9219, + "step": 14634 + }, + { + "epoch": 0.79, + "learning_rate": 0.00011477999852896975, + "loss": 0.9258, + "step": 14635 + }, + { + "epoch": 0.79, + "learning_rate": 0.0001147245157910895, + "loss": 0.832, + "step": 14636 + }, + { + "epoch": 0.79, + "learning_rate": 0.00011466904472832195, + "loss": 0.7617, + "step": 14637 + }, + { + "epoch": 0.79, + "learning_rate": 0.00011461358534234795, + "loss": 0.7734, + "step": 14638 + }, + { + "epoch": 0.79, + "learning_rate": 0.00011455813763484823, + "loss": 0.7617, + "step": 14639 + }, + { + "epoch": 0.79, + "learning_rate": 0.0001145027016075032, + "loss": 0.8516, + "step": 14640 + }, + { + "epoch": 0.79, + "learning_rate": 0.00011444727726199244, + "loss": 0.8203, + "step": 14641 + }, + { + "epoch": 0.79, + "learning_rate": 0.00011439186459999579, + "loss": 0.8203, + "step": 14642 + }, + { + "epoch": 0.79, + "learning_rate": 0.00011433646362319216, + "loss": 0.8164, + "step": 14643 + }, + { + "epoch": 0.79, + "learning_rate": 0.00011428107433326052, + "loss": 0.7305, + "step": 14644 + }, + { + "epoch": 0.79, + "learning_rate": 0.00011422569673187949, + "loss": 0.8203, + "step": 14645 + }, + { + "epoch": 0.79, + "learning_rate": 0.00011417033082072703, + "loss": 0.793, + "step": 14646 + }, + { + "epoch": 0.79, + "learning_rate": 0.00011411497660148074, + "loss": 0.7773, + "step": 14647 + }, + { + "epoch": 0.79, + "learning_rate": 0.00011405963407581826, + "loss": 0.832, + "step": 14648 + }, + { + "epoch": 0.79, + "learning_rate": 0.00011400430324541678, + "loss": 0.7773, + "step": 14649 + }, + { + "epoch": 0.79, + "learning_rate": 0.00011394898411195276, + "loss": 0.7656, + "step": 14650 + }, + { + "epoch": 0.79, + "learning_rate": 0.00011389367667710237, + "loss": 0.7969, + "step": 14651 + }, + { + "epoch": 0.79, + "learning_rate": 0.00011383838094254218, + "loss": 0.7812, + "step": 14652 + }, + { + "epoch": 0.79, + "learning_rate": 0.00011378309690994748, + "loss": 0.8633, + "step": 14653 + }, + { + "epoch": 0.79, + "learning_rate": 0.00011372782458099346, + "loss": 0.8477, + "step": 14654 + }, + { + "epoch": 0.79, + "learning_rate": 0.0001136725639573552, + "loss": 0.8438, + "step": 14655 + }, + { + "epoch": 0.79, + "learning_rate": 0.00011361731504070738, + "loss": 0.8438, + "step": 14656 + }, + { + "epoch": 0.79, + "learning_rate": 0.00011356207783272398, + "loss": 0.9297, + "step": 14657 + }, + { + "epoch": 0.79, + "learning_rate": 0.00011350685233507918, + "loss": 0.8008, + "step": 14658 + }, + { + "epoch": 0.79, + "learning_rate": 0.00011345163854944613, + "loss": 0.8633, + "step": 14659 + }, + { + "epoch": 0.79, + "learning_rate": 0.0001133964364774983, + "loss": 0.9023, + "step": 14660 + }, + { + "epoch": 0.79, + "learning_rate": 0.00011334124612090824, + "loss": 0.8086, + "step": 14661 + }, + { + "epoch": 0.79, + "learning_rate": 0.00011328606748134851, + "loss": 0.7891, + "step": 14662 + }, + { + "epoch": 0.79, + "learning_rate": 0.00011323090056049134, + "loss": 0.7812, + "step": 14663 + }, + { + "epoch": 0.79, + "learning_rate": 0.00011317574536000824, + "loss": 0.832, + "step": 14664 + }, + { + "epoch": 0.79, + "learning_rate": 0.0001131206018815708, + "loss": 0.7891, + "step": 14665 + }, + { + "epoch": 0.79, + "learning_rate": 0.00011306547012684987, + "loss": 0.7812, + "step": 14666 + }, + { + "epoch": 0.79, + "learning_rate": 0.00011301035009751632, + "loss": 0.9023, + "step": 14667 + }, + { + "epoch": 0.79, + "learning_rate": 0.0001129552417952402, + "loss": 0.7461, + "step": 14668 + }, + { + "epoch": 0.79, + "learning_rate": 0.00011290014522169173, + "loss": 0.7969, + "step": 14669 + }, + { + "epoch": 0.79, + "learning_rate": 0.00011284506037854031, + "loss": 0.8359, + "step": 14670 + }, + { + "epoch": 0.79, + "learning_rate": 0.00011278998726745527, + "loss": 0.7773, + "step": 14671 + }, + { + "epoch": 0.79, + "learning_rate": 0.0001127349258901057, + "loss": 0.7188, + "step": 14672 + }, + { + "epoch": 0.79, + "learning_rate": 0.0001126798762481599, + "loss": 0.7305, + "step": 14673 + }, + { + "epoch": 0.79, + "learning_rate": 0.0001126248383432859, + "loss": 0.7852, + "step": 14674 + }, + { + "epoch": 0.79, + "learning_rate": 0.00011256981217715201, + "loss": 0.9453, + "step": 14675 + }, + { + "epoch": 0.79, + "learning_rate": 0.00011251479775142548, + "loss": 0.8711, + "step": 14676 + }, + { + "epoch": 0.79, + "learning_rate": 0.0001124597950677732, + "loss": 0.7969, + "step": 14677 + }, + { + "epoch": 0.79, + "learning_rate": 0.00011240480412786213, + "loss": 0.8711, + "step": 14678 + }, + { + "epoch": 0.79, + "learning_rate": 0.00011234982493335888, + "loss": 0.8398, + "step": 14679 + }, + { + "epoch": 0.79, + "learning_rate": 0.0001122948574859291, + "loss": 0.8438, + "step": 14680 + }, + { + "epoch": 0.79, + "learning_rate": 0.0001122399017872388, + "loss": 0.793, + "step": 14681 + }, + { + "epoch": 0.79, + "learning_rate": 0.0001121849578389531, + "loss": 0.8203, + "step": 14682 + }, + { + "epoch": 0.79, + "learning_rate": 0.00011213002564273716, + "loss": 0.8008, + "step": 14683 + }, + { + "epoch": 0.79, + "learning_rate": 0.00011207510520025544, + "loss": 0.7656, + "step": 14684 + }, + { + "epoch": 0.79, + "learning_rate": 0.00011202019651317241, + "loss": 0.8125, + "step": 14685 + }, + { + "epoch": 0.79, + "learning_rate": 0.00011196529958315172, + "loss": 0.8086, + "step": 14686 + }, + { + "epoch": 0.79, + "learning_rate": 0.00011191041441185706, + "loss": 0.832, + "step": 14687 + }, + { + "epoch": 0.79, + "learning_rate": 0.00011185554100095179, + "loss": 0.8086, + "step": 14688 + }, + { + "epoch": 0.79, + "learning_rate": 0.00011180067935209859, + "loss": 0.75, + "step": 14689 + }, + { + "epoch": 0.79, + "learning_rate": 0.00011174582946695987, + "loss": 0.7734, + "step": 14690 + }, + { + "epoch": 0.79, + "learning_rate": 0.00011169099134719785, + "loss": 0.7812, + "step": 14691 + }, + { + "epoch": 0.79, + "learning_rate": 0.00011163616499447444, + "loss": 0.7773, + "step": 14692 + }, + { + "epoch": 0.79, + "learning_rate": 0.0001115813504104508, + "loss": 0.8438, + "step": 14693 + }, + { + "epoch": 0.79, + "learning_rate": 0.0001115265475967881, + "loss": 0.7969, + "step": 14694 + }, + { + "epoch": 0.79, + "learning_rate": 0.00011147175655514724, + "loss": 0.793, + "step": 14695 + }, + { + "epoch": 0.79, + "learning_rate": 0.00011141697728718835, + "loss": 0.9023, + "step": 14696 + }, + { + "epoch": 0.79, + "learning_rate": 0.0001113622097945714, + "loss": 0.7266, + "step": 14697 + }, + { + "epoch": 0.79, + "learning_rate": 0.00011130745407895603, + "loss": 0.8633, + "step": 14698 + }, + { + "epoch": 0.79, + "learning_rate": 0.00011125271014200177, + "loss": 0.7695, + "step": 14699 + }, + { + "epoch": 0.79, + "learning_rate": 0.00011119797798536718, + "loss": 0.793, + "step": 14700 + }, + { + "epoch": 0.79, + "learning_rate": 0.00011114325761071103, + "loss": 0.8164, + "step": 14701 + }, + { + "epoch": 0.79, + "learning_rate": 0.00011108854901969162, + "loss": 0.7812, + "step": 14702 + }, + { + "epoch": 0.79, + "learning_rate": 0.00011103385221396666, + "loss": 0.7812, + "step": 14703 + }, + { + "epoch": 0.79, + "learning_rate": 0.00011097916719519352, + "loss": 0.7422, + "step": 14704 + }, + { + "epoch": 0.79, + "learning_rate": 0.00011092449396502952, + "loss": 0.8086, + "step": 14705 + }, + { + "epoch": 0.79, + "learning_rate": 0.00011086983252513145, + "loss": 0.8008, + "step": 14706 + }, + { + "epoch": 0.79, + "learning_rate": 0.00011081518287715559, + "loss": 0.8828, + "step": 14707 + }, + { + "epoch": 0.79, + "learning_rate": 0.00011076054502275822, + "loss": 0.8594, + "step": 14708 + }, + { + "epoch": 0.79, + "learning_rate": 0.00011070591896359477, + "loss": 0.8945, + "step": 14709 + }, + { + "epoch": 0.79, + "learning_rate": 0.00011065130470132073, + "loss": 0.7852, + "step": 14710 + }, + { + "epoch": 0.79, + "learning_rate": 0.00011059670223759122, + "loss": 0.7656, + "step": 14711 + }, + { + "epoch": 0.79, + "learning_rate": 0.00011054211157406074, + "loss": 0.8398, + "step": 14712 + }, + { + "epoch": 0.79, + "learning_rate": 0.00011048753271238348, + "loss": 0.8398, + "step": 14713 + }, + { + "epoch": 0.79, + "learning_rate": 0.00011043296565421345, + "loss": 0.8438, + "step": 14714 + }, + { + "epoch": 0.79, + "learning_rate": 0.00011037841040120433, + "loss": 0.8594, + "step": 14715 + }, + { + "epoch": 0.79, + "learning_rate": 0.00011032386695500917, + "loss": 0.832, + "step": 14716 + }, + { + "epoch": 0.79, + "learning_rate": 0.00011026933531728067, + "loss": 0.7461, + "step": 14717 + }, + { + "epoch": 0.79, + "learning_rate": 0.00011021481548967172, + "loss": 0.7617, + "step": 14718 + }, + { + "epoch": 0.79, + "learning_rate": 0.00011016030747383421, + "loss": 0.8086, + "step": 14719 + }, + { + "epoch": 0.79, + "learning_rate": 0.00011010581127141982, + "loss": 0.8516, + "step": 14720 + }, + { + "epoch": 0.79, + "learning_rate": 0.0001100513268840801, + "loss": 0.832, + "step": 14721 + }, + { + "epoch": 0.79, + "learning_rate": 0.00010999685431346618, + "loss": 0.8203, + "step": 14722 + }, + { + "epoch": 0.79, + "learning_rate": 0.00010994239356122854, + "loss": 0.875, + "step": 14723 + }, + { + "epoch": 0.79, + "learning_rate": 0.0001098879446290178, + "loss": 0.8125, + "step": 14724 + }, + { + "epoch": 0.79, + "learning_rate": 0.00010983350751848358, + "loss": 0.8086, + "step": 14725 + }, + { + "epoch": 0.79, + "learning_rate": 0.00010977908223127591, + "loss": 0.8633, + "step": 14726 + }, + { + "epoch": 0.79, + "learning_rate": 0.00010972466876904369, + "loss": 0.7773, + "step": 14727 + }, + { + "epoch": 0.79, + "learning_rate": 0.00010967026713343608, + "loss": 0.7539, + "step": 14728 + }, + { + "epoch": 0.79, + "learning_rate": 0.00010961587732610146, + "loss": 0.8086, + "step": 14729 + }, + { + "epoch": 0.79, + "learning_rate": 0.00010956149934868808, + "loss": 0.6797, + "step": 14730 + }, + { + "epoch": 0.79, + "learning_rate": 0.00010950713320284389, + "loss": 0.8594, + "step": 14731 + }, + { + "epoch": 0.79, + "learning_rate": 0.00010945277889021632, + "loss": 0.8867, + "step": 14732 + }, + { + "epoch": 0.79, + "learning_rate": 0.00010939843641245224, + "loss": 0.9102, + "step": 14733 + }, + { + "epoch": 0.79, + "learning_rate": 0.00010934410577119858, + "loss": 0.8164, + "step": 14734 + }, + { + "epoch": 0.79, + "learning_rate": 0.00010928978696810188, + "loss": 0.8086, + "step": 14735 + }, + { + "epoch": 0.79, + "learning_rate": 0.00010923548000480794, + "loss": 0.7578, + "step": 14736 + }, + { + "epoch": 0.79, + "learning_rate": 0.00010918118488296252, + "loss": 0.7812, + "step": 14737 + }, + { + "epoch": 0.79, + "learning_rate": 0.00010912690160421113, + "loss": 0.8477, + "step": 14738 + }, + { + "epoch": 0.79, + "learning_rate": 0.00010907263017019852, + "loss": 0.7734, + "step": 14739 + }, + { + "epoch": 0.79, + "learning_rate": 0.00010901837058256909, + "loss": 0.8516, + "step": 14740 + }, + { + "epoch": 0.79, + "learning_rate": 0.00010896412284296763, + "loss": 0.8008, + "step": 14741 + }, + { + "epoch": 0.79, + "learning_rate": 0.00010890988695303772, + "loss": 0.8008, + "step": 14742 + }, + { + "epoch": 0.79, + "learning_rate": 0.00010885566291442272, + "loss": 0.875, + "step": 14743 + }, + { + "epoch": 0.79, + "learning_rate": 0.00010880145072876596, + "loss": 0.8672, + "step": 14744 + }, + { + "epoch": 0.79, + "learning_rate": 0.00010874725039771044, + "loss": 0.8477, + "step": 14745 + }, + { + "epoch": 0.79, + "learning_rate": 0.0001086930619228984, + "loss": 0.7422, + "step": 14746 + }, + { + "epoch": 0.79, + "learning_rate": 0.00010863888530597183, + "loss": 0.8672, + "step": 14747 + }, + { + "epoch": 0.79, + "learning_rate": 0.00010858472054857255, + "loss": 0.7773, + "step": 14748 + }, + { + "epoch": 0.79, + "learning_rate": 0.00010853056765234209, + "loss": 0.8203, + "step": 14749 + }, + { + "epoch": 0.79, + "learning_rate": 0.00010847642661892127, + "loss": 0.8359, + "step": 14750 + }, + { + "epoch": 0.79, + "learning_rate": 0.00010842229744995086, + "loss": 0.7695, + "step": 14751 + }, + { + "epoch": 0.79, + "learning_rate": 0.00010836818014707101, + "loss": 0.8438, + "step": 14752 + }, + { + "epoch": 0.79, + "learning_rate": 0.00010831407471192173, + "loss": 0.8477, + "step": 14753 + }, + { + "epoch": 0.79, + "learning_rate": 0.00010825998114614272, + "loss": 0.7969, + "step": 14754 + }, + { + "epoch": 0.79, + "learning_rate": 0.00010820589945137304, + "loss": 0.8711, + "step": 14755 + }, + { + "epoch": 0.79, + "learning_rate": 0.00010815182962925152, + "loss": 0.7969, + "step": 14756 + }, + { + "epoch": 0.79, + "learning_rate": 0.00010809777168141666, + "loss": 0.8359, + "step": 14757 + }, + { + "epoch": 0.79, + "learning_rate": 0.00010804372560950676, + "loss": 0.8086, + "step": 14758 + }, + { + "epoch": 0.79, + "learning_rate": 0.00010798969141515951, + "loss": 0.8242, + "step": 14759 + }, + { + "epoch": 0.79, + "learning_rate": 0.00010793566910001201, + "loss": 0.8555, + "step": 14760 + }, + { + "epoch": 0.79, + "learning_rate": 0.00010788165866570187, + "loss": 0.8984, + "step": 14761 + }, + { + "epoch": 0.79, + "learning_rate": 0.00010782766011386546, + "loss": 0.7656, + "step": 14762 + }, + { + "epoch": 0.79, + "learning_rate": 0.00010777367344613909, + "loss": 0.8477, + "step": 14763 + }, + { + "epoch": 0.79, + "learning_rate": 0.00010771969866415876, + "loss": 0.8906, + "step": 14764 + }, + { + "epoch": 0.79, + "learning_rate": 0.00010766573576956029, + "loss": 0.8477, + "step": 14765 + }, + { + "epoch": 0.79, + "learning_rate": 0.00010761178476397865, + "loss": 0.8008, + "step": 14766 + }, + { + "epoch": 0.79, + "learning_rate": 0.00010755784564904891, + "loss": 0.7812, + "step": 14767 + }, + { + "epoch": 0.79, + "learning_rate": 0.0001075039184264055, + "loss": 0.8008, + "step": 14768 + }, + { + "epoch": 0.79, + "learning_rate": 0.00010745000309768271, + "loss": 0.7188, + "step": 14769 + }, + { + "epoch": 0.79, + "learning_rate": 0.00010739609966451419, + "loss": 0.7734, + "step": 14770 + }, + { + "epoch": 0.79, + "learning_rate": 0.00010734220812853358, + "loss": 0.8242, + "step": 14771 + }, + { + "epoch": 0.79, + "learning_rate": 0.00010728832849137372, + "loss": 0.6992, + "step": 14772 + }, + { + "epoch": 0.79, + "learning_rate": 0.00010723446075466753, + "loss": 0.75, + "step": 14773 + }, + { + "epoch": 0.79, + "learning_rate": 0.00010718060492004744, + "loss": 0.8047, + "step": 14774 + }, + { + "epoch": 0.79, + "learning_rate": 0.00010712676098914525, + "loss": 0.8359, + "step": 14775 + }, + { + "epoch": 0.79, + "learning_rate": 0.00010707292896359272, + "loss": 0.793, + "step": 14776 + }, + { + "epoch": 0.79, + "learning_rate": 0.00010701910884502125, + "loss": 0.7266, + "step": 14777 + }, + { + "epoch": 0.79, + "learning_rate": 0.00010696530063506166, + "loss": 0.668, + "step": 14778 + }, + { + "epoch": 0.79, + "learning_rate": 0.00010691150433534435, + "loss": 0.7969, + "step": 14779 + }, + { + "epoch": 0.79, + "learning_rate": 0.00010685771994749971, + "loss": 0.8125, + "step": 14780 + }, + { + "epoch": 0.79, + "learning_rate": 0.00010680394747315763, + "loss": 0.6992, + "step": 14781 + }, + { + "epoch": 0.79, + "learning_rate": 0.00010675018691394761, + "loss": 0.7812, + "step": 14782 + }, + { + "epoch": 0.79, + "learning_rate": 0.00010669643827149833, + "loss": 0.8516, + "step": 14783 + }, + { + "epoch": 0.79, + "learning_rate": 0.00010664270154743921, + "loss": 0.7891, + "step": 14784 + }, + { + "epoch": 0.79, + "learning_rate": 0.0001065889767433983, + "loss": 0.8281, + "step": 14785 + }, + { + "epoch": 0.79, + "learning_rate": 0.00010653526386100354, + "loss": 0.8789, + "step": 14786 + }, + { + "epoch": 0.79, + "learning_rate": 0.00010648156290188277, + "loss": 0.8203, + "step": 14787 + }, + { + "epoch": 0.79, + "learning_rate": 0.00010642787386766339, + "loss": 0.7344, + "step": 14788 + }, + { + "epoch": 0.79, + "learning_rate": 0.00010637419675997212, + "loss": 0.7422, + "step": 14789 + }, + { + "epoch": 0.79, + "learning_rate": 0.00010632053158043581, + "loss": 0.7734, + "step": 14790 + }, + { + "epoch": 0.79, + "learning_rate": 0.00010626687833068043, + "loss": 0.7539, + "step": 14791 + }, + { + "epoch": 0.8, + "learning_rate": 0.0001062132370123321, + "loss": 0.8008, + "step": 14792 + }, + { + "epoch": 0.8, + "learning_rate": 0.0001061596076270161, + "loss": 0.8047, + "step": 14793 + }, + { + "epoch": 0.8, + "learning_rate": 0.00010610599017635775, + "loss": 0.7969, + "step": 14794 + }, + { + "epoch": 0.8, + "learning_rate": 0.00010605238466198169, + "loss": 0.8594, + "step": 14795 + }, + { + "epoch": 0.8, + "learning_rate": 0.00010599879108551236, + "loss": 0.8789, + "step": 14796 + }, + { + "epoch": 0.8, + "learning_rate": 0.00010594520944857405, + "loss": 0.6836, + "step": 14797 + }, + { + "epoch": 0.8, + "learning_rate": 0.00010589163975279026, + "loss": 0.7188, + "step": 14798 + }, + { + "epoch": 0.8, + "learning_rate": 0.00010583808199978428, + "loss": 0.8398, + "step": 14799 + }, + { + "epoch": 0.8, + "learning_rate": 0.00010578453619117912, + "loss": 0.8438, + "step": 14800 + }, + { + "epoch": 0.8, + "learning_rate": 0.00010573100232859756, + "loss": 0.7812, + "step": 14801 + }, + { + "epoch": 0.8, + "learning_rate": 0.0001056774804136616, + "loss": 0.6953, + "step": 14802 + }, + { + "epoch": 0.8, + "learning_rate": 0.00010562397044799326, + "loss": 0.8789, + "step": 14803 + }, + { + "epoch": 0.8, + "learning_rate": 0.00010557047243321422, + "loss": 0.8672, + "step": 14804 + }, + { + "epoch": 0.8, + "learning_rate": 0.00010551698637094548, + "loss": 0.8281, + "step": 14805 + }, + { + "epoch": 0.8, + "learning_rate": 0.00010546351226280771, + "loss": 0.8242, + "step": 14806 + }, + { + "epoch": 0.8, + "learning_rate": 0.00010541005011042154, + "loss": 0.8789, + "step": 14807 + }, + { + "epoch": 0.8, + "learning_rate": 0.00010535659991540708, + "loss": 0.7891, + "step": 14808 + }, + { + "epoch": 0.8, + "learning_rate": 0.00010530316167938386, + "loss": 0.793, + "step": 14809 + }, + { + "epoch": 0.8, + "learning_rate": 0.00010524973540397148, + "loss": 0.8594, + "step": 14810 + }, + { + "epoch": 0.8, + "learning_rate": 0.00010519632109078864, + "loss": 0.6719, + "step": 14811 + }, + { + "epoch": 0.8, + "learning_rate": 0.00010514291874145432, + "loss": 0.8477, + "step": 14812 + }, + { + "epoch": 0.8, + "learning_rate": 0.0001050895283575864, + "loss": 0.8164, + "step": 14813 + }, + { + "epoch": 0.8, + "learning_rate": 0.00010503614994080302, + "loss": 0.8281, + "step": 14814 + }, + { + "epoch": 0.8, + "learning_rate": 0.00010498278349272178, + "loss": 0.8789, + "step": 14815 + }, + { + "epoch": 0.8, + "learning_rate": 0.00010492942901495961, + "loss": 0.8359, + "step": 14816 + }, + { + "epoch": 0.8, + "learning_rate": 0.00010487608650913365, + "loss": 0.7734, + "step": 14817 + }, + { + "epoch": 0.8, + "learning_rate": 0.00010482275597686003, + "loss": 0.9414, + "step": 14818 + }, + { + "epoch": 0.8, + "learning_rate": 0.00010476943741975492, + "loss": 0.8711, + "step": 14819 + }, + { + "epoch": 0.8, + "learning_rate": 0.00010471613083943432, + "loss": 0.7227, + "step": 14820 + }, + { + "epoch": 0.8, + "learning_rate": 0.00010466283623751338, + "loss": 0.8789, + "step": 14821 + }, + { + "epoch": 0.8, + "learning_rate": 0.00010460955361560697, + "loss": 0.8516, + "step": 14822 + }, + { + "epoch": 0.8, + "learning_rate": 0.00010455628297532987, + "loss": 0.707, + "step": 14823 + }, + { + "epoch": 0.8, + "learning_rate": 0.00010450302431829645, + "loss": 0.793, + "step": 14824 + }, + { + "epoch": 0.8, + "learning_rate": 0.00010444977764612057, + "loss": 0.7812, + "step": 14825 + }, + { + "epoch": 0.8, + "learning_rate": 0.00010439654296041545, + "loss": 0.8086, + "step": 14826 + }, + { + "epoch": 0.8, + "learning_rate": 0.00010434332026279485, + "loss": 0.9062, + "step": 14827 + }, + { + "epoch": 0.8, + "learning_rate": 0.00010429010955487128, + "loss": 0.8398, + "step": 14828 + }, + { + "epoch": 0.8, + "learning_rate": 0.0001042369108382571, + "loss": 0.7383, + "step": 14829 + }, + { + "epoch": 0.8, + "learning_rate": 0.00010418372411456451, + "loss": 0.7422, + "step": 14830 + }, + { + "epoch": 0.8, + "learning_rate": 0.0001041305493854054, + "loss": 0.8555, + "step": 14831 + }, + { + "epoch": 0.8, + "learning_rate": 0.00010407738665239086, + "loss": 0.8828, + "step": 14832 + }, + { + "epoch": 0.8, + "learning_rate": 0.00010402423591713218, + "loss": 0.8477, + "step": 14833 + }, + { + "epoch": 0.8, + "learning_rate": 0.00010397109718123971, + "loss": 0.8516, + "step": 14834 + }, + { + "epoch": 0.8, + "learning_rate": 0.00010391797044632401, + "loss": 0.832, + "step": 14835 + }, + { + "epoch": 0.8, + "learning_rate": 0.00010386485571399473, + "loss": 0.7695, + "step": 14836 + }, + { + "epoch": 0.8, + "learning_rate": 0.0001038117529858617, + "loss": 0.7461, + "step": 14837 + }, + { + "epoch": 0.8, + "learning_rate": 0.00010375866226353381, + "loss": 0.8047, + "step": 14838 + }, + { + "epoch": 0.8, + "learning_rate": 0.00010370558354862003, + "loss": 0.793, + "step": 14839 + }, + { + "epoch": 0.8, + "learning_rate": 0.00010365251684272898, + "loss": 0.8672, + "step": 14840 + }, + { + "epoch": 0.8, + "learning_rate": 0.00010359946214746856, + "loss": 0.7305, + "step": 14841 + }, + { + "epoch": 0.8, + "learning_rate": 0.00010354641946444643, + "loss": 0.8359, + "step": 14842 + }, + { + "epoch": 0.8, + "learning_rate": 0.00010349338879527003, + "loss": 0.8203, + "step": 14843 + }, + { + "epoch": 0.8, + "learning_rate": 0.00010344037014154656, + "loss": 0.8125, + "step": 14844 + }, + { + "epoch": 0.8, + "learning_rate": 0.00010338736350488237, + "loss": 0.7148, + "step": 14845 + }, + { + "epoch": 0.8, + "learning_rate": 0.00010333436888688386, + "loss": 0.8281, + "step": 14846 + }, + { + "epoch": 0.8, + "learning_rate": 0.00010328138628915706, + "loss": 0.8867, + "step": 14847 + }, + { + "epoch": 0.8, + "learning_rate": 0.00010322841571330743, + "loss": 0.8203, + "step": 14848 + }, + { + "epoch": 0.8, + "learning_rate": 0.00010317545716093979, + "loss": 0.7891, + "step": 14849 + }, + { + "epoch": 0.8, + "learning_rate": 0.00010312251063365963, + "loss": 0.7422, + "step": 14850 + }, + { + "epoch": 0.8, + "learning_rate": 0.00010306957613307106, + "loss": 0.7852, + "step": 14851 + }, + { + "epoch": 0.8, + "learning_rate": 0.000103016653660778, + "loss": 0.9023, + "step": 14852 + }, + { + "epoch": 0.8, + "learning_rate": 0.00010296374321838436, + "loss": 0.8125, + "step": 14853 + }, + { + "epoch": 0.8, + "learning_rate": 0.00010291084480749363, + "loss": 0.8477, + "step": 14854 + }, + { + "epoch": 0.8, + "learning_rate": 0.00010285795842970852, + "loss": 0.75, + "step": 14855 + }, + { + "epoch": 0.8, + "learning_rate": 0.00010280508408663197, + "loss": 0.7422, + "step": 14856 + }, + { + "epoch": 0.8, + "learning_rate": 0.00010275222177986592, + "loss": 0.8594, + "step": 14857 + }, + { + "epoch": 0.8, + "learning_rate": 0.00010269937151101255, + "loss": 0.7812, + "step": 14858 + }, + { + "epoch": 0.8, + "learning_rate": 0.00010264653328167317, + "loss": 0.8672, + "step": 14859 + }, + { + "epoch": 0.8, + "learning_rate": 0.00010259370709344923, + "loss": 0.8594, + "step": 14860 + }, + { + "epoch": 0.8, + "learning_rate": 0.00010254089294794117, + "loss": 0.793, + "step": 14861 + }, + { + "epoch": 0.8, + "learning_rate": 0.0001024880908467497, + "loss": 0.9102, + "step": 14862 + }, + { + "epoch": 0.8, + "learning_rate": 0.00010243530079147489, + "loss": 0.8047, + "step": 14863 + }, + { + "epoch": 0.8, + "learning_rate": 0.00010238252278371646, + "loss": 0.7227, + "step": 14864 + }, + { + "epoch": 0.8, + "learning_rate": 0.0001023297568250735, + "loss": 0.7695, + "step": 14865 + }, + { + "epoch": 0.8, + "learning_rate": 0.0001022770029171452, + "loss": 0.7734, + "step": 14866 + }, + { + "epoch": 0.8, + "learning_rate": 0.00010222426106153027, + "loss": 0.8047, + "step": 14867 + }, + { + "epoch": 0.8, + "learning_rate": 0.00010217153125982676, + "loss": 0.9258, + "step": 14868 + }, + { + "epoch": 0.8, + "learning_rate": 0.00010211881351363261, + "loss": 0.7969, + "step": 14869 + }, + { + "epoch": 0.8, + "learning_rate": 0.00010206610782454551, + "loss": 0.8516, + "step": 14870 + }, + { + "epoch": 0.8, + "learning_rate": 0.00010201341419416249, + "loss": 0.832, + "step": 14871 + }, + { + "epoch": 0.8, + "learning_rate": 0.00010196073262408018, + "loss": 0.7891, + "step": 14872 + }, + { + "epoch": 0.8, + "learning_rate": 0.00010190806311589517, + "loss": 0.7734, + "step": 14873 + }, + { + "epoch": 0.8, + "learning_rate": 0.0001018554056712036, + "loss": 0.7773, + "step": 14874 + }, + { + "epoch": 0.8, + "learning_rate": 0.00010180276029160101, + "loss": 0.7969, + "step": 14875 + }, + { + "epoch": 0.8, + "learning_rate": 0.00010175012697868285, + "loss": 0.8008, + "step": 14876 + }, + { + "epoch": 0.8, + "learning_rate": 0.0001016975057340439, + "loss": 0.8164, + "step": 14877 + }, + { + "epoch": 0.8, + "learning_rate": 0.00010164489655927905, + "loss": 0.7891, + "step": 14878 + }, + { + "epoch": 0.8, + "learning_rate": 0.0001015922994559822, + "loss": 0.8711, + "step": 14879 + }, + { + "epoch": 0.8, + "learning_rate": 0.00010153971442574745, + "loss": 0.7578, + "step": 14880 + }, + { + "epoch": 0.8, + "learning_rate": 0.00010148714147016813, + "loss": 0.8164, + "step": 14881 + }, + { + "epoch": 0.8, + "learning_rate": 0.00010143458059083748, + "loss": 0.75, + "step": 14882 + }, + { + "epoch": 0.8, + "learning_rate": 0.00010138203178934835, + "loss": 0.9727, + "step": 14883 + }, + { + "epoch": 0.8, + "learning_rate": 0.00010132949506729306, + "loss": 0.8711, + "step": 14884 + }, + { + "epoch": 0.8, + "learning_rate": 0.00010127697042626332, + "loss": 0.9258, + "step": 14885 + }, + { + "epoch": 0.8, + "learning_rate": 0.00010122445786785145, + "loss": 0.8125, + "step": 14886 + }, + { + "epoch": 0.8, + "learning_rate": 0.00010117195739364831, + "loss": 0.9023, + "step": 14887 + }, + { + "epoch": 0.8, + "learning_rate": 0.00010111946900524488, + "loss": 0.8164, + "step": 14888 + }, + { + "epoch": 0.8, + "learning_rate": 0.00010106699270423175, + "loss": 0.8242, + "step": 14889 + }, + { + "epoch": 0.8, + "learning_rate": 0.00010101452849219934, + "loss": 0.9062, + "step": 14890 + }, + { + "epoch": 0.8, + "learning_rate": 0.00010096207637073724, + "loss": 0.7734, + "step": 14891 + }, + { + "epoch": 0.8, + "learning_rate": 0.00010090963634143474, + "loss": 0.8438, + "step": 14892 + }, + { + "epoch": 0.8, + "learning_rate": 0.00010085720840588153, + "loss": 0.8164, + "step": 14893 + }, + { + "epoch": 0.8, + "learning_rate": 0.00010080479256566593, + "loss": 0.8203, + "step": 14894 + }, + { + "epoch": 0.8, + "learning_rate": 0.0001007523888223763, + "loss": 0.7617, + "step": 14895 + }, + { + "epoch": 0.8, + "learning_rate": 0.0001006999971776007, + "loss": 0.793, + "step": 14896 + }, + { + "epoch": 0.8, + "learning_rate": 0.00010064761763292701, + "loss": 0.8828, + "step": 14897 + }, + { + "epoch": 0.8, + "learning_rate": 0.00010059525018994215, + "loss": 0.832, + "step": 14898 + }, + { + "epoch": 0.8, + "learning_rate": 0.00010054289485023327, + "loss": 0.8633, + "step": 14899 + }, + { + "epoch": 0.8, + "learning_rate": 0.00010049055161538673, + "loss": 0.8672, + "step": 14900 + }, + { + "epoch": 0.8, + "learning_rate": 0.00010043822048698886, + "loss": 0.832, + "step": 14901 + }, + { + "epoch": 0.8, + "learning_rate": 0.00010038590146662535, + "loss": 0.8711, + "step": 14902 + }, + { + "epoch": 0.8, + "learning_rate": 0.00010033359455588176, + "loss": 0.8438, + "step": 14903 + }, + { + "epoch": 0.8, + "learning_rate": 0.00010028129975634293, + "loss": 0.8477, + "step": 14904 + }, + { + "epoch": 0.8, + "learning_rate": 0.00010022901706959375, + "loss": 0.8516, + "step": 14905 + }, + { + "epoch": 0.8, + "learning_rate": 0.00010017674649721864, + "loss": 0.8555, + "step": 14906 + }, + { + "epoch": 0.8, + "learning_rate": 0.00010012448804080149, + "loss": 0.7305, + "step": 14907 + }, + { + "epoch": 0.8, + "learning_rate": 0.00010007224170192569, + "loss": 0.7617, + "step": 14908 + }, + { + "epoch": 0.8, + "learning_rate": 0.00010002000748217465, + "loss": 0.6992, + "step": 14909 + }, + { + "epoch": 0.8, + "learning_rate": 9.996778538313145e-05, + "loss": 0.8438, + "step": 14910 + }, + { + "epoch": 0.8, + "learning_rate": 9.991557540637819e-05, + "loss": 0.8359, + "step": 14911 + }, + { + "epoch": 0.8, + "learning_rate": 9.986337755349718e-05, + "loss": 0.7734, + "step": 14912 + }, + { + "epoch": 0.8, + "learning_rate": 9.981119182607035e-05, + "loss": 0.7891, + "step": 14913 + }, + { + "epoch": 0.8, + "learning_rate": 9.975901822567896e-05, + "loss": 0.7773, + "step": 14914 + }, + { + "epoch": 0.8, + "learning_rate": 9.97068567539039e-05, + "loss": 0.7852, + "step": 14915 + }, + { + "epoch": 0.8, + "learning_rate": 9.965470741232596e-05, + "loss": 0.8242, + "step": 14916 + }, + { + "epoch": 0.8, + "learning_rate": 9.960257020252562e-05, + "loss": 0.7109, + "step": 14917 + }, + { + "epoch": 0.8, + "learning_rate": 9.955044512608247e-05, + "loss": 0.7969, + "step": 14918 + }, + { + "epoch": 0.8, + "learning_rate": 9.949833218457633e-05, + "loss": 0.8203, + "step": 14919 + }, + { + "epoch": 0.8, + "learning_rate": 9.94462313795862e-05, + "loss": 0.8438, + "step": 14920 + }, + { + "epoch": 0.8, + "learning_rate": 9.939414271269114e-05, + "loss": 0.8711, + "step": 14921 + }, + { + "epoch": 0.8, + "learning_rate": 9.934206618546932e-05, + "loss": 0.7734, + "step": 14922 + }, + { + "epoch": 0.8, + "learning_rate": 9.929000179949899e-05, + "loss": 0.7227, + "step": 14923 + }, + { + "epoch": 0.8, + "learning_rate": 9.923794955635801e-05, + "loss": 0.9062, + "step": 14924 + }, + { + "epoch": 0.8, + "learning_rate": 9.918590945762341e-05, + "loss": 0.8008, + "step": 14925 + }, + { + "epoch": 0.8, + "learning_rate": 9.913388150487252e-05, + "loss": 0.7891, + "step": 14926 + }, + { + "epoch": 0.8, + "learning_rate": 9.908186569968164e-05, + "loss": 0.8594, + "step": 14927 + }, + { + "epoch": 0.8, + "learning_rate": 9.902986204362713e-05, + "loss": 0.8398, + "step": 14928 + }, + { + "epoch": 0.8, + "learning_rate": 9.89778705382851e-05, + "loss": 0.7812, + "step": 14929 + }, + { + "epoch": 0.8, + "learning_rate": 9.892589118523088e-05, + "loss": 0.8398, + "step": 14930 + }, + { + "epoch": 0.8, + "learning_rate": 9.887392398603945e-05, + "loss": 0.7383, + "step": 14931 + }, + { + "epoch": 0.8, + "learning_rate": 9.882196894228574e-05, + "loss": 0.75, + "step": 14932 + }, + { + "epoch": 0.8, + "learning_rate": 9.877002605554431e-05, + "loss": 0.8359, + "step": 14933 + }, + { + "epoch": 0.8, + "learning_rate": 9.87180953273889e-05, + "loss": 0.8203, + "step": 14934 + }, + { + "epoch": 0.8, + "learning_rate": 9.866617675939338e-05, + "loss": 0.8398, + "step": 14935 + }, + { + "epoch": 0.8, + "learning_rate": 9.861427035313115e-05, + "loss": 0.7539, + "step": 14936 + }, + { + "epoch": 0.8, + "learning_rate": 9.856237611017493e-05, + "loss": 0.8516, + "step": 14937 + }, + { + "epoch": 0.8, + "learning_rate": 9.851049403209727e-05, + "loss": 0.8125, + "step": 14938 + }, + { + "epoch": 0.8, + "learning_rate": 9.845862412047046e-05, + "loss": 0.8008, + "step": 14939 + }, + { + "epoch": 0.8, + "learning_rate": 9.840676637686646e-05, + "loss": 0.8438, + "step": 14940 + }, + { + "epoch": 0.8, + "learning_rate": 9.835492080285646e-05, + "loss": 0.8047, + "step": 14941 + }, + { + "epoch": 0.8, + "learning_rate": 9.830308740001181e-05, + "loss": 0.8906, + "step": 14942 + }, + { + "epoch": 0.8, + "learning_rate": 9.825126616990299e-05, + "loss": 0.8555, + "step": 14943 + }, + { + "epoch": 0.8, + "learning_rate": 9.819945711410061e-05, + "loss": 0.7891, + "step": 14944 + }, + { + "epoch": 0.8, + "learning_rate": 9.814766023417437e-05, + "loss": 0.8125, + "step": 14945 + }, + { + "epoch": 0.8, + "learning_rate": 9.809587553169418e-05, + "loss": 0.8359, + "step": 14946 + }, + { + "epoch": 0.8, + "learning_rate": 9.804410300822902e-05, + "loss": 0.8945, + "step": 14947 + }, + { + "epoch": 0.8, + "learning_rate": 9.799234266534784e-05, + "loss": 0.7852, + "step": 14948 + }, + { + "epoch": 0.8, + "learning_rate": 9.794059450461939e-05, + "loss": 0.9219, + "step": 14949 + }, + { + "epoch": 0.8, + "learning_rate": 9.788885852761153e-05, + "loss": 0.832, + "step": 14950 + }, + { + "epoch": 0.8, + "learning_rate": 9.783713473589195e-05, + "loss": 0.8047, + "step": 14951 + }, + { + "epoch": 0.8, + "learning_rate": 9.77854231310284e-05, + "loss": 0.6992, + "step": 14952 + }, + { + "epoch": 0.8, + "learning_rate": 9.773372371458778e-05, + "loss": 0.8164, + "step": 14953 + }, + { + "epoch": 0.8, + "learning_rate": 9.768203648813656e-05, + "loss": 0.6797, + "step": 14954 + }, + { + "epoch": 0.8, + "learning_rate": 9.763036145324117e-05, + "loss": 0.8008, + "step": 14955 + }, + { + "epoch": 0.8, + "learning_rate": 9.757869861146762e-05, + "loss": 0.7383, + "step": 14956 + }, + { + "epoch": 0.8, + "learning_rate": 9.752704796438144e-05, + "loss": 0.8125, + "step": 14957 + }, + { + "epoch": 0.8, + "learning_rate": 9.747540951354761e-05, + "loss": 0.9453, + "step": 14958 + }, + { + "epoch": 0.8, + "learning_rate": 9.742378326053102e-05, + "loss": 0.9375, + "step": 14959 + }, + { + "epoch": 0.8, + "learning_rate": 9.73721692068964e-05, + "loss": 0.8008, + "step": 14960 + }, + { + "epoch": 0.8, + "learning_rate": 9.732056735420746e-05, + "loss": 0.8008, + "step": 14961 + }, + { + "epoch": 0.8, + "learning_rate": 9.726897770402804e-05, + "loss": 0.8672, + "step": 14962 + }, + { + "epoch": 0.8, + "learning_rate": 9.721740025792164e-05, + "loss": 0.8477, + "step": 14963 + }, + { + "epoch": 0.8, + "learning_rate": 9.71658350174509e-05, + "loss": 0.7539, + "step": 14964 + }, + { + "epoch": 0.8, + "learning_rate": 9.711428198417877e-05, + "loss": 0.8125, + "step": 14965 + }, + { + "epoch": 0.8, + "learning_rate": 9.706274115966713e-05, + "loss": 0.8945, + "step": 14966 + }, + { + "epoch": 0.8, + "learning_rate": 9.701121254547818e-05, + "loss": 0.8242, + "step": 14967 + }, + { + "epoch": 0.8, + "learning_rate": 9.695969614317307e-05, + "loss": 0.707, + "step": 14968 + }, + { + "epoch": 0.8, + "learning_rate": 9.690819195431322e-05, + "loss": 0.9531, + "step": 14969 + }, + { + "epoch": 0.8, + "learning_rate": 9.685669998045909e-05, + "loss": 0.707, + "step": 14970 + }, + { + "epoch": 0.8, + "learning_rate": 9.680522022317123e-05, + "loss": 0.7617, + "step": 14971 + }, + { + "epoch": 0.8, + "learning_rate": 9.675375268400971e-05, + "loss": 0.7617, + "step": 14972 + }, + { + "epoch": 0.8, + "learning_rate": 9.670229736453412e-05, + "loss": 0.7891, + "step": 14973 + }, + { + "epoch": 0.8, + "learning_rate": 9.66508542663035e-05, + "loss": 0.8828, + "step": 14974 + }, + { + "epoch": 0.8, + "learning_rate": 9.659942339087691e-05, + "loss": 0.8477, + "step": 14975 + }, + { + "epoch": 0.8, + "learning_rate": 9.654800473981307e-05, + "loss": 0.8086, + "step": 14976 + }, + { + "epoch": 0.8, + "learning_rate": 9.649659831466979e-05, + "loss": 0.8516, + "step": 14977 + }, + { + "epoch": 0.81, + "learning_rate": 9.644520411700497e-05, + "loss": 0.8828, + "step": 14978 + }, + { + "epoch": 0.81, + "learning_rate": 9.639382214837627e-05, + "loss": 0.7461, + "step": 14979 + }, + { + "epoch": 0.81, + "learning_rate": 9.634245241034045e-05, + "loss": 0.7461, + "step": 14980 + }, + { + "epoch": 0.81, + "learning_rate": 9.62910949044542e-05, + "loss": 0.6953, + "step": 14981 + }, + { + "epoch": 0.81, + "learning_rate": 9.623974963227383e-05, + "loss": 0.8516, + "step": 14982 + }, + { + "epoch": 0.81, + "learning_rate": 9.618841659535548e-05, + "loss": 0.8164, + "step": 14983 + }, + { + "epoch": 0.81, + "learning_rate": 9.613709579525442e-05, + "loss": 0.8516, + "step": 14984 + }, + { + "epoch": 0.81, + "learning_rate": 9.608578723352613e-05, + "loss": 0.8008, + "step": 14985 + }, + { + "epoch": 0.81, + "learning_rate": 9.603449091172511e-05, + "loss": 0.8516, + "step": 14986 + }, + { + "epoch": 0.81, + "learning_rate": 9.598320683140611e-05, + "loss": 0.832, + "step": 14987 + }, + { + "epoch": 0.81, + "learning_rate": 9.59319349941229e-05, + "loss": 0.7695, + "step": 14988 + }, + { + "epoch": 0.81, + "learning_rate": 9.588067540142947e-05, + "loss": 0.8281, + "step": 14989 + }, + { + "epoch": 0.81, + "learning_rate": 9.582942805487893e-05, + "loss": 0.8242, + "step": 14990 + }, + { + "epoch": 0.81, + "learning_rate": 9.57781929560243e-05, + "loss": 0.8125, + "step": 14991 + }, + { + "epoch": 0.81, + "learning_rate": 9.57269701064184e-05, + "loss": 0.793, + "step": 14992 + }, + { + "epoch": 0.81, + "learning_rate": 9.567575950761326e-05, + "loss": 0.8164, + "step": 14993 + }, + { + "epoch": 0.81, + "learning_rate": 9.562456116116047e-05, + "loss": 0.8086, + "step": 14994 + }, + { + "epoch": 0.81, + "learning_rate": 9.557337506861202e-05, + "loss": 0.8242, + "step": 14995 + }, + { + "epoch": 0.81, + "learning_rate": 9.552220123151878e-05, + "loss": 0.8047, + "step": 14996 + }, + { + "epoch": 0.81, + "learning_rate": 9.547103965143133e-05, + "loss": 0.8555, + "step": 14997 + }, + { + "epoch": 0.81, + "learning_rate": 9.541989032990017e-05, + "loss": 0.7344, + "step": 14998 + }, + { + "epoch": 0.81, + "learning_rate": 9.536875326847544e-05, + "loss": 0.8398, + "step": 14999 + }, + { + "epoch": 0.81, + "learning_rate": 9.531762846870662e-05, + "loss": 0.8086, + "step": 15000 + }, + { + "epoch": 0.81, + "learning_rate": 9.526651593214274e-05, + "loss": 0.8594, + "step": 15001 + }, + { + "epoch": 0.81, + "learning_rate": 9.521541566033315e-05, + "loss": 0.7891, + "step": 15002 + }, + { + "epoch": 0.81, + "learning_rate": 9.516432765482608e-05, + "loss": 0.7969, + "step": 15003 + }, + { + "epoch": 0.81, + "learning_rate": 9.511325191716957e-05, + "loss": 0.7812, + "step": 15004 + }, + { + "epoch": 0.81, + "learning_rate": 9.506218844891146e-05, + "loss": 0.8125, + "step": 15005 + }, + { + "epoch": 0.81, + "learning_rate": 9.501113725159938e-05, + "loss": 0.8633, + "step": 15006 + }, + { + "epoch": 0.81, + "learning_rate": 9.496009832677999e-05, + "loss": 0.7695, + "step": 15007 + }, + { + "epoch": 0.81, + "learning_rate": 9.490907167600027e-05, + "loss": 0.7578, + "step": 15008 + }, + { + "epoch": 0.81, + "learning_rate": 9.485805730080615e-05, + "loss": 0.8047, + "step": 15009 + }, + { + "epoch": 0.81, + "learning_rate": 9.480705520274385e-05, + "loss": 0.8555, + "step": 15010 + }, + { + "epoch": 0.81, + "learning_rate": 9.475606538335862e-05, + "loss": 0.8477, + "step": 15011 + }, + { + "epoch": 0.81, + "learning_rate": 9.47050878441959e-05, + "loss": 0.9258, + "step": 15012 + }, + { + "epoch": 0.81, + "learning_rate": 9.465412258680017e-05, + "loss": 0.8398, + "step": 15013 + }, + { + "epoch": 0.81, + "learning_rate": 9.460316961271609e-05, + "loss": 0.793, + "step": 15014 + }, + { + "epoch": 0.81, + "learning_rate": 9.45522289234877e-05, + "loss": 0.8281, + "step": 15015 + }, + { + "epoch": 0.81, + "learning_rate": 9.450130052065858e-05, + "loss": 0.8906, + "step": 15016 + }, + { + "epoch": 0.81, + "learning_rate": 9.44503844057718e-05, + "loss": 0.8594, + "step": 15017 + }, + { + "epoch": 0.81, + "learning_rate": 9.439948058037079e-05, + "loss": 0.8438, + "step": 15018 + }, + { + "epoch": 0.81, + "learning_rate": 9.43485890459978e-05, + "loss": 0.7188, + "step": 15019 + }, + { + "epoch": 0.81, + "learning_rate": 9.429770980419495e-05, + "loss": 0.8086, + "step": 15020 + }, + { + "epoch": 0.81, + "learning_rate": 9.424684285650415e-05, + "loss": 0.8281, + "step": 15021 + }, + { + "epoch": 0.81, + "learning_rate": 9.419598820446695e-05, + "loss": 0.793, + "step": 15022 + }, + { + "epoch": 0.81, + "learning_rate": 9.414514584962436e-05, + "loss": 0.832, + "step": 15023 + }, + { + "epoch": 0.81, + "learning_rate": 9.40943157935168e-05, + "loss": 0.8242, + "step": 15024 + }, + { + "epoch": 0.81, + "learning_rate": 9.40434980376848e-05, + "loss": 0.875, + "step": 15025 + }, + { + "epoch": 0.81, + "learning_rate": 9.399269258366849e-05, + "loss": 0.832, + "step": 15026 + }, + { + "epoch": 0.81, + "learning_rate": 9.394189943300708e-05, + "loss": 0.8359, + "step": 15027 + }, + { + "epoch": 0.81, + "learning_rate": 9.389111858724003e-05, + "loss": 0.7773, + "step": 15028 + }, + { + "epoch": 0.81, + "learning_rate": 9.384035004790604e-05, + "loss": 0.8242, + "step": 15029 + }, + { + "epoch": 0.81, + "learning_rate": 9.378959381654351e-05, + "loss": 0.8125, + "step": 15030 + }, + { + "epoch": 0.81, + "learning_rate": 9.373884989469079e-05, + "loss": 0.7305, + "step": 15031 + }, + { + "epoch": 0.81, + "learning_rate": 9.36881182838854e-05, + "loss": 0.8438, + "step": 15032 + }, + { + "epoch": 0.81, + "learning_rate": 9.363739898566453e-05, + "loss": 0.8906, + "step": 15033 + }, + { + "epoch": 0.81, + "learning_rate": 9.358669200156534e-05, + "loss": 0.7812, + "step": 15034 + }, + { + "epoch": 0.81, + "learning_rate": 9.353599733312446e-05, + "loss": 0.8633, + "step": 15035 + }, + { + "epoch": 0.81, + "learning_rate": 9.34853149818779e-05, + "loss": 0.8398, + "step": 15036 + }, + { + "epoch": 0.81, + "learning_rate": 9.343464494936166e-05, + "loss": 0.8516, + "step": 15037 + }, + { + "epoch": 0.81, + "learning_rate": 9.338398723711122e-05, + "loss": 0.8984, + "step": 15038 + }, + { + "epoch": 0.81, + "learning_rate": 9.33333418466617e-05, + "loss": 0.8789, + "step": 15039 + }, + { + "epoch": 0.81, + "learning_rate": 9.328270877954759e-05, + "loss": 0.7695, + "step": 15040 + }, + { + "epoch": 0.81, + "learning_rate": 9.323208803730338e-05, + "loss": 0.8828, + "step": 15041 + }, + { + "epoch": 0.81, + "learning_rate": 9.318147962146317e-05, + "loss": 0.75, + "step": 15042 + }, + { + "epoch": 0.81, + "learning_rate": 9.313088353356036e-05, + "loss": 0.7969, + "step": 15043 + }, + { + "epoch": 0.81, + "learning_rate": 9.308029977512822e-05, + "loss": 0.8516, + "step": 15044 + }, + { + "epoch": 0.81, + "learning_rate": 9.30297283476998e-05, + "loss": 0.7734, + "step": 15045 + }, + { + "epoch": 0.81, + "learning_rate": 9.297916925280747e-05, + "loss": 0.7578, + "step": 15046 + }, + { + "epoch": 0.81, + "learning_rate": 9.292862249198313e-05, + "loss": 0.7773, + "step": 15047 + }, + { + "epoch": 0.81, + "learning_rate": 9.287808806675868e-05, + "loss": 0.75, + "step": 15048 + }, + { + "epoch": 0.81, + "learning_rate": 9.28275659786656e-05, + "loss": 0.75, + "step": 15049 + }, + { + "epoch": 0.81, + "learning_rate": 9.277705622923466e-05, + "loss": 0.8164, + "step": 15050 + }, + { + "epoch": 0.81, + "learning_rate": 9.27265588199967e-05, + "loss": 0.7773, + "step": 15051 + }, + { + "epoch": 0.81, + "learning_rate": 9.267607375248167e-05, + "loss": 0.6875, + "step": 15052 + }, + { + "epoch": 0.81, + "learning_rate": 9.262560102821971e-05, + "loss": 0.8359, + "step": 15053 + }, + { + "epoch": 0.81, + "learning_rate": 9.257514064874006e-05, + "loss": 0.8438, + "step": 15054 + }, + { + "epoch": 0.81, + "learning_rate": 9.252469261557206e-05, + "loss": 0.8164, + "step": 15055 + }, + { + "epoch": 0.81, + "learning_rate": 9.247425693024425e-05, + "loss": 0.8008, + "step": 15056 + }, + { + "epoch": 0.81, + "learning_rate": 9.242383359428513e-05, + "loss": 0.8828, + "step": 15057 + }, + { + "epoch": 0.81, + "learning_rate": 9.237342260922277e-05, + "loss": 0.8203, + "step": 15058 + }, + { + "epoch": 0.81, + "learning_rate": 9.232302397658465e-05, + "loss": 0.6914, + "step": 15059 + }, + { + "epoch": 0.81, + "learning_rate": 9.227263769789784e-05, + "loss": 0.8945, + "step": 15060 + }, + { + "epoch": 0.81, + "learning_rate": 9.222226377468967e-05, + "loss": 0.832, + "step": 15061 + }, + { + "epoch": 0.81, + "learning_rate": 9.217190220848637e-05, + "loss": 0.8516, + "step": 15062 + }, + { + "epoch": 0.81, + "learning_rate": 9.212155300081393e-05, + "loss": 0.8125, + "step": 15063 + }, + { + "epoch": 0.81, + "learning_rate": 9.207121615319818e-05, + "loss": 0.7617, + "step": 15064 + }, + { + "epoch": 0.81, + "learning_rate": 9.202089166716476e-05, + "loss": 0.8711, + "step": 15065 + }, + { + "epoch": 0.81, + "learning_rate": 9.197057954423843e-05, + "loss": 0.8164, + "step": 15066 + }, + { + "epoch": 0.81, + "learning_rate": 9.19202797859437e-05, + "loss": 0.7578, + "step": 15067 + }, + { + "epoch": 0.81, + "learning_rate": 9.1869992393805e-05, + "loss": 0.7422, + "step": 15068 + }, + { + "epoch": 0.81, + "learning_rate": 9.181971736934624e-05, + "loss": 0.7578, + "step": 15069 + }, + { + "epoch": 0.81, + "learning_rate": 9.17694547140907e-05, + "loss": 0.8398, + "step": 15070 + }, + { + "epoch": 0.81, + "learning_rate": 9.171920442956183e-05, + "loss": 0.8477, + "step": 15071 + }, + { + "epoch": 0.81, + "learning_rate": 9.166896651728201e-05, + "loss": 0.7734, + "step": 15072 + }, + { + "epoch": 0.81, + "learning_rate": 9.161874097877381e-05, + "loss": 0.7188, + "step": 15073 + }, + { + "epoch": 0.81, + "learning_rate": 9.156852781555935e-05, + "loss": 0.7422, + "step": 15074 + }, + { + "epoch": 0.81, + "learning_rate": 9.151832702915996e-05, + "loss": 0.7773, + "step": 15075 + }, + { + "epoch": 0.81, + "learning_rate": 9.146813862109721e-05, + "loss": 0.7695, + "step": 15076 + }, + { + "epoch": 0.81, + "learning_rate": 9.14179625928917e-05, + "loss": 0.7695, + "step": 15077 + }, + { + "epoch": 0.81, + "learning_rate": 9.136779894606411e-05, + "loss": 0.7148, + "step": 15078 + }, + { + "epoch": 0.81, + "learning_rate": 9.131764768213441e-05, + "loss": 0.7656, + "step": 15079 + }, + { + "epoch": 0.81, + "learning_rate": 9.126750880262235e-05, + "loss": 0.8203, + "step": 15080 + }, + { + "epoch": 0.81, + "learning_rate": 9.121738230904758e-05, + "loss": 0.8359, + "step": 15081 + }, + { + "epoch": 0.81, + "learning_rate": 9.116726820292892e-05, + "loss": 0.8008, + "step": 15082 + }, + { + "epoch": 0.81, + "learning_rate": 9.111716648578477e-05, + "loss": 0.8164, + "step": 15083 + }, + { + "epoch": 0.81, + "learning_rate": 9.106707715913364e-05, + "loss": 0.8242, + "step": 15084 + }, + { + "epoch": 0.81, + "learning_rate": 9.101700022449344e-05, + "loss": 0.8359, + "step": 15085 + }, + { + "epoch": 0.81, + "learning_rate": 9.096693568338144e-05, + "loss": 0.7812, + "step": 15086 + }, + { + "epoch": 0.81, + "learning_rate": 9.091688353731487e-05, + "loss": 0.8047, + "step": 15087 + }, + { + "epoch": 0.81, + "learning_rate": 9.086684378781063e-05, + "loss": 0.7656, + "step": 15088 + }, + { + "epoch": 0.81, + "learning_rate": 9.081681643638495e-05, + "loss": 0.7188, + "step": 15089 + }, + { + "epoch": 0.81, + "learning_rate": 9.076680148455368e-05, + "loss": 0.7656, + "step": 15090 + }, + { + "epoch": 0.81, + "learning_rate": 9.071679893383261e-05, + "loss": 0.7695, + "step": 15091 + }, + { + "epoch": 0.81, + "learning_rate": 9.066680878573702e-05, + "loss": 0.8047, + "step": 15092 + }, + { + "epoch": 0.81, + "learning_rate": 9.061683104178164e-05, + "loss": 0.6914, + "step": 15093 + }, + { + "epoch": 0.81, + "learning_rate": 9.056686570348111e-05, + "loss": 0.8125, + "step": 15094 + }, + { + "epoch": 0.81, + "learning_rate": 9.051691277234936e-05, + "loss": 0.8086, + "step": 15095 + }, + { + "epoch": 0.81, + "learning_rate": 9.046697224990036e-05, + "loss": 0.8008, + "step": 15096 + }, + { + "epoch": 0.81, + "learning_rate": 9.041704413764718e-05, + "loss": 0.8242, + "step": 15097 + }, + { + "epoch": 0.81, + "learning_rate": 9.03671284371031e-05, + "loss": 0.9414, + "step": 15098 + }, + { + "epoch": 0.81, + "learning_rate": 9.031722514978048e-05, + "loss": 0.8945, + "step": 15099 + }, + { + "epoch": 0.81, + "learning_rate": 9.026733427719164e-05, + "loss": 0.8047, + "step": 15100 + }, + { + "epoch": 0.81, + "learning_rate": 9.021745582084862e-05, + "loss": 0.7344, + "step": 15101 + }, + { + "epoch": 0.81, + "learning_rate": 9.016758978226275e-05, + "loss": 0.8281, + "step": 15102 + }, + { + "epoch": 0.81, + "learning_rate": 9.011773616294483e-05, + "loss": 0.8281, + "step": 15103 + }, + { + "epoch": 0.81, + "learning_rate": 9.00678949644062e-05, + "loss": 0.8438, + "step": 15104 + }, + { + "epoch": 0.81, + "learning_rate": 9.001806618815689e-05, + "loss": 0.832, + "step": 15105 + }, + { + "epoch": 0.81, + "learning_rate": 8.996824983570673e-05, + "loss": 0.7461, + "step": 15106 + }, + { + "epoch": 0.81, + "learning_rate": 8.991844590856551e-05, + "loss": 0.7227, + "step": 15107 + }, + { + "epoch": 0.81, + "learning_rate": 8.986865440824255e-05, + "loss": 0.7773, + "step": 15108 + }, + { + "epoch": 0.81, + "learning_rate": 8.981887533624638e-05, + "loss": 0.7461, + "step": 15109 + }, + { + "epoch": 0.81, + "learning_rate": 8.976910869408572e-05, + "loss": 0.8477, + "step": 15110 + }, + { + "epoch": 0.81, + "learning_rate": 8.971935448326868e-05, + "loss": 0.9219, + "step": 15111 + }, + { + "epoch": 0.81, + "learning_rate": 8.966961270530294e-05, + "loss": 0.875, + "step": 15112 + }, + { + "epoch": 0.81, + "learning_rate": 8.961988336169564e-05, + "loss": 0.7617, + "step": 15113 + }, + { + "epoch": 0.81, + "learning_rate": 8.957016645395388e-05, + "loss": 0.8828, + "step": 15114 + }, + { + "epoch": 0.81, + "learning_rate": 8.952046198358437e-05, + "loss": 0.8359, + "step": 15115 + }, + { + "epoch": 0.81, + "learning_rate": 8.947076995209314e-05, + "loss": 0.8047, + "step": 15116 + }, + { + "epoch": 0.81, + "learning_rate": 8.942109036098617e-05, + "loss": 0.8867, + "step": 15117 + }, + { + "epoch": 0.81, + "learning_rate": 8.937142321176873e-05, + "loss": 0.8281, + "step": 15118 + }, + { + "epoch": 0.81, + "learning_rate": 8.932176850594614e-05, + "loss": 0.8477, + "step": 15119 + }, + { + "epoch": 0.81, + "learning_rate": 8.927212624502284e-05, + "loss": 0.8242, + "step": 15120 + }, + { + "epoch": 0.81, + "learning_rate": 8.92224964305034e-05, + "loss": 0.8242, + "step": 15121 + }, + { + "epoch": 0.81, + "learning_rate": 8.917287906389148e-05, + "loss": 0.7617, + "step": 15122 + }, + { + "epoch": 0.81, + "learning_rate": 8.912327414669086e-05, + "loss": 0.8828, + "step": 15123 + }, + { + "epoch": 0.81, + "learning_rate": 8.907368168040481e-05, + "loss": 0.6875, + "step": 15124 + }, + { + "epoch": 0.81, + "learning_rate": 8.902410166653596e-05, + "loss": 0.7969, + "step": 15125 + }, + { + "epoch": 0.81, + "learning_rate": 8.897453410658663e-05, + "loss": 0.7773, + "step": 15126 + }, + { + "epoch": 0.81, + "learning_rate": 8.892497900205926e-05, + "loss": 0.8125, + "step": 15127 + }, + { + "epoch": 0.81, + "learning_rate": 8.887543635445533e-05, + "loss": 0.8359, + "step": 15128 + }, + { + "epoch": 0.81, + "learning_rate": 8.882590616527603e-05, + "loss": 0.7305, + "step": 15129 + }, + { + "epoch": 0.81, + "learning_rate": 8.877638843602242e-05, + "loss": 0.7617, + "step": 15130 + }, + { + "epoch": 0.81, + "learning_rate": 8.872688316819516e-05, + "loss": 0.8359, + "step": 15131 + }, + { + "epoch": 0.81, + "learning_rate": 8.867739036329425e-05, + "loss": 0.8633, + "step": 15132 + }, + { + "epoch": 0.81, + "learning_rate": 8.862791002281945e-05, + "loss": 0.7266, + "step": 15133 + }, + { + "epoch": 0.81, + "learning_rate": 8.85784421482702e-05, + "loss": 0.8984, + "step": 15134 + }, + { + "epoch": 0.81, + "learning_rate": 8.852898674114579e-05, + "loss": 0.7852, + "step": 15135 + }, + { + "epoch": 0.81, + "learning_rate": 8.847954380294448e-05, + "loss": 0.8164, + "step": 15136 + }, + { + "epoch": 0.81, + "learning_rate": 8.843011333516493e-05, + "loss": 0.7852, + "step": 15137 + }, + { + "epoch": 0.81, + "learning_rate": 8.838069533930476e-05, + "loss": 0.832, + "step": 15138 + }, + { + "epoch": 0.81, + "learning_rate": 8.833128981686161e-05, + "loss": 0.7852, + "step": 15139 + }, + { + "epoch": 0.81, + "learning_rate": 8.828189676933273e-05, + "loss": 0.7422, + "step": 15140 + }, + { + "epoch": 0.81, + "learning_rate": 8.823251619821482e-05, + "loss": 0.7773, + "step": 15141 + }, + { + "epoch": 0.81, + "learning_rate": 8.818314810500411e-05, + "loss": 0.9609, + "step": 15142 + }, + { + "epoch": 0.81, + "learning_rate": 8.813379249119679e-05, + "loss": 0.8047, + "step": 15143 + }, + { + "epoch": 0.81, + "learning_rate": 8.808444935828853e-05, + "loss": 0.8203, + "step": 15144 + }, + { + "epoch": 0.81, + "learning_rate": 8.803511870777453e-05, + "loss": 0.7461, + "step": 15145 + }, + { + "epoch": 0.81, + "learning_rate": 8.798580054114941e-05, + "loss": 0.8438, + "step": 15146 + }, + { + "epoch": 0.81, + "learning_rate": 8.793649485990818e-05, + "loss": 0.8047, + "step": 15147 + }, + { + "epoch": 0.81, + "learning_rate": 8.78872016655447e-05, + "loss": 0.7461, + "step": 15148 + }, + { + "epoch": 0.81, + "learning_rate": 8.783792095955262e-05, + "loss": 0.7734, + "step": 15149 + }, + { + "epoch": 0.81, + "learning_rate": 8.77886527434254e-05, + "loss": 0.7383, + "step": 15150 + }, + { + "epoch": 0.81, + "learning_rate": 8.773939701865613e-05, + "loss": 0.9023, + "step": 15151 + }, + { + "epoch": 0.81, + "learning_rate": 8.769015378673722e-05, + "loss": 0.832, + "step": 15152 + }, + { + "epoch": 0.81, + "learning_rate": 8.764092304916105e-05, + "loss": 0.832, + "step": 15153 + }, + { + "epoch": 0.81, + "learning_rate": 8.759170480741951e-05, + "loss": 0.8281, + "step": 15154 + }, + { + "epoch": 0.81, + "learning_rate": 8.754249906300404e-05, + "loss": 0.8125, + "step": 15155 + }, + { + "epoch": 0.81, + "learning_rate": 8.74933058174055e-05, + "loss": 0.7852, + "step": 15156 + }, + { + "epoch": 0.81, + "learning_rate": 8.744412507211485e-05, + "loss": 0.7734, + "step": 15157 + }, + { + "epoch": 0.81, + "learning_rate": 8.739495682862247e-05, + "loss": 0.8516, + "step": 15158 + }, + { + "epoch": 0.81, + "learning_rate": 8.734580108841816e-05, + "loss": 0.8906, + "step": 15159 + }, + { + "epoch": 0.81, + "learning_rate": 8.729665785299162e-05, + "loss": 0.7461, + "step": 15160 + }, + { + "epoch": 0.81, + "learning_rate": 8.724752712383194e-05, + "loss": 0.7773, + "step": 15161 + }, + { + "epoch": 0.81, + "learning_rate": 8.719840890242809e-05, + "loss": 0.7422, + "step": 15162 + }, + { + "epoch": 0.81, + "learning_rate": 8.714930319026832e-05, + "loss": 0.832, + "step": 15163 + }, + { + "epoch": 0.82, + "learning_rate": 8.710020998884088e-05, + "loss": 0.7852, + "step": 15164 + }, + { + "epoch": 0.82, + "learning_rate": 8.705112929963333e-05, + "loss": 0.8594, + "step": 15165 + }, + { + "epoch": 0.82, + "learning_rate": 8.700206112413294e-05, + "loss": 0.7617, + "step": 15166 + }, + { + "epoch": 0.82, + "learning_rate": 8.695300546382689e-05, + "loss": 0.8438, + "step": 15167 + }, + { + "epoch": 0.82, + "learning_rate": 8.690396232020153e-05, + "loss": 0.8398, + "step": 15168 + }, + { + "epoch": 0.82, + "learning_rate": 8.685493169474279e-05, + "loss": 0.793, + "step": 15169 + }, + { + "epoch": 0.82, + "learning_rate": 8.680591358893702e-05, + "loss": 0.7812, + "step": 15170 + }, + { + "epoch": 0.82, + "learning_rate": 8.675690800426927e-05, + "loss": 0.7812, + "step": 15171 + }, + { + "epoch": 0.82, + "learning_rate": 8.670791494222457e-05, + "loss": 0.7344, + "step": 15172 + }, + { + "epoch": 0.82, + "learning_rate": 8.665893440428762e-05, + "loss": 0.8477, + "step": 15173 + }, + { + "epoch": 0.82, + "learning_rate": 8.66099663919428e-05, + "loss": 0.8281, + "step": 15174 + }, + { + "epoch": 0.82, + "learning_rate": 8.656101090667389e-05, + "loss": 0.8047, + "step": 15175 + }, + { + "epoch": 0.82, + "learning_rate": 8.651206794996435e-05, + "loss": 0.7695, + "step": 15176 + }, + { + "epoch": 0.82, + "learning_rate": 8.646313752329737e-05, + "loss": 0.9102, + "step": 15177 + }, + { + "epoch": 0.82, + "learning_rate": 8.641421962815582e-05, + "loss": 0.7656, + "step": 15178 + }, + { + "epoch": 0.82, + "learning_rate": 8.636531426602185e-05, + "loss": 0.7812, + "step": 15179 + }, + { + "epoch": 0.82, + "learning_rate": 8.63164214383777e-05, + "loss": 0.8086, + "step": 15180 + }, + { + "epoch": 0.82, + "learning_rate": 8.626754114670471e-05, + "loss": 0.8008, + "step": 15181 + }, + { + "epoch": 0.82, + "learning_rate": 8.621867339248424e-05, + "loss": 0.8398, + "step": 15182 + }, + { + "epoch": 0.82, + "learning_rate": 8.616981817719732e-05, + "loss": 0.8047, + "step": 15183 + }, + { + "epoch": 0.82, + "learning_rate": 8.612097550232412e-05, + "loss": 0.8555, + "step": 15184 + }, + { + "epoch": 0.82, + "learning_rate": 8.607214536934499e-05, + "loss": 0.7969, + "step": 15185 + }, + { + "epoch": 0.82, + "learning_rate": 8.602332777973943e-05, + "loss": 0.8164, + "step": 15186 + }, + { + "epoch": 0.82, + "learning_rate": 8.597452273498696e-05, + "loss": 0.8008, + "step": 15187 + }, + { + "epoch": 0.82, + "learning_rate": 8.592573023656635e-05, + "loss": 0.7656, + "step": 15188 + }, + { + "epoch": 0.82, + "learning_rate": 8.587695028595626e-05, + "loss": 0.75, + "step": 15189 + }, + { + "epoch": 0.82, + "learning_rate": 8.582818288463507e-05, + "loss": 0.793, + "step": 15190 + }, + { + "epoch": 0.82, + "learning_rate": 8.577942803408034e-05, + "loss": 0.7656, + "step": 15191 + }, + { + "epoch": 0.82, + "learning_rate": 8.573068573576936e-05, + "loss": 0.8125, + "step": 15192 + }, + { + "epoch": 0.82, + "learning_rate": 8.568195599117962e-05, + "loss": 0.8203, + "step": 15193 + }, + { + "epoch": 0.82, + "learning_rate": 8.563323880178759e-05, + "loss": 0.8281, + "step": 15194 + }, + { + "epoch": 0.82, + "learning_rate": 8.558453416906942e-05, + "loss": 0.7227, + "step": 15195 + }, + { + "epoch": 0.82, + "learning_rate": 8.553584209450116e-05, + "loss": 0.7422, + "step": 15196 + }, + { + "epoch": 0.82, + "learning_rate": 8.548716257955841e-05, + "loss": 0.8555, + "step": 15197 + }, + { + "epoch": 0.82, + "learning_rate": 8.543849562571627e-05, + "loss": 0.8398, + "step": 15198 + }, + { + "epoch": 0.82, + "learning_rate": 8.538984123444927e-05, + "loss": 0.7383, + "step": 15199 + }, + { + "epoch": 0.82, + "learning_rate": 8.534119940723207e-05, + "loss": 0.8086, + "step": 15200 + }, + { + "epoch": 0.82, + "learning_rate": 8.52925701455387e-05, + "loss": 0.6875, + "step": 15201 + }, + { + "epoch": 0.82, + "learning_rate": 8.524395345084257e-05, + "loss": 0.832, + "step": 15202 + }, + { + "epoch": 0.82, + "learning_rate": 8.519534932461714e-05, + "loss": 0.7344, + "step": 15203 + }, + { + "epoch": 0.82, + "learning_rate": 8.514675776833509e-05, + "loss": 0.8555, + "step": 15204 + }, + { + "epoch": 0.82, + "learning_rate": 8.509817878346898e-05, + "loss": 0.8125, + "step": 15205 + }, + { + "epoch": 0.82, + "learning_rate": 8.504961237149106e-05, + "loss": 0.7461, + "step": 15206 + }, + { + "epoch": 0.82, + "learning_rate": 8.50010585338729e-05, + "loss": 0.7852, + "step": 15207 + }, + { + "epoch": 0.82, + "learning_rate": 8.495251727208576e-05, + "loss": 0.7734, + "step": 15208 + }, + { + "epoch": 0.82, + "learning_rate": 8.490398858760068e-05, + "loss": 0.7969, + "step": 15209 + }, + { + "epoch": 0.82, + "learning_rate": 8.485547248188836e-05, + "loss": 0.8008, + "step": 15210 + }, + { + "epoch": 0.82, + "learning_rate": 8.480696895641893e-05, + "loss": 0.875, + "step": 15211 + }, + { + "epoch": 0.82, + "learning_rate": 8.47584780126619e-05, + "loss": 0.7656, + "step": 15212 + }, + { + "epoch": 0.82, + "learning_rate": 8.470999965208725e-05, + "loss": 0.7891, + "step": 15213 + }, + { + "epoch": 0.82, + "learning_rate": 8.46615338761637e-05, + "loss": 0.8359, + "step": 15214 + }, + { + "epoch": 0.82, + "learning_rate": 8.461308068635992e-05, + "loss": 0.8242, + "step": 15215 + }, + { + "epoch": 0.82, + "learning_rate": 8.456464008414427e-05, + "loss": 0.9102, + "step": 15216 + }, + { + "epoch": 0.82, + "learning_rate": 8.451621207098476e-05, + "loss": 0.7852, + "step": 15217 + }, + { + "epoch": 0.82, + "learning_rate": 8.446779664834864e-05, + "loss": 0.8125, + "step": 15218 + }, + { + "epoch": 0.82, + "learning_rate": 8.441939381770342e-05, + "loss": 0.8516, + "step": 15219 + }, + { + "epoch": 0.82, + "learning_rate": 8.437100358051546e-05, + "loss": 0.8125, + "step": 15220 + }, + { + "epoch": 0.82, + "learning_rate": 8.432262593825157e-05, + "loss": 0.8047, + "step": 15221 + }, + { + "epoch": 0.82, + "learning_rate": 8.427426089237739e-05, + "loss": 0.7031, + "step": 15222 + }, + { + "epoch": 0.82, + "learning_rate": 8.422590844435863e-05, + "loss": 0.75, + "step": 15223 + }, + { + "epoch": 0.82, + "learning_rate": 8.417756859566073e-05, + "loss": 0.8516, + "step": 15224 + }, + { + "epoch": 0.82, + "learning_rate": 8.41292413477483e-05, + "loss": 0.8242, + "step": 15225 + }, + { + "epoch": 0.82, + "learning_rate": 8.408092670208606e-05, + "loss": 0.8047, + "step": 15226 + }, + { + "epoch": 0.82, + "learning_rate": 8.403262466013778e-05, + "loss": 0.7422, + "step": 15227 + }, + { + "epoch": 0.82, + "learning_rate": 8.398433522336746e-05, + "loss": 0.6836, + "step": 15228 + }, + { + "epoch": 0.82, + "learning_rate": 8.393605839323826e-05, + "loss": 0.793, + "step": 15229 + }, + { + "epoch": 0.82, + "learning_rate": 8.388779417121323e-05, + "loss": 0.7539, + "step": 15230 + }, + { + "epoch": 0.82, + "learning_rate": 8.383954255875481e-05, + "loss": 0.832, + "step": 15231 + }, + { + "epoch": 0.82, + "learning_rate": 8.379130355732523e-05, + "loss": 0.7266, + "step": 15232 + }, + { + "epoch": 0.82, + "learning_rate": 8.374307716838647e-05, + "loss": 0.918, + "step": 15233 + }, + { + "epoch": 0.82, + "learning_rate": 8.369486339339977e-05, + "loss": 0.75, + "step": 15234 + }, + { + "epoch": 0.82, + "learning_rate": 8.364666223382594e-05, + "loss": 0.7656, + "step": 15235 + }, + { + "epoch": 0.82, + "learning_rate": 8.359847369112617e-05, + "loss": 0.6602, + "step": 15236 + }, + { + "epoch": 0.82, + "learning_rate": 8.355029776676042e-05, + "loss": 0.9648, + "step": 15237 + }, + { + "epoch": 0.82, + "learning_rate": 8.350213446218852e-05, + "loss": 0.7852, + "step": 15238 + }, + { + "epoch": 0.82, + "learning_rate": 8.345398377887003e-05, + "loss": 0.8438, + "step": 15239 + }, + { + "epoch": 0.82, + "learning_rate": 8.340584571826421e-05, + "loss": 0.7305, + "step": 15240 + }, + { + "epoch": 0.82, + "learning_rate": 8.33577202818297e-05, + "loss": 0.6797, + "step": 15241 + }, + { + "epoch": 0.82, + "learning_rate": 8.330960747102479e-05, + "loss": 0.7695, + "step": 15242 + }, + { + "epoch": 0.82, + "learning_rate": 8.326150728730747e-05, + "loss": 0.8633, + "step": 15243 + }, + { + "epoch": 0.82, + "learning_rate": 8.321341973213553e-05, + "loss": 0.8398, + "step": 15244 + }, + { + "epoch": 0.82, + "learning_rate": 8.316534480696592e-05, + "loss": 0.7578, + "step": 15245 + }, + { + "epoch": 0.82, + "learning_rate": 8.311728251325568e-05, + "loss": 0.8203, + "step": 15246 + }, + { + "epoch": 0.82, + "learning_rate": 8.306923285246104e-05, + "loss": 0.7539, + "step": 15247 + }, + { + "epoch": 0.82, + "learning_rate": 8.302119582603818e-05, + "loss": 0.7617, + "step": 15248 + }, + { + "epoch": 0.82, + "learning_rate": 8.297317143544292e-05, + "loss": 0.75, + "step": 15249 + }, + { + "epoch": 0.82, + "learning_rate": 8.292515968213043e-05, + "loss": 0.7852, + "step": 15250 + }, + { + "epoch": 0.82, + "learning_rate": 8.287716056755545e-05, + "loss": 0.75, + "step": 15251 + }, + { + "epoch": 0.82, + "learning_rate": 8.28291740931727e-05, + "loss": 0.8281, + "step": 15252 + }, + { + "epoch": 0.82, + "learning_rate": 8.278120026043645e-05, + "loss": 0.8828, + "step": 15253 + }, + { + "epoch": 0.82, + "learning_rate": 8.273323907080027e-05, + "loss": 0.8633, + "step": 15254 + }, + { + "epoch": 0.82, + "learning_rate": 8.268529052571739e-05, + "loss": 0.832, + "step": 15255 + }, + { + "epoch": 0.82, + "learning_rate": 8.263735462664123e-05, + "loss": 0.7383, + "step": 15256 + }, + { + "epoch": 0.82, + "learning_rate": 8.258943137502417e-05, + "loss": 0.75, + "step": 15257 + }, + { + "epoch": 0.82, + "learning_rate": 8.254152077231836e-05, + "loss": 0.8281, + "step": 15258 + }, + { + "epoch": 0.82, + "learning_rate": 8.249362281997569e-05, + "loss": 0.8242, + "step": 15259 + }, + { + "epoch": 0.82, + "learning_rate": 8.244573751944784e-05, + "loss": 0.9102, + "step": 15260 + }, + { + "epoch": 0.82, + "learning_rate": 8.239786487218559e-05, + "loss": 0.7734, + "step": 15261 + }, + { + "epoch": 0.82, + "learning_rate": 8.235000487963978e-05, + "loss": 0.8438, + "step": 15262 + }, + { + "epoch": 0.82, + "learning_rate": 8.230215754326087e-05, + "loss": 0.8359, + "step": 15263 + }, + { + "epoch": 0.82, + "learning_rate": 8.225432286449863e-05, + "loss": 0.7188, + "step": 15264 + }, + { + "epoch": 0.82, + "learning_rate": 8.220650084480253e-05, + "loss": 0.8047, + "step": 15265 + }, + { + "epoch": 0.82, + "learning_rate": 8.215869148562177e-05, + "loss": 0.8242, + "step": 15266 + }, + { + "epoch": 0.82, + "learning_rate": 8.211089478840533e-05, + "loss": 0.7852, + "step": 15267 + }, + { + "epoch": 0.82, + "learning_rate": 8.206311075460132e-05, + "loss": 0.7266, + "step": 15268 + }, + { + "epoch": 0.82, + "learning_rate": 8.201533938565808e-05, + "loss": 0.8008, + "step": 15269 + }, + { + "epoch": 0.82, + "learning_rate": 8.19675806830229e-05, + "loss": 0.9023, + "step": 15270 + }, + { + "epoch": 0.82, + "learning_rate": 8.191983464814317e-05, + "loss": 0.7969, + "step": 15271 + }, + { + "epoch": 0.82, + "learning_rate": 8.187210128246586e-05, + "loss": 0.8516, + "step": 15272 + }, + { + "epoch": 0.82, + "learning_rate": 8.182438058743741e-05, + "loss": 0.8906, + "step": 15273 + }, + { + "epoch": 0.82, + "learning_rate": 8.177667256450372e-05, + "loss": 0.793, + "step": 15274 + }, + { + "epoch": 0.82, + "learning_rate": 8.172897721511063e-05, + "loss": 0.8008, + "step": 15275 + }, + { + "epoch": 0.82, + "learning_rate": 8.168129454070361e-05, + "loss": 0.7188, + "step": 15276 + }, + { + "epoch": 0.82, + "learning_rate": 8.163362454272744e-05, + "loss": 0.7344, + "step": 15277 + }, + { + "epoch": 0.82, + "learning_rate": 8.158596722262645e-05, + "loss": 0.7461, + "step": 15278 + }, + { + "epoch": 0.82, + "learning_rate": 8.153832258184529e-05, + "loss": 0.6836, + "step": 15279 + }, + { + "epoch": 0.82, + "learning_rate": 8.149069062182757e-05, + "loss": 0.8047, + "step": 15280 + }, + { + "epoch": 0.82, + "learning_rate": 8.144307134401652e-05, + "loss": 0.8242, + "step": 15281 + }, + { + "epoch": 0.82, + "learning_rate": 8.139546474985526e-05, + "loss": 0.7852, + "step": 15282 + }, + { + "epoch": 0.82, + "learning_rate": 8.13478708407866e-05, + "loss": 0.75, + "step": 15283 + }, + { + "epoch": 0.82, + "learning_rate": 8.130028961825247e-05, + "loss": 0.7188, + "step": 15284 + }, + { + "epoch": 0.82, + "learning_rate": 8.125272108369507e-05, + "loss": 0.9102, + "step": 15285 + }, + { + "epoch": 0.82, + "learning_rate": 8.120516523855565e-05, + "loss": 0.9141, + "step": 15286 + }, + { + "epoch": 0.82, + "learning_rate": 8.115762208427546e-05, + "loss": 0.7695, + "step": 15287 + }, + { + "epoch": 0.82, + "learning_rate": 8.111009162229505e-05, + "loss": 0.832, + "step": 15288 + }, + { + "epoch": 0.82, + "learning_rate": 8.106257385405491e-05, + "loss": 0.8672, + "step": 15289 + }, + { + "epoch": 0.82, + "learning_rate": 8.10150687809948e-05, + "loss": 0.8008, + "step": 15290 + }, + { + "epoch": 0.82, + "learning_rate": 8.096757640455443e-05, + "loss": 0.8008, + "step": 15291 + }, + { + "epoch": 0.82, + "learning_rate": 8.092009672617301e-05, + "loss": 0.7969, + "step": 15292 + }, + { + "epoch": 0.82, + "learning_rate": 8.087262974728932e-05, + "loss": 0.8398, + "step": 15293 + }, + { + "epoch": 0.82, + "learning_rate": 8.082517546934154e-05, + "loss": 0.793, + "step": 15294 + }, + { + "epoch": 0.82, + "learning_rate": 8.077773389376786e-05, + "loss": 0.8047, + "step": 15295 + }, + { + "epoch": 0.82, + "learning_rate": 8.0730305022006e-05, + "loss": 0.7695, + "step": 15296 + }, + { + "epoch": 0.82, + "learning_rate": 8.068288885549302e-05, + "loss": 0.7617, + "step": 15297 + }, + { + "epoch": 0.82, + "learning_rate": 8.06354853956659e-05, + "loss": 0.8359, + "step": 15298 + }, + { + "epoch": 0.82, + "learning_rate": 8.058809464396121e-05, + "loss": 0.8711, + "step": 15299 + }, + { + "epoch": 0.82, + "learning_rate": 8.054071660181494e-05, + "loss": 0.8477, + "step": 15300 + }, + { + "epoch": 0.82, + "learning_rate": 8.049335127066254e-05, + "loss": 0.8164, + "step": 15301 + }, + { + "epoch": 0.82, + "learning_rate": 8.044599865193992e-05, + "loss": 0.75, + "step": 15302 + }, + { + "epoch": 0.82, + "learning_rate": 8.039865874708158e-05, + "loss": 0.8047, + "step": 15303 + }, + { + "epoch": 0.82, + "learning_rate": 8.035133155752216e-05, + "loss": 0.8242, + "step": 15304 + }, + { + "epoch": 0.82, + "learning_rate": 8.030401708469581e-05, + "loss": 0.8594, + "step": 15305 + }, + { + "epoch": 0.82, + "learning_rate": 8.025671533003648e-05, + "loss": 0.8359, + "step": 15306 + }, + { + "epoch": 0.82, + "learning_rate": 8.020942629497751e-05, + "loss": 0.8281, + "step": 15307 + }, + { + "epoch": 0.82, + "learning_rate": 8.016214998095167e-05, + "loss": 0.707, + "step": 15308 + }, + { + "epoch": 0.82, + "learning_rate": 8.011488638939179e-05, + "loss": 0.8945, + "step": 15309 + }, + { + "epoch": 0.82, + "learning_rate": 8.006763552173024e-05, + "loss": 0.7188, + "step": 15310 + }, + { + "epoch": 0.82, + "learning_rate": 8.002039737939859e-05, + "loss": 0.7266, + "step": 15311 + }, + { + "epoch": 0.82, + "learning_rate": 7.997317196382858e-05, + "loss": 0.7305, + "step": 15312 + }, + { + "epoch": 0.82, + "learning_rate": 7.992595927645107e-05, + "loss": 0.8672, + "step": 15313 + }, + { + "epoch": 0.82, + "learning_rate": 7.987875931869681e-05, + "loss": 0.75, + "step": 15314 + }, + { + "epoch": 0.82, + "learning_rate": 7.983157209199632e-05, + "loss": 0.8164, + "step": 15315 + }, + { + "epoch": 0.82, + "learning_rate": 7.978439759777933e-05, + "loss": 0.6719, + "step": 15316 + }, + { + "epoch": 0.82, + "learning_rate": 7.973723583747533e-05, + "loss": 0.8242, + "step": 15317 + }, + { + "epoch": 0.82, + "learning_rate": 7.96900868125136e-05, + "loss": 0.8516, + "step": 15318 + }, + { + "epoch": 0.82, + "learning_rate": 7.964295052432297e-05, + "loss": 0.7891, + "step": 15319 + }, + { + "epoch": 0.82, + "learning_rate": 7.95958269743317e-05, + "loss": 0.8047, + "step": 15320 + }, + { + "epoch": 0.82, + "learning_rate": 7.954871616396758e-05, + "loss": 0.7305, + "step": 15321 + }, + { + "epoch": 0.82, + "learning_rate": 7.950161809465879e-05, + "loss": 0.7578, + "step": 15322 + }, + { + "epoch": 0.82, + "learning_rate": 7.94545327678321e-05, + "loss": 0.7266, + "step": 15323 + }, + { + "epoch": 0.82, + "learning_rate": 7.940746018491446e-05, + "loss": 0.793, + "step": 15324 + }, + { + "epoch": 0.82, + "learning_rate": 7.936040034733227e-05, + "loss": 0.75, + "step": 15325 + }, + { + "epoch": 0.82, + "learning_rate": 7.931335325651184e-05, + "loss": 0.8164, + "step": 15326 + }, + { + "epoch": 0.82, + "learning_rate": 7.926631891387854e-05, + "loss": 0.8398, + "step": 15327 + }, + { + "epoch": 0.82, + "learning_rate": 7.921929732085797e-05, + "loss": 0.75, + "step": 15328 + }, + { + "epoch": 0.82, + "learning_rate": 7.917228847887469e-05, + "loss": 0.8867, + "step": 15329 + }, + { + "epoch": 0.82, + "learning_rate": 7.912529238935356e-05, + "loss": 0.7539, + "step": 15330 + }, + { + "epoch": 0.82, + "learning_rate": 7.907830905371844e-05, + "loss": 0.7266, + "step": 15331 + }, + { + "epoch": 0.82, + "learning_rate": 7.903133847339322e-05, + "loss": 0.7578, + "step": 15332 + }, + { + "epoch": 0.82, + "learning_rate": 7.898438064980134e-05, + "loss": 0.8086, + "step": 15333 + }, + { + "epoch": 0.82, + "learning_rate": 7.893743558436561e-05, + "loss": 0.832, + "step": 15334 + }, + { + "epoch": 0.82, + "learning_rate": 7.889050327850878e-05, + "loss": 0.8359, + "step": 15335 + }, + { + "epoch": 0.82, + "learning_rate": 7.88435837336528e-05, + "loss": 0.832, + "step": 15336 + }, + { + "epoch": 0.82, + "learning_rate": 7.879667695121983e-05, + "loss": 0.7109, + "step": 15337 + }, + { + "epoch": 0.82, + "learning_rate": 7.874978293263096e-05, + "loss": 0.7852, + "step": 15338 + }, + { + "epoch": 0.82, + "learning_rate": 7.870290167930755e-05, + "loss": 0.8047, + "step": 15339 + }, + { + "epoch": 0.82, + "learning_rate": 7.865603319267001e-05, + "loss": 0.8281, + "step": 15340 + }, + { + "epoch": 0.82, + "learning_rate": 7.860917747413865e-05, + "loss": 0.8398, + "step": 15341 + }, + { + "epoch": 0.82, + "learning_rate": 7.85623345251335e-05, + "loss": 0.875, + "step": 15342 + }, + { + "epoch": 0.82, + "learning_rate": 7.8515504347074e-05, + "loss": 0.8711, + "step": 15343 + }, + { + "epoch": 0.82, + "learning_rate": 7.846868694137892e-05, + "loss": 0.7734, + "step": 15344 + }, + { + "epoch": 0.82, + "learning_rate": 7.842188230946762e-05, + "loss": 0.8242, + "step": 15345 + }, + { + "epoch": 0.82, + "learning_rate": 7.837509045275798e-05, + "loss": 0.8281, + "step": 15346 + }, + { + "epoch": 0.82, + "learning_rate": 7.832831137266799e-05, + "loss": 0.7344, + "step": 15347 + }, + { + "epoch": 0.82, + "learning_rate": 7.82815450706153e-05, + "loss": 0.6797, + "step": 15348 + }, + { + "epoch": 0.82, + "learning_rate": 7.82347915480171e-05, + "loss": 0.8008, + "step": 15349 + }, + { + "epoch": 0.83, + "learning_rate": 7.81880508062901e-05, + "loss": 0.7891, + "step": 15350 + }, + { + "epoch": 0.83, + "learning_rate": 7.814132284685083e-05, + "loss": 0.7383, + "step": 15351 + }, + { + "epoch": 0.83, + "learning_rate": 7.809460767111503e-05, + "loss": 0.6406, + "step": 15352 + }, + { + "epoch": 0.83, + "learning_rate": 7.804790528049866e-05, + "loss": 0.832, + "step": 15353 + }, + { + "epoch": 0.83, + "learning_rate": 7.800121567641665e-05, + "loss": 0.8203, + "step": 15354 + }, + { + "epoch": 0.83, + "learning_rate": 7.795453886028415e-05, + "loss": 0.9023, + "step": 15355 + }, + { + "epoch": 0.83, + "learning_rate": 7.79078748335153e-05, + "loss": 0.8008, + "step": 15356 + }, + { + "epoch": 0.83, + "learning_rate": 7.78612235975244e-05, + "loss": 0.7578, + "step": 15357 + }, + { + "epoch": 0.83, + "learning_rate": 7.781458515372514e-05, + "loss": 0.668, + "step": 15358 + }, + { + "epoch": 0.83, + "learning_rate": 7.776795950353072e-05, + "loss": 0.7617, + "step": 15359 + }, + { + "epoch": 0.83, + "learning_rate": 7.772134664835401e-05, + "loss": 0.7305, + "step": 15360 + }, + { + "epoch": 0.83, + "learning_rate": 7.76747465896076e-05, + "loss": 0.7773, + "step": 15361 + }, + { + "epoch": 0.83, + "learning_rate": 7.762815932870371e-05, + "loss": 0.8516, + "step": 15362 + }, + { + "epoch": 0.83, + "learning_rate": 7.75815848670539e-05, + "loss": 0.7734, + "step": 15363 + }, + { + "epoch": 0.83, + "learning_rate": 7.753502320606964e-05, + "loss": 0.8086, + "step": 15364 + }, + { + "epoch": 0.83, + "learning_rate": 7.7488474347162e-05, + "loss": 0.707, + "step": 15365 + }, + { + "epoch": 0.83, + "learning_rate": 7.744193829174146e-05, + "loss": 0.8086, + "step": 15366 + }, + { + "epoch": 0.83, + "learning_rate": 7.739541504121811e-05, + "loss": 0.7773, + "step": 15367 + }, + { + "epoch": 0.83, + "learning_rate": 7.734890459700183e-05, + "loss": 0.8477, + "step": 15368 + }, + { + "epoch": 0.83, + "learning_rate": 7.730240696050223e-05, + "loss": 0.8008, + "step": 15369 + }, + { + "epoch": 0.83, + "learning_rate": 7.725592213312804e-05, + "loss": 0.8555, + "step": 15370 + }, + { + "epoch": 0.83, + "learning_rate": 7.720945011628805e-05, + "loss": 0.7695, + "step": 15371 + }, + { + "epoch": 0.83, + "learning_rate": 7.716299091139062e-05, + "loss": 0.8125, + "step": 15372 + }, + { + "epoch": 0.83, + "learning_rate": 7.71165445198435e-05, + "loss": 0.8203, + "step": 15373 + }, + { + "epoch": 0.83, + "learning_rate": 7.707011094305405e-05, + "loss": 0.7461, + "step": 15374 + }, + { + "epoch": 0.83, + "learning_rate": 7.702369018242949e-05, + "loss": 0.8086, + "step": 15375 + }, + { + "epoch": 0.83, + "learning_rate": 7.697728223937661e-05, + "loss": 0.8438, + "step": 15376 + }, + { + "epoch": 0.83, + "learning_rate": 7.693088711530156e-05, + "loss": 0.9336, + "step": 15377 + }, + { + "epoch": 0.83, + "learning_rate": 7.68845048116104e-05, + "loss": 0.8359, + "step": 15378 + }, + { + "epoch": 0.83, + "learning_rate": 7.683813532970851e-05, + "loss": 0.7305, + "step": 15379 + }, + { + "epoch": 0.83, + "learning_rate": 7.679177867100112e-05, + "loss": 0.8945, + "step": 15380 + }, + { + "epoch": 0.83, + "learning_rate": 7.674543483689311e-05, + "loss": 0.8945, + "step": 15381 + }, + { + "epoch": 0.83, + "learning_rate": 7.669910382878875e-05, + "loss": 0.8203, + "step": 15382 + }, + { + "epoch": 0.83, + "learning_rate": 7.665278564809186e-05, + "loss": 0.7188, + "step": 15383 + }, + { + "epoch": 0.83, + "learning_rate": 7.66064802962062e-05, + "loss": 0.7578, + "step": 15384 + }, + { + "epoch": 0.83, + "learning_rate": 7.656018777453506e-05, + "loss": 0.8789, + "step": 15385 + }, + { + "epoch": 0.83, + "learning_rate": 7.651390808448116e-05, + "loss": 0.8086, + "step": 15386 + }, + { + "epoch": 0.83, + "learning_rate": 7.646764122744665e-05, + "loss": 0.7422, + "step": 15387 + }, + { + "epoch": 0.83, + "learning_rate": 7.64213872048341e-05, + "loss": 0.7461, + "step": 15388 + }, + { + "epoch": 0.83, + "learning_rate": 7.63751460180449e-05, + "loss": 0.8008, + "step": 15389 + }, + { + "epoch": 0.83, + "learning_rate": 7.632891766848016e-05, + "loss": 0.8828, + "step": 15390 + }, + { + "epoch": 0.83, + "learning_rate": 7.62827021575409e-05, + "loss": 0.8477, + "step": 15391 + }, + { + "epoch": 0.83, + "learning_rate": 7.623649948662775e-05, + "loss": 0.8672, + "step": 15392 + }, + { + "epoch": 0.83, + "learning_rate": 7.619030965714052e-05, + "loss": 0.7188, + "step": 15393 + }, + { + "epoch": 0.83, + "learning_rate": 7.614413267047909e-05, + "loss": 0.6875, + "step": 15394 + }, + { + "epoch": 0.83, + "learning_rate": 7.609796852804268e-05, + "loss": 0.8359, + "step": 15395 + }, + { + "epoch": 0.83, + "learning_rate": 7.605181723123034e-05, + "loss": 0.7617, + "step": 15396 + }, + { + "epoch": 0.83, + "learning_rate": 7.600567878144044e-05, + "loss": 0.832, + "step": 15397 + }, + { + "epoch": 0.83, + "learning_rate": 7.595955318007131e-05, + "loss": 0.7109, + "step": 15398 + }, + { + "epoch": 0.83, + "learning_rate": 7.59134404285205e-05, + "loss": 0.7734, + "step": 15399 + }, + { + "epoch": 0.83, + "learning_rate": 7.586734052818551e-05, + "loss": 0.7695, + "step": 15400 + }, + { + "epoch": 0.83, + "learning_rate": 7.582125348046338e-05, + "loss": 0.6797, + "step": 15401 + }, + { + "epoch": 0.83, + "learning_rate": 7.577517928675065e-05, + "loss": 0.8242, + "step": 15402 + }, + { + "epoch": 0.83, + "learning_rate": 7.572911794844334e-05, + "loss": 0.7734, + "step": 15403 + }, + { + "epoch": 0.83, + "learning_rate": 7.568306946693737e-05, + "loss": 0.7188, + "step": 15404 + }, + { + "epoch": 0.83, + "learning_rate": 7.563703384362841e-05, + "loss": 0.7227, + "step": 15405 + }, + { + "epoch": 0.83, + "learning_rate": 7.559101107991107e-05, + "loss": 0.7812, + "step": 15406 + }, + { + "epoch": 0.83, + "learning_rate": 7.554500117718022e-05, + "loss": 0.8203, + "step": 15407 + }, + { + "epoch": 0.83, + "learning_rate": 7.549900413683025e-05, + "loss": 0.9062, + "step": 15408 + }, + { + "epoch": 0.83, + "learning_rate": 7.545301996025483e-05, + "loss": 0.7695, + "step": 15409 + }, + { + "epoch": 0.83, + "learning_rate": 7.540704864884718e-05, + "loss": 0.8789, + "step": 15410 + }, + { + "epoch": 0.83, + "learning_rate": 7.536109020400095e-05, + "loss": 0.8164, + "step": 15411 + }, + { + "epoch": 0.83, + "learning_rate": 7.531514462710853e-05, + "loss": 0.8242, + "step": 15412 + }, + { + "epoch": 0.83, + "learning_rate": 7.526921191956215e-05, + "loss": 0.7812, + "step": 15413 + }, + { + "epoch": 0.83, + "learning_rate": 7.522329208275374e-05, + "loss": 0.8594, + "step": 15414 + }, + { + "epoch": 0.83, + "learning_rate": 7.517738511807509e-05, + "loss": 0.7422, + "step": 15415 + }, + { + "epoch": 0.83, + "learning_rate": 7.513149102691708e-05, + "loss": 0.8281, + "step": 15416 + }, + { + "epoch": 0.83, + "learning_rate": 7.508560981067042e-05, + "loss": 0.6602, + "step": 15417 + }, + { + "epoch": 0.83, + "learning_rate": 7.503974147072551e-05, + "loss": 0.6719, + "step": 15418 + }, + { + "epoch": 0.83, + "learning_rate": 7.499388600847246e-05, + "loss": 0.8164, + "step": 15419 + }, + { + "epoch": 0.83, + "learning_rate": 7.494804342530066e-05, + "loss": 0.8281, + "step": 15420 + }, + { + "epoch": 0.83, + "learning_rate": 7.490221372259943e-05, + "loss": 0.8281, + "step": 15421 + }, + { + "epoch": 0.83, + "learning_rate": 7.485639690175738e-05, + "loss": 0.8516, + "step": 15422 + }, + { + "epoch": 0.83, + "learning_rate": 7.481059296416304e-05, + "loss": 0.7617, + "step": 15423 + }, + { + "epoch": 0.83, + "learning_rate": 7.476480191120449e-05, + "loss": 0.8086, + "step": 15424 + }, + { + "epoch": 0.83, + "learning_rate": 7.471902374426931e-05, + "loss": 0.9062, + "step": 15425 + }, + { + "epoch": 0.83, + "learning_rate": 7.467325846474449e-05, + "loss": 0.8398, + "step": 15426 + }, + { + "epoch": 0.83, + "learning_rate": 7.462750607401709e-05, + "loss": 0.6953, + "step": 15427 + }, + { + "epoch": 0.83, + "learning_rate": 7.458176657347365e-05, + "loss": 0.7891, + "step": 15428 + }, + { + "epoch": 0.83, + "learning_rate": 7.453603996450009e-05, + "loss": 0.8438, + "step": 15429 + }, + { + "epoch": 0.83, + "learning_rate": 7.449032624848184e-05, + "loss": 0.75, + "step": 15430 + }, + { + "epoch": 0.83, + "learning_rate": 7.444462542680469e-05, + "loss": 0.8359, + "step": 15431 + }, + { + "epoch": 0.83, + "learning_rate": 7.439893750085325e-05, + "loss": 0.8867, + "step": 15432 + }, + { + "epoch": 0.83, + "learning_rate": 7.435326247201191e-05, + "loss": 0.832, + "step": 15433 + }, + { + "epoch": 0.83, + "learning_rate": 7.430760034166484e-05, + "loss": 0.7422, + "step": 15434 + }, + { + "epoch": 0.83, + "learning_rate": 7.426195111119593e-05, + "loss": 0.8359, + "step": 15435 + }, + { + "epoch": 0.83, + "learning_rate": 7.421631478198831e-05, + "loss": 0.8359, + "step": 15436 + }, + { + "epoch": 0.83, + "learning_rate": 7.417069135542503e-05, + "loss": 0.7891, + "step": 15437 + }, + { + "epoch": 0.83, + "learning_rate": 7.412508083288849e-05, + "loss": 0.7578, + "step": 15438 + }, + { + "epoch": 0.83, + "learning_rate": 7.407948321576096e-05, + "loss": 0.8867, + "step": 15439 + }, + { + "epoch": 0.83, + "learning_rate": 7.403389850542413e-05, + "loss": 0.832, + "step": 15440 + }, + { + "epoch": 0.83, + "learning_rate": 7.398832670325944e-05, + "loss": 0.7305, + "step": 15441 + }, + { + "epoch": 0.83, + "learning_rate": 7.394276781064779e-05, + "loss": 0.8008, + "step": 15442 + }, + { + "epoch": 0.83, + "learning_rate": 7.389722182896974e-05, + "loss": 0.8242, + "step": 15443 + }, + { + "epoch": 0.83, + "learning_rate": 7.385168875960563e-05, + "loss": 0.8594, + "step": 15444 + }, + { + "epoch": 0.83, + "learning_rate": 7.380616860393507e-05, + "loss": 0.8359, + "step": 15445 + }, + { + "epoch": 0.83, + "learning_rate": 7.376066136333753e-05, + "loss": 0.7539, + "step": 15446 + }, + { + "epoch": 0.83, + "learning_rate": 7.371516703919223e-05, + "loss": 0.793, + "step": 15447 + }, + { + "epoch": 0.83, + "learning_rate": 7.366968563287762e-05, + "loss": 0.918, + "step": 15448 + }, + { + "epoch": 0.83, + "learning_rate": 7.362421714577183e-05, + "loss": 0.7578, + "step": 15449 + }, + { + "epoch": 0.83, + "learning_rate": 7.357876157925286e-05, + "loss": 0.7578, + "step": 15450 + }, + { + "epoch": 0.83, + "learning_rate": 7.353331893469822e-05, + "loss": 0.8398, + "step": 15451 + }, + { + "epoch": 0.83, + "learning_rate": 7.348788921348486e-05, + "loss": 0.8008, + "step": 15452 + }, + { + "epoch": 0.83, + "learning_rate": 7.344247241698926e-05, + "loss": 0.8438, + "step": 15453 + }, + { + "epoch": 0.83, + "learning_rate": 7.339706854658811e-05, + "loss": 0.8281, + "step": 15454 + }, + { + "epoch": 0.83, + "learning_rate": 7.335167760365707e-05, + "loss": 0.7773, + "step": 15455 + }, + { + "epoch": 0.83, + "learning_rate": 7.330629958957158e-05, + "loss": 0.8711, + "step": 15456 + }, + { + "epoch": 0.83, + "learning_rate": 7.326093450570681e-05, + "loss": 0.6992, + "step": 15457 + }, + { + "epoch": 0.83, + "learning_rate": 7.321558235343762e-05, + "loss": 0.9219, + "step": 15458 + }, + { + "epoch": 0.83, + "learning_rate": 7.317024313413806e-05, + "loss": 0.832, + "step": 15459 + }, + { + "epoch": 0.83, + "learning_rate": 7.312491684918232e-05, + "loss": 0.9141, + "step": 15460 + }, + { + "epoch": 0.83, + "learning_rate": 7.307960349994363e-05, + "loss": 0.7266, + "step": 15461 + }, + { + "epoch": 0.83, + "learning_rate": 7.303430308779547e-05, + "loss": 0.7227, + "step": 15462 + }, + { + "epoch": 0.83, + "learning_rate": 7.298901561411031e-05, + "loss": 0.7812, + "step": 15463 + }, + { + "epoch": 0.83, + "learning_rate": 7.294374108026075e-05, + "loss": 0.7578, + "step": 15464 + }, + { + "epoch": 0.83, + "learning_rate": 7.289847948761852e-05, + "loss": 0.8008, + "step": 15465 + }, + { + "epoch": 0.83, + "learning_rate": 7.285323083755524e-05, + "loss": 0.8516, + "step": 15466 + }, + { + "epoch": 0.83, + "learning_rate": 7.280799513144237e-05, + "loss": 0.8164, + "step": 15467 + }, + { + "epoch": 0.83, + "learning_rate": 7.276277237065043e-05, + "loss": 0.7812, + "step": 15468 + }, + { + "epoch": 0.83, + "learning_rate": 7.271756255654977e-05, + "loss": 0.7578, + "step": 15469 + }, + { + "epoch": 0.83, + "learning_rate": 7.267236569051056e-05, + "loss": 0.8789, + "step": 15470 + }, + { + "epoch": 0.83, + "learning_rate": 7.262718177390243e-05, + "loss": 0.8086, + "step": 15471 + }, + { + "epoch": 0.83, + "learning_rate": 7.25820108080944e-05, + "loss": 0.7773, + "step": 15472 + }, + { + "epoch": 0.83, + "learning_rate": 7.253685279445544e-05, + "loss": 0.7578, + "step": 15473 + }, + { + "epoch": 0.83, + "learning_rate": 7.24917077343541e-05, + "loss": 0.7852, + "step": 15474 + }, + { + "epoch": 0.83, + "learning_rate": 7.244657562915829e-05, + "loss": 0.8594, + "step": 15475 + }, + { + "epoch": 0.83, + "learning_rate": 7.240145648023561e-05, + "loss": 0.7656, + "step": 15476 + }, + { + "epoch": 0.83, + "learning_rate": 7.235635028895332e-05, + "loss": 0.8164, + "step": 15477 + }, + { + "epoch": 0.83, + "learning_rate": 7.231125705667845e-05, + "loss": 0.6992, + "step": 15478 + }, + { + "epoch": 0.83, + "learning_rate": 7.226617678477732e-05, + "loss": 0.7734, + "step": 15479 + }, + { + "epoch": 0.83, + "learning_rate": 7.222110947461607e-05, + "loss": 0.7383, + "step": 15480 + }, + { + "epoch": 0.83, + "learning_rate": 7.217605512756038e-05, + "loss": 0.7734, + "step": 15481 + }, + { + "epoch": 0.83, + "learning_rate": 7.213101374497555e-05, + "loss": 0.8008, + "step": 15482 + }, + { + "epoch": 0.83, + "learning_rate": 7.208598532822641e-05, + "loss": 0.7734, + "step": 15483 + }, + { + "epoch": 0.83, + "learning_rate": 7.204096987867753e-05, + "loss": 0.8633, + "step": 15484 + }, + { + "epoch": 0.83, + "learning_rate": 7.19959673976931e-05, + "loss": 0.9375, + "step": 15485 + }, + { + "epoch": 0.83, + "learning_rate": 7.195097788663668e-05, + "loss": 0.7852, + "step": 15486 + }, + { + "epoch": 0.83, + "learning_rate": 7.19060013468718e-05, + "loss": 0.7109, + "step": 15487 + }, + { + "epoch": 0.83, + "learning_rate": 7.186103777976122e-05, + "loss": 0.6641, + "step": 15488 + }, + { + "epoch": 0.83, + "learning_rate": 7.181608718666749e-05, + "loss": 0.7422, + "step": 15489 + }, + { + "epoch": 0.83, + "learning_rate": 7.177114956895298e-05, + "loss": 0.7383, + "step": 15490 + }, + { + "epoch": 0.83, + "learning_rate": 7.172622492797926e-05, + "loss": 0.8867, + "step": 15491 + }, + { + "epoch": 0.83, + "learning_rate": 7.168131326510763e-05, + "loss": 0.7344, + "step": 15492 + }, + { + "epoch": 0.83, + "learning_rate": 7.163641458169912e-05, + "loss": 0.8242, + "step": 15493 + }, + { + "epoch": 0.83, + "learning_rate": 7.159152887911447e-05, + "loss": 0.7656, + "step": 15494 + }, + { + "epoch": 0.83, + "learning_rate": 7.154665615871376e-05, + "loss": 0.8398, + "step": 15495 + }, + { + "epoch": 0.83, + "learning_rate": 7.15017964218565e-05, + "loss": 0.8242, + "step": 15496 + }, + { + "epoch": 0.83, + "learning_rate": 7.145694966990257e-05, + "loss": 0.7383, + "step": 15497 + }, + { + "epoch": 0.83, + "learning_rate": 7.141211590421076e-05, + "loss": 0.8086, + "step": 15498 + }, + { + "epoch": 0.83, + "learning_rate": 7.136729512613955e-05, + "loss": 0.7734, + "step": 15499 + }, + { + "epoch": 0.83, + "learning_rate": 7.132248733704727e-05, + "loss": 0.75, + "step": 15500 + }, + { + "epoch": 0.83, + "learning_rate": 7.127769253829187e-05, + "loss": 0.75, + "step": 15501 + }, + { + "epoch": 0.83, + "learning_rate": 7.12329107312305e-05, + "loss": 0.7188, + "step": 15502 + }, + { + "epoch": 0.83, + "learning_rate": 7.118814191722051e-05, + "loss": 0.875, + "step": 15503 + }, + { + "epoch": 0.83, + "learning_rate": 7.114338609761828e-05, + "loss": 0.7617, + "step": 15504 + }, + { + "epoch": 0.83, + "learning_rate": 7.109864327378023e-05, + "loss": 0.8359, + "step": 15505 + }, + { + "epoch": 0.83, + "learning_rate": 7.105391344706207e-05, + "loss": 0.7891, + "step": 15506 + }, + { + "epoch": 0.83, + "learning_rate": 7.100919661881949e-05, + "loss": 0.7188, + "step": 15507 + }, + { + "epoch": 0.83, + "learning_rate": 7.096449279040723e-05, + "loss": 0.8281, + "step": 15508 + }, + { + "epoch": 0.83, + "learning_rate": 7.091980196318015e-05, + "loss": 0.8242, + "step": 15509 + }, + { + "epoch": 0.83, + "learning_rate": 7.087512413849267e-05, + "loss": 0.8164, + "step": 15510 + }, + { + "epoch": 0.83, + "learning_rate": 7.08304593176985e-05, + "loss": 0.7422, + "step": 15511 + }, + { + "epoch": 0.83, + "learning_rate": 7.078580750215102e-05, + "loss": 0.7461, + "step": 15512 + }, + { + "epoch": 0.83, + "learning_rate": 7.074116869320346e-05, + "loss": 0.8438, + "step": 15513 + }, + { + "epoch": 0.83, + "learning_rate": 7.069654289220868e-05, + "loss": 0.8047, + "step": 15514 + }, + { + "epoch": 0.83, + "learning_rate": 7.065193010051868e-05, + "loss": 0.8047, + "step": 15515 + }, + { + "epoch": 0.83, + "learning_rate": 7.060733031948552e-05, + "loss": 0.8086, + "step": 15516 + }, + { + "epoch": 0.83, + "learning_rate": 7.056274355046083e-05, + "loss": 0.832, + "step": 15517 + }, + { + "epoch": 0.83, + "learning_rate": 7.051816979479563e-05, + "loss": 0.7656, + "step": 15518 + }, + { + "epoch": 0.83, + "learning_rate": 7.047360905384048e-05, + "loss": 0.8477, + "step": 15519 + }, + { + "epoch": 0.83, + "learning_rate": 7.042906132894606e-05, + "loss": 0.7773, + "step": 15520 + }, + { + "epoch": 0.83, + "learning_rate": 7.038452662146216e-05, + "loss": 0.8203, + "step": 15521 + }, + { + "epoch": 0.83, + "learning_rate": 7.034000493273817e-05, + "loss": 0.8438, + "step": 15522 + }, + { + "epoch": 0.83, + "learning_rate": 7.029549626412345e-05, + "loss": 0.7812, + "step": 15523 + }, + { + "epoch": 0.83, + "learning_rate": 7.025100061696677e-05, + "loss": 0.8477, + "step": 15524 + }, + { + "epoch": 0.83, + "learning_rate": 7.020651799261623e-05, + "loss": 0.7578, + "step": 15525 + }, + { + "epoch": 0.83, + "learning_rate": 7.016204839242018e-05, + "loss": 0.8047, + "step": 15526 + }, + { + "epoch": 0.83, + "learning_rate": 7.011759181772587e-05, + "loss": 0.8672, + "step": 15527 + }, + { + "epoch": 0.83, + "learning_rate": 7.00731482698807e-05, + "loss": 0.793, + "step": 15528 + }, + { + "epoch": 0.83, + "learning_rate": 7.002871775023128e-05, + "loss": 0.7578, + "step": 15529 + }, + { + "epoch": 0.83, + "learning_rate": 6.99843002601242e-05, + "loss": 0.7852, + "step": 15530 + }, + { + "epoch": 0.83, + "learning_rate": 6.993989580090521e-05, + "loss": 0.8555, + "step": 15531 + }, + { + "epoch": 0.83, + "learning_rate": 6.989550437392006e-05, + "loss": 0.7852, + "step": 15532 + }, + { + "epoch": 0.83, + "learning_rate": 6.985112598051402e-05, + "loss": 0.832, + "step": 15533 + }, + { + "epoch": 0.83, + "learning_rate": 6.980676062203185e-05, + "loss": 0.7852, + "step": 15534 + }, + { + "epoch": 0.83, + "learning_rate": 6.976240829981784e-05, + "loss": 0.7734, + "step": 15535 + }, + { + "epoch": 0.83, + "learning_rate": 6.971806901521604e-05, + "loss": 0.6797, + "step": 15536 + }, + { + "epoch": 0.84, + "learning_rate": 6.967374276957033e-05, + "loss": 0.7656, + "step": 15537 + }, + { + "epoch": 0.84, + "learning_rate": 6.962942956422358e-05, + "loss": 0.8125, + "step": 15538 + }, + { + "epoch": 0.84, + "learning_rate": 6.958512940051887e-05, + "loss": 0.7656, + "step": 15539 + }, + { + "epoch": 0.84, + "learning_rate": 6.954084227979862e-05, + "loss": 0.8164, + "step": 15540 + }, + { + "epoch": 0.84, + "learning_rate": 6.949656820340483e-05, + "loss": 0.75, + "step": 15541 + }, + { + "epoch": 0.84, + "learning_rate": 6.945230717267908e-05, + "loss": 0.7461, + "step": 15542 + }, + { + "epoch": 0.84, + "learning_rate": 6.940805918896265e-05, + "loss": 0.8477, + "step": 15543 + }, + { + "epoch": 0.84, + "learning_rate": 6.936382425359655e-05, + "loss": 0.8711, + "step": 15544 + }, + { + "epoch": 0.84, + "learning_rate": 6.931960236792106e-05, + "loss": 0.7969, + "step": 15545 + }, + { + "epoch": 0.84, + "learning_rate": 6.927539353327639e-05, + "loss": 0.7383, + "step": 15546 + }, + { + "epoch": 0.84, + "learning_rate": 6.923119775100206e-05, + "loss": 0.8594, + "step": 15547 + }, + { + "epoch": 0.84, + "learning_rate": 6.918701502243752e-05, + "loss": 0.8281, + "step": 15548 + }, + { + "epoch": 0.84, + "learning_rate": 6.914284534892146e-05, + "loss": 0.7656, + "step": 15549 + }, + { + "epoch": 0.84, + "learning_rate": 6.90986887317926e-05, + "loss": 0.7656, + "step": 15550 + }, + { + "epoch": 0.84, + "learning_rate": 6.905454517238874e-05, + "loss": 0.8281, + "step": 15551 + }, + { + "epoch": 0.84, + "learning_rate": 6.901041467204778e-05, + "loss": 0.8633, + "step": 15552 + }, + { + "epoch": 0.84, + "learning_rate": 6.896629723210707e-05, + "loss": 0.7969, + "step": 15553 + }, + { + "epoch": 0.84, + "learning_rate": 6.892219285390345e-05, + "loss": 0.7852, + "step": 15554 + }, + { + "epoch": 0.84, + "learning_rate": 6.88781015387731e-05, + "loss": 0.7891, + "step": 15555 + }, + { + "epoch": 0.84, + "learning_rate": 6.883402328805272e-05, + "loss": 0.8203, + "step": 15556 + }, + { + "epoch": 0.84, + "learning_rate": 6.87899581030777e-05, + "loss": 0.7695, + "step": 15557 + }, + { + "epoch": 0.84, + "learning_rate": 6.874590598518327e-05, + "loss": 0.793, + "step": 15558 + }, + { + "epoch": 0.84, + "learning_rate": 6.870186693570452e-05, + "loss": 0.6602, + "step": 15559 + }, + { + "epoch": 0.84, + "learning_rate": 6.865784095597599e-05, + "loss": 0.9219, + "step": 15560 + }, + { + "epoch": 0.84, + "learning_rate": 6.86138280473318e-05, + "loss": 0.6797, + "step": 15561 + }, + { + "epoch": 0.84, + "learning_rate": 6.856982821110536e-05, + "loss": 0.7891, + "step": 15562 + }, + { + "epoch": 0.84, + "learning_rate": 6.852584144863055e-05, + "loss": 0.7539, + "step": 15563 + }, + { + "epoch": 0.84, + "learning_rate": 6.84818677612401e-05, + "loss": 0.8359, + "step": 15564 + }, + { + "epoch": 0.84, + "learning_rate": 6.843790715026637e-05, + "loss": 0.8711, + "step": 15565 + }, + { + "epoch": 0.84, + "learning_rate": 6.839395961704164e-05, + "loss": 0.7812, + "step": 15566 + }, + { + "epoch": 0.84, + "learning_rate": 6.835002516289779e-05, + "loss": 0.7305, + "step": 15567 + }, + { + "epoch": 0.84, + "learning_rate": 6.830610378916596e-05, + "loss": 0.8984, + "step": 15568 + }, + { + "epoch": 0.84, + "learning_rate": 6.826219549717733e-05, + "loss": 0.7383, + "step": 15569 + }, + { + "epoch": 0.84, + "learning_rate": 6.821830028826221e-05, + "loss": 0.7617, + "step": 15570 + }, + { + "epoch": 0.84, + "learning_rate": 6.817441816375102e-05, + "loss": 0.7188, + "step": 15571 + }, + { + "epoch": 0.84, + "learning_rate": 6.813054912497336e-05, + "loss": 0.7891, + "step": 15572 + }, + { + "epoch": 0.84, + "learning_rate": 6.808669317325877e-05, + "loss": 0.7734, + "step": 15573 + }, + { + "epoch": 0.84, + "learning_rate": 6.804285030993601e-05, + "loss": 0.7422, + "step": 15574 + }, + { + "epoch": 0.84, + "learning_rate": 6.799902053633378e-05, + "loss": 0.7891, + "step": 15575 + }, + { + "epoch": 0.84, + "learning_rate": 6.79552038537804e-05, + "loss": 0.8047, + "step": 15576 + }, + { + "epoch": 0.84, + "learning_rate": 6.791140026360355e-05, + "loss": 0.7773, + "step": 15577 + }, + { + "epoch": 0.84, + "learning_rate": 6.786760976713042e-05, + "loss": 0.7578, + "step": 15578 + }, + { + "epoch": 0.84, + "learning_rate": 6.782383236568823e-05, + "loss": 0.8203, + "step": 15579 + }, + { + "epoch": 0.84, + "learning_rate": 6.778006806060361e-05, + "loss": 0.7578, + "step": 15580 + }, + { + "epoch": 0.84, + "learning_rate": 6.773631685320264e-05, + "loss": 0.7539, + "step": 15581 + }, + { + "epoch": 0.84, + "learning_rate": 6.769257874481111e-05, + "loss": 0.8203, + "step": 15582 + }, + { + "epoch": 0.84, + "learning_rate": 6.764885373675467e-05, + "loss": 0.7578, + "step": 15583 + }, + { + "epoch": 0.84, + "learning_rate": 6.76051418303581e-05, + "loss": 0.7812, + "step": 15584 + }, + { + "epoch": 0.84, + "learning_rate": 6.756144302694595e-05, + "loss": 0.8125, + "step": 15585 + }, + { + "epoch": 0.84, + "learning_rate": 6.751775732784259e-05, + "loss": 0.8164, + "step": 15586 + }, + { + "epoch": 0.84, + "learning_rate": 6.747408473437189e-05, + "loss": 0.918, + "step": 15587 + }, + { + "epoch": 0.84, + "learning_rate": 6.743042524785708e-05, + "loss": 0.875, + "step": 15588 + }, + { + "epoch": 0.84, + "learning_rate": 6.738677886962141e-05, + "loss": 0.8398, + "step": 15589 + }, + { + "epoch": 0.84, + "learning_rate": 6.73431456009873e-05, + "loss": 0.7812, + "step": 15590 + }, + { + "epoch": 0.84, + "learning_rate": 6.729952544327717e-05, + "loss": 0.7461, + "step": 15591 + }, + { + "epoch": 0.84, + "learning_rate": 6.725591839781264e-05, + "loss": 0.6836, + "step": 15592 + }, + { + "epoch": 0.84, + "learning_rate": 6.721232446591524e-05, + "loss": 0.6719, + "step": 15593 + }, + { + "epoch": 0.84, + "learning_rate": 6.716874364890618e-05, + "loss": 0.8203, + "step": 15594 + }, + { + "epoch": 0.84, + "learning_rate": 6.712517594810585e-05, + "loss": 0.8125, + "step": 15595 + }, + { + "epoch": 0.84, + "learning_rate": 6.70816213648347e-05, + "loss": 0.8203, + "step": 15596 + }, + { + "epoch": 0.84, + "learning_rate": 6.703807990041238e-05, + "loss": 0.7266, + "step": 15597 + }, + { + "epoch": 0.84, + "learning_rate": 6.699455155615841e-05, + "loss": 0.8008, + "step": 15598 + }, + { + "epoch": 0.84, + "learning_rate": 6.6951036333392e-05, + "loss": 0.7812, + "step": 15599 + }, + { + "epoch": 0.84, + "learning_rate": 6.690753423343165e-05, + "loss": 0.8398, + "step": 15600 + }, + { + "epoch": 0.84, + "learning_rate": 6.686404525759554e-05, + "loss": 0.7617, + "step": 15601 + }, + { + "epoch": 0.84, + "learning_rate": 6.68205694072016e-05, + "loss": 0.8438, + "step": 15602 + }, + { + "epoch": 0.84, + "learning_rate": 6.677710668356751e-05, + "loss": 0.7891, + "step": 15603 + }, + { + "epoch": 0.84, + "learning_rate": 6.673365708800994e-05, + "loss": 0.8242, + "step": 15604 + }, + { + "epoch": 0.84, + "learning_rate": 6.669022062184582e-05, + "loss": 0.8008, + "step": 15605 + }, + { + "epoch": 0.84, + "learning_rate": 6.664679728639145e-05, + "loss": 0.9062, + "step": 15606 + }, + { + "epoch": 0.84, + "learning_rate": 6.660338708296259e-05, + "loss": 0.7852, + "step": 15607 + }, + { + "epoch": 0.84, + "learning_rate": 6.65599900128746e-05, + "loss": 0.6719, + "step": 15608 + }, + { + "epoch": 0.84, + "learning_rate": 6.651660607744275e-05, + "loss": 0.7266, + "step": 15609 + }, + { + "epoch": 0.84, + "learning_rate": 6.647323527798172e-05, + "loss": 0.7656, + "step": 15610 + }, + { + "epoch": 0.84, + "learning_rate": 6.642987761580555e-05, + "loss": 0.7422, + "step": 15611 + }, + { + "epoch": 0.84, + "learning_rate": 6.638653309222848e-05, + "loss": 0.8242, + "step": 15612 + }, + { + "epoch": 0.84, + "learning_rate": 6.634320170856361e-05, + "loss": 0.8047, + "step": 15613 + }, + { + "epoch": 0.84, + "learning_rate": 6.629988346612436e-05, + "loss": 0.8867, + "step": 15614 + }, + { + "epoch": 0.84, + "learning_rate": 6.625657836622317e-05, + "loss": 0.7578, + "step": 15615 + }, + { + "epoch": 0.84, + "learning_rate": 6.621328641017249e-05, + "loss": 0.8984, + "step": 15616 + }, + { + "epoch": 0.84, + "learning_rate": 6.617000759928405e-05, + "loss": 0.8477, + "step": 15617 + }, + { + "epoch": 0.84, + "learning_rate": 6.61267419348694e-05, + "loss": 0.8789, + "step": 15618 + }, + { + "epoch": 0.84, + "learning_rate": 6.608348941823983e-05, + "loss": 0.8203, + "step": 15619 + }, + { + "epoch": 0.84, + "learning_rate": 6.604025005070579e-05, + "loss": 0.7578, + "step": 15620 + }, + { + "epoch": 0.84, + "learning_rate": 6.599702383357747e-05, + "loss": 0.8008, + "step": 15621 + }, + { + "epoch": 0.84, + "learning_rate": 6.595381076816521e-05, + "loss": 0.7695, + "step": 15622 + }, + { + "epoch": 0.84, + "learning_rate": 6.591061085577815e-05, + "loss": 0.7695, + "step": 15623 + }, + { + "epoch": 0.84, + "learning_rate": 6.586742409772544e-05, + "loss": 0.7617, + "step": 15624 + }, + { + "epoch": 0.84, + "learning_rate": 6.582425049531576e-05, + "loss": 0.8828, + "step": 15625 + }, + { + "epoch": 0.84, + "learning_rate": 6.578109004985767e-05, + "loss": 0.7891, + "step": 15626 + }, + { + "epoch": 0.84, + "learning_rate": 6.573794276265882e-05, + "loss": 0.8633, + "step": 15627 + }, + { + "epoch": 0.84, + "learning_rate": 6.56948086350267e-05, + "loss": 0.8086, + "step": 15628 + }, + { + "epoch": 0.84, + "learning_rate": 6.565168766826845e-05, + "loss": 0.7188, + "step": 15629 + }, + { + "epoch": 0.84, + "learning_rate": 6.56085798636909e-05, + "loss": 0.793, + "step": 15630 + }, + { + "epoch": 0.84, + "learning_rate": 6.556548522260019e-05, + "loss": 0.8711, + "step": 15631 + }, + { + "epoch": 0.84, + "learning_rate": 6.552240374630236e-05, + "loss": 0.6719, + "step": 15632 + }, + { + "epoch": 0.84, + "learning_rate": 6.547933543610295e-05, + "loss": 0.793, + "step": 15633 + }, + { + "epoch": 0.84, + "learning_rate": 6.543628029330689e-05, + "loss": 0.7383, + "step": 15634 + }, + { + "epoch": 0.84, + "learning_rate": 6.539323831921906e-05, + "loss": 0.7891, + "step": 15635 + }, + { + "epoch": 0.84, + "learning_rate": 6.535020951514364e-05, + "loss": 0.8789, + "step": 15636 + }, + { + "epoch": 0.84, + "learning_rate": 6.53071938823847e-05, + "loss": 0.8281, + "step": 15637 + }, + { + "epoch": 0.84, + "learning_rate": 6.52641914222456e-05, + "loss": 0.7461, + "step": 15638 + }, + { + "epoch": 0.84, + "learning_rate": 6.522120213602961e-05, + "loss": 0.8125, + "step": 15639 + }, + { + "epoch": 0.84, + "learning_rate": 6.517822602503925e-05, + "loss": 0.7617, + "step": 15640 + }, + { + "epoch": 0.84, + "learning_rate": 6.513526309057693e-05, + "loss": 0.8672, + "step": 15641 + }, + { + "epoch": 0.84, + "learning_rate": 6.509231333394472e-05, + "loss": 0.832, + "step": 15642 + }, + { + "epoch": 0.84, + "learning_rate": 6.504937675644402e-05, + "loss": 0.793, + "step": 15643 + }, + { + "epoch": 0.84, + "learning_rate": 6.500645335937583e-05, + "loss": 0.7656, + "step": 15644 + }, + { + "epoch": 0.84, + "learning_rate": 6.49635431440409e-05, + "loss": 0.8516, + "step": 15645 + }, + { + "epoch": 0.84, + "learning_rate": 6.492064611173982e-05, + "loss": 0.7109, + "step": 15646 + }, + { + "epoch": 0.84, + "learning_rate": 6.487776226377217e-05, + "loss": 0.7539, + "step": 15647 + }, + { + "epoch": 0.84, + "learning_rate": 6.483489160143763e-05, + "loss": 0.832, + "step": 15648 + }, + { + "epoch": 0.84, + "learning_rate": 6.47920341260354e-05, + "loss": 0.7656, + "step": 15649 + }, + { + "epoch": 0.84, + "learning_rate": 6.474918983886412e-05, + "loss": 0.8359, + "step": 15650 + }, + { + "epoch": 0.84, + "learning_rate": 6.470635874122199e-05, + "loss": 0.8438, + "step": 15651 + }, + { + "epoch": 0.84, + "learning_rate": 6.466354083440706e-05, + "loss": 0.8125, + "step": 15652 + }, + { + "epoch": 0.84, + "learning_rate": 6.462073611971691e-05, + "loss": 0.8828, + "step": 15653 + }, + { + "epoch": 0.84, + "learning_rate": 6.457794459844851e-05, + "loss": 0.7852, + "step": 15654 + }, + { + "epoch": 0.84, + "learning_rate": 6.453516627189881e-05, + "loss": 0.8906, + "step": 15655 + }, + { + "epoch": 0.84, + "learning_rate": 6.449240114136384e-05, + "loss": 0.7695, + "step": 15656 + }, + { + "epoch": 0.84, + "learning_rate": 6.444964920813978e-05, + "loss": 0.793, + "step": 15657 + }, + { + "epoch": 0.84, + "learning_rate": 6.440691047352198e-05, + "loss": 0.8086, + "step": 15658 + }, + { + "epoch": 0.84, + "learning_rate": 6.436418493880569e-05, + "loss": 0.7656, + "step": 15659 + }, + { + "epoch": 0.84, + "learning_rate": 6.432147260528548e-05, + "loss": 0.8906, + "step": 15660 + }, + { + "epoch": 0.84, + "learning_rate": 6.427877347425582e-05, + "loss": 0.8594, + "step": 15661 + }, + { + "epoch": 0.84, + "learning_rate": 6.423608754701066e-05, + "loss": 0.7969, + "step": 15662 + }, + { + "epoch": 0.84, + "learning_rate": 6.419341482484342e-05, + "loss": 0.8594, + "step": 15663 + }, + { + "epoch": 0.84, + "learning_rate": 6.415075530904706e-05, + "loss": 0.7188, + "step": 15664 + }, + { + "epoch": 0.84, + "learning_rate": 6.410810900091474e-05, + "loss": 0.7852, + "step": 15665 + }, + { + "epoch": 0.84, + "learning_rate": 6.40654759017385e-05, + "loss": 0.7344, + "step": 15666 + }, + { + "epoch": 0.84, + "learning_rate": 6.402285601281021e-05, + "loss": 0.7188, + "step": 15667 + }, + { + "epoch": 0.84, + "learning_rate": 6.398024933542151e-05, + "loss": 0.7188, + "step": 15668 + }, + { + "epoch": 0.84, + "learning_rate": 6.393765587086358e-05, + "loss": 0.7227, + "step": 15669 + }, + { + "epoch": 0.84, + "learning_rate": 6.389507562042707e-05, + "loss": 0.8906, + "step": 15670 + }, + { + "epoch": 0.84, + "learning_rate": 6.385250858540203e-05, + "loss": 0.8125, + "step": 15671 + }, + { + "epoch": 0.84, + "learning_rate": 6.38099547670789e-05, + "loss": 0.8359, + "step": 15672 + }, + { + "epoch": 0.84, + "learning_rate": 6.376741416674692e-05, + "loss": 0.7891, + "step": 15673 + }, + { + "epoch": 0.84, + "learning_rate": 6.372488678569516e-05, + "loss": 0.7656, + "step": 15674 + }, + { + "epoch": 0.84, + "learning_rate": 6.368237262521237e-05, + "loss": 0.7891, + "step": 15675 + }, + { + "epoch": 0.84, + "learning_rate": 6.363987168658703e-05, + "loss": 0.8438, + "step": 15676 + }, + { + "epoch": 0.84, + "learning_rate": 6.359738397110682e-05, + "loss": 0.8125, + "step": 15677 + }, + { + "epoch": 0.84, + "learning_rate": 6.355490948005954e-05, + "loss": 0.9375, + "step": 15678 + }, + { + "epoch": 0.84, + "learning_rate": 6.351244821473196e-05, + "loss": 0.7891, + "step": 15679 + }, + { + "epoch": 0.84, + "learning_rate": 6.347000017641113e-05, + "loss": 0.7891, + "step": 15680 + }, + { + "epoch": 0.84, + "learning_rate": 6.34275653663831e-05, + "loss": 0.9844, + "step": 15681 + }, + { + "epoch": 0.84, + "learning_rate": 6.338514378593397e-05, + "loss": 0.75, + "step": 15682 + }, + { + "epoch": 0.84, + "learning_rate": 6.334273543634911e-05, + "loss": 0.7852, + "step": 15683 + }, + { + "epoch": 0.84, + "learning_rate": 6.330034031891368e-05, + "loss": 0.8984, + "step": 15684 + }, + { + "epoch": 0.84, + "learning_rate": 6.325795843491255e-05, + "loss": 0.7969, + "step": 15685 + }, + { + "epoch": 0.84, + "learning_rate": 6.321558978562985e-05, + "loss": 0.7852, + "step": 15686 + }, + { + "epoch": 0.84, + "learning_rate": 6.317323437234934e-05, + "loss": 0.8633, + "step": 15687 + }, + { + "epoch": 0.84, + "learning_rate": 6.313089219635498e-05, + "loss": 0.8281, + "step": 15688 + }, + { + "epoch": 0.84, + "learning_rate": 6.308856325892959e-05, + "loss": 0.793, + "step": 15689 + }, + { + "epoch": 0.84, + "learning_rate": 6.304624756135574e-05, + "loss": 0.8477, + "step": 15690 + }, + { + "epoch": 0.84, + "learning_rate": 6.300394510491597e-05, + "loss": 0.793, + "step": 15691 + }, + { + "epoch": 0.84, + "learning_rate": 6.29616558908922e-05, + "loss": 0.8789, + "step": 15692 + }, + { + "epoch": 0.84, + "learning_rate": 6.291937992056584e-05, + "loss": 0.8125, + "step": 15693 + }, + { + "epoch": 0.84, + "learning_rate": 6.287711719521789e-05, + "loss": 0.8242, + "step": 15694 + }, + { + "epoch": 0.84, + "learning_rate": 6.283486771612912e-05, + "loss": 0.8047, + "step": 15695 + }, + { + "epoch": 0.84, + "learning_rate": 6.279263148458003e-05, + "loss": 0.6602, + "step": 15696 + }, + { + "epoch": 0.84, + "learning_rate": 6.27504085018502e-05, + "loss": 0.8477, + "step": 15697 + }, + { + "epoch": 0.84, + "learning_rate": 6.270819876921941e-05, + "loss": 0.9023, + "step": 15698 + }, + { + "epoch": 0.84, + "learning_rate": 6.266600228796649e-05, + "loss": 0.8086, + "step": 15699 + }, + { + "epoch": 0.84, + "learning_rate": 6.262381905937025e-05, + "loss": 0.8828, + "step": 15700 + }, + { + "epoch": 0.84, + "learning_rate": 6.258164908470909e-05, + "loss": 0.7891, + "step": 15701 + }, + { + "epoch": 0.84, + "learning_rate": 6.253949236526086e-05, + "loss": 0.7852, + "step": 15702 + }, + { + "epoch": 0.84, + "learning_rate": 6.24973489023028e-05, + "loss": 0.7266, + "step": 15703 + }, + { + "epoch": 0.84, + "learning_rate": 6.245521869711224e-05, + "loss": 0.8203, + "step": 15704 + }, + { + "epoch": 0.84, + "learning_rate": 6.241310175096588e-05, + "loss": 0.8516, + "step": 15705 + }, + { + "epoch": 0.84, + "learning_rate": 6.23709980651398e-05, + "loss": 0.875, + "step": 15706 + }, + { + "epoch": 0.84, + "learning_rate": 6.232890764091004e-05, + "loss": 0.793, + "step": 15707 + }, + { + "epoch": 0.84, + "learning_rate": 6.228683047955213e-05, + "loss": 0.8203, + "step": 15708 + }, + { + "epoch": 0.84, + "learning_rate": 6.224476658234113e-05, + "loss": 0.7578, + "step": 15709 + }, + { + "epoch": 0.84, + "learning_rate": 6.220271595055149e-05, + "loss": 0.8477, + "step": 15710 + }, + { + "epoch": 0.84, + "learning_rate": 6.21606785854576e-05, + "loss": 0.832, + "step": 15711 + }, + { + "epoch": 0.84, + "learning_rate": 6.211865448833354e-05, + "loss": 0.7578, + "step": 15712 + }, + { + "epoch": 0.84, + "learning_rate": 6.207664366045246e-05, + "loss": 0.8242, + "step": 15713 + }, + { + "epoch": 0.84, + "learning_rate": 6.203464610308757e-05, + "loss": 0.7852, + "step": 15714 + }, + { + "epoch": 0.84, + "learning_rate": 6.199266181751168e-05, + "loss": 0.8008, + "step": 15715 + }, + { + "epoch": 0.84, + "learning_rate": 6.195069080499688e-05, + "loss": 0.7656, + "step": 15716 + }, + { + "epoch": 0.84, + "learning_rate": 6.190873306681494e-05, + "loss": 0.8398, + "step": 15717 + }, + { + "epoch": 0.84, + "learning_rate": 6.186678860423745e-05, + "loss": 0.7578, + "step": 15718 + }, + { + "epoch": 0.84, + "learning_rate": 6.182485741853555e-05, + "loss": 0.8125, + "step": 15719 + }, + { + "epoch": 0.84, + "learning_rate": 6.178293951097969e-05, + "loss": 0.8555, + "step": 15720 + }, + { + "epoch": 0.84, + "learning_rate": 6.174103488284033e-05, + "loss": 0.8047, + "step": 15721 + }, + { + "epoch": 0.84, + "learning_rate": 6.169914353538709e-05, + "loss": 0.8047, + "step": 15722 + }, + { + "epoch": 0.85, + "learning_rate": 6.165726546988965e-05, + "loss": 0.8125, + "step": 15723 + }, + { + "epoch": 0.85, + "learning_rate": 6.161540068761685e-05, + "loss": 0.8242, + "step": 15724 + }, + { + "epoch": 0.85, + "learning_rate": 6.157354918983749e-05, + "loss": 0.9258, + "step": 15725 + }, + { + "epoch": 0.85, + "learning_rate": 6.153171097781962e-05, + "loss": 0.668, + "step": 15726 + }, + { + "epoch": 0.85, + "learning_rate": 6.148988605283124e-05, + "loss": 0.7383, + "step": 15727 + }, + { + "epoch": 0.85, + "learning_rate": 6.144807441613981e-05, + "loss": 0.75, + "step": 15728 + }, + { + "epoch": 0.85, + "learning_rate": 6.140627606901234e-05, + "loss": 0.7617, + "step": 15729 + }, + { + "epoch": 0.85, + "learning_rate": 6.136449101271512e-05, + "loss": 0.7148, + "step": 15730 + }, + { + "epoch": 0.85, + "learning_rate": 6.13227192485149e-05, + "loss": 0.832, + "step": 15731 + }, + { + "epoch": 0.85, + "learning_rate": 6.128096077767725e-05, + "loss": 0.7109, + "step": 15732 + }, + { + "epoch": 0.85, + "learning_rate": 6.123921560146751e-05, + "loss": 0.7773, + "step": 15733 + }, + { + "epoch": 0.85, + "learning_rate": 6.119748372115074e-05, + "loss": 0.6875, + "step": 15734 + }, + { + "epoch": 0.85, + "learning_rate": 6.115576513799176e-05, + "loss": 0.8164, + "step": 15735 + }, + { + "epoch": 0.85, + "learning_rate": 6.111405985325458e-05, + "loss": 0.8242, + "step": 15736 + }, + { + "epoch": 0.85, + "learning_rate": 6.107236786820291e-05, + "loss": 0.9258, + "step": 15737 + }, + { + "epoch": 0.85, + "learning_rate": 6.1030689184100316e-05, + "loss": 0.8594, + "step": 15738 + }, + { + "epoch": 0.85, + "learning_rate": 6.0989023802209886e-05, + "loss": 0.8125, + "step": 15739 + }, + { + "epoch": 0.85, + "learning_rate": 6.0947371723793996e-05, + "loss": 0.7617, + "step": 15740 + }, + { + "epoch": 0.85, + "learning_rate": 6.0905732950114976e-05, + "loss": 0.8789, + "step": 15741 + }, + { + "epoch": 0.85, + "learning_rate": 6.086410748243465e-05, + "loss": 0.7227, + "step": 15742 + }, + { + "epoch": 0.85, + "learning_rate": 6.082249532201423e-05, + "loss": 0.8438, + "step": 15743 + }, + { + "epoch": 0.85, + "learning_rate": 6.078089647011498e-05, + "loss": 0.8359, + "step": 15744 + }, + { + "epoch": 0.85, + "learning_rate": 6.073931092799717e-05, + "loss": 0.8242, + "step": 15745 + }, + { + "epoch": 0.85, + "learning_rate": 6.0697738696921244e-05, + "loss": 0.875, + "step": 15746 + }, + { + "epoch": 0.85, + "learning_rate": 6.0656179778146746e-05, + "loss": 0.8164, + "step": 15747 + }, + { + "epoch": 0.85, + "learning_rate": 6.061463417293323e-05, + "loss": 0.7383, + "step": 15748 + }, + { + "epoch": 0.85, + "learning_rate": 6.057310188253956e-05, + "loss": 0.7891, + "step": 15749 + }, + { + "epoch": 0.85, + "learning_rate": 6.05315829082243e-05, + "loss": 0.793, + "step": 15750 + }, + { + "epoch": 0.85, + "learning_rate": 6.049007725124578e-05, + "loss": 0.9219, + "step": 15751 + }, + { + "epoch": 0.85, + "learning_rate": 6.0448584912861596e-05, + "loss": 0.8438, + "step": 15752 + }, + { + "epoch": 0.85, + "learning_rate": 6.040710589432902e-05, + "loss": 0.8359, + "step": 15753 + }, + { + "epoch": 0.85, + "learning_rate": 6.03656401969051e-05, + "loss": 0.8398, + "step": 15754 + }, + { + "epoch": 0.85, + "learning_rate": 6.0324187821846563e-05, + "loss": 0.8477, + "step": 15755 + }, + { + "epoch": 0.85, + "learning_rate": 6.028274877040923e-05, + "loss": 0.8594, + "step": 15756 + }, + { + "epoch": 0.85, + "learning_rate": 6.024132304384905e-05, + "loss": 0.8047, + "step": 15757 + }, + { + "epoch": 0.85, + "learning_rate": 6.019991064342134e-05, + "loss": 0.8047, + "step": 15758 + }, + { + "epoch": 0.85, + "learning_rate": 6.015851157038105e-05, + "loss": 0.8633, + "step": 15759 + }, + { + "epoch": 0.85, + "learning_rate": 6.0117125825982555e-05, + "loss": 0.8359, + "step": 15760 + }, + { + "epoch": 0.85, + "learning_rate": 6.007575341148003e-05, + "loss": 0.7891, + "step": 15761 + }, + { + "epoch": 0.85, + "learning_rate": 6.0034394328127404e-05, + "loss": 0.7422, + "step": 15762 + }, + { + "epoch": 0.85, + "learning_rate": 5.999304857717769e-05, + "loss": 0.8516, + "step": 15763 + }, + { + "epoch": 0.85, + "learning_rate": 5.995171615988404e-05, + "loss": 0.8633, + "step": 15764 + }, + { + "epoch": 0.85, + "learning_rate": 5.991039707749879e-05, + "loss": 0.8477, + "step": 15765 + }, + { + "epoch": 0.85, + "learning_rate": 5.9869091331274106e-05, + "loss": 0.7422, + "step": 15766 + }, + { + "epoch": 0.85, + "learning_rate": 5.982779892246182e-05, + "loss": 0.7188, + "step": 15767 + }, + { + "epoch": 0.85, + "learning_rate": 5.978651985231309e-05, + "loss": 0.8008, + "step": 15768 + }, + { + "epoch": 0.85, + "learning_rate": 5.9745254122078704e-05, + "loss": 0.8789, + "step": 15769 + }, + { + "epoch": 0.85, + "learning_rate": 5.970400173300933e-05, + "loss": 0.8008, + "step": 15770 + }, + { + "epoch": 0.85, + "learning_rate": 5.966276268635506e-05, + "loss": 0.8047, + "step": 15771 + }, + { + "epoch": 0.85, + "learning_rate": 5.962153698336548e-05, + "loss": 0.7812, + "step": 15772 + }, + { + "epoch": 0.85, + "learning_rate": 5.9580324625289725e-05, + "loss": 0.707, + "step": 15773 + }, + { + "epoch": 0.85, + "learning_rate": 5.953912561337704e-05, + "loss": 0.8711, + "step": 15774 + }, + { + "epoch": 0.85, + "learning_rate": 5.949793994887565e-05, + "loss": 0.7812, + "step": 15775 + }, + { + "epoch": 0.85, + "learning_rate": 5.9456767633033535e-05, + "loss": 0.8398, + "step": 15776 + }, + { + "epoch": 0.85, + "learning_rate": 5.94156086670985e-05, + "loss": 0.8594, + "step": 15777 + }, + { + "epoch": 0.85, + "learning_rate": 5.937446305231781e-05, + "loss": 0.9141, + "step": 15778 + }, + { + "epoch": 0.85, + "learning_rate": 5.933333078993819e-05, + "loss": 0.7266, + "step": 15779 + }, + { + "epoch": 0.85, + "learning_rate": 5.929221188120615e-05, + "loss": 0.7852, + "step": 15780 + }, + { + "epoch": 0.85, + "learning_rate": 5.92511063273678e-05, + "loss": 0.8477, + "step": 15781 + }, + { + "epoch": 0.85, + "learning_rate": 5.921001412966875e-05, + "loss": 0.7656, + "step": 15782 + }, + { + "epoch": 0.85, + "learning_rate": 5.9168935289354076e-05, + "loss": 0.832, + "step": 15783 + }, + { + "epoch": 0.85, + "learning_rate": 5.912786980766871e-05, + "loss": 0.7266, + "step": 15784 + }, + { + "epoch": 0.85, + "learning_rate": 5.9086817685857165e-05, + "loss": 0.7461, + "step": 15785 + }, + { + "epoch": 0.85, + "learning_rate": 5.9045778925163215e-05, + "loss": 0.8203, + "step": 15786 + }, + { + "epoch": 0.85, + "learning_rate": 5.900475352683077e-05, + "loss": 0.7734, + "step": 15787 + }, + { + "epoch": 0.85, + "learning_rate": 5.896374149210276e-05, + "loss": 0.8867, + "step": 15788 + }, + { + "epoch": 0.85, + "learning_rate": 5.8922742822222206e-05, + "loss": 0.8438, + "step": 15789 + }, + { + "epoch": 0.85, + "learning_rate": 5.888175751843133e-05, + "loss": 0.7812, + "step": 15790 + }, + { + "epoch": 0.85, + "learning_rate": 5.8840785581972255e-05, + "loss": 0.75, + "step": 15791 + }, + { + "epoch": 0.85, + "learning_rate": 5.8799827014086374e-05, + "loss": 0.7383, + "step": 15792 + }, + { + "epoch": 0.85, + "learning_rate": 5.875888181601502e-05, + "loss": 0.7852, + "step": 15793 + }, + { + "epoch": 0.85, + "learning_rate": 5.871794998899904e-05, + "loss": 0.8164, + "step": 15794 + }, + { + "epoch": 0.85, + "learning_rate": 5.867703153427872e-05, + "loss": 0.7422, + "step": 15795 + }, + { + "epoch": 0.85, + "learning_rate": 5.86361264530938e-05, + "loss": 0.7773, + "step": 15796 + }, + { + "epoch": 0.85, + "learning_rate": 5.859523474668427e-05, + "loss": 0.7148, + "step": 15797 + }, + { + "epoch": 0.85, + "learning_rate": 5.855435641628898e-05, + "loss": 0.8086, + "step": 15798 + }, + { + "epoch": 0.85, + "learning_rate": 5.8513491463146716e-05, + "loss": 0.8047, + "step": 15799 + }, + { + "epoch": 0.85, + "learning_rate": 5.8472639888495827e-05, + "loss": 0.875, + "step": 15800 + }, + { + "epoch": 0.85, + "learning_rate": 5.8431801693574424e-05, + "loss": 0.8086, + "step": 15801 + }, + { + "epoch": 0.85, + "learning_rate": 5.839097687961986e-05, + "loss": 0.7812, + "step": 15802 + }, + { + "epoch": 0.85, + "learning_rate": 5.8350165447869254e-05, + "loss": 0.8906, + "step": 15803 + }, + { + "epoch": 0.85, + "learning_rate": 5.8309367399559276e-05, + "loss": 0.7852, + "step": 15804 + }, + { + "epoch": 0.85, + "learning_rate": 5.82685827359265e-05, + "loss": 0.8008, + "step": 15805 + }, + { + "epoch": 0.85, + "learning_rate": 5.822781145820655e-05, + "loss": 0.9062, + "step": 15806 + }, + { + "epoch": 0.85, + "learning_rate": 5.818705356763521e-05, + "loss": 0.8203, + "step": 15807 + }, + { + "epoch": 0.85, + "learning_rate": 5.814630906544727e-05, + "loss": 0.8242, + "step": 15808 + }, + { + "epoch": 0.85, + "learning_rate": 5.810557795287752e-05, + "loss": 0.8047, + "step": 15809 + }, + { + "epoch": 0.85, + "learning_rate": 5.806486023116048e-05, + "loss": 0.7852, + "step": 15810 + }, + { + "epoch": 0.85, + "learning_rate": 5.802415590152982e-05, + "loss": 0.7773, + "step": 15811 + }, + { + "epoch": 0.85, + "learning_rate": 5.798346496521889e-05, + "loss": 0.7656, + "step": 15812 + }, + { + "epoch": 0.85, + "learning_rate": 5.7942787423460984e-05, + "loss": 0.8008, + "step": 15813 + }, + { + "epoch": 0.85, + "learning_rate": 5.7902123277488785e-05, + "loss": 0.8438, + "step": 15814 + }, + { + "epoch": 0.85, + "learning_rate": 5.786147252853441e-05, + "loss": 0.793, + "step": 15815 + }, + { + "epoch": 0.85, + "learning_rate": 5.78208351778296e-05, + "loss": 0.7617, + "step": 15816 + }, + { + "epoch": 0.85, + "learning_rate": 5.778021122660615e-05, + "loss": 0.6797, + "step": 15817 + }, + { + "epoch": 0.85, + "learning_rate": 5.773960067609491e-05, + "loss": 0.8047, + "step": 15818 + }, + { + "epoch": 0.85, + "learning_rate": 5.7699003527526386e-05, + "loss": 0.8125, + "step": 15819 + }, + { + "epoch": 0.85, + "learning_rate": 5.7658419782130934e-05, + "loss": 0.8477, + "step": 15820 + }, + { + "epoch": 0.85, + "learning_rate": 5.7617849441138516e-05, + "loss": 0.7578, + "step": 15821 + }, + { + "epoch": 0.85, + "learning_rate": 5.7577292505778253e-05, + "loss": 0.8203, + "step": 15822 + }, + { + "epoch": 0.85, + "learning_rate": 5.753674897727934e-05, + "loss": 0.8398, + "step": 15823 + }, + { + "epoch": 0.85, + "learning_rate": 5.74962188568704e-05, + "loss": 0.8164, + "step": 15824 + }, + { + "epoch": 0.85, + "learning_rate": 5.7455702145779607e-05, + "loss": 0.7891, + "step": 15825 + }, + { + "epoch": 0.85, + "learning_rate": 5.7415198845234554e-05, + "loss": 0.7656, + "step": 15826 + }, + { + "epoch": 0.85, + "learning_rate": 5.737470895646285e-05, + "loss": 0.8008, + "step": 15827 + }, + { + "epoch": 0.85, + "learning_rate": 5.7334232480691474e-05, + "loss": 0.875, + "step": 15828 + }, + { + "epoch": 0.85, + "learning_rate": 5.7293769419146826e-05, + "loss": 0.8438, + "step": 15829 + }, + { + "epoch": 0.85, + "learning_rate": 5.725331977305526e-05, + "loss": 0.7383, + "step": 15830 + }, + { + "epoch": 0.85, + "learning_rate": 5.7212883543642404e-05, + "loss": 0.7227, + "step": 15831 + }, + { + "epoch": 0.85, + "learning_rate": 5.7172460732133666e-05, + "loss": 0.8281, + "step": 15832 + }, + { + "epoch": 0.85, + "learning_rate": 5.713205133975391e-05, + "loss": 0.7148, + "step": 15833 + }, + { + "epoch": 0.85, + "learning_rate": 5.709165536772781e-05, + "loss": 0.7461, + "step": 15834 + }, + { + "epoch": 0.85, + "learning_rate": 5.70512728172794e-05, + "loss": 0.8789, + "step": 15835 + }, + { + "epoch": 0.85, + "learning_rate": 5.701090368963235e-05, + "loss": 0.8008, + "step": 15836 + }, + { + "epoch": 0.85, + "learning_rate": 5.6970547986010144e-05, + "loss": 0.875, + "step": 15837 + }, + { + "epoch": 0.85, + "learning_rate": 5.693020570763563e-05, + "loss": 0.9102, + "step": 15838 + }, + { + "epoch": 0.85, + "learning_rate": 5.68898768557311e-05, + "loss": 0.7227, + "step": 15839 + }, + { + "epoch": 0.85, + "learning_rate": 5.6849561431519036e-05, + "loss": 0.7734, + "step": 15840 + }, + { + "epoch": 0.85, + "learning_rate": 5.68092594362209e-05, + "loss": 0.7969, + "step": 15841 + }, + { + "epoch": 0.85, + "learning_rate": 5.676897087105792e-05, + "loss": 0.8438, + "step": 15842 + }, + { + "epoch": 0.85, + "learning_rate": 5.672869573725104e-05, + "loss": 0.875, + "step": 15843 + }, + { + "epoch": 0.85, + "learning_rate": 5.6688434036020805e-05, + "loss": 0.7695, + "step": 15844 + }, + { + "epoch": 0.85, + "learning_rate": 5.664818576858727e-05, + "loss": 0.7969, + "step": 15845 + }, + { + "epoch": 0.85, + "learning_rate": 5.6607950936169884e-05, + "loss": 0.9141, + "step": 15846 + }, + { + "epoch": 0.85, + "learning_rate": 5.656772953998807e-05, + "loss": 0.8867, + "step": 15847 + }, + { + "epoch": 0.85, + "learning_rate": 5.652752158126079e-05, + "loss": 0.7969, + "step": 15848 + }, + { + "epoch": 0.85, + "learning_rate": 5.648732706120613e-05, + "loss": 0.8242, + "step": 15849 + }, + { + "epoch": 0.85, + "learning_rate": 5.6447145981042504e-05, + "loss": 0.7891, + "step": 15850 + }, + { + "epoch": 0.85, + "learning_rate": 5.640697834198721e-05, + "loss": 0.8594, + "step": 15851 + }, + { + "epoch": 0.85, + "learning_rate": 5.636682414525757e-05, + "loss": 0.8047, + "step": 15852 + }, + { + "epoch": 0.85, + "learning_rate": 5.6326683392070535e-05, + "loss": 0.7305, + "step": 15853 + }, + { + "epoch": 0.85, + "learning_rate": 5.628655608364225e-05, + "loss": 0.8672, + "step": 15854 + }, + { + "epoch": 0.85, + "learning_rate": 5.624644222118891e-05, + "loss": 0.7461, + "step": 15855 + }, + { + "epoch": 0.85, + "learning_rate": 5.620634180592599e-05, + "loss": 0.8242, + "step": 15856 + }, + { + "epoch": 0.85, + "learning_rate": 5.616625483906873e-05, + "loss": 0.8867, + "step": 15857 + }, + { + "epoch": 0.85, + "learning_rate": 5.612618132183178e-05, + "loss": 0.8359, + "step": 15858 + }, + { + "epoch": 0.85, + "learning_rate": 5.608612125542956e-05, + "loss": 0.7773, + "step": 15859 + }, + { + "epoch": 0.85, + "learning_rate": 5.6046074641076196e-05, + "loss": 0.8242, + "step": 15860 + }, + { + "epoch": 0.85, + "learning_rate": 5.600604147998501e-05, + "loss": 0.8164, + "step": 15861 + }, + { + "epoch": 0.85, + "learning_rate": 5.5966021773369016e-05, + "loss": 0.7891, + "step": 15862 + }, + { + "epoch": 0.85, + "learning_rate": 5.5926015522441364e-05, + "loss": 0.7578, + "step": 15863 + }, + { + "epoch": 0.85, + "learning_rate": 5.588602272841409e-05, + "loss": 0.8594, + "step": 15864 + }, + { + "epoch": 0.85, + "learning_rate": 5.58460433924991e-05, + "loss": 0.7969, + "step": 15865 + }, + { + "epoch": 0.85, + "learning_rate": 5.580607751590794e-05, + "loss": 0.8008, + "step": 15866 + }, + { + "epoch": 0.85, + "learning_rate": 5.57661250998518e-05, + "loss": 0.7695, + "step": 15867 + }, + { + "epoch": 0.85, + "learning_rate": 5.572618614554131e-05, + "loss": 0.8125, + "step": 15868 + }, + { + "epoch": 0.85, + "learning_rate": 5.568626065418664e-05, + "loss": 0.8242, + "step": 15869 + }, + { + "epoch": 0.85, + "learning_rate": 5.564634862699774e-05, + "loss": 0.8203, + "step": 15870 + }, + { + "epoch": 0.85, + "learning_rate": 5.560645006518422e-05, + "loss": 0.75, + "step": 15871 + }, + { + "epoch": 0.85, + "learning_rate": 5.556656496995494e-05, + "loss": 0.8672, + "step": 15872 + }, + { + "epoch": 0.85, + "learning_rate": 5.552669334251864e-05, + "loss": 0.8438, + "step": 15873 + }, + { + "epoch": 0.85, + "learning_rate": 5.548683518408348e-05, + "loss": 0.7617, + "step": 15874 + }, + { + "epoch": 0.85, + "learning_rate": 5.544699049585738e-05, + "loss": 0.8359, + "step": 15875 + }, + { + "epoch": 0.85, + "learning_rate": 5.540715927904782e-05, + "loss": 0.6641, + "step": 15876 + }, + { + "epoch": 0.85, + "learning_rate": 5.5367341534861726e-05, + "loss": 0.7148, + "step": 15877 + }, + { + "epoch": 0.85, + "learning_rate": 5.5327537264505624e-05, + "loss": 0.7461, + "step": 15878 + }, + { + "epoch": 0.85, + "learning_rate": 5.528774646918583e-05, + "loss": 0.7695, + "step": 15879 + }, + { + "epoch": 0.85, + "learning_rate": 5.524796915010821e-05, + "loss": 0.7109, + "step": 15880 + }, + { + "epoch": 0.85, + "learning_rate": 5.5208205308478034e-05, + "loss": 0.7578, + "step": 15881 + }, + { + "epoch": 0.85, + "learning_rate": 5.51684549455001e-05, + "loss": 0.7266, + "step": 15882 + }, + { + "epoch": 0.85, + "learning_rate": 5.5128718062379344e-05, + "loss": 0.7656, + "step": 15883 + }, + { + "epoch": 0.85, + "learning_rate": 5.5088994660319744e-05, + "loss": 0.7852, + "step": 15884 + }, + { + "epoch": 0.85, + "learning_rate": 5.504928474052501e-05, + "loss": 0.8281, + "step": 15885 + }, + { + "epoch": 0.85, + "learning_rate": 5.5009588304198496e-05, + "loss": 0.7383, + "step": 15886 + }, + { + "epoch": 0.85, + "learning_rate": 5.496990535254326e-05, + "loss": 0.8398, + "step": 15887 + }, + { + "epoch": 0.85, + "learning_rate": 5.4930235886761605e-05, + "loss": 0.7969, + "step": 15888 + }, + { + "epoch": 0.85, + "learning_rate": 5.4890579908055906e-05, + "loss": 0.7891, + "step": 15889 + }, + { + "epoch": 0.85, + "learning_rate": 5.4850937417627645e-05, + "loss": 0.7891, + "step": 15890 + }, + { + "epoch": 0.85, + "learning_rate": 5.4811308416678306e-05, + "loss": 0.8555, + "step": 15891 + }, + { + "epoch": 0.85, + "learning_rate": 5.477169290640854e-05, + "loss": 0.8867, + "step": 15892 + }, + { + "epoch": 0.85, + "learning_rate": 5.4732090888019005e-05, + "loss": 0.793, + "step": 15893 + }, + { + "epoch": 0.85, + "learning_rate": 5.469250236270984e-05, + "loss": 0.7305, + "step": 15894 + }, + { + "epoch": 0.85, + "learning_rate": 5.465292733168048e-05, + "loss": 0.7617, + "step": 15895 + }, + { + "epoch": 0.85, + "learning_rate": 5.461336579613041e-05, + "loss": 0.7734, + "step": 15896 + }, + { + "epoch": 0.85, + "learning_rate": 5.457381775725828e-05, + "loss": 0.793, + "step": 15897 + }, + { + "epoch": 0.85, + "learning_rate": 5.4534283216262694e-05, + "loss": 0.7969, + "step": 15898 + }, + { + "epoch": 0.85, + "learning_rate": 5.449476217434152e-05, + "loss": 0.8281, + "step": 15899 + }, + { + "epoch": 0.85, + "learning_rate": 5.4455254632692576e-05, + "loss": 0.8086, + "step": 15900 + }, + { + "epoch": 0.85, + "learning_rate": 5.44157605925128e-05, + "loss": 0.7969, + "step": 15901 + }, + { + "epoch": 0.85, + "learning_rate": 5.437628005499912e-05, + "loss": 0.7656, + "step": 15902 + }, + { + "epoch": 0.85, + "learning_rate": 5.4336813021348076e-05, + "loss": 0.7344, + "step": 15903 + }, + { + "epoch": 0.85, + "learning_rate": 5.429735949275555e-05, + "loss": 0.8242, + "step": 15904 + }, + { + "epoch": 0.85, + "learning_rate": 5.42579194704168e-05, + "loss": 0.8203, + "step": 15905 + }, + { + "epoch": 0.85, + "learning_rate": 5.4218492955527555e-05, + "loss": 0.8242, + "step": 15906 + }, + { + "epoch": 0.85, + "learning_rate": 5.417907994928223e-05, + "loss": 0.7695, + "step": 15907 + }, + { + "epoch": 0.85, + "learning_rate": 5.4139680452875094e-05, + "loss": 0.8008, + "step": 15908 + }, + { + "epoch": 0.86, + "learning_rate": 5.4100294467500257e-05, + "loss": 0.7773, + "step": 15909 + }, + { + "epoch": 0.86, + "learning_rate": 5.4060921994351255e-05, + "loss": 0.8633, + "step": 15910 + }, + { + "epoch": 0.86, + "learning_rate": 5.402156303462119e-05, + "loss": 0.7266, + "step": 15911 + }, + { + "epoch": 0.86, + "learning_rate": 5.398221758950256e-05, + "loss": 0.7734, + "step": 15912 + }, + { + "epoch": 0.86, + "learning_rate": 5.394288566018785e-05, + "loss": 0.8359, + "step": 15913 + }, + { + "epoch": 0.86, + "learning_rate": 5.390356724786905e-05, + "loss": 0.7695, + "step": 15914 + }, + { + "epoch": 0.86, + "learning_rate": 5.386426235373737e-05, + "loss": 0.8242, + "step": 15915 + }, + { + "epoch": 0.86, + "learning_rate": 5.382497097898409e-05, + "loss": 0.8672, + "step": 15916 + }, + { + "epoch": 0.86, + "learning_rate": 5.3785693124799696e-05, + "loss": 0.8164, + "step": 15917 + }, + { + "epoch": 0.86, + "learning_rate": 5.374642879237457e-05, + "loss": 0.8281, + "step": 15918 + }, + { + "epoch": 0.86, + "learning_rate": 5.3707177982898594e-05, + "loss": 0.668, + "step": 15919 + }, + { + "epoch": 0.86, + "learning_rate": 5.366794069756109e-05, + "loss": 0.8164, + "step": 15920 + }, + { + "epoch": 0.86, + "learning_rate": 5.3628716937551e-05, + "loss": 0.7305, + "step": 15921 + }, + { + "epoch": 0.86, + "learning_rate": 5.358950670405705e-05, + "loss": 0.8203, + "step": 15922 + }, + { + "epoch": 0.86, + "learning_rate": 5.355030999826749e-05, + "loss": 0.8047, + "step": 15923 + }, + { + "epoch": 0.86, + "learning_rate": 5.3511126821370107e-05, + "loss": 0.7578, + "step": 15924 + }, + { + "epoch": 0.86, + "learning_rate": 5.347195717455194e-05, + "loss": 0.832, + "step": 15925 + }, + { + "epoch": 0.86, + "learning_rate": 5.343280105900045e-05, + "loss": 0.9219, + "step": 15926 + }, + { + "epoch": 0.86, + "learning_rate": 5.3393658475901994e-05, + "loss": 0.8672, + "step": 15927 + }, + { + "epoch": 0.86, + "learning_rate": 5.335452942644259e-05, + "loss": 0.7969, + "step": 15928 + }, + { + "epoch": 0.86, + "learning_rate": 5.331541391180811e-05, + "loss": 0.7852, + "step": 15929 + }, + { + "epoch": 0.86, + "learning_rate": 5.3276311933183943e-05, + "loss": 0.7656, + "step": 15930 + }, + { + "epoch": 0.86, + "learning_rate": 5.3237223491754805e-05, + "loss": 0.9766, + "step": 15931 + }, + { + "epoch": 0.86, + "learning_rate": 5.319814858870537e-05, + "loss": 0.8164, + "step": 15932 + }, + { + "epoch": 0.86, + "learning_rate": 5.315908722521978e-05, + "loss": 0.8789, + "step": 15933 + }, + { + "epoch": 0.86, + "learning_rate": 5.312003940248161e-05, + "loss": 0.9883, + "step": 15934 + }, + { + "epoch": 0.86, + "learning_rate": 5.3081005121674076e-05, + "loss": 0.8398, + "step": 15935 + }, + { + "epoch": 0.86, + "learning_rate": 5.304198438398017e-05, + "loss": 0.7461, + "step": 15936 + }, + { + "epoch": 0.86, + "learning_rate": 5.3002977190582336e-05, + "loss": 0.7578, + "step": 15937 + }, + { + "epoch": 0.86, + "learning_rate": 5.2963983542662517e-05, + "loss": 0.707, + "step": 15938 + }, + { + "epoch": 0.86, + "learning_rate": 5.292500344140255e-05, + "loss": 0.8633, + "step": 15939 + }, + { + "epoch": 0.86, + "learning_rate": 5.288603688798343e-05, + "loss": 0.875, + "step": 15940 + }, + { + "epoch": 0.86, + "learning_rate": 5.2847083883586055e-05, + "loss": 0.7422, + "step": 15941 + }, + { + "epoch": 0.86, + "learning_rate": 5.280814442939097e-05, + "loss": 0.7734, + "step": 15942 + }, + { + "epoch": 0.86, + "learning_rate": 5.276921852657807e-05, + "loss": 0.7539, + "step": 15943 + }, + { + "epoch": 0.86, + "learning_rate": 5.273030617632679e-05, + "loss": 0.8125, + "step": 15944 + }, + { + "epoch": 0.86, + "learning_rate": 5.2691407379816425e-05, + "loss": 0.7266, + "step": 15945 + }, + { + "epoch": 0.86, + "learning_rate": 5.2652522138225854e-05, + "loss": 0.8359, + "step": 15946 + }, + { + "epoch": 0.86, + "learning_rate": 5.261365045273325e-05, + "loss": 0.7383, + "step": 15947 + }, + { + "epoch": 0.86, + "learning_rate": 5.2574792324516504e-05, + "loss": 0.793, + "step": 15948 + }, + { + "epoch": 0.86, + "learning_rate": 5.25359477547534e-05, + "loss": 0.875, + "step": 15949 + }, + { + "epoch": 0.86, + "learning_rate": 5.249711674462093e-05, + "loss": 0.9062, + "step": 15950 + }, + { + "epoch": 0.86, + "learning_rate": 5.245829929529572e-05, + "loss": 0.8086, + "step": 15951 + }, + { + "epoch": 0.86, + "learning_rate": 5.24194954079541e-05, + "loss": 0.8516, + "step": 15952 + }, + { + "epoch": 0.86, + "learning_rate": 5.238070508377207e-05, + "loss": 0.7461, + "step": 15953 + }, + { + "epoch": 0.86, + "learning_rate": 5.2341928323924926e-05, + "loss": 0.7891, + "step": 15954 + }, + { + "epoch": 0.86, + "learning_rate": 5.230316512958794e-05, + "loss": 0.7539, + "step": 15955 + }, + { + "epoch": 0.86, + "learning_rate": 5.226441550193556e-05, + "loss": 0.7891, + "step": 15956 + }, + { + "epoch": 0.86, + "learning_rate": 5.222567944214218e-05, + "loss": 0.7266, + "step": 15957 + }, + { + "epoch": 0.86, + "learning_rate": 5.218695695138154e-05, + "loss": 0.7305, + "step": 15958 + }, + { + "epoch": 0.86, + "learning_rate": 5.214824803082713e-05, + "loss": 0.8867, + "step": 15959 + }, + { + "epoch": 0.86, + "learning_rate": 5.210955268165179e-05, + "loss": 0.8047, + "step": 15960 + }, + { + "epoch": 0.86, + "learning_rate": 5.2070870905028264e-05, + "loss": 0.7109, + "step": 15961 + }, + { + "epoch": 0.86, + "learning_rate": 5.203220270212883e-05, + "loss": 0.8477, + "step": 15962 + }, + { + "epoch": 0.86, + "learning_rate": 5.1993548074125094e-05, + "loss": 0.8516, + "step": 15963 + }, + { + "epoch": 0.86, + "learning_rate": 5.195490702218841e-05, + "loss": 0.7578, + "step": 15964 + }, + { + "epoch": 0.86, + "learning_rate": 5.191627954748979e-05, + "loss": 0.7812, + "step": 15965 + }, + { + "epoch": 0.86, + "learning_rate": 5.1877665651199835e-05, + "loss": 0.7422, + "step": 15966 + }, + { + "epoch": 0.86, + "learning_rate": 5.1839065334488514e-05, + "loss": 0.8086, + "step": 15967 + }, + { + "epoch": 0.86, + "learning_rate": 5.180047859852566e-05, + "loss": 0.8242, + "step": 15968 + }, + { + "epoch": 0.86, + "learning_rate": 5.176190544448062e-05, + "loss": 0.8438, + "step": 15969 + }, + { + "epoch": 0.86, + "learning_rate": 5.1723345873522286e-05, + "loss": 0.7852, + "step": 15970 + }, + { + "epoch": 0.86, + "learning_rate": 5.168479988681879e-05, + "loss": 0.9023, + "step": 15971 + }, + { + "epoch": 0.86, + "learning_rate": 5.1646267485538744e-05, + "loss": 0.8516, + "step": 15972 + }, + { + "epoch": 0.86, + "learning_rate": 5.1607748670849554e-05, + "loss": 0.8125, + "step": 15973 + }, + { + "epoch": 0.86, + "learning_rate": 5.156924344391833e-05, + "loss": 0.8164, + "step": 15974 + }, + { + "epoch": 0.86, + "learning_rate": 5.153075180591204e-05, + "loss": 0.8633, + "step": 15975 + }, + { + "epoch": 0.86, + "learning_rate": 5.1492273757997256e-05, + "loss": 0.7734, + "step": 15976 + }, + { + "epoch": 0.86, + "learning_rate": 5.145380930133981e-05, + "loss": 0.7734, + "step": 15977 + }, + { + "epoch": 0.86, + "learning_rate": 5.141535843710521e-05, + "loss": 0.7734, + "step": 15978 + }, + { + "epoch": 0.86, + "learning_rate": 5.137692116645881e-05, + "loss": 0.7148, + "step": 15979 + }, + { + "epoch": 0.86, + "learning_rate": 5.133849749056541e-05, + "loss": 0.8516, + "step": 15980 + }, + { + "epoch": 0.86, + "learning_rate": 5.130008741058917e-05, + "loss": 0.6797, + "step": 15981 + }, + { + "epoch": 0.86, + "learning_rate": 5.126169092769434e-05, + "loss": 0.7891, + "step": 15982 + }, + { + "epoch": 0.86, + "learning_rate": 5.122330804304415e-05, + "loss": 0.8984, + "step": 15983 + }, + { + "epoch": 0.86, + "learning_rate": 5.118493875780189e-05, + "loss": 0.7109, + "step": 15984 + }, + { + "epoch": 0.86, + "learning_rate": 5.114658307313036e-05, + "loss": 0.8477, + "step": 15985 + }, + { + "epoch": 0.86, + "learning_rate": 5.1108240990191736e-05, + "loss": 0.6953, + "step": 15986 + }, + { + "epoch": 0.86, + "learning_rate": 5.106991251014786e-05, + "loss": 0.7148, + "step": 15987 + }, + { + "epoch": 0.86, + "learning_rate": 5.1031597634160265e-05, + "loss": 0.8281, + "step": 15988 + }, + { + "epoch": 0.86, + "learning_rate": 5.099329636339017e-05, + "loss": 0.668, + "step": 15989 + }, + { + "epoch": 0.86, + "learning_rate": 5.0955008698998054e-05, + "loss": 0.8711, + "step": 15990 + }, + { + "epoch": 0.86, + "learning_rate": 5.0916734642144025e-05, + "loss": 0.875, + "step": 15991 + }, + { + "epoch": 0.86, + "learning_rate": 5.087847419398828e-05, + "loss": 0.6797, + "step": 15992 + }, + { + "epoch": 0.86, + "learning_rate": 5.084022735569005e-05, + "loss": 0.8164, + "step": 15993 + }, + { + "epoch": 0.86, + "learning_rate": 5.0801994128408246e-05, + "loss": 0.918, + "step": 15994 + }, + { + "epoch": 0.86, + "learning_rate": 5.076377451330155e-05, + "loss": 0.8281, + "step": 15995 + }, + { + "epoch": 0.86, + "learning_rate": 5.0725568511528266e-05, + "loss": 0.8672, + "step": 15996 + }, + { + "epoch": 0.86, + "learning_rate": 5.0687376124245896e-05, + "loss": 0.7617, + "step": 15997 + }, + { + "epoch": 0.86, + "learning_rate": 5.064919735261203e-05, + "loss": 0.8359, + "step": 15998 + }, + { + "epoch": 0.86, + "learning_rate": 5.0611032197783455e-05, + "loss": 0.8281, + "step": 15999 + }, + { + "epoch": 0.86, + "learning_rate": 5.057288066091681e-05, + "loss": 0.8242, + "step": 16000 + }, + { + "epoch": 0.86, + "learning_rate": 5.0534742743168104e-05, + "loss": 0.7539, + "step": 16001 + }, + { + "epoch": 0.86, + "learning_rate": 5.0496618445693087e-05, + "loss": 0.8281, + "step": 16002 + }, + { + "epoch": 0.86, + "learning_rate": 5.045850776964717e-05, + "loss": 0.7812, + "step": 16003 + }, + { + "epoch": 0.86, + "learning_rate": 5.042041071618503e-05, + "loss": 0.8164, + "step": 16004 + }, + { + "epoch": 0.86, + "learning_rate": 5.0382327286461306e-05, + "loss": 0.8398, + "step": 16005 + }, + { + "epoch": 0.86, + "learning_rate": 5.034425748162991e-05, + "loss": 0.832, + "step": 16006 + }, + { + "epoch": 0.86, + "learning_rate": 5.0306201302844635e-05, + "loss": 0.8164, + "step": 16007 + }, + { + "epoch": 0.86, + "learning_rate": 5.026815875125851e-05, + "loss": 0.8594, + "step": 16008 + }, + { + "epoch": 0.86, + "learning_rate": 5.023012982802455e-05, + "loss": 0.8281, + "step": 16009 + }, + { + "epoch": 0.86, + "learning_rate": 5.0192114534294954e-05, + "loss": 0.7188, + "step": 16010 + }, + { + "epoch": 0.86, + "learning_rate": 5.0154112871221844e-05, + "loss": 0.7461, + "step": 16011 + }, + { + "epoch": 0.86, + "learning_rate": 5.0116124839956866e-05, + "loss": 0.7656, + "step": 16012 + }, + { + "epoch": 0.86, + "learning_rate": 5.007815044165109e-05, + "loss": 0.7305, + "step": 16013 + }, + { + "epoch": 0.86, + "learning_rate": 5.004018967745505e-05, + "loss": 0.8398, + "step": 16014 + }, + { + "epoch": 0.86, + "learning_rate": 5.000224254851954e-05, + "loss": 0.7539, + "step": 16015 + }, + { + "epoch": 0.86, + "learning_rate": 4.9964309055994194e-05, + "loss": 0.7266, + "step": 16016 + }, + { + "epoch": 0.86, + "learning_rate": 4.992638920102849e-05, + "loss": 0.707, + "step": 16017 + }, + { + "epoch": 0.86, + "learning_rate": 4.9888482984771556e-05, + "loss": 0.8008, + "step": 16018 + }, + { + "epoch": 0.86, + "learning_rate": 4.9850590408372256e-05, + "loss": 0.8867, + "step": 16019 + }, + { + "epoch": 0.86, + "learning_rate": 4.981271147297861e-05, + "loss": 0.7422, + "step": 16020 + }, + { + "epoch": 0.86, + "learning_rate": 4.9774846179738707e-05, + "loss": 0.8594, + "step": 16021 + }, + { + "epoch": 0.86, + "learning_rate": 4.973699452979974e-05, + "loss": 0.7227, + "step": 16022 + }, + { + "epoch": 0.86, + "learning_rate": 4.9699156524308956e-05, + "loss": 0.7383, + "step": 16023 + }, + { + "epoch": 0.86, + "learning_rate": 4.966133216441282e-05, + "loss": 0.8164, + "step": 16024 + }, + { + "epoch": 0.86, + "learning_rate": 4.9623521451257705e-05, + "loss": 0.7812, + "step": 16025 + }, + { + "epoch": 0.86, + "learning_rate": 4.958572438598913e-05, + "loss": 0.8477, + "step": 16026 + }, + { + "epoch": 0.86, + "learning_rate": 4.954794096975268e-05, + "loss": 0.7539, + "step": 16027 + }, + { + "epoch": 0.86, + "learning_rate": 4.951017120369333e-05, + "loss": 0.7461, + "step": 16028 + }, + { + "epoch": 0.86, + "learning_rate": 4.9472415088955615e-05, + "loss": 0.8047, + "step": 16029 + }, + { + "epoch": 0.86, + "learning_rate": 4.94346726266835e-05, + "loss": 0.7852, + "step": 16030 + }, + { + "epoch": 0.86, + "learning_rate": 4.939694381802079e-05, + "loss": 0.9258, + "step": 16031 + }, + { + "epoch": 0.86, + "learning_rate": 4.935922866411097e-05, + "loss": 0.8438, + "step": 16032 + }, + { + "epoch": 0.86, + "learning_rate": 4.932152716609667e-05, + "loss": 0.8242, + "step": 16033 + }, + { + "epoch": 0.86, + "learning_rate": 4.9283839325120536e-05, + "loss": 0.8672, + "step": 16034 + }, + { + "epoch": 0.86, + "learning_rate": 4.924616514232466e-05, + "loss": 0.9062, + "step": 16035 + }, + { + "epoch": 0.86, + "learning_rate": 4.920850461885057e-05, + "loss": 0.7656, + "step": 16036 + }, + { + "epoch": 0.86, + "learning_rate": 4.917085775583951e-05, + "loss": 0.8516, + "step": 16037 + }, + { + "epoch": 0.86, + "learning_rate": 4.9133224554432366e-05, + "loss": 0.8633, + "step": 16038 + }, + { + "epoch": 0.86, + "learning_rate": 4.90956050157696e-05, + "loss": 0.9688, + "step": 16039 + }, + { + "epoch": 0.86, + "learning_rate": 4.905799914099101e-05, + "loss": 0.7109, + "step": 16040 + }, + { + "epoch": 0.86, + "learning_rate": 4.902040693123633e-05, + "loss": 0.8125, + "step": 16041 + }, + { + "epoch": 0.86, + "learning_rate": 4.898282838764478e-05, + "loss": 0.7812, + "step": 16042 + }, + { + "epoch": 0.86, + "learning_rate": 4.8945263511355084e-05, + "loss": 0.7773, + "step": 16043 + }, + { + "epoch": 0.86, + "learning_rate": 4.890771230350538e-05, + "loss": 0.8594, + "step": 16044 + }, + { + "epoch": 0.86, + "learning_rate": 4.887017476523381e-05, + "loss": 0.8047, + "step": 16045 + }, + { + "epoch": 0.86, + "learning_rate": 4.8832650897677854e-05, + "loss": 0.7148, + "step": 16046 + }, + { + "epoch": 0.86, + "learning_rate": 4.8795140701974495e-05, + "loss": 0.7773, + "step": 16047 + }, + { + "epoch": 0.86, + "learning_rate": 4.8757644179260594e-05, + "loss": 0.7539, + "step": 16048 + }, + { + "epoch": 0.86, + "learning_rate": 4.872016133067225e-05, + "loss": 0.9023, + "step": 16049 + }, + { + "epoch": 0.86, + "learning_rate": 4.8682692157345334e-05, + "loss": 0.7656, + "step": 16050 + }, + { + "epoch": 0.86, + "learning_rate": 4.864523666041548e-05, + "loss": 0.793, + "step": 16051 + }, + { + "epoch": 0.86, + "learning_rate": 4.860779484101757e-05, + "loss": 0.8477, + "step": 16052 + }, + { + "epoch": 0.86, + "learning_rate": 4.857036670028609e-05, + "loss": 0.7734, + "step": 16053 + }, + { + "epoch": 0.86, + "learning_rate": 4.853295223935539e-05, + "loss": 0.7656, + "step": 16054 + }, + { + "epoch": 0.86, + "learning_rate": 4.84955514593593e-05, + "loss": 0.7344, + "step": 16055 + }, + { + "epoch": 0.86, + "learning_rate": 4.845816436143108e-05, + "loss": 0.7891, + "step": 16056 + }, + { + "epoch": 0.86, + "learning_rate": 4.842079094670354e-05, + "loss": 0.7773, + "step": 16057 + }, + { + "epoch": 0.86, + "learning_rate": 4.838343121630956e-05, + "loss": 0.8047, + "step": 16058 + }, + { + "epoch": 0.86, + "learning_rate": 4.834608517138112e-05, + "loss": 0.7852, + "step": 16059 + }, + { + "epoch": 0.86, + "learning_rate": 4.8308752813049805e-05, + "loss": 0.7969, + "step": 16060 + }, + { + "epoch": 0.86, + "learning_rate": 4.827143414244695e-05, + "loss": 0.8203, + "step": 16061 + }, + { + "epoch": 0.86, + "learning_rate": 4.823412916070363e-05, + "loss": 0.8203, + "step": 16062 + }, + { + "epoch": 0.86, + "learning_rate": 4.8196837868950074e-05, + "loss": 0.7344, + "step": 16063 + }, + { + "epoch": 0.86, + "learning_rate": 4.815956026831647e-05, + "loss": 0.875, + "step": 16064 + }, + { + "epoch": 0.86, + "learning_rate": 4.812229635993231e-05, + "loss": 0.8789, + "step": 16065 + }, + { + "epoch": 0.86, + "learning_rate": 4.8085046144927036e-05, + "loss": 0.7812, + "step": 16066 + }, + { + "epoch": 0.86, + "learning_rate": 4.8047809624429174e-05, + "loss": 0.8945, + "step": 16067 + }, + { + "epoch": 0.86, + "learning_rate": 4.801058679956738e-05, + "loss": 0.8516, + "step": 16068 + }, + { + "epoch": 0.86, + "learning_rate": 4.7973377671469364e-05, + "loss": 0.8398, + "step": 16069 + }, + { + "epoch": 0.86, + "learning_rate": 4.7936182241262896e-05, + "loss": 0.7891, + "step": 16070 + }, + { + "epoch": 0.86, + "learning_rate": 4.789900051007512e-05, + "loss": 0.8125, + "step": 16071 + }, + { + "epoch": 0.86, + "learning_rate": 4.78618324790327e-05, + "loss": 0.8359, + "step": 16072 + }, + { + "epoch": 0.86, + "learning_rate": 4.782467814926184e-05, + "loss": 0.8359, + "step": 16073 + }, + { + "epoch": 0.86, + "learning_rate": 4.7787537521888526e-05, + "loss": 0.7734, + "step": 16074 + }, + { + "epoch": 0.86, + "learning_rate": 4.7750410598038366e-05, + "loss": 0.9062, + "step": 16075 + }, + { + "epoch": 0.86, + "learning_rate": 4.771329737883623e-05, + "loss": 0.957, + "step": 16076 + }, + { + "epoch": 0.86, + "learning_rate": 4.7676197865406835e-05, + "loss": 0.8242, + "step": 16077 + }, + { + "epoch": 0.86, + "learning_rate": 4.763911205887456e-05, + "loss": 0.7578, + "step": 16078 + }, + { + "epoch": 0.86, + "learning_rate": 4.7602039960363105e-05, + "loss": 0.7656, + "step": 16079 + }, + { + "epoch": 0.86, + "learning_rate": 4.75649815709957e-05, + "loss": 0.7617, + "step": 16080 + }, + { + "epoch": 0.86, + "learning_rate": 4.752793689189566e-05, + "loss": 0.8477, + "step": 16081 + }, + { + "epoch": 0.86, + "learning_rate": 4.749090592418548e-05, + "loss": 0.7734, + "step": 16082 + }, + { + "epoch": 0.86, + "learning_rate": 4.745388866898715e-05, + "loss": 0.7109, + "step": 16083 + }, + { + "epoch": 0.86, + "learning_rate": 4.741688512742248e-05, + "loss": 0.8047, + "step": 16084 + }, + { + "epoch": 0.86, + "learning_rate": 4.737989530061293e-05, + "loss": 0.8242, + "step": 16085 + }, + { + "epoch": 0.86, + "learning_rate": 4.734291918967937e-05, + "loss": 0.7461, + "step": 16086 + }, + { + "epoch": 0.86, + "learning_rate": 4.7305956795742125e-05, + "loss": 0.8438, + "step": 16087 + }, + { + "epoch": 0.86, + "learning_rate": 4.7269008119921406e-05, + "loss": 0.7695, + "step": 16088 + }, + { + "epoch": 0.86, + "learning_rate": 4.7232073163337e-05, + "loss": 0.7695, + "step": 16089 + }, + { + "epoch": 0.86, + "learning_rate": 4.7195151927107936e-05, + "loss": 0.7031, + "step": 16090 + }, + { + "epoch": 0.86, + "learning_rate": 4.715824441235322e-05, + "loss": 0.7148, + "step": 16091 + }, + { + "epoch": 0.86, + "learning_rate": 4.712135062019113e-05, + "loss": 0.8164, + "step": 16092 + }, + { + "epoch": 0.86, + "learning_rate": 4.70844705517397e-05, + "loss": 0.7734, + "step": 16093 + }, + { + "epoch": 0.86, + "learning_rate": 4.704760420811666e-05, + "loss": 0.8086, + "step": 16094 + }, + { + "epoch": 0.87, + "learning_rate": 4.701075159043905e-05, + "loss": 0.8086, + "step": 16095 + }, + { + "epoch": 0.87, + "learning_rate": 4.6973912699823595e-05, + "loss": 0.7461, + "step": 16096 + }, + { + "epoch": 0.87, + "learning_rate": 4.693708753738668e-05, + "loss": 0.7656, + "step": 16097 + }, + { + "epoch": 0.87, + "learning_rate": 4.6900276104244355e-05, + "loss": 0.6719, + "step": 16098 + }, + { + "epoch": 0.87, + "learning_rate": 4.686347840151195e-05, + "loss": 0.9141, + "step": 16099 + }, + { + "epoch": 0.87, + "learning_rate": 4.682669443030446e-05, + "loss": 0.8086, + "step": 16100 + }, + { + "epoch": 0.87, + "learning_rate": 4.678992419173694e-05, + "loss": 0.7461, + "step": 16101 + }, + { + "epoch": 0.87, + "learning_rate": 4.675316768692339e-05, + "loss": 0.8164, + "step": 16102 + }, + { + "epoch": 0.87, + "learning_rate": 4.6716424916977585e-05, + "loss": 0.7266, + "step": 16103 + }, + { + "epoch": 0.87, + "learning_rate": 4.6679695883013077e-05, + "loss": 0.7383, + "step": 16104 + }, + { + "epoch": 0.87, + "learning_rate": 4.6642980586142916e-05, + "loss": 0.8594, + "step": 16105 + }, + { + "epoch": 0.87, + "learning_rate": 4.66062790274796e-05, + "loss": 0.7578, + "step": 16106 + }, + { + "epoch": 0.87, + "learning_rate": 4.6569591208135366e-05, + "loss": 0.8828, + "step": 16107 + }, + { + "epoch": 0.87, + "learning_rate": 4.653291712922192e-05, + "loss": 0.7891, + "step": 16108 + }, + { + "epoch": 0.87, + "learning_rate": 4.649625679185071e-05, + "loss": 0.7617, + "step": 16109 + }, + { + "epoch": 0.87, + "learning_rate": 4.6459610197132464e-05, + "loss": 0.8086, + "step": 16110 + }, + { + "epoch": 0.87, + "learning_rate": 4.642297734617795e-05, + "loss": 0.7344, + "step": 16111 + }, + { + "epoch": 0.87, + "learning_rate": 4.638635824009707e-05, + "loss": 0.6875, + "step": 16112 + }, + { + "epoch": 0.87, + "learning_rate": 4.634975287999954e-05, + "loss": 0.8438, + "step": 16113 + }, + { + "epoch": 0.87, + "learning_rate": 4.6313161266994694e-05, + "loss": 0.7461, + "step": 16114 + }, + { + "epoch": 0.87, + "learning_rate": 4.627658340219132e-05, + "loss": 0.8711, + "step": 16115 + }, + { + "epoch": 0.87, + "learning_rate": 4.6240019286697854e-05, + "loss": 0.8047, + "step": 16116 + }, + { + "epoch": 0.87, + "learning_rate": 4.620346892162236e-05, + "loss": 0.7969, + "step": 16117 + }, + { + "epoch": 0.87, + "learning_rate": 4.616693230807245e-05, + "loss": 0.6953, + "step": 16118 + }, + { + "epoch": 0.87, + "learning_rate": 4.613040944715513e-05, + "loss": 0.7617, + "step": 16119 + }, + { + "epoch": 0.87, + "learning_rate": 4.609390033997729e-05, + "loss": 0.8203, + "step": 16120 + }, + { + "epoch": 0.87, + "learning_rate": 4.6057404987645324e-05, + "loss": 0.8672, + "step": 16121 + }, + { + "epoch": 0.87, + "learning_rate": 4.6020923391265126e-05, + "loss": 0.7773, + "step": 16122 + }, + { + "epoch": 0.87, + "learning_rate": 4.598445555194203e-05, + "loss": 0.75, + "step": 16123 + }, + { + "epoch": 0.87, + "learning_rate": 4.594800147078143e-05, + "loss": 0.8086, + "step": 16124 + }, + { + "epoch": 0.87, + "learning_rate": 4.5911561148887836e-05, + "loss": 0.8242, + "step": 16125 + }, + { + "epoch": 0.87, + "learning_rate": 4.5875134587365474e-05, + "loss": 0.8203, + "step": 16126 + }, + { + "epoch": 0.87, + "learning_rate": 4.583872178731829e-05, + "loss": 0.8125, + "step": 16127 + }, + { + "epoch": 0.87, + "learning_rate": 4.580232274984969e-05, + "loss": 0.8203, + "step": 16128 + }, + { + "epoch": 0.87, + "learning_rate": 4.576593747606261e-05, + "loss": 0.7344, + "step": 16129 + }, + { + "epoch": 0.87, + "learning_rate": 4.5729565967059794e-05, + "loss": 0.9258, + "step": 16130 + }, + { + "epoch": 0.87, + "learning_rate": 4.5693208223943185e-05, + "loss": 0.8398, + "step": 16131 + }, + { + "epoch": 0.87, + "learning_rate": 4.565686424781479e-05, + "loss": 0.8281, + "step": 16132 + }, + { + "epoch": 0.87, + "learning_rate": 4.562053403977579e-05, + "loss": 0.8516, + "step": 16133 + }, + { + "epoch": 0.87, + "learning_rate": 4.558421760092718e-05, + "loss": 0.8672, + "step": 16134 + }, + { + "epoch": 0.87, + "learning_rate": 4.5547914932369426e-05, + "loss": 0.8242, + "step": 16135 + }, + { + "epoch": 0.87, + "learning_rate": 4.551162603520259e-05, + "loss": 0.7852, + "step": 16136 + }, + { + "epoch": 0.87, + "learning_rate": 4.5475350910526515e-05, + "loss": 0.7383, + "step": 16137 + }, + { + "epoch": 0.87, + "learning_rate": 4.543908955944032e-05, + "loss": 0.8203, + "step": 16138 + }, + { + "epoch": 0.87, + "learning_rate": 4.540284198304273e-05, + "loss": 0.8477, + "step": 16139 + }, + { + "epoch": 0.87, + "learning_rate": 4.5366608182432325e-05, + "loss": 0.8125, + "step": 16140 + }, + { + "epoch": 0.87, + "learning_rate": 4.533038815870716e-05, + "loss": 0.8789, + "step": 16141 + }, + { + "epoch": 0.87, + "learning_rate": 4.5294181912964696e-05, + "loss": 0.7617, + "step": 16142 + }, + { + "epoch": 0.87, + "learning_rate": 4.52579894463021e-05, + "loss": 0.8359, + "step": 16143 + }, + { + "epoch": 0.87, + "learning_rate": 4.522181075981624e-05, + "loss": 0.7461, + "step": 16144 + }, + { + "epoch": 0.87, + "learning_rate": 4.518564585460339e-05, + "loss": 0.8203, + "step": 16145 + }, + { + "epoch": 0.87, + "learning_rate": 4.5149494731759334e-05, + "loss": 0.8359, + "step": 16146 + }, + { + "epoch": 0.87, + "learning_rate": 4.511335739237971e-05, + "loss": 0.7227, + "step": 16147 + }, + { + "epoch": 0.87, + "learning_rate": 4.507723383755968e-05, + "loss": 0.8359, + "step": 16148 + }, + { + "epoch": 0.87, + "learning_rate": 4.5041124068393727e-05, + "loss": 0.7773, + "step": 16149 + }, + { + "epoch": 0.87, + "learning_rate": 4.500502808597617e-05, + "loss": 0.793, + "step": 16150 + }, + { + "epoch": 0.87, + "learning_rate": 4.496894589140082e-05, + "loss": 0.7109, + "step": 16151 + }, + { + "epoch": 0.87, + "learning_rate": 4.493287748576119e-05, + "loss": 0.7734, + "step": 16152 + }, + { + "epoch": 0.87, + "learning_rate": 4.489682287015007e-05, + "loss": 0.7383, + "step": 16153 + }, + { + "epoch": 0.87, + "learning_rate": 4.486078204566013e-05, + "loss": 0.8086, + "step": 16154 + }, + { + "epoch": 0.87, + "learning_rate": 4.4824755013383624e-05, + "loss": 0.7695, + "step": 16155 + }, + { + "epoch": 0.87, + "learning_rate": 4.478874177441211e-05, + "loss": 0.793, + "step": 16156 + }, + { + "epoch": 0.87, + "learning_rate": 4.4752742329837114e-05, + "loss": 0.8281, + "step": 16157 + }, + { + "epoch": 0.87, + "learning_rate": 4.471675668074937e-05, + "loss": 0.7891, + "step": 16158 + }, + { + "epoch": 0.87, + "learning_rate": 4.46807848282394e-05, + "loss": 0.7969, + "step": 16159 + }, + { + "epoch": 0.87, + "learning_rate": 4.464482677339732e-05, + "loss": 0.7812, + "step": 16160 + }, + { + "epoch": 0.87, + "learning_rate": 4.460888251731282e-05, + "loss": 0.8203, + "step": 16161 + }, + { + "epoch": 0.87, + "learning_rate": 4.457295206107492e-05, + "loss": 0.875, + "step": 16162 + }, + { + "epoch": 0.87, + "learning_rate": 4.453703540577258e-05, + "loss": 0.7695, + "step": 16163 + }, + { + "epoch": 0.87, + "learning_rate": 4.4501132552494215e-05, + "loss": 0.8516, + "step": 16164 + }, + { + "epoch": 0.87, + "learning_rate": 4.4465243502327834e-05, + "loss": 0.7734, + "step": 16165 + }, + { + "epoch": 0.87, + "learning_rate": 4.442936825636068e-05, + "loss": 0.7148, + "step": 16166 + }, + { + "epoch": 0.87, + "learning_rate": 4.4393506815680386e-05, + "loss": 0.832, + "step": 16167 + }, + { + "epoch": 0.87, + "learning_rate": 4.4357659181373354e-05, + "loss": 0.8594, + "step": 16168 + }, + { + "epoch": 0.87, + "learning_rate": 4.432182535452589e-05, + "loss": 0.7266, + "step": 16169 + }, + { + "epoch": 0.87, + "learning_rate": 4.42860053362239e-05, + "loss": 0.7891, + "step": 16170 + }, + { + "epoch": 0.87, + "learning_rate": 4.425019912755296e-05, + "loss": 0.832, + "step": 16171 + }, + { + "epoch": 0.87, + "learning_rate": 4.421440672959798e-05, + "loss": 0.7852, + "step": 16172 + }, + { + "epoch": 0.87, + "learning_rate": 4.4178628143443756e-05, + "loss": 0.7617, + "step": 16173 + }, + { + "epoch": 0.87, + "learning_rate": 4.4142863370174255e-05, + "loss": 0.7109, + "step": 16174 + }, + { + "epoch": 0.87, + "learning_rate": 4.410711241087351e-05, + "loss": 0.7422, + "step": 16175 + }, + { + "epoch": 0.87, + "learning_rate": 4.407137526662469e-05, + "loss": 0.7188, + "step": 16176 + }, + { + "epoch": 0.87, + "learning_rate": 4.403565193851095e-05, + "loss": 0.7773, + "step": 16177 + }, + { + "epoch": 0.87, + "learning_rate": 4.399994242761457e-05, + "loss": 0.832, + "step": 16178 + }, + { + "epoch": 0.87, + "learning_rate": 4.396424673501781e-05, + "loss": 0.7734, + "step": 16179 + }, + { + "epoch": 0.87, + "learning_rate": 4.3928564861802525e-05, + "loss": 0.8281, + "step": 16180 + }, + { + "epoch": 0.87, + "learning_rate": 4.389289680904973e-05, + "loss": 0.8125, + "step": 16181 + }, + { + "epoch": 0.87, + "learning_rate": 4.385724257784041e-05, + "loss": 0.7852, + "step": 16182 + }, + { + "epoch": 0.87, + "learning_rate": 4.38216021692549e-05, + "loss": 0.8398, + "step": 16183 + }, + { + "epoch": 0.87, + "learning_rate": 4.378597558437342e-05, + "loss": 0.7383, + "step": 16184 + }, + { + "epoch": 0.87, + "learning_rate": 4.375036282427536e-05, + "loss": 0.7656, + "step": 16185 + }, + { + "epoch": 0.87, + "learning_rate": 4.371476389003998e-05, + "loss": 0.8398, + "step": 16186 + }, + { + "epoch": 0.87, + "learning_rate": 4.367917878274619e-05, + "loss": 0.7344, + "step": 16187 + }, + { + "epoch": 0.87, + "learning_rate": 4.3643607503472185e-05, + "loss": 0.8086, + "step": 16188 + }, + { + "epoch": 0.87, + "learning_rate": 4.3608050053295764e-05, + "loss": 0.8438, + "step": 16189 + }, + { + "epoch": 0.87, + "learning_rate": 4.3572506433294744e-05, + "loss": 0.7539, + "step": 16190 + }, + { + "epoch": 0.87, + "learning_rate": 4.353697664454609e-05, + "loss": 0.7539, + "step": 16191 + }, + { + "epoch": 0.87, + "learning_rate": 4.350146068812633e-05, + "loss": 0.7461, + "step": 16192 + }, + { + "epoch": 0.87, + "learning_rate": 4.3465958565111765e-05, + "loss": 0.8164, + "step": 16193 + }, + { + "epoch": 0.87, + "learning_rate": 4.3430470276578424e-05, + "loss": 0.7891, + "step": 16194 + }, + { + "epoch": 0.87, + "learning_rate": 4.3394995823601516e-05, + "loss": 0.7734, + "step": 16195 + }, + { + "epoch": 0.87, + "learning_rate": 4.335953520725616e-05, + "loss": 0.7891, + "step": 16196 + }, + { + "epoch": 0.87, + "learning_rate": 4.332408842861674e-05, + "loss": 0.7461, + "step": 16197 + }, + { + "epoch": 0.87, + "learning_rate": 4.3288655488757656e-05, + "loss": 0.8281, + "step": 16198 + }, + { + "epoch": 0.87, + "learning_rate": 4.3253236388752445e-05, + "loss": 0.7773, + "step": 16199 + }, + { + "epoch": 0.87, + "learning_rate": 4.321783112967453e-05, + "loss": 0.8398, + "step": 16200 + }, + { + "epoch": 0.87, + "learning_rate": 4.3182439712596725e-05, + "loss": 0.75, + "step": 16201 + }, + { + "epoch": 0.87, + "learning_rate": 4.31470621385916e-05, + "loss": 0.8516, + "step": 16202 + }, + { + "epoch": 0.87, + "learning_rate": 4.311169840873119e-05, + "loss": 0.8867, + "step": 16203 + }, + { + "epoch": 0.87, + "learning_rate": 4.307634852408715e-05, + "loss": 0.8633, + "step": 16204 + }, + { + "epoch": 0.87, + "learning_rate": 4.3041012485730504e-05, + "loss": 0.8008, + "step": 16205 + }, + { + "epoch": 0.87, + "learning_rate": 4.300569029473228e-05, + "loss": 0.7891, + "step": 16206 + }, + { + "epoch": 0.87, + "learning_rate": 4.2970381952162854e-05, + "loss": 0.7656, + "step": 16207 + }, + { + "epoch": 0.87, + "learning_rate": 4.293508745909197e-05, + "loss": 0.7734, + "step": 16208 + }, + { + "epoch": 0.87, + "learning_rate": 4.289980681658934e-05, + "loss": 0.7617, + "step": 16209 + }, + { + "epoch": 0.87, + "learning_rate": 4.286454002572415e-05, + "loss": 0.7461, + "step": 16210 + }, + { + "epoch": 0.87, + "learning_rate": 4.2829287087565003e-05, + "loss": 0.8047, + "step": 16211 + }, + { + "epoch": 0.87, + "learning_rate": 4.279404800318004e-05, + "loss": 0.7852, + "step": 16212 + }, + { + "epoch": 0.87, + "learning_rate": 4.275882277363729e-05, + "loss": 0.7305, + "step": 16213 + }, + { + "epoch": 0.87, + "learning_rate": 4.272361140000425e-05, + "loss": 0.832, + "step": 16214 + }, + { + "epoch": 0.87, + "learning_rate": 4.268841388334776e-05, + "loss": 0.8203, + "step": 16215 + }, + { + "epoch": 0.87, + "learning_rate": 4.265323022473461e-05, + "loss": 0.7617, + "step": 16216 + }, + { + "epoch": 0.87, + "learning_rate": 4.261806042523081e-05, + "loss": 0.8438, + "step": 16217 + }, + { + "epoch": 0.87, + "learning_rate": 4.258290448590224e-05, + "loss": 0.7539, + "step": 16218 + }, + { + "epoch": 0.87, + "learning_rate": 4.2547762407814105e-05, + "loss": 0.8945, + "step": 16219 + }, + { + "epoch": 0.87, + "learning_rate": 4.251263419203149e-05, + "loss": 0.8203, + "step": 16220 + }, + { + "epoch": 0.87, + "learning_rate": 4.2477519839618786e-05, + "loss": 0.7812, + "step": 16221 + }, + { + "epoch": 0.87, + "learning_rate": 4.244241935164006e-05, + "loss": 0.793, + "step": 16222 + }, + { + "epoch": 0.87, + "learning_rate": 4.240733272915914e-05, + "loss": 0.918, + "step": 16223 + }, + { + "epoch": 0.87, + "learning_rate": 4.2372259973239124e-05, + "loss": 0.7852, + "step": 16224 + }, + { + "epoch": 0.87, + "learning_rate": 4.2337201084942654e-05, + "loss": 0.8359, + "step": 16225 + }, + { + "epoch": 0.87, + "learning_rate": 4.23021560653325e-05, + "loss": 0.8086, + "step": 16226 + }, + { + "epoch": 0.87, + "learning_rate": 4.2267124915470526e-05, + "loss": 0.793, + "step": 16227 + }, + { + "epoch": 0.87, + "learning_rate": 4.2232107636418047e-05, + "loss": 0.7617, + "step": 16228 + }, + { + "epoch": 0.87, + "learning_rate": 4.219710422923645e-05, + "loss": 0.8047, + "step": 16229 + }, + { + "epoch": 0.87, + "learning_rate": 4.216211469498649e-05, + "loss": 0.7656, + "step": 16230 + }, + { + "epoch": 0.87, + "learning_rate": 4.212713903472837e-05, + "loss": 0.8281, + "step": 16231 + }, + { + "epoch": 0.87, + "learning_rate": 4.2092177249521744e-05, + "loss": 0.7344, + "step": 16232 + }, + { + "epoch": 0.87, + "learning_rate": 4.205722934042649e-05, + "loss": 0.8086, + "step": 16233 + }, + { + "epoch": 0.87, + "learning_rate": 4.202229530850143e-05, + "loss": 0.7969, + "step": 16234 + }, + { + "epoch": 0.87, + "learning_rate": 4.1987375154805106e-05, + "loss": 0.7617, + "step": 16235 + }, + { + "epoch": 0.87, + "learning_rate": 4.1952468880395835e-05, + "loss": 0.7656, + "step": 16236 + }, + { + "epoch": 0.87, + "learning_rate": 4.1917576486331374e-05, + "loss": 0.9023, + "step": 16237 + }, + { + "epoch": 0.87, + "learning_rate": 4.1882697973669004e-05, + "loss": 0.7852, + "step": 16238 + }, + { + "epoch": 0.87, + "learning_rate": 4.1847833343465815e-05, + "loss": 0.7422, + "step": 16239 + }, + { + "epoch": 0.87, + "learning_rate": 4.1812982596778125e-05, + "loss": 0.7344, + "step": 16240 + }, + { + "epoch": 0.87, + "learning_rate": 4.177814573466221e-05, + "loss": 0.832, + "step": 16241 + }, + { + "epoch": 0.87, + "learning_rate": 4.174332275817355e-05, + "loss": 0.7461, + "step": 16242 + }, + { + "epoch": 0.87, + "learning_rate": 4.170851366836759e-05, + "loss": 0.8555, + "step": 16243 + }, + { + "epoch": 0.87, + "learning_rate": 4.167371846629897e-05, + "loss": 0.8398, + "step": 16244 + }, + { + "epoch": 0.87, + "learning_rate": 4.1638937153022194e-05, + "loss": 0.7344, + "step": 16245 + }, + { + "epoch": 0.87, + "learning_rate": 4.1604169729591354e-05, + "loss": 0.8047, + "step": 16246 + }, + { + "epoch": 0.87, + "learning_rate": 4.156941619705995e-05, + "loss": 0.918, + "step": 16247 + }, + { + "epoch": 0.87, + "learning_rate": 4.153467655648096e-05, + "loss": 0.7891, + "step": 16248 + }, + { + "epoch": 0.87, + "learning_rate": 4.1499950808907224e-05, + "loss": 0.793, + "step": 16249 + }, + { + "epoch": 0.87, + "learning_rate": 4.146523895539117e-05, + "loss": 0.8477, + "step": 16250 + }, + { + "epoch": 0.87, + "learning_rate": 4.143054099698451e-05, + "loss": 0.8516, + "step": 16251 + }, + { + "epoch": 0.87, + "learning_rate": 4.139585693473874e-05, + "loss": 0.8633, + "step": 16252 + }, + { + "epoch": 0.87, + "learning_rate": 4.1361186769705014e-05, + "loss": 0.8086, + "step": 16253 + }, + { + "epoch": 0.87, + "learning_rate": 4.132653050293389e-05, + "loss": 0.8867, + "step": 16254 + }, + { + "epoch": 0.87, + "learning_rate": 4.129188813547546e-05, + "loss": 0.8867, + "step": 16255 + }, + { + "epoch": 0.87, + "learning_rate": 4.125725966837957e-05, + "loss": 0.8438, + "step": 16256 + }, + { + "epoch": 0.87, + "learning_rate": 4.122264510269574e-05, + "loss": 0.7852, + "step": 16257 + }, + { + "epoch": 0.87, + "learning_rate": 4.11880444394726e-05, + "loss": 0.7461, + "step": 16258 + }, + { + "epoch": 0.87, + "learning_rate": 4.115345767975898e-05, + "loss": 0.6758, + "step": 16259 + }, + { + "epoch": 0.87, + "learning_rate": 4.1118884824602634e-05, + "loss": 0.7617, + "step": 16260 + }, + { + "epoch": 0.87, + "learning_rate": 4.108432587505157e-05, + "loss": 0.8203, + "step": 16261 + }, + { + "epoch": 0.87, + "learning_rate": 4.1049780832152774e-05, + "loss": 0.7031, + "step": 16262 + }, + { + "epoch": 0.87, + "learning_rate": 4.10152496969532e-05, + "loss": 0.8008, + "step": 16263 + }, + { + "epoch": 0.87, + "learning_rate": 4.0980732470499324e-05, + "loss": 0.7695, + "step": 16264 + }, + { + "epoch": 0.87, + "learning_rate": 4.094622915383694e-05, + "loss": 0.8164, + "step": 16265 + }, + { + "epoch": 0.87, + "learning_rate": 4.0911739748011824e-05, + "loss": 0.8477, + "step": 16266 + }, + { + "epoch": 0.87, + "learning_rate": 4.0877264254068905e-05, + "loss": 0.7695, + "step": 16267 + }, + { + "epoch": 0.87, + "learning_rate": 4.084280267305302e-05, + "loss": 0.8281, + "step": 16268 + }, + { + "epoch": 0.87, + "learning_rate": 4.0808355006008544e-05, + "loss": 0.8125, + "step": 16269 + }, + { + "epoch": 0.87, + "learning_rate": 4.0773921253979275e-05, + "loss": 0.8203, + "step": 16270 + }, + { + "epoch": 0.87, + "learning_rate": 4.0739501418008586e-05, + "loss": 0.7891, + "step": 16271 + }, + { + "epoch": 0.87, + "learning_rate": 4.070509549913953e-05, + "loss": 0.8516, + "step": 16272 + }, + { + "epoch": 0.87, + "learning_rate": 4.0670703498414954e-05, + "loss": 0.7617, + "step": 16273 + }, + { + "epoch": 0.87, + "learning_rate": 4.063632541687673e-05, + "loss": 0.832, + "step": 16274 + }, + { + "epoch": 0.87, + "learning_rate": 4.0601961255566756e-05, + "loss": 0.8555, + "step": 16275 + }, + { + "epoch": 0.87, + "learning_rate": 4.056761101552647e-05, + "loss": 0.793, + "step": 16276 + }, + { + "epoch": 0.87, + "learning_rate": 4.0533274697796776e-05, + "loss": 0.7266, + "step": 16277 + }, + { + "epoch": 0.87, + "learning_rate": 4.049895230341799e-05, + "loss": 0.7031, + "step": 16278 + }, + { + "epoch": 0.87, + "learning_rate": 4.046464383343029e-05, + "loss": 0.8789, + "step": 16279 + }, + { + "epoch": 0.87, + "learning_rate": 4.043034928887351e-05, + "loss": 0.8359, + "step": 16280 + }, + { + "epoch": 0.88, + "learning_rate": 4.039606867078666e-05, + "loss": 0.793, + "step": 16281 + }, + { + "epoch": 0.88, + "learning_rate": 4.0361801980208725e-05, + "loss": 0.9297, + "step": 16282 + }, + { + "epoch": 0.88, + "learning_rate": 4.032754921817788e-05, + "loss": 0.7656, + "step": 16283 + }, + { + "epoch": 0.88, + "learning_rate": 4.029331038573236e-05, + "loss": 0.707, + "step": 16284 + }, + { + "epoch": 0.88, + "learning_rate": 4.02590854839095e-05, + "loss": 0.7227, + "step": 16285 + }, + { + "epoch": 0.88, + "learning_rate": 4.022487451374657e-05, + "loss": 0.8594, + "step": 16286 + }, + { + "epoch": 0.88, + "learning_rate": 4.019067747628013e-05, + "loss": 0.8516, + "step": 16287 + }, + { + "epoch": 0.88, + "learning_rate": 4.0156494372546596e-05, + "loss": 0.7734, + "step": 16288 + }, + { + "epoch": 0.88, + "learning_rate": 4.012232520358183e-05, + "loss": 0.8047, + "step": 16289 + }, + { + "epoch": 0.88, + "learning_rate": 4.008816997042125e-05, + "loss": 0.7812, + "step": 16290 + }, + { + "epoch": 0.88, + "learning_rate": 4.005402867409963e-05, + "loss": 0.8555, + "step": 16291 + }, + { + "epoch": 0.88, + "learning_rate": 4.001990131565197e-05, + "loss": 0.8164, + "step": 16292 + }, + { + "epoch": 0.88, + "learning_rate": 3.9985787896112235e-05, + "loss": 0.7109, + "step": 16293 + }, + { + "epoch": 0.88, + "learning_rate": 3.9951688416514086e-05, + "loss": 0.8398, + "step": 16294 + }, + { + "epoch": 0.88, + "learning_rate": 3.991760287789098e-05, + "loss": 0.7266, + "step": 16295 + }, + { + "epoch": 0.88, + "learning_rate": 3.988353128127587e-05, + "loss": 0.7969, + "step": 16296 + }, + { + "epoch": 0.88, + "learning_rate": 3.9849473627701136e-05, + "loss": 0.6953, + "step": 16297 + }, + { + "epoch": 0.88, + "learning_rate": 3.981542991819881e-05, + "loss": 0.707, + "step": 16298 + }, + { + "epoch": 0.88, + "learning_rate": 3.978140015380055e-05, + "loss": 0.7695, + "step": 16299 + }, + { + "epoch": 0.88, + "learning_rate": 3.9747384335537704e-05, + "loss": 0.8438, + "step": 16300 + }, + { + "epoch": 0.88, + "learning_rate": 3.971338246444084e-05, + "loss": 0.8672, + "step": 16301 + }, + { + "epoch": 0.88, + "learning_rate": 3.967939454154046e-05, + "loss": 0.8203, + "step": 16302 + }, + { + "epoch": 0.88, + "learning_rate": 3.9645420567866575e-05, + "loss": 0.8086, + "step": 16303 + }, + { + "epoch": 0.88, + "learning_rate": 3.961146054444853e-05, + "loss": 0.7969, + "step": 16304 + }, + { + "epoch": 0.88, + "learning_rate": 3.957751447231561e-05, + "loss": 0.8008, + "step": 16305 + }, + { + "epoch": 0.88, + "learning_rate": 3.9543582352496335e-05, + "loss": 0.8359, + "step": 16306 + }, + { + "epoch": 0.88, + "learning_rate": 3.950966418601909e-05, + "loss": 0.8594, + "step": 16307 + }, + { + "epoch": 0.88, + "learning_rate": 3.9475759973911564e-05, + "loss": 0.8828, + "step": 16308 + }, + { + "epoch": 0.88, + "learning_rate": 3.944186971720132e-05, + "loss": 0.7578, + "step": 16309 + }, + { + "epoch": 0.88, + "learning_rate": 3.9407993416915146e-05, + "loss": 0.75, + "step": 16310 + }, + { + "epoch": 0.88, + "learning_rate": 3.937413107407978e-05, + "loss": 0.7227, + "step": 16311 + }, + { + "epoch": 0.88, + "learning_rate": 3.9340282689721396e-05, + "loss": 0.7188, + "step": 16312 + }, + { + "epoch": 0.88, + "learning_rate": 3.930644826486562e-05, + "loss": 0.7539, + "step": 16313 + }, + { + "epoch": 0.88, + "learning_rate": 3.927262780053764e-05, + "loss": 0.875, + "step": 16314 + }, + { + "epoch": 0.88, + "learning_rate": 3.923882129776252e-05, + "loss": 0.8633, + "step": 16315 + }, + { + "epoch": 0.88, + "learning_rate": 3.920502875756466e-05, + "loss": 0.8203, + "step": 16316 + }, + { + "epoch": 0.88, + "learning_rate": 3.917125018096796e-05, + "loss": 0.8398, + "step": 16317 + }, + { + "epoch": 0.88, + "learning_rate": 3.91374855689961e-05, + "loss": 0.8164, + "step": 16318 + }, + { + "epoch": 0.88, + "learning_rate": 3.910373492267238e-05, + "loss": 0.7969, + "step": 16319 + }, + { + "epoch": 0.88, + "learning_rate": 3.9069998243019436e-05, + "loss": 0.6562, + "step": 16320 + }, + { + "epoch": 0.88, + "learning_rate": 3.903627553105954e-05, + "loss": 0.8555, + "step": 16321 + }, + { + "epoch": 0.88, + "learning_rate": 3.9002566787814663e-05, + "loss": 0.7656, + "step": 16322 + }, + { + "epoch": 0.88, + "learning_rate": 3.8968872014306375e-05, + "loss": 0.7852, + "step": 16323 + }, + { + "epoch": 0.88, + "learning_rate": 3.893519121155564e-05, + "loss": 0.793, + "step": 16324 + }, + { + "epoch": 0.88, + "learning_rate": 3.8901524380583144e-05, + "loss": 0.7109, + "step": 16325 + }, + { + "epoch": 0.88, + "learning_rate": 3.8867871522409e-05, + "loss": 0.8633, + "step": 16326 + }, + { + "epoch": 0.88, + "learning_rate": 3.883423263805319e-05, + "loss": 0.8242, + "step": 16327 + }, + { + "epoch": 0.88, + "learning_rate": 3.880060772853489e-05, + "loss": 0.75, + "step": 16328 + }, + { + "epoch": 0.88, + "learning_rate": 3.8766996794873166e-05, + "loss": 0.7812, + "step": 16329 + }, + { + "epoch": 0.88, + "learning_rate": 3.873339983808649e-05, + "loss": 0.7227, + "step": 16330 + }, + { + "epoch": 0.88, + "learning_rate": 3.869981685919294e-05, + "loss": 0.8164, + "step": 16331 + }, + { + "epoch": 0.88, + "learning_rate": 3.86662478592103e-05, + "loss": 0.8828, + "step": 16332 + }, + { + "epoch": 0.88, + "learning_rate": 3.8632692839155715e-05, + "loss": 0.7617, + "step": 16333 + }, + { + "epoch": 0.88, + "learning_rate": 3.8599151800045916e-05, + "loss": 0.8281, + "step": 16334 + }, + { + "epoch": 0.88, + "learning_rate": 3.85656247428976e-05, + "loss": 0.8008, + "step": 16335 + }, + { + "epoch": 0.88, + "learning_rate": 3.8532111668726556e-05, + "loss": 0.75, + "step": 16336 + }, + { + "epoch": 0.88, + "learning_rate": 3.8498612578548254e-05, + "loss": 0.7031, + "step": 16337 + }, + { + "epoch": 0.88, + "learning_rate": 3.846512747337799e-05, + "loss": 0.7148, + "step": 16338 + }, + { + "epoch": 0.88, + "learning_rate": 3.843165635423046e-05, + "loss": 0.9102, + "step": 16339 + }, + { + "epoch": 0.88, + "learning_rate": 3.8398199222119955e-05, + "loss": 0.7891, + "step": 16340 + }, + { + "epoch": 0.88, + "learning_rate": 3.836475607806006e-05, + "loss": 0.8203, + "step": 16341 + }, + { + "epoch": 0.88, + "learning_rate": 3.8331326923064625e-05, + "loss": 0.7852, + "step": 16342 + }, + { + "epoch": 0.88, + "learning_rate": 3.829791175814651e-05, + "loss": 0.8047, + "step": 16343 + }, + { + "epoch": 0.88, + "learning_rate": 3.826451058431818e-05, + "loss": 0.8516, + "step": 16344 + }, + { + "epoch": 0.88, + "learning_rate": 3.8231123402591883e-05, + "loss": 0.8281, + "step": 16345 + }, + { + "epoch": 0.88, + "learning_rate": 3.819775021397942e-05, + "loss": 0.875, + "step": 16346 + }, + { + "epoch": 0.88, + "learning_rate": 3.816439101949204e-05, + "loss": 0.8086, + "step": 16347 + }, + { + "epoch": 0.88, + "learning_rate": 3.813104582014071e-05, + "loss": 0.7773, + "step": 16348 + }, + { + "epoch": 0.88, + "learning_rate": 3.809771461693573e-05, + "loss": 0.8008, + "step": 16349 + }, + { + "epoch": 0.88, + "learning_rate": 3.8064397410887354e-05, + "loss": 0.7266, + "step": 16350 + }, + { + "epoch": 0.88, + "learning_rate": 3.8031094203005045e-05, + "loss": 0.707, + "step": 16351 + }, + { + "epoch": 0.88, + "learning_rate": 3.799780499429811e-05, + "loss": 0.7812, + "step": 16352 + }, + { + "epoch": 0.88, + "learning_rate": 3.7964529785775245e-05, + "loss": 0.7617, + "step": 16353 + }, + { + "epoch": 0.88, + "learning_rate": 3.793126857844481e-05, + "loss": 0.8281, + "step": 16354 + }, + { + "epoch": 0.88, + "learning_rate": 3.7898021373314775e-05, + "loss": 0.8359, + "step": 16355 + }, + { + "epoch": 0.88, + "learning_rate": 3.786478817139266e-05, + "loss": 0.7812, + "step": 16356 + }, + { + "epoch": 0.88, + "learning_rate": 3.7831568973685337e-05, + "loss": 0.7891, + "step": 16357 + }, + { + "epoch": 0.88, + "learning_rate": 3.7798363781199716e-05, + "loss": 0.7109, + "step": 16358 + }, + { + "epoch": 0.88, + "learning_rate": 3.776517259494194e-05, + "loss": 0.8008, + "step": 16359 + }, + { + "epoch": 0.88, + "learning_rate": 3.773199541591771e-05, + "loss": 0.8125, + "step": 16360 + }, + { + "epoch": 0.88, + "learning_rate": 3.769883224513249e-05, + "loss": 0.7773, + "step": 16361 + }, + { + "epoch": 0.88, + "learning_rate": 3.7665683083591304e-05, + "loss": 0.7344, + "step": 16362 + }, + { + "epoch": 0.88, + "learning_rate": 3.763254793229864e-05, + "loss": 0.8359, + "step": 16363 + }, + { + "epoch": 0.88, + "learning_rate": 3.759942679225842e-05, + "loss": 0.7227, + "step": 16364 + }, + { + "epoch": 0.88, + "learning_rate": 3.756631966447449e-05, + "loss": 0.793, + "step": 16365 + }, + { + "epoch": 0.88, + "learning_rate": 3.7533226549950174e-05, + "loss": 0.75, + "step": 16366 + }, + { + "epoch": 0.88, + "learning_rate": 3.75001474496881e-05, + "loss": 0.6992, + "step": 16367 + }, + { + "epoch": 0.88, + "learning_rate": 3.746708236469087e-05, + "loss": 0.7734, + "step": 16368 + }, + { + "epoch": 0.88, + "learning_rate": 3.743403129596029e-05, + "loss": 0.7773, + "step": 16369 + }, + { + "epoch": 0.88, + "learning_rate": 3.7400994244498e-05, + "loss": 0.8008, + "step": 16370 + }, + { + "epoch": 0.88, + "learning_rate": 3.736797121130525e-05, + "loss": 0.8047, + "step": 16371 + }, + { + "epoch": 0.88, + "learning_rate": 3.733496219738264e-05, + "loss": 0.8945, + "step": 16372 + }, + { + "epoch": 0.88, + "learning_rate": 3.73019672037303e-05, + "loss": 0.7969, + "step": 16373 + }, + { + "epoch": 0.88, + "learning_rate": 3.726898623134828e-05, + "loss": 0.7852, + "step": 16374 + }, + { + "epoch": 0.88, + "learning_rate": 3.723601928123599e-05, + "loss": 0.8945, + "step": 16375 + }, + { + "epoch": 0.88, + "learning_rate": 3.7203066354392354e-05, + "loss": 0.7539, + "step": 16376 + }, + { + "epoch": 0.88, + "learning_rate": 3.7170127451816026e-05, + "loss": 0.7656, + "step": 16377 + }, + { + "epoch": 0.88, + "learning_rate": 3.7137202574505205e-05, + "loss": 0.8398, + "step": 16378 + }, + { + "epoch": 0.88, + "learning_rate": 3.710429172345758e-05, + "loss": 0.8125, + "step": 16379 + }, + { + "epoch": 0.88, + "learning_rate": 3.707139489967032e-05, + "loss": 0.7734, + "step": 16380 + }, + { + "epoch": 0.88, + "learning_rate": 3.70385121041405e-05, + "loss": 0.8438, + "step": 16381 + }, + { + "epoch": 0.88, + "learning_rate": 3.700564333786449e-05, + "loss": 0.75, + "step": 16382 + }, + { + "epoch": 0.88, + "learning_rate": 3.6972788601838336e-05, + "loss": 0.6875, + "step": 16383 + }, + { + "epoch": 0.88, + "learning_rate": 3.693994789705757e-05, + "loss": 0.7422, + "step": 16384 + }, + { + "epoch": 0.88, + "learning_rate": 3.690712122451762e-05, + "loss": 0.7695, + "step": 16385 + }, + { + "epoch": 0.88, + "learning_rate": 3.687430858521301e-05, + "loss": 0.8477, + "step": 16386 + }, + { + "epoch": 0.88, + "learning_rate": 3.684150998013802e-05, + "loss": 0.75, + "step": 16387 + }, + { + "epoch": 0.88, + "learning_rate": 3.6808725410286734e-05, + "loss": 0.8203, + "step": 16388 + }, + { + "epoch": 0.88, + "learning_rate": 3.677595487665259e-05, + "loss": 0.8125, + "step": 16389 + }, + { + "epoch": 0.88, + "learning_rate": 3.674319838022855e-05, + "loss": 0.8047, + "step": 16390 + }, + { + "epoch": 0.88, + "learning_rate": 3.671045592200739e-05, + "loss": 0.8398, + "step": 16391 + }, + { + "epoch": 0.88, + "learning_rate": 3.667772750298115e-05, + "loss": 0.7539, + "step": 16392 + }, + { + "epoch": 0.88, + "learning_rate": 3.6645013124141814e-05, + "loss": 0.8242, + "step": 16393 + }, + { + "epoch": 0.88, + "learning_rate": 3.6612312786480474e-05, + "loss": 0.8477, + "step": 16394 + }, + { + "epoch": 0.88, + "learning_rate": 3.6579626490988336e-05, + "loss": 0.8242, + "step": 16395 + }, + { + "epoch": 0.88, + "learning_rate": 3.6546954238655614e-05, + "loss": 0.8555, + "step": 16396 + }, + { + "epoch": 0.88, + "learning_rate": 3.651429603047257e-05, + "loss": 0.7852, + "step": 16397 + }, + { + "epoch": 0.88, + "learning_rate": 3.648165186742891e-05, + "loss": 0.8672, + "step": 16398 + }, + { + "epoch": 0.88, + "learning_rate": 3.644902175051373e-05, + "loss": 0.7656, + "step": 16399 + }, + { + "epoch": 0.88, + "learning_rate": 3.6416405680715744e-05, + "loss": 0.8164, + "step": 16400 + }, + { + "epoch": 0.88, + "learning_rate": 3.638380365902361e-05, + "loss": 0.8516, + "step": 16401 + }, + { + "epoch": 0.88, + "learning_rate": 3.635121568642513e-05, + "loss": 0.8125, + "step": 16402 + }, + { + "epoch": 0.88, + "learning_rate": 3.63186417639077e-05, + "loss": 0.7656, + "step": 16403 + }, + { + "epoch": 0.88, + "learning_rate": 3.6286081892458524e-05, + "loss": 0.8008, + "step": 16404 + }, + { + "epoch": 0.88, + "learning_rate": 3.6253536073064364e-05, + "loss": 0.7773, + "step": 16405 + }, + { + "epoch": 0.88, + "learning_rate": 3.622100430671138e-05, + "loss": 0.8867, + "step": 16406 + }, + { + "epoch": 0.88, + "learning_rate": 3.6188486594385336e-05, + "loss": 0.8516, + "step": 16407 + }, + { + "epoch": 0.88, + "learning_rate": 3.615598293707168e-05, + "loss": 0.8672, + "step": 16408 + }, + { + "epoch": 0.88, + "learning_rate": 3.612349333575543e-05, + "loss": 0.8984, + "step": 16409 + }, + { + "epoch": 0.88, + "learning_rate": 3.609101779142099e-05, + "loss": 0.832, + "step": 16410 + }, + { + "epoch": 0.88, + "learning_rate": 3.6058556305052614e-05, + "loss": 0.8242, + "step": 16411 + }, + { + "epoch": 0.88, + "learning_rate": 3.602610887763397e-05, + "loss": 0.7969, + "step": 16412 + }, + { + "epoch": 0.88, + "learning_rate": 3.599367551014826e-05, + "loss": 0.7461, + "step": 16413 + }, + { + "epoch": 0.88, + "learning_rate": 3.5961256203578376e-05, + "loss": 0.8398, + "step": 16414 + }, + { + "epoch": 0.88, + "learning_rate": 3.592885095890669e-05, + "loss": 0.7891, + "step": 16415 + }, + { + "epoch": 0.88, + "learning_rate": 3.5896459777115255e-05, + "loss": 0.8398, + "step": 16416 + }, + { + "epoch": 0.88, + "learning_rate": 3.586408265918551e-05, + "loss": 0.832, + "step": 16417 + }, + { + "epoch": 0.88, + "learning_rate": 3.583171960609871e-05, + "loss": 0.8203, + "step": 16418 + }, + { + "epoch": 0.88, + "learning_rate": 3.579937061883548e-05, + "loss": 0.7578, + "step": 16419 + }, + { + "epoch": 0.88, + "learning_rate": 3.5767035698376084e-05, + "loss": 0.7969, + "step": 16420 + }, + { + "epoch": 0.88, + "learning_rate": 3.573471484570051e-05, + "loss": 0.7305, + "step": 16421 + }, + { + "epoch": 0.88, + "learning_rate": 3.570240806178815e-05, + "loss": 0.7734, + "step": 16422 + }, + { + "epoch": 0.88, + "learning_rate": 3.5670115347617826e-05, + "loss": 0.7891, + "step": 16423 + }, + { + "epoch": 0.88, + "learning_rate": 3.5637836704168255e-05, + "loss": 0.8281, + "step": 16424 + }, + { + "epoch": 0.88, + "learning_rate": 3.560557213241761e-05, + "loss": 0.8828, + "step": 16425 + }, + { + "epoch": 0.88, + "learning_rate": 3.5573321633343534e-05, + "loss": 0.8242, + "step": 16426 + }, + { + "epoch": 0.88, + "learning_rate": 3.5541085207923376e-05, + "loss": 0.7891, + "step": 16427 + }, + { + "epoch": 0.88, + "learning_rate": 3.550886285713406e-05, + "loss": 0.7773, + "step": 16428 + }, + { + "epoch": 0.88, + "learning_rate": 3.547665458195199e-05, + "loss": 0.7617, + "step": 16429 + }, + { + "epoch": 0.88, + "learning_rate": 3.5444460383353085e-05, + "loss": 0.8242, + "step": 16430 + }, + { + "epoch": 0.88, + "learning_rate": 3.541228026231297e-05, + "loss": 0.7578, + "step": 16431 + }, + { + "epoch": 0.88, + "learning_rate": 3.5380114219806915e-05, + "loss": 0.8438, + "step": 16432 + }, + { + "epoch": 0.88, + "learning_rate": 3.534796225680953e-05, + "loss": 0.8242, + "step": 16433 + }, + { + "epoch": 0.88, + "learning_rate": 3.531582437429526e-05, + "loss": 0.8398, + "step": 16434 + }, + { + "epoch": 0.88, + "learning_rate": 3.528370057323782e-05, + "loss": 0.8359, + "step": 16435 + }, + { + "epoch": 0.88, + "learning_rate": 3.5251590854610774e-05, + "loss": 0.8359, + "step": 16436 + }, + { + "epoch": 0.88, + "learning_rate": 3.521949521938722e-05, + "loss": 0.8086, + "step": 16437 + }, + { + "epoch": 0.88, + "learning_rate": 3.518741366853967e-05, + "loss": 0.7109, + "step": 16438 + }, + { + "epoch": 0.88, + "learning_rate": 3.5155346203040226e-05, + "loss": 0.7617, + "step": 16439 + }, + { + "epoch": 0.88, + "learning_rate": 3.512329282386073e-05, + "loss": 0.7461, + "step": 16440 + }, + { + "epoch": 0.88, + "learning_rate": 3.509125353197257e-05, + "loss": 0.7773, + "step": 16441 + }, + { + "epoch": 0.88, + "learning_rate": 3.5059228328346516e-05, + "loss": 0.8125, + "step": 16442 + }, + { + "epoch": 0.88, + "learning_rate": 3.502721721395297e-05, + "loss": 0.6992, + "step": 16443 + }, + { + "epoch": 0.88, + "learning_rate": 3.4995220189762256e-05, + "loss": 0.8008, + "step": 16444 + }, + { + "epoch": 0.88, + "learning_rate": 3.4963237256743776e-05, + "loss": 0.7539, + "step": 16445 + }, + { + "epoch": 0.88, + "learning_rate": 3.4931268415866744e-05, + "loss": 0.6797, + "step": 16446 + }, + { + "epoch": 0.88, + "learning_rate": 3.4899313668099895e-05, + "loss": 0.8242, + "step": 16447 + }, + { + "epoch": 0.88, + "learning_rate": 3.486737301441173e-05, + "loss": 0.7305, + "step": 16448 + }, + { + "epoch": 0.88, + "learning_rate": 3.483544645576992e-05, + "loss": 0.7695, + "step": 16449 + }, + { + "epoch": 0.88, + "learning_rate": 3.480353399314207e-05, + "loss": 0.7891, + "step": 16450 + }, + { + "epoch": 0.88, + "learning_rate": 3.47716356274953e-05, + "loss": 0.8008, + "step": 16451 + }, + { + "epoch": 0.88, + "learning_rate": 3.473975135979618e-05, + "loss": 0.7734, + "step": 16452 + }, + { + "epoch": 0.88, + "learning_rate": 3.470788119101076e-05, + "loss": 0.7695, + "step": 16453 + }, + { + "epoch": 0.88, + "learning_rate": 3.467602512210499e-05, + "loss": 0.7656, + "step": 16454 + }, + { + "epoch": 0.88, + "learning_rate": 3.464418315404422e-05, + "loss": 0.8203, + "step": 16455 + }, + { + "epoch": 0.88, + "learning_rate": 3.461235528779322e-05, + "loss": 0.8164, + "step": 16456 + }, + { + "epoch": 0.88, + "learning_rate": 3.458054152431661e-05, + "loss": 0.8906, + "step": 16457 + }, + { + "epoch": 0.88, + "learning_rate": 3.454874186457835e-05, + "loss": 0.75, + "step": 16458 + }, + { + "epoch": 0.88, + "learning_rate": 3.4516956309542215e-05, + "loss": 0.8125, + "step": 16459 + }, + { + "epoch": 0.88, + "learning_rate": 3.448518486017127e-05, + "loss": 0.8516, + "step": 16460 + }, + { + "epoch": 0.88, + "learning_rate": 3.445342751742842e-05, + "loss": 0.8164, + "step": 16461 + }, + { + "epoch": 0.88, + "learning_rate": 3.442168428227582e-05, + "loss": 0.668, + "step": 16462 + }, + { + "epoch": 0.88, + "learning_rate": 3.4389955155675614e-05, + "loss": 0.8047, + "step": 16463 + }, + { + "epoch": 0.88, + "learning_rate": 3.435824013858918e-05, + "loss": 0.8945, + "step": 16464 + }, + { + "epoch": 0.88, + "learning_rate": 3.4326539231977703e-05, + "loss": 0.8242, + "step": 16465 + }, + { + "epoch": 0.88, + "learning_rate": 3.4294852436801526e-05, + "loss": 0.8398, + "step": 16466 + }, + { + "epoch": 0.89, + "learning_rate": 3.4263179754021266e-05, + "loss": 0.8984, + "step": 16467 + }, + { + "epoch": 0.89, + "learning_rate": 3.4231521184596496e-05, + "loss": 0.8438, + "step": 16468 + }, + { + "epoch": 0.89, + "learning_rate": 3.419987672948649e-05, + "loss": 0.7344, + "step": 16469 + }, + { + "epoch": 0.89, + "learning_rate": 3.416824638965032e-05, + "loss": 0.7656, + "step": 16470 + }, + { + "epoch": 0.89, + "learning_rate": 3.4136630166046515e-05, + "loss": 0.8398, + "step": 16471 + }, + { + "epoch": 0.89, + "learning_rate": 3.4105028059633115e-05, + "loss": 0.6367, + "step": 16472 + }, + { + "epoch": 0.89, + "learning_rate": 3.407344007136764e-05, + "loss": 0.7852, + "step": 16473 + }, + { + "epoch": 0.89, + "learning_rate": 3.404186620220745e-05, + "loss": 0.8008, + "step": 16474 + }, + { + "epoch": 0.89, + "learning_rate": 3.4010306453109316e-05, + "loss": 0.8281, + "step": 16475 + }, + { + "epoch": 0.89, + "learning_rate": 3.397876082502954e-05, + "loss": 0.8164, + "step": 16476 + }, + { + "epoch": 0.89, + "learning_rate": 3.3947229318924186e-05, + "loss": 0.7266, + "step": 16477 + }, + { + "epoch": 0.89, + "learning_rate": 3.391571193574855e-05, + "loss": 0.8203, + "step": 16478 + }, + { + "epoch": 0.89, + "learning_rate": 3.388420867645792e-05, + "loss": 0.8672, + "step": 16479 + }, + { + "epoch": 0.89, + "learning_rate": 3.385271954200686e-05, + "loss": 0.8438, + "step": 16480 + }, + { + "epoch": 0.89, + "learning_rate": 3.382124453334967e-05, + "loss": 0.8555, + "step": 16481 + }, + { + "epoch": 0.89, + "learning_rate": 3.3789783651439975e-05, + "loss": 0.793, + "step": 16482 + }, + { + "epoch": 0.89, + "learning_rate": 3.3758336897231234e-05, + "loss": 0.8281, + "step": 16483 + }, + { + "epoch": 0.89, + "learning_rate": 3.3726904271676516e-05, + "loss": 0.7773, + "step": 16484 + }, + { + "epoch": 0.89, + "learning_rate": 3.369548577572818e-05, + "loss": 0.7539, + "step": 16485 + }, + { + "epoch": 0.89, + "learning_rate": 3.366408141033822e-05, + "loss": 0.7031, + "step": 16486 + }, + { + "epoch": 0.89, + "learning_rate": 3.363269117645856e-05, + "loss": 0.7812, + "step": 16487 + }, + { + "epoch": 0.89, + "learning_rate": 3.3601315075040276e-05, + "loss": 0.7812, + "step": 16488 + }, + { + "epoch": 0.89, + "learning_rate": 3.356995310703409e-05, + "loss": 0.7734, + "step": 16489 + }, + { + "epoch": 0.89, + "learning_rate": 3.353860527339048e-05, + "loss": 0.6992, + "step": 16490 + }, + { + "epoch": 0.89, + "learning_rate": 3.350727157505945e-05, + "loss": 0.8438, + "step": 16491 + }, + { + "epoch": 0.89, + "learning_rate": 3.347595201299036e-05, + "loss": 0.793, + "step": 16492 + }, + { + "epoch": 0.89, + "learning_rate": 3.344464658813234e-05, + "loss": 0.7969, + "step": 16493 + }, + { + "epoch": 0.89, + "learning_rate": 3.3413355301434135e-05, + "loss": 0.8047, + "step": 16494 + }, + { + "epoch": 0.89, + "learning_rate": 3.338207815384398e-05, + "loss": 0.7305, + "step": 16495 + }, + { + "epoch": 0.89, + "learning_rate": 3.3350815146309445e-05, + "loss": 0.8516, + "step": 16496 + }, + { + "epoch": 0.89, + "learning_rate": 3.331956627977811e-05, + "loss": 0.75, + "step": 16497 + }, + { + "epoch": 0.89, + "learning_rate": 3.328833155519695e-05, + "loss": 0.9844, + "step": 16498 + }, + { + "epoch": 0.89, + "learning_rate": 3.325711097351236e-05, + "loss": 0.7383, + "step": 16499 + }, + { + "epoch": 0.89, + "learning_rate": 3.322590453567048e-05, + "loss": 0.7617, + "step": 16500 + }, + { + "epoch": 0.89, + "learning_rate": 3.319471224261694e-05, + "loss": 0.8516, + "step": 16501 + }, + { + "epoch": 0.89, + "learning_rate": 3.316353409529704e-05, + "loss": 0.8281, + "step": 16502 + }, + { + "epoch": 0.89, + "learning_rate": 3.3132370094655416e-05, + "loss": 0.8516, + "step": 16503 + }, + { + "epoch": 0.89, + "learning_rate": 3.310122024163664e-05, + "loss": 0.8008, + "step": 16504 + }, + { + "epoch": 0.89, + "learning_rate": 3.307008453718452e-05, + "loss": 0.7109, + "step": 16505 + }, + { + "epoch": 0.89, + "learning_rate": 3.303896298224257e-05, + "loss": 0.7734, + "step": 16506 + }, + { + "epoch": 0.89, + "learning_rate": 3.300785557775404e-05, + "loss": 0.8633, + "step": 16507 + }, + { + "epoch": 0.89, + "learning_rate": 3.2976762324661456e-05, + "loss": 0.9336, + "step": 16508 + }, + { + "epoch": 0.89, + "learning_rate": 3.29456832239069e-05, + "loss": 0.7812, + "step": 16509 + }, + { + "epoch": 0.89, + "learning_rate": 3.291461827643244e-05, + "loss": 0.6602, + "step": 16510 + }, + { + "epoch": 0.89, + "learning_rate": 3.288356748317939e-05, + "loss": 0.6367, + "step": 16511 + }, + { + "epoch": 0.89, + "learning_rate": 3.2852530845088546e-05, + "loss": 0.7461, + "step": 16512 + }, + { + "epoch": 0.89, + "learning_rate": 3.282150836310049e-05, + "loss": 0.8477, + "step": 16513 + }, + { + "epoch": 0.89, + "learning_rate": 3.279050003815542e-05, + "loss": 0.7461, + "step": 16514 + }, + { + "epoch": 0.89, + "learning_rate": 3.2759505871192865e-05, + "loss": 0.8555, + "step": 16515 + }, + { + "epoch": 0.89, + "learning_rate": 3.2728525863152e-05, + "loss": 0.9141, + "step": 16516 + }, + { + "epoch": 0.89, + "learning_rate": 3.269756001497171e-05, + "loss": 0.7266, + "step": 16517 + }, + { + "epoch": 0.89, + "learning_rate": 3.266660832759044e-05, + "loss": 0.8008, + "step": 16518 + }, + { + "epoch": 0.89, + "learning_rate": 3.263567080194596e-05, + "loss": 0.7422, + "step": 16519 + }, + { + "epoch": 0.89, + "learning_rate": 3.26047474389759e-05, + "loss": 0.8477, + "step": 16520 + }, + { + "epoch": 0.89, + "learning_rate": 3.257383823961729e-05, + "loss": 0.8711, + "step": 16521 + }, + { + "epoch": 0.89, + "learning_rate": 3.254294320480672e-05, + "loss": 0.8281, + "step": 16522 + }, + { + "epoch": 0.89, + "learning_rate": 3.25120623354806e-05, + "loss": 0.7852, + "step": 16523 + }, + { + "epoch": 0.89, + "learning_rate": 3.2481195632574515e-05, + "loss": 0.7773, + "step": 16524 + }, + { + "epoch": 0.89, + "learning_rate": 3.245034309702394e-05, + "loss": 0.8633, + "step": 16525 + }, + { + "epoch": 0.89, + "learning_rate": 3.241950472976374e-05, + "loss": 0.7852, + "step": 16526 + }, + { + "epoch": 0.89, + "learning_rate": 3.2388680531728554e-05, + "loss": 0.8398, + "step": 16527 + }, + { + "epoch": 0.89, + "learning_rate": 3.2357870503852306e-05, + "loss": 0.7109, + "step": 16528 + }, + { + "epoch": 0.89, + "learning_rate": 3.2327074647068634e-05, + "loss": 0.8516, + "step": 16529 + }, + { + "epoch": 0.89, + "learning_rate": 3.22962929623109e-05, + "loss": 0.8281, + "step": 16530 + }, + { + "epoch": 0.89, + "learning_rate": 3.226552545051187e-05, + "loss": 0.875, + "step": 16531 + }, + { + "epoch": 0.89, + "learning_rate": 3.223477211260361e-05, + "loss": 0.8555, + "step": 16532 + }, + { + "epoch": 0.89, + "learning_rate": 3.220403294951846e-05, + "loss": 0.8438, + "step": 16533 + }, + { + "epoch": 0.89, + "learning_rate": 3.217330796218776e-05, + "loss": 0.8789, + "step": 16534 + }, + { + "epoch": 0.89, + "learning_rate": 3.214259715154244e-05, + "loss": 0.8789, + "step": 16535 + }, + { + "epoch": 0.89, + "learning_rate": 3.211190051851326e-05, + "loss": 0.8867, + "step": 16536 + }, + { + "epoch": 0.89, + "learning_rate": 3.208121806403047e-05, + "loss": 0.8516, + "step": 16537 + }, + { + "epoch": 0.89, + "learning_rate": 3.205054978902383e-05, + "loss": 0.7773, + "step": 16538 + }, + { + "epoch": 0.89, + "learning_rate": 3.201989569442254e-05, + "loss": 0.8633, + "step": 16539 + }, + { + "epoch": 0.89, + "learning_rate": 3.198925578115569e-05, + "loss": 0.7422, + "step": 16540 + }, + { + "epoch": 0.89, + "learning_rate": 3.195863005015176e-05, + "loss": 0.8867, + "step": 16541 + }, + { + "epoch": 0.89, + "learning_rate": 3.1928018502338675e-05, + "loss": 0.7969, + "step": 16542 + }, + { + "epoch": 0.89, + "learning_rate": 3.1897421138644245e-05, + "loss": 0.8008, + "step": 16543 + }, + { + "epoch": 0.89, + "learning_rate": 3.186683795999551e-05, + "loss": 0.8047, + "step": 16544 + }, + { + "epoch": 0.89, + "learning_rate": 3.1836268967319335e-05, + "loss": 0.832, + "step": 16545 + }, + { + "epoch": 0.89, + "learning_rate": 3.1805714161542096e-05, + "loss": 0.7969, + "step": 16546 + }, + { + "epoch": 0.89, + "learning_rate": 3.177517354358966e-05, + "loss": 0.7969, + "step": 16547 + }, + { + "epoch": 0.89, + "learning_rate": 3.1744647114387405e-05, + "loss": 0.8008, + "step": 16548 + }, + { + "epoch": 0.89, + "learning_rate": 3.171413487486047e-05, + "loss": 0.8164, + "step": 16549 + }, + { + "epoch": 0.89, + "learning_rate": 3.168363682593356e-05, + "loss": 0.8242, + "step": 16550 + }, + { + "epoch": 0.89, + "learning_rate": 3.1653152968530776e-05, + "loss": 0.7812, + "step": 16551 + }, + { + "epoch": 0.89, + "learning_rate": 3.162268330357576e-05, + "loss": 0.7617, + "step": 16552 + }, + { + "epoch": 0.89, + "learning_rate": 3.1592227831992115e-05, + "loss": 0.793, + "step": 16553 + }, + { + "epoch": 0.89, + "learning_rate": 3.1561786554702546e-05, + "loss": 0.7461, + "step": 16554 + }, + { + "epoch": 0.89, + "learning_rate": 3.1531359472629526e-05, + "loss": 0.8086, + "step": 16555 + }, + { + "epoch": 0.89, + "learning_rate": 3.150094658669517e-05, + "loss": 0.8281, + "step": 16556 + }, + { + "epoch": 0.89, + "learning_rate": 3.147054789782111e-05, + "loss": 0.8945, + "step": 16557 + }, + { + "epoch": 0.89, + "learning_rate": 3.1440163406928455e-05, + "loss": 0.7969, + "step": 16558 + }, + { + "epoch": 0.89, + "learning_rate": 3.140979311493802e-05, + "loss": 0.8633, + "step": 16559 + }, + { + "epoch": 0.89, + "learning_rate": 3.137943702277002e-05, + "loss": 0.8164, + "step": 16560 + }, + { + "epoch": 0.89, + "learning_rate": 3.1349095131344426e-05, + "loss": 0.9219, + "step": 16561 + }, + { + "epoch": 0.89, + "learning_rate": 3.131876744158063e-05, + "loss": 0.7266, + "step": 16562 + }, + { + "epoch": 0.89, + "learning_rate": 3.128845395439778e-05, + "loss": 0.8047, + "step": 16563 + }, + { + "epoch": 0.89, + "learning_rate": 3.125815467071441e-05, + "loss": 0.7461, + "step": 16564 + }, + { + "epoch": 0.89, + "learning_rate": 3.122786959144858e-05, + "loss": 0.7383, + "step": 16565 + }, + { + "epoch": 0.89, + "learning_rate": 3.119759871751826e-05, + "loss": 0.9141, + "step": 16566 + }, + { + "epoch": 0.89, + "learning_rate": 3.116734204984056e-05, + "loss": 0.8281, + "step": 16567 + }, + { + "epoch": 0.89, + "learning_rate": 3.113709958933247e-05, + "loss": 0.8398, + "step": 16568 + }, + { + "epoch": 0.89, + "learning_rate": 3.1106871336910315e-05, + "loss": 0.7969, + "step": 16569 + }, + { + "epoch": 0.89, + "learning_rate": 3.107665729349024e-05, + "loss": 0.7578, + "step": 16570 + }, + { + "epoch": 0.89, + "learning_rate": 3.104645745998769e-05, + "loss": 0.8203, + "step": 16571 + }, + { + "epoch": 0.89, + "learning_rate": 3.1016271837317924e-05, + "loss": 0.7617, + "step": 16572 + }, + { + "epoch": 0.89, + "learning_rate": 3.098610042639571e-05, + "loss": 0.8359, + "step": 16573 + }, + { + "epoch": 0.89, + "learning_rate": 3.095594322813533e-05, + "loss": 0.8594, + "step": 16574 + }, + { + "epoch": 0.89, + "learning_rate": 3.0925800243450376e-05, + "loss": 0.8008, + "step": 16575 + }, + { + "epoch": 0.89, + "learning_rate": 3.089567147325467e-05, + "loss": 0.8711, + "step": 16576 + }, + { + "epoch": 0.89, + "learning_rate": 3.0865556918461045e-05, + "loss": 0.7969, + "step": 16577 + }, + { + "epoch": 0.89, + "learning_rate": 3.083545657998194e-05, + "loss": 0.8203, + "step": 16578 + }, + { + "epoch": 0.89, + "learning_rate": 3.0805370458729666e-05, + "loss": 0.7812, + "step": 16579 + }, + { + "epoch": 0.89, + "learning_rate": 3.077529855561595e-05, + "loss": 0.7383, + "step": 16580 + }, + { + "epoch": 0.89, + "learning_rate": 3.0745240871552004e-05, + "loss": 0.8164, + "step": 16581 + }, + { + "epoch": 0.89, + "learning_rate": 3.071519740744855e-05, + "loss": 0.8047, + "step": 16582 + }, + { + "epoch": 0.89, + "learning_rate": 3.068516816421618e-05, + "loss": 0.8203, + "step": 16583 + }, + { + "epoch": 0.89, + "learning_rate": 3.0655153142764905e-05, + "loss": 0.8555, + "step": 16584 + }, + { + "epoch": 0.89, + "learning_rate": 3.062515234400409e-05, + "loss": 0.8281, + "step": 16585 + }, + { + "epoch": 0.89, + "learning_rate": 3.059516576884302e-05, + "loss": 0.7344, + "step": 16586 + }, + { + "epoch": 0.89, + "learning_rate": 3.056519341819031e-05, + "loss": 0.8047, + "step": 16587 + }, + { + "epoch": 0.89, + "learning_rate": 3.053523529295421e-05, + "loss": 0.8047, + "step": 16588 + }, + { + "epoch": 0.89, + "learning_rate": 3.0505291394042678e-05, + "loss": 0.7344, + "step": 16589 + }, + { + "epoch": 0.89, + "learning_rate": 3.0475361722362982e-05, + "loss": 0.7539, + "step": 16590 + }, + { + "epoch": 0.89, + "learning_rate": 3.044544627882212e-05, + "loss": 0.793, + "step": 16591 + }, + { + "epoch": 0.89, + "learning_rate": 3.041554506432659e-05, + "loss": 0.7656, + "step": 16592 + }, + { + "epoch": 0.89, + "learning_rate": 3.0385658079782607e-05, + "loss": 0.7891, + "step": 16593 + }, + { + "epoch": 0.89, + "learning_rate": 3.0355785326095786e-05, + "loss": 0.6992, + "step": 16594 + }, + { + "epoch": 0.89, + "learning_rate": 3.0325926804171178e-05, + "loss": 0.832, + "step": 16595 + }, + { + "epoch": 0.89, + "learning_rate": 3.0296082514914002e-05, + "loss": 0.7852, + "step": 16596 + }, + { + "epoch": 0.89, + "learning_rate": 3.0266252459228373e-05, + "loss": 0.7539, + "step": 16597 + }, + { + "epoch": 0.89, + "learning_rate": 3.023643663801817e-05, + "loss": 0.7656, + "step": 16598 + }, + { + "epoch": 0.89, + "learning_rate": 3.020663505218707e-05, + "loss": 0.7852, + "step": 16599 + }, + { + "epoch": 0.89, + "learning_rate": 3.0176847702638178e-05, + "loss": 0.7461, + "step": 16600 + }, + { + "epoch": 0.89, + "learning_rate": 3.014707459027399e-05, + "loss": 0.8633, + "step": 16601 + }, + { + "epoch": 0.89, + "learning_rate": 3.0117315715996796e-05, + "loss": 0.7773, + "step": 16602 + }, + { + "epoch": 0.89, + "learning_rate": 3.0087571080708532e-05, + "loss": 0.7969, + "step": 16603 + }, + { + "epoch": 0.89, + "learning_rate": 3.0057840685310422e-05, + "loss": 0.8281, + "step": 16604 + }, + { + "epoch": 0.89, + "learning_rate": 3.002812453070336e-05, + "loss": 0.8164, + "step": 16605 + }, + { + "epoch": 0.89, + "learning_rate": 2.9998422617787845e-05, + "loss": 0.832, + "step": 16606 + }, + { + "epoch": 0.89, + "learning_rate": 2.9968734947464105e-05, + "loss": 0.7266, + "step": 16607 + }, + { + "epoch": 0.89, + "learning_rate": 2.9939061520631582e-05, + "loss": 0.8047, + "step": 16608 + }, + { + "epoch": 0.89, + "learning_rate": 2.9909402338189618e-05, + "loss": 0.7344, + "step": 16609 + }, + { + "epoch": 0.89, + "learning_rate": 2.9879757401036823e-05, + "loss": 0.8086, + "step": 16610 + }, + { + "epoch": 0.89, + "learning_rate": 2.9850126710071645e-05, + "loss": 0.7773, + "step": 16611 + }, + { + "epoch": 0.89, + "learning_rate": 2.9820510266192092e-05, + "loss": 0.7305, + "step": 16612 + }, + { + "epoch": 0.89, + "learning_rate": 2.9790908070295496e-05, + "loss": 0.8867, + "step": 16613 + }, + { + "epoch": 0.89, + "learning_rate": 2.9761320123278866e-05, + "loss": 0.7773, + "step": 16614 + }, + { + "epoch": 0.89, + "learning_rate": 2.9731746426038875e-05, + "loss": 0.7617, + "step": 16615 + }, + { + "epoch": 0.89, + "learning_rate": 2.9702186979471746e-05, + "loss": 0.7734, + "step": 16616 + }, + { + "epoch": 0.89, + "learning_rate": 2.9672641784473266e-05, + "loss": 0.7422, + "step": 16617 + }, + { + "epoch": 0.89, + "learning_rate": 2.9643110841938493e-05, + "loss": 0.8281, + "step": 16618 + }, + { + "epoch": 0.89, + "learning_rate": 2.9613594152762657e-05, + "loss": 0.8086, + "step": 16619 + }, + { + "epoch": 0.89, + "learning_rate": 2.9584091717839988e-05, + "loss": 0.8945, + "step": 16620 + }, + { + "epoch": 0.89, + "learning_rate": 2.9554603538064552e-05, + "loss": 0.8164, + "step": 16621 + }, + { + "epoch": 0.89, + "learning_rate": 2.952512961432996e-05, + "loss": 0.7383, + "step": 16622 + }, + { + "epoch": 0.89, + "learning_rate": 2.949566994752939e-05, + "loss": 0.7344, + "step": 16623 + }, + { + "epoch": 0.89, + "learning_rate": 2.946622453855552e-05, + "loss": 0.7656, + "step": 16624 + }, + { + "epoch": 0.89, + "learning_rate": 2.9436793388300686e-05, + "loss": 0.8711, + "step": 16625 + }, + { + "epoch": 0.89, + "learning_rate": 2.9407376497656678e-05, + "loss": 0.9141, + "step": 16626 + }, + { + "epoch": 0.89, + "learning_rate": 2.9377973867515005e-05, + "loss": 0.8008, + "step": 16627 + }, + { + "epoch": 0.89, + "learning_rate": 2.9348585498766567e-05, + "loss": 0.8125, + "step": 16628 + }, + { + "epoch": 0.89, + "learning_rate": 2.9319211392302092e-05, + "loss": 0.8164, + "step": 16629 + }, + { + "epoch": 0.89, + "learning_rate": 2.928985154901154e-05, + "loss": 0.8633, + "step": 16630 + }, + { + "epoch": 0.89, + "learning_rate": 2.9260505969784636e-05, + "loss": 0.7578, + "step": 16631 + }, + { + "epoch": 0.89, + "learning_rate": 2.9231174655510783e-05, + "loss": 0.8125, + "step": 16632 + }, + { + "epoch": 0.89, + "learning_rate": 2.920185760707872e-05, + "loss": 0.8359, + "step": 16633 + }, + { + "epoch": 0.89, + "learning_rate": 2.9172554825376785e-05, + "loss": 0.75, + "step": 16634 + }, + { + "epoch": 0.89, + "learning_rate": 2.9143266311292993e-05, + "loss": 0.9375, + "step": 16635 + }, + { + "epoch": 0.89, + "learning_rate": 2.9113992065715023e-05, + "loss": 0.8828, + "step": 16636 + }, + { + "epoch": 0.89, + "learning_rate": 2.9084732089529774e-05, + "loss": 0.8047, + "step": 16637 + }, + { + "epoch": 0.89, + "learning_rate": 2.9055486383623985e-05, + "loss": 0.7305, + "step": 16638 + }, + { + "epoch": 0.89, + "learning_rate": 2.9026254948884056e-05, + "loss": 0.7031, + "step": 16639 + }, + { + "epoch": 0.89, + "learning_rate": 2.8997037786195614e-05, + "loss": 0.8086, + "step": 16640 + }, + { + "epoch": 0.89, + "learning_rate": 2.8967834896443944e-05, + "loss": 0.75, + "step": 16641 + }, + { + "epoch": 0.89, + "learning_rate": 2.893864628051429e-05, + "loss": 0.8359, + "step": 16642 + }, + { + "epoch": 0.89, + "learning_rate": 2.8909471939290945e-05, + "loss": 0.8086, + "step": 16643 + }, + { + "epoch": 0.89, + "learning_rate": 2.8880311873658027e-05, + "loss": 0.8164, + "step": 16644 + }, + { + "epoch": 0.89, + "learning_rate": 2.885116608449917e-05, + "loss": 0.7539, + "step": 16645 + }, + { + "epoch": 0.89, + "learning_rate": 2.8822034572697718e-05, + "loss": 0.7695, + "step": 16646 + }, + { + "epoch": 0.89, + "learning_rate": 2.8792917339136305e-05, + "loss": 0.7461, + "step": 16647 + }, + { + "epoch": 0.89, + "learning_rate": 2.8763814384697274e-05, + "loss": 0.8438, + "step": 16648 + }, + { + "epoch": 0.89, + "learning_rate": 2.8734725710262588e-05, + "loss": 0.7578, + "step": 16649 + }, + { + "epoch": 0.89, + "learning_rate": 2.870565131671382e-05, + "loss": 0.8203, + "step": 16650 + }, + { + "epoch": 0.89, + "learning_rate": 2.8676591204931823e-05, + "loss": 0.7344, + "step": 16651 + }, + { + "epoch": 0.89, + "learning_rate": 2.8647545375797446e-05, + "loss": 0.8125, + "step": 16652 + }, + { + "epoch": 0.9, + "learning_rate": 2.8618513830190594e-05, + "loss": 0.7891, + "step": 16653 + }, + { + "epoch": 0.9, + "learning_rate": 2.8589496568991235e-05, + "loss": 0.6953, + "step": 16654 + }, + { + "epoch": 0.9, + "learning_rate": 2.8560493593078663e-05, + "loss": 0.793, + "step": 16655 + }, + { + "epoch": 0.9, + "learning_rate": 2.8531504903331728e-05, + "loss": 0.7773, + "step": 16656 + }, + { + "epoch": 0.9, + "learning_rate": 2.850253050062884e-05, + "loss": 0.7148, + "step": 16657 + }, + { + "epoch": 0.9, + "learning_rate": 2.8473570385848023e-05, + "loss": 0.7852, + "step": 16658 + }, + { + "epoch": 0.9, + "learning_rate": 2.8444624559867017e-05, + "loss": 0.7461, + "step": 16659 + }, + { + "epoch": 0.9, + "learning_rate": 2.8415693023562783e-05, + "loss": 0.918, + "step": 16660 + }, + { + "epoch": 0.9, + "learning_rate": 2.8386775777812014e-05, + "loss": 0.8477, + "step": 16661 + }, + { + "epoch": 0.9, + "learning_rate": 2.8357872823491227e-05, + "loss": 0.7227, + "step": 16662 + }, + { + "epoch": 0.9, + "learning_rate": 2.8328984161476168e-05, + "loss": 0.7891, + "step": 16663 + }, + { + "epoch": 0.9, + "learning_rate": 2.8300109792642138e-05, + "loss": 0.8438, + "step": 16664 + }, + { + "epoch": 0.9, + "learning_rate": 2.8271249717864267e-05, + "loss": 0.832, + "step": 16665 + }, + { + "epoch": 0.9, + "learning_rate": 2.824240393801708e-05, + "loss": 0.8047, + "step": 16666 + }, + { + "epoch": 0.9, + "learning_rate": 2.8213572453974657e-05, + "loss": 0.9102, + "step": 16667 + }, + { + "epoch": 0.9, + "learning_rate": 2.8184755266610795e-05, + "loss": 0.8281, + "step": 16668 + }, + { + "epoch": 0.9, + "learning_rate": 2.8155952376798576e-05, + "loss": 0.8047, + "step": 16669 + }, + { + "epoch": 0.9, + "learning_rate": 2.8127163785411025e-05, + "loss": 0.8516, + "step": 16670 + }, + { + "epoch": 0.9, + "learning_rate": 2.8098389493320332e-05, + "loss": 0.8398, + "step": 16671 + }, + { + "epoch": 0.9, + "learning_rate": 2.806962950139852e-05, + "loss": 0.832, + "step": 16672 + }, + { + "epoch": 0.9, + "learning_rate": 2.8040883810517226e-05, + "loss": 0.7578, + "step": 16673 + }, + { + "epoch": 0.9, + "learning_rate": 2.801215242154742e-05, + "loss": 0.7422, + "step": 16674 + }, + { + "epoch": 0.9, + "learning_rate": 2.7983435335359798e-05, + "loss": 0.7891, + "step": 16675 + }, + { + "epoch": 0.9, + "learning_rate": 2.7954732552824546e-05, + "loss": 0.793, + "step": 16676 + }, + { + "epoch": 0.9, + "learning_rate": 2.7926044074811475e-05, + "loss": 0.7969, + "step": 16677 + }, + { + "epoch": 0.9, + "learning_rate": 2.789736990218994e-05, + "loss": 0.7891, + "step": 16678 + }, + { + "epoch": 0.9, + "learning_rate": 2.7868710035828915e-05, + "loss": 0.8984, + "step": 16679 + }, + { + "epoch": 0.9, + "learning_rate": 2.7840064476596762e-05, + "loss": 0.7656, + "step": 16680 + }, + { + "epoch": 0.9, + "learning_rate": 2.7811433225361616e-05, + "loss": 0.7109, + "step": 16681 + }, + { + "epoch": 0.9, + "learning_rate": 2.778281628299112e-05, + "loss": 0.7344, + "step": 16682 + }, + { + "epoch": 0.9, + "learning_rate": 2.7754213650352468e-05, + "loss": 0.75, + "step": 16683 + }, + { + "epoch": 0.9, + "learning_rate": 2.7725625328312243e-05, + "loss": 0.7578, + "step": 16684 + }, + { + "epoch": 0.9, + "learning_rate": 2.7697051317737087e-05, + "loss": 0.9062, + "step": 16685 + }, + { + "epoch": 0.9, + "learning_rate": 2.7668491619492643e-05, + "loss": 0.7539, + "step": 16686 + }, + { + "epoch": 0.9, + "learning_rate": 2.763994623444438e-05, + "loss": 0.8164, + "step": 16687 + }, + { + "epoch": 0.9, + "learning_rate": 2.761141516345733e-05, + "loss": 0.8086, + "step": 16688 + }, + { + "epoch": 0.9, + "learning_rate": 2.7582898407396194e-05, + "loss": 0.8359, + "step": 16689 + }, + { + "epoch": 0.9, + "learning_rate": 2.7554395967124944e-05, + "loss": 0.7227, + "step": 16690 + }, + { + "epoch": 0.9, + "learning_rate": 2.7525907843507504e-05, + "loss": 0.7578, + "step": 16691 + }, + { + "epoch": 0.9, + "learning_rate": 2.7497434037406955e-05, + "loss": 0.8672, + "step": 16692 + }, + { + "epoch": 0.9, + "learning_rate": 2.7468974549686276e-05, + "loss": 0.8438, + "step": 16693 + }, + { + "epoch": 0.9, + "learning_rate": 2.744052938120778e-05, + "loss": 0.8086, + "step": 16694 + }, + { + "epoch": 0.9, + "learning_rate": 2.7412098532833606e-05, + "loss": 0.8203, + "step": 16695 + }, + { + "epoch": 0.9, + "learning_rate": 2.7383682005425125e-05, + "loss": 0.8594, + "step": 16696 + }, + { + "epoch": 0.9, + "learning_rate": 2.7355279799843537e-05, + "loss": 0.75, + "step": 16697 + }, + { + "epoch": 0.9, + "learning_rate": 2.732689191694959e-05, + "loss": 0.6523, + "step": 16698 + }, + { + "epoch": 0.9, + "learning_rate": 2.7298518357603496e-05, + "loss": 0.7266, + "step": 16699 + }, + { + "epoch": 0.9, + "learning_rate": 2.7270159122664895e-05, + "loss": 0.7969, + "step": 16700 + }, + { + "epoch": 0.9, + "learning_rate": 2.724181421299332e-05, + "loss": 0.8203, + "step": 16701 + }, + { + "epoch": 0.9, + "learning_rate": 2.721348362944781e-05, + "loss": 0.8203, + "step": 16702 + }, + { + "epoch": 0.9, + "learning_rate": 2.7185167372886622e-05, + "loss": 0.8008, + "step": 16703 + }, + { + "epoch": 0.9, + "learning_rate": 2.715686544416801e-05, + "loss": 0.7344, + "step": 16704 + }, + { + "epoch": 0.9, + "learning_rate": 2.7128577844149627e-05, + "loss": 0.7109, + "step": 16705 + }, + { + "epoch": 0.9, + "learning_rate": 2.7100304573688672e-05, + "loss": 0.7422, + "step": 16706 + }, + { + "epoch": 0.9, + "learning_rate": 2.707204563364174e-05, + "loss": 0.9062, + "step": 16707 + }, + { + "epoch": 0.9, + "learning_rate": 2.7043801024865367e-05, + "loss": 0.7695, + "step": 16708 + }, + { + "epoch": 0.9, + "learning_rate": 2.7015570748215425e-05, + "loss": 0.75, + "step": 16709 + }, + { + "epoch": 0.9, + "learning_rate": 2.698735480454728e-05, + "loss": 0.8594, + "step": 16710 + }, + { + "epoch": 0.9, + "learning_rate": 2.6959153194716035e-05, + "loss": 0.8398, + "step": 16711 + }, + { + "epoch": 0.9, + "learning_rate": 2.693096591957639e-05, + "loss": 0.7812, + "step": 16712 + }, + { + "epoch": 0.9, + "learning_rate": 2.6902792979982436e-05, + "loss": 0.75, + "step": 16713 + }, + { + "epoch": 0.9, + "learning_rate": 2.6874634376787778e-05, + "loss": 0.7695, + "step": 16714 + }, + { + "epoch": 0.9, + "learning_rate": 2.6846490110845832e-05, + "loss": 0.8398, + "step": 16715 + }, + { + "epoch": 0.9, + "learning_rate": 2.6818360183009537e-05, + "loss": 0.8242, + "step": 16716 + }, + { + "epoch": 0.9, + "learning_rate": 2.6790244594131152e-05, + "loss": 0.6953, + "step": 16717 + }, + { + "epoch": 0.9, + "learning_rate": 2.6762143345062883e-05, + "loss": 0.7305, + "step": 16718 + }, + { + "epoch": 0.9, + "learning_rate": 2.673405643665605e-05, + "loss": 0.8203, + "step": 16719 + }, + { + "epoch": 0.9, + "learning_rate": 2.670598386976186e-05, + "loss": 0.8906, + "step": 16720 + }, + { + "epoch": 0.9, + "learning_rate": 2.6677925645231192e-05, + "loss": 0.7539, + "step": 16721 + }, + { + "epoch": 0.9, + "learning_rate": 2.6649881763914087e-05, + "loss": 0.8477, + "step": 16722 + }, + { + "epoch": 0.9, + "learning_rate": 2.6621852226660415e-05, + "loss": 0.8047, + "step": 16723 + }, + { + "epoch": 0.9, + "learning_rate": 2.6593837034319502e-05, + "loss": 0.8359, + "step": 16724 + }, + { + "epoch": 0.9, + "learning_rate": 2.65658361877405e-05, + "loss": 0.8125, + "step": 16725 + }, + { + "epoch": 0.9, + "learning_rate": 2.6537849687771786e-05, + "loss": 0.75, + "step": 16726 + }, + { + "epoch": 0.9, + "learning_rate": 2.650987753526135e-05, + "loss": 0.8086, + "step": 16727 + }, + { + "epoch": 0.9, + "learning_rate": 2.648191973105707e-05, + "loss": 0.8555, + "step": 16728 + }, + { + "epoch": 0.9, + "learning_rate": 2.645397627600604e-05, + "loss": 0.8438, + "step": 16729 + }, + { + "epoch": 0.9, + "learning_rate": 2.6426047170954924e-05, + "loss": 0.7422, + "step": 16730 + }, + { + "epoch": 0.9, + "learning_rate": 2.6398132416750264e-05, + "loss": 0.6445, + "step": 16731 + }, + { + "epoch": 0.9, + "learning_rate": 2.6370232014237883e-05, + "loss": 0.7305, + "step": 16732 + }, + { + "epoch": 0.9, + "learning_rate": 2.6342345964263215e-05, + "loss": 0.8281, + "step": 16733 + }, + { + "epoch": 0.9, + "learning_rate": 2.6314474267671416e-05, + "loss": 0.7656, + "step": 16734 + }, + { + "epoch": 0.9, + "learning_rate": 2.6286616925306927e-05, + "loss": 0.6641, + "step": 16735 + }, + { + "epoch": 0.9, + "learning_rate": 2.625877393801407e-05, + "loss": 0.7773, + "step": 16736 + }, + { + "epoch": 0.9, + "learning_rate": 2.6230945306636443e-05, + "loss": 0.7852, + "step": 16737 + }, + { + "epoch": 0.9, + "learning_rate": 2.620313103201749e-05, + "loss": 0.9258, + "step": 16738 + }, + { + "epoch": 0.9, + "learning_rate": 2.6175331114999923e-05, + "loss": 0.8555, + "step": 16739 + }, + { + "epoch": 0.9, + "learning_rate": 2.614754555642629e-05, + "loss": 0.7891, + "step": 16740 + }, + { + "epoch": 0.9, + "learning_rate": 2.611977435713858e-05, + "loss": 0.8477, + "step": 16741 + }, + { + "epoch": 0.9, + "learning_rate": 2.6092017517978296e-05, + "loss": 0.7734, + "step": 16742 + }, + { + "epoch": 0.9, + "learning_rate": 2.6064275039786532e-05, + "loss": 0.875, + "step": 16743 + }, + { + "epoch": 0.9, + "learning_rate": 2.6036546923404016e-05, + "loss": 0.8203, + "step": 16744 + }, + { + "epoch": 0.9, + "learning_rate": 2.6008833169671065e-05, + "loss": 0.8281, + "step": 16745 + }, + { + "epoch": 0.9, + "learning_rate": 2.5981133779427346e-05, + "loss": 0.7188, + "step": 16746 + }, + { + "epoch": 0.9, + "learning_rate": 2.595344875351241e-05, + "loss": 0.6758, + "step": 16747 + }, + { + "epoch": 0.9, + "learning_rate": 2.592577809276514e-05, + "loss": 0.7656, + "step": 16748 + }, + { + "epoch": 0.9, + "learning_rate": 2.5898121798024034e-05, + "loss": 0.8438, + "step": 16749 + }, + { + "epoch": 0.9, + "learning_rate": 2.5870479870127085e-05, + "loss": 0.8711, + "step": 16750 + }, + { + "epoch": 0.9, + "learning_rate": 2.5842852309912125e-05, + "loss": 0.7461, + "step": 16751 + }, + { + "epoch": 0.9, + "learning_rate": 2.581523911821626e-05, + "loss": 0.7891, + "step": 16752 + }, + { + "epoch": 0.9, + "learning_rate": 2.5787640295876157e-05, + "loss": 0.7656, + "step": 16753 + }, + { + "epoch": 0.9, + "learning_rate": 2.5760055843728314e-05, + "loss": 0.8398, + "step": 16754 + }, + { + "epoch": 0.9, + "learning_rate": 2.5732485762608615e-05, + "loss": 0.75, + "step": 16755 + }, + { + "epoch": 0.9, + "learning_rate": 2.5704930053352448e-05, + "loss": 0.8281, + "step": 16756 + }, + { + "epoch": 0.9, + "learning_rate": 2.5677388716794814e-05, + "loss": 0.8242, + "step": 16757 + }, + { + "epoch": 0.9, + "learning_rate": 2.5649861753770378e-05, + "loss": 0.8359, + "step": 16758 + }, + { + "epoch": 0.9, + "learning_rate": 2.5622349165113302e-05, + "loss": 0.9102, + "step": 16759 + }, + { + "epoch": 0.9, + "learning_rate": 2.5594850951657255e-05, + "loss": 0.6914, + "step": 16760 + }, + { + "epoch": 0.9, + "learning_rate": 2.556736711423563e-05, + "loss": 0.7578, + "step": 16761 + }, + { + "epoch": 0.9, + "learning_rate": 2.5539897653681144e-05, + "loss": 0.8242, + "step": 16762 + }, + { + "epoch": 0.9, + "learning_rate": 2.5512442570826245e-05, + "loss": 0.7773, + "step": 16763 + }, + { + "epoch": 0.9, + "learning_rate": 2.548500186650299e-05, + "loss": 0.8906, + "step": 16764 + }, + { + "epoch": 0.9, + "learning_rate": 2.5457575541542878e-05, + "loss": 0.875, + "step": 16765 + }, + { + "epoch": 0.9, + "learning_rate": 2.5430163596776966e-05, + "loss": 0.7422, + "step": 16766 + }, + { + "epoch": 0.9, + "learning_rate": 2.5402766033035983e-05, + "loss": 0.7773, + "step": 16767 + }, + { + "epoch": 0.9, + "learning_rate": 2.5375382851150153e-05, + "loss": 0.8711, + "step": 16768 + }, + { + "epoch": 0.9, + "learning_rate": 2.5348014051949253e-05, + "loss": 0.8789, + "step": 16769 + }, + { + "epoch": 0.9, + "learning_rate": 2.5320659636262677e-05, + "loss": 0.8359, + "step": 16770 + }, + { + "epoch": 0.9, + "learning_rate": 2.5293319604919373e-05, + "loss": 0.8203, + "step": 16771 + }, + { + "epoch": 0.9, + "learning_rate": 2.5265993958747847e-05, + "loss": 0.8242, + "step": 16772 + }, + { + "epoch": 0.9, + "learning_rate": 2.5238682698576044e-05, + "loss": 0.8594, + "step": 16773 + }, + { + "epoch": 0.9, + "learning_rate": 2.5211385825231637e-05, + "loss": 0.9375, + "step": 16774 + }, + { + "epoch": 0.9, + "learning_rate": 2.518410333954191e-05, + "loss": 0.8086, + "step": 16775 + }, + { + "epoch": 0.9, + "learning_rate": 2.515683524233342e-05, + "loss": 0.7305, + "step": 16776 + }, + { + "epoch": 0.9, + "learning_rate": 2.5129581534432733e-05, + "loss": 0.8359, + "step": 16777 + }, + { + "epoch": 0.9, + "learning_rate": 2.5102342216665465e-05, + "loss": 0.8047, + "step": 16778 + }, + { + "epoch": 0.9, + "learning_rate": 2.507511728985723e-05, + "loss": 0.8281, + "step": 16779 + }, + { + "epoch": 0.9, + "learning_rate": 2.5047906754832927e-05, + "loss": 0.7422, + "step": 16780 + }, + { + "epoch": 0.9, + "learning_rate": 2.5020710612417286e-05, + "loss": 0.8242, + "step": 16781 + }, + { + "epoch": 0.9, + "learning_rate": 2.49935288634342e-05, + "loss": 0.7539, + "step": 16782 + }, + { + "epoch": 0.9, + "learning_rate": 2.496636150870757e-05, + "loss": 0.832, + "step": 16783 + }, + { + "epoch": 0.9, + "learning_rate": 2.4939208549060565e-05, + "loss": 0.8047, + "step": 16784 + }, + { + "epoch": 0.9, + "learning_rate": 2.4912069985316033e-05, + "loss": 0.8086, + "step": 16785 + }, + { + "epoch": 0.9, + "learning_rate": 2.488494581829631e-05, + "loss": 0.7773, + "step": 16786 + }, + { + "epoch": 0.9, + "learning_rate": 2.4857836048823467e-05, + "loss": 0.8906, + "step": 16787 + }, + { + "epoch": 0.9, + "learning_rate": 2.4830740677718954e-05, + "loss": 0.8281, + "step": 16788 + }, + { + "epoch": 0.9, + "learning_rate": 2.4803659705803728e-05, + "loss": 0.832, + "step": 16789 + }, + { + "epoch": 0.9, + "learning_rate": 2.4776593133898572e-05, + "loss": 0.8828, + "step": 16790 + }, + { + "epoch": 0.9, + "learning_rate": 2.474954096282378e-05, + "loss": 0.6797, + "step": 16791 + }, + { + "epoch": 0.9, + "learning_rate": 2.4722503193398914e-05, + "loss": 0.8477, + "step": 16792 + }, + { + "epoch": 0.9, + "learning_rate": 2.4695479826443325e-05, + "loss": 0.8281, + "step": 16793 + }, + { + "epoch": 0.9, + "learning_rate": 2.4668470862776126e-05, + "loss": 0.8555, + "step": 16794 + }, + { + "epoch": 0.9, + "learning_rate": 2.464147630321556e-05, + "loss": 0.8242, + "step": 16795 + }, + { + "epoch": 0.9, + "learning_rate": 2.4614496148579745e-05, + "loss": 0.8867, + "step": 16796 + }, + { + "epoch": 0.9, + "learning_rate": 2.4587530399686198e-05, + "loss": 0.75, + "step": 16797 + }, + { + "epoch": 0.9, + "learning_rate": 2.4560579057352207e-05, + "loss": 0.7773, + "step": 16798 + }, + { + "epoch": 0.9, + "learning_rate": 2.4533642122394286e-05, + "loss": 0.9062, + "step": 16799 + }, + { + "epoch": 0.9, + "learning_rate": 2.4506719595628946e-05, + "loss": 0.8125, + "step": 16800 + }, + { + "epoch": 0.9, + "learning_rate": 2.4479811477871817e-05, + "loss": 0.8125, + "step": 16801 + }, + { + "epoch": 0.9, + "learning_rate": 2.4452917769938466e-05, + "loss": 0.8828, + "step": 16802 + }, + { + "epoch": 0.9, + "learning_rate": 2.4426038472643687e-05, + "loss": 0.7422, + "step": 16803 + }, + { + "epoch": 0.9, + "learning_rate": 2.4399173586802215e-05, + "loss": 0.8711, + "step": 16804 + }, + { + "epoch": 0.9, + "learning_rate": 2.4372323113227956e-05, + "loss": 0.8867, + "step": 16805 + }, + { + "epoch": 0.9, + "learning_rate": 2.4345487052734706e-05, + "loss": 0.8242, + "step": 16806 + }, + { + "epoch": 0.9, + "learning_rate": 2.431866540613564e-05, + "loss": 0.8984, + "step": 16807 + }, + { + "epoch": 0.9, + "learning_rate": 2.4291858174243563e-05, + "loss": 0.8047, + "step": 16808 + }, + { + "epoch": 0.9, + "learning_rate": 2.426506535787071e-05, + "loss": 0.7969, + "step": 16809 + }, + { + "epoch": 0.9, + "learning_rate": 2.423828695782909e-05, + "loss": 0.8633, + "step": 16810 + }, + { + "epoch": 0.9, + "learning_rate": 2.4211522974930233e-05, + "loss": 0.7617, + "step": 16811 + }, + { + "epoch": 0.9, + "learning_rate": 2.418477340998504e-05, + "loss": 0.8359, + "step": 16812 + }, + { + "epoch": 0.9, + "learning_rate": 2.4158038263804138e-05, + "loss": 0.8164, + "step": 16813 + }, + { + "epoch": 0.9, + "learning_rate": 2.4131317537197826e-05, + "loss": 0.8594, + "step": 16814 + }, + { + "epoch": 0.9, + "learning_rate": 2.4104611230975737e-05, + "loss": 0.8281, + "step": 16815 + }, + { + "epoch": 0.9, + "learning_rate": 2.4077919345947054e-05, + "loss": 0.6797, + "step": 16816 + }, + { + "epoch": 0.9, + "learning_rate": 2.4051241882920737e-05, + "loss": 0.7891, + "step": 16817 + }, + { + "epoch": 0.9, + "learning_rate": 2.402457884270526e-05, + "loss": 0.9453, + "step": 16818 + }, + { + "epoch": 0.9, + "learning_rate": 2.399793022610852e-05, + "loss": 0.7891, + "step": 16819 + }, + { + "epoch": 0.9, + "learning_rate": 2.3971296033937996e-05, + "loss": 0.8789, + "step": 16820 + }, + { + "epoch": 0.9, + "learning_rate": 2.3944676267000976e-05, + "loss": 0.8125, + "step": 16821 + }, + { + "epoch": 0.9, + "learning_rate": 2.3918070926103986e-05, + "loss": 0.7734, + "step": 16822 + }, + { + "epoch": 0.9, + "learning_rate": 2.3891480012053267e-05, + "loss": 0.7656, + "step": 16823 + }, + { + "epoch": 0.9, + "learning_rate": 2.386490352565457e-05, + "loss": 0.8359, + "step": 16824 + }, + { + "epoch": 0.9, + "learning_rate": 2.383834146771341e-05, + "loss": 0.8242, + "step": 16825 + }, + { + "epoch": 0.9, + "learning_rate": 2.3811793839034536e-05, + "loss": 0.7773, + "step": 16826 + }, + { + "epoch": 0.9, + "learning_rate": 2.3785260640422524e-05, + "loss": 0.8164, + "step": 16827 + }, + { + "epoch": 0.9, + "learning_rate": 2.37587418726814e-05, + "loss": 0.7461, + "step": 16828 + }, + { + "epoch": 0.9, + "learning_rate": 2.373223753661469e-05, + "loss": 0.8438, + "step": 16829 + }, + { + "epoch": 0.9, + "learning_rate": 2.3705747633025744e-05, + "loss": 0.7188, + "step": 16830 + }, + { + "epoch": 0.9, + "learning_rate": 2.3679272162717146e-05, + "loss": 0.8008, + "step": 16831 + }, + { + "epoch": 0.9, + "learning_rate": 2.365281112649115e-05, + "loss": 0.8359, + "step": 16832 + }, + { + "epoch": 0.9, + "learning_rate": 2.3626364525149714e-05, + "loss": 0.6992, + "step": 16833 + }, + { + "epoch": 0.9, + "learning_rate": 2.3599932359494313e-05, + "loss": 0.8086, + "step": 16834 + }, + { + "epoch": 0.9, + "learning_rate": 2.357351463032581e-05, + "loss": 0.793, + "step": 16835 + }, + { + "epoch": 0.9, + "learning_rate": 2.3547111338444672e-05, + "loss": 0.7617, + "step": 16836 + }, + { + "epoch": 0.9, + "learning_rate": 2.3520722484651256e-05, + "loss": 0.8086, + "step": 16837 + }, + { + "epoch": 0.9, + "learning_rate": 2.3494348069745043e-05, + "loss": 0.8086, + "step": 16838 + }, + { + "epoch": 0.91, + "learning_rate": 2.3467988094525272e-05, + "loss": 0.7617, + "step": 16839 + }, + { + "epoch": 0.91, + "learning_rate": 2.344164255979081e-05, + "loss": 0.8047, + "step": 16840 + }, + { + "epoch": 0.91, + "learning_rate": 2.3415311466340016e-05, + "loss": 0.7344, + "step": 16841 + }, + { + "epoch": 0.91, + "learning_rate": 2.3388994814970754e-05, + "loss": 0.8008, + "step": 16842 + }, + { + "epoch": 0.91, + "learning_rate": 2.336269260648055e-05, + "loss": 0.793, + "step": 16843 + }, + { + "epoch": 0.91, + "learning_rate": 2.3336404841666326e-05, + "loss": 0.6992, + "step": 16844 + }, + { + "epoch": 0.91, + "learning_rate": 2.331013152132494e-05, + "loss": 0.7891, + "step": 16845 + }, + { + "epoch": 0.91, + "learning_rate": 2.3283872646252257e-05, + "loss": 0.8359, + "step": 16846 + }, + { + "epoch": 0.91, + "learning_rate": 2.3257628217244255e-05, + "loss": 0.8359, + "step": 16847 + }, + { + "epoch": 0.91, + "learning_rate": 2.323139823509607e-05, + "loss": 0.7422, + "step": 16848 + }, + { + "epoch": 0.91, + "learning_rate": 2.3205182700602567e-05, + "loss": 0.8008, + "step": 16849 + }, + { + "epoch": 0.91, + "learning_rate": 2.3178981614558335e-05, + "loss": 0.8047, + "step": 16850 + }, + { + "epoch": 0.91, + "learning_rate": 2.3152794977757185e-05, + "loss": 0.7344, + "step": 16851 + }, + { + "epoch": 0.91, + "learning_rate": 2.312662279099259e-05, + "loss": 0.7617, + "step": 16852 + }, + { + "epoch": 0.91, + "learning_rate": 2.310046505505786e-05, + "loss": 0.8086, + "step": 16853 + }, + { + "epoch": 0.91, + "learning_rate": 2.3074321770745587e-05, + "loss": 0.7891, + "step": 16854 + }, + { + "epoch": 0.91, + "learning_rate": 2.3048192938847912e-05, + "loss": 0.7344, + "step": 16855 + }, + { + "epoch": 0.91, + "learning_rate": 2.3022078560156702e-05, + "loss": 0.7461, + "step": 16856 + }, + { + "epoch": 0.91, + "learning_rate": 2.2995978635463322e-05, + "loss": 0.8867, + "step": 16857 + }, + { + "epoch": 0.91, + "learning_rate": 2.2969893165558698e-05, + "loss": 0.8047, + "step": 16858 + }, + { + "epoch": 0.91, + "learning_rate": 2.2943822151233083e-05, + "loss": 0.8789, + "step": 16859 + }, + { + "epoch": 0.91, + "learning_rate": 2.2917765593276852e-05, + "loss": 0.7852, + "step": 16860 + }, + { + "epoch": 0.91, + "learning_rate": 2.2891723492479476e-05, + "loss": 0.7812, + "step": 16861 + }, + { + "epoch": 0.91, + "learning_rate": 2.2865695849629998e-05, + "loss": 0.7969, + "step": 16862 + }, + { + "epoch": 0.91, + "learning_rate": 2.2839682665517224e-05, + "loss": 0.8281, + "step": 16863 + }, + { + "epoch": 0.91, + "learning_rate": 2.281368394092953e-05, + "loss": 0.7188, + "step": 16864 + }, + { + "epoch": 0.91, + "learning_rate": 2.278769967665456e-05, + "loss": 0.7656, + "step": 16865 + }, + { + "epoch": 0.91, + "learning_rate": 2.2761729873479964e-05, + "loss": 0.9219, + "step": 16866 + }, + { + "epoch": 0.91, + "learning_rate": 2.27357745321925e-05, + "loss": 0.8984, + "step": 16867 + }, + { + "epoch": 0.91, + "learning_rate": 2.270983365357887e-05, + "loss": 0.8516, + "step": 16868 + }, + { + "epoch": 0.91, + "learning_rate": 2.2683907238425006e-05, + "loss": 0.7305, + "step": 16869 + }, + { + "epoch": 0.91, + "learning_rate": 2.265799528751672e-05, + "loss": 0.8164, + "step": 16870 + }, + { + "epoch": 0.91, + "learning_rate": 2.2632097801639108e-05, + "loss": 0.8008, + "step": 16871 + }, + { + "epoch": 0.91, + "learning_rate": 2.2606214781576984e-05, + "loss": 0.9062, + "step": 16872 + }, + { + "epoch": 0.91, + "learning_rate": 2.2580346228114725e-05, + "loss": 0.7891, + "step": 16873 + }, + { + "epoch": 0.91, + "learning_rate": 2.2554492142036255e-05, + "loss": 0.8477, + "step": 16874 + }, + { + "epoch": 0.91, + "learning_rate": 2.2528652524124894e-05, + "loss": 0.7812, + "step": 16875 + }, + { + "epoch": 0.91, + "learning_rate": 2.2502827375163792e-05, + "loss": 0.7656, + "step": 16876 + }, + { + "epoch": 0.91, + "learning_rate": 2.247701669593555e-05, + "loss": 0.75, + "step": 16877 + }, + { + "epoch": 0.91, + "learning_rate": 2.2451220487222202e-05, + "loss": 0.8398, + "step": 16878 + }, + { + "epoch": 0.91, + "learning_rate": 2.242543874980557e-05, + "loss": 0.9258, + "step": 16879 + }, + { + "epoch": 0.91, + "learning_rate": 2.239967148446692e-05, + "loss": 0.8086, + "step": 16880 + }, + { + "epoch": 0.91, + "learning_rate": 2.237391869198707e-05, + "loss": 0.7695, + "step": 16881 + }, + { + "epoch": 0.91, + "learning_rate": 2.2348180373146286e-05, + "loss": 0.8906, + "step": 16882 + }, + { + "epoch": 0.91, + "learning_rate": 2.2322456528724665e-05, + "loss": 0.7734, + "step": 16883 + }, + { + "epoch": 0.91, + "learning_rate": 2.2296747159501808e-05, + "loss": 0.7773, + "step": 16884 + }, + { + "epoch": 0.91, + "learning_rate": 2.227105226625653e-05, + "loss": 0.8477, + "step": 16885 + }, + { + "epoch": 0.91, + "learning_rate": 2.2245371849767715e-05, + "loss": 0.8555, + "step": 16886 + }, + { + "epoch": 0.91, + "learning_rate": 2.2219705910813404e-05, + "loss": 0.8164, + "step": 16887 + }, + { + "epoch": 0.91, + "learning_rate": 2.2194054450171475e-05, + "loss": 0.8008, + "step": 16888 + }, + { + "epoch": 0.91, + "learning_rate": 2.216841746861914e-05, + "loss": 0.7891, + "step": 16889 + }, + { + "epoch": 0.91, + "learning_rate": 2.214279496693339e-05, + "loss": 0.9336, + "step": 16890 + }, + { + "epoch": 0.91, + "learning_rate": 2.2117186945890543e-05, + "loss": 0.8125, + "step": 16891 + }, + { + "epoch": 0.91, + "learning_rate": 2.2091593406266764e-05, + "loss": 0.6914, + "step": 16892 + }, + { + "epoch": 0.91, + "learning_rate": 2.2066014348837536e-05, + "loss": 0.7617, + "step": 16893 + }, + { + "epoch": 0.91, + "learning_rate": 2.2040449774378023e-05, + "loss": 0.7969, + "step": 16894 + }, + { + "epoch": 0.91, + "learning_rate": 2.2014899683662713e-05, + "loss": 0.8828, + "step": 16895 + }, + { + "epoch": 0.91, + "learning_rate": 2.1989364077466213e-05, + "loss": 0.7656, + "step": 16896 + }, + { + "epoch": 0.91, + "learning_rate": 2.1963842956562174e-05, + "loss": 0.8359, + "step": 16897 + }, + { + "epoch": 0.91, + "learning_rate": 2.1938336321723816e-05, + "loss": 0.6953, + "step": 16898 + }, + { + "epoch": 0.91, + "learning_rate": 2.1912844173724243e-05, + "loss": 0.8281, + "step": 16899 + }, + { + "epoch": 0.91, + "learning_rate": 2.1887366513336003e-05, + "loss": 0.7539, + "step": 16900 + }, + { + "epoch": 0.91, + "learning_rate": 2.1861903341331035e-05, + "loss": 0.7891, + "step": 16901 + }, + { + "epoch": 0.91, + "learning_rate": 2.1836454658480887e-05, + "loss": 0.7344, + "step": 16902 + }, + { + "epoch": 0.91, + "learning_rate": 2.181102046555694e-05, + "loss": 0.8477, + "step": 16903 + }, + { + "epoch": 0.91, + "learning_rate": 2.178560076332986e-05, + "loss": 0.7539, + "step": 16904 + }, + { + "epoch": 0.91, + "learning_rate": 2.1760195552569863e-05, + "loss": 0.8281, + "step": 16905 + }, + { + "epoch": 0.91, + "learning_rate": 2.173480483404683e-05, + "loss": 0.793, + "step": 16906 + }, + { + "epoch": 0.91, + "learning_rate": 2.1709428608530312e-05, + "loss": 0.9297, + "step": 16907 + }, + { + "epoch": 0.91, + "learning_rate": 2.1684066876789145e-05, + "loss": 0.7344, + "step": 16908 + }, + { + "epoch": 0.91, + "learning_rate": 2.1658719639591985e-05, + "loss": 0.7305, + "step": 16909 + }, + { + "epoch": 0.91, + "learning_rate": 2.1633386897706775e-05, + "loss": 0.8242, + "step": 16910 + }, + { + "epoch": 0.91, + "learning_rate": 2.16080686519014e-05, + "loss": 0.75, + "step": 16911 + }, + { + "epoch": 0.91, + "learning_rate": 2.1582764902942866e-05, + "loss": 0.7656, + "step": 16912 + }, + { + "epoch": 0.91, + "learning_rate": 2.1557475651598157e-05, + "loss": 0.9844, + "step": 16913 + }, + { + "epoch": 0.91, + "learning_rate": 2.1532200898633446e-05, + "loss": 0.8008, + "step": 16914 + }, + { + "epoch": 0.91, + "learning_rate": 2.1506940644814733e-05, + "loss": 0.8086, + "step": 16915 + }, + { + "epoch": 0.91, + "learning_rate": 2.1481694890907565e-05, + "loss": 0.8008, + "step": 16916 + }, + { + "epoch": 0.91, + "learning_rate": 2.1456463637676837e-05, + "loss": 0.8086, + "step": 16917 + }, + { + "epoch": 0.91, + "learning_rate": 2.1431246885887157e-05, + "loss": 0.8477, + "step": 16918 + }, + { + "epoch": 0.91, + "learning_rate": 2.1406044636302692e-05, + "loss": 0.8594, + "step": 16919 + }, + { + "epoch": 0.91, + "learning_rate": 2.1380856889687216e-05, + "loss": 0.8438, + "step": 16920 + }, + { + "epoch": 0.91, + "learning_rate": 2.1355683646803903e-05, + "loss": 0.7344, + "step": 16921 + }, + { + "epoch": 0.91, + "learning_rate": 2.1330524908415637e-05, + "loss": 0.8516, + "step": 16922 + }, + { + "epoch": 0.91, + "learning_rate": 2.130538067528487e-05, + "loss": 0.7539, + "step": 16923 + }, + { + "epoch": 0.91, + "learning_rate": 2.1280250948173486e-05, + "loss": 0.7969, + "step": 16924 + }, + { + "epoch": 0.91, + "learning_rate": 2.1255135727842935e-05, + "loss": 0.7578, + "step": 16925 + }, + { + "epoch": 0.91, + "learning_rate": 2.1230035015054328e-05, + "loss": 0.8008, + "step": 16926 + }, + { + "epoch": 0.91, + "learning_rate": 2.120494881056839e-05, + "loss": 0.7227, + "step": 16927 + }, + { + "epoch": 0.91, + "learning_rate": 2.117987711514524e-05, + "loss": 0.875, + "step": 16928 + }, + { + "epoch": 0.91, + "learning_rate": 2.1154819929544654e-05, + "loss": 0.7695, + "step": 16929 + }, + { + "epoch": 0.91, + "learning_rate": 2.1129777254525916e-05, + "loss": 0.7891, + "step": 16930 + }, + { + "epoch": 0.91, + "learning_rate": 2.1104749090847975e-05, + "loss": 0.7578, + "step": 16931 + }, + { + "epoch": 0.91, + "learning_rate": 2.1079735439269165e-05, + "loss": 0.7812, + "step": 16932 + }, + { + "epoch": 0.91, + "learning_rate": 2.105473630054755e-05, + "loss": 0.7344, + "step": 16933 + }, + { + "epoch": 0.91, + "learning_rate": 2.102975167544069e-05, + "loss": 0.7695, + "step": 16934 + }, + { + "epoch": 0.91, + "learning_rate": 2.1004781564705645e-05, + "loss": 0.7383, + "step": 16935 + }, + { + "epoch": 0.91, + "learning_rate": 2.0979825969099143e-05, + "loss": 0.7305, + "step": 16936 + }, + { + "epoch": 0.91, + "learning_rate": 2.0954884889377413e-05, + "loss": 0.9023, + "step": 16937 + }, + { + "epoch": 0.91, + "learning_rate": 2.0929958326296183e-05, + "loss": 0.7734, + "step": 16938 + }, + { + "epoch": 0.91, + "learning_rate": 2.0905046280610963e-05, + "loss": 0.7812, + "step": 16939 + }, + { + "epoch": 0.91, + "learning_rate": 2.0880148753076588e-05, + "loss": 0.8125, + "step": 16940 + }, + { + "epoch": 0.91, + "learning_rate": 2.0855265744447406e-05, + "loss": 0.832, + "step": 16941 + }, + { + "epoch": 0.91, + "learning_rate": 2.083039725547764e-05, + "loss": 0.7852, + "step": 16942 + }, + { + "epoch": 0.91, + "learning_rate": 2.0805543286920858e-05, + "loss": 0.8164, + "step": 16943 + }, + { + "epoch": 0.91, + "learning_rate": 2.078070383953007e-05, + "loss": 0.8281, + "step": 16944 + }, + { + "epoch": 0.91, + "learning_rate": 2.075587891405817e-05, + "loss": 0.9141, + "step": 16945 + }, + { + "epoch": 0.91, + "learning_rate": 2.073106851125739e-05, + "loss": 0.8516, + "step": 16946 + }, + { + "epoch": 0.91, + "learning_rate": 2.070627263187952e-05, + "loss": 0.793, + "step": 16947 + }, + { + "epoch": 0.91, + "learning_rate": 2.0681491276675957e-05, + "loss": 0.8398, + "step": 16948 + }, + { + "epoch": 0.91, + "learning_rate": 2.0656724446397656e-05, + "loss": 0.75, + "step": 16949 + }, + { + "epoch": 0.91, + "learning_rate": 2.0631972141795184e-05, + "loss": 0.8438, + "step": 16950 + }, + { + "epoch": 0.91, + "learning_rate": 2.0607234363618553e-05, + "loss": 0.7969, + "step": 16951 + }, + { + "epoch": 0.91, + "learning_rate": 2.0582511112617552e-05, + "loss": 0.8398, + "step": 16952 + }, + { + "epoch": 0.91, + "learning_rate": 2.0557802389541135e-05, + "loss": 0.7812, + "step": 16953 + }, + { + "epoch": 0.91, + "learning_rate": 2.053310819513826e-05, + "loss": 0.8164, + "step": 16954 + }, + { + "epoch": 0.91, + "learning_rate": 2.0508428530157107e-05, + "loss": 0.8633, + "step": 16955 + }, + { + "epoch": 0.91, + "learning_rate": 2.048376339534569e-05, + "loss": 0.7852, + "step": 16956 + }, + { + "epoch": 0.91, + "learning_rate": 2.045911279145124e-05, + "loss": 0.8359, + "step": 16957 + }, + { + "epoch": 0.91, + "learning_rate": 2.043447671922094e-05, + "loss": 0.8594, + "step": 16958 + }, + { + "epoch": 0.91, + "learning_rate": 2.0409855179401303e-05, + "loss": 0.9023, + "step": 16959 + }, + { + "epoch": 0.91, + "learning_rate": 2.03852481727384e-05, + "loss": 0.7656, + "step": 16960 + }, + { + "epoch": 0.91, + "learning_rate": 2.0360655699977802e-05, + "loss": 0.793, + "step": 16961 + }, + { + "epoch": 0.91, + "learning_rate": 2.0336077761865025e-05, + "loss": 0.8633, + "step": 16962 + }, + { + "epoch": 0.91, + "learning_rate": 2.031151435914469e-05, + "loss": 0.8008, + "step": 16963 + }, + { + "epoch": 0.91, + "learning_rate": 2.0286965492561038e-05, + "loss": 0.8945, + "step": 16964 + }, + { + "epoch": 0.91, + "learning_rate": 2.0262431162858143e-05, + "loss": 0.875, + "step": 16965 + }, + { + "epoch": 0.91, + "learning_rate": 2.0237911370779516e-05, + "loss": 0.7188, + "step": 16966 + }, + { + "epoch": 0.91, + "learning_rate": 2.0213406117068066e-05, + "loss": 0.8477, + "step": 16967 + }, + { + "epoch": 0.91, + "learning_rate": 2.0188915402466367e-05, + "loss": 0.7812, + "step": 16968 + }, + { + "epoch": 0.91, + "learning_rate": 2.01644392277166e-05, + "loss": 0.9336, + "step": 16969 + }, + { + "epoch": 0.91, + "learning_rate": 2.0139977593560565e-05, + "loss": 0.7734, + "step": 16970 + }, + { + "epoch": 0.91, + "learning_rate": 2.0115530500739386e-05, + "loss": 0.7539, + "step": 16971 + }, + { + "epoch": 0.91, + "learning_rate": 2.0091097949993976e-05, + "loss": 0.7617, + "step": 16972 + }, + { + "epoch": 0.91, + "learning_rate": 2.006667994206479e-05, + "loss": 0.8242, + "step": 16973 + }, + { + "epoch": 0.91, + "learning_rate": 2.004227647769158e-05, + "loss": 0.8125, + "step": 16974 + }, + { + "epoch": 0.91, + "learning_rate": 2.001788755761408e-05, + "loss": 0.7812, + "step": 16975 + }, + { + "epoch": 0.91, + "learning_rate": 1.9993513182571145e-05, + "loss": 0.75, + "step": 16976 + }, + { + "epoch": 0.91, + "learning_rate": 1.9969153353301518e-05, + "loss": 0.7617, + "step": 16977 + }, + { + "epoch": 0.91, + "learning_rate": 1.994480807054333e-05, + "loss": 0.7578, + "step": 16978 + }, + { + "epoch": 0.91, + "learning_rate": 1.9920477335034437e-05, + "loss": 0.8281, + "step": 16979 + }, + { + "epoch": 0.91, + "learning_rate": 1.9896161147511914e-05, + "loss": 0.7031, + "step": 16980 + }, + { + "epoch": 0.91, + "learning_rate": 1.9871859508712843e-05, + "loss": 0.6953, + "step": 16981 + }, + { + "epoch": 0.91, + "learning_rate": 1.984757241937357e-05, + "loss": 0.8164, + "step": 16982 + }, + { + "epoch": 0.91, + "learning_rate": 1.9823299880230074e-05, + "loss": 0.8203, + "step": 16983 + }, + { + "epoch": 0.91, + "learning_rate": 1.979904189201781e-05, + "loss": 0.707, + "step": 16984 + }, + { + "epoch": 0.91, + "learning_rate": 1.9774798455471978e-05, + "loss": 0.7852, + "step": 16985 + }, + { + "epoch": 0.91, + "learning_rate": 1.9750569571327205e-05, + "loss": 0.7695, + "step": 16986 + }, + { + "epoch": 0.91, + "learning_rate": 1.9726355240317682e-05, + "loss": 0.7812, + "step": 16987 + }, + { + "epoch": 0.91, + "learning_rate": 1.970215546317722e-05, + "loss": 0.8906, + "step": 16988 + }, + { + "epoch": 0.91, + "learning_rate": 1.9677970240639165e-05, + "loss": 0.8008, + "step": 16989 + }, + { + "epoch": 0.91, + "learning_rate": 1.965379957343638e-05, + "loss": 0.8516, + "step": 16990 + }, + { + "epoch": 0.91, + "learning_rate": 1.9629643462301228e-05, + "loss": 0.793, + "step": 16991 + }, + { + "epoch": 0.91, + "learning_rate": 1.9605501907965838e-05, + "loss": 0.7031, + "step": 16992 + }, + { + "epoch": 0.91, + "learning_rate": 1.95813749111618e-05, + "loss": 0.9336, + "step": 16993 + }, + { + "epoch": 0.91, + "learning_rate": 1.9557262472620128e-05, + "loss": 0.8438, + "step": 16994 + }, + { + "epoch": 0.91, + "learning_rate": 1.953316459307164e-05, + "loss": 0.9414, + "step": 16995 + }, + { + "epoch": 0.91, + "learning_rate": 1.950908127324641e-05, + "loss": 0.7969, + "step": 16996 + }, + { + "epoch": 0.91, + "learning_rate": 1.948501251387441e-05, + "loss": 0.9102, + "step": 16997 + }, + { + "epoch": 0.91, + "learning_rate": 1.9460958315684894e-05, + "loss": 0.832, + "step": 16998 + }, + { + "epoch": 0.91, + "learning_rate": 1.9436918679406834e-05, + "loss": 0.7695, + "step": 16999 + }, + { + "epoch": 0.91, + "learning_rate": 1.94128936057687e-05, + "loss": 0.8438, + "step": 17000 + }, + { + "epoch": 0.91, + "learning_rate": 1.9388883095498466e-05, + "loss": 0.7188, + "step": 17001 + }, + { + "epoch": 0.91, + "learning_rate": 1.9364887149323885e-05, + "loss": 0.8633, + "step": 17002 + }, + { + "epoch": 0.91, + "learning_rate": 1.934090576797204e-05, + "loss": 0.8672, + "step": 17003 + }, + { + "epoch": 0.91, + "learning_rate": 1.931693895216946e-05, + "loss": 0.8789, + "step": 17004 + }, + { + "epoch": 0.91, + "learning_rate": 1.9292986702642678e-05, + "loss": 0.9023, + "step": 17005 + }, + { + "epoch": 0.91, + "learning_rate": 1.9269049020117502e-05, + "loss": 0.8359, + "step": 17006 + }, + { + "epoch": 0.91, + "learning_rate": 1.924512590531913e-05, + "loss": 0.7734, + "step": 17007 + }, + { + "epoch": 0.91, + "learning_rate": 1.9221217358972697e-05, + "loss": 0.7891, + "step": 17008 + }, + { + "epoch": 0.91, + "learning_rate": 1.9197323381802635e-05, + "loss": 0.7148, + "step": 17009 + }, + { + "epoch": 0.91, + "learning_rate": 1.917344397453308e-05, + "loss": 0.8555, + "step": 17010 + }, + { + "epoch": 0.91, + "learning_rate": 1.9149579137887453e-05, + "loss": 0.8633, + "step": 17011 + }, + { + "epoch": 0.91, + "learning_rate": 1.9125728872589177e-05, + "loss": 0.7617, + "step": 17012 + }, + { + "epoch": 0.91, + "learning_rate": 1.9101893179360895e-05, + "loss": 0.832, + "step": 17013 + }, + { + "epoch": 0.91, + "learning_rate": 1.9078072058924866e-05, + "loss": 0.7656, + "step": 17014 + }, + { + "epoch": 0.91, + "learning_rate": 1.905426551200301e-05, + "loss": 0.707, + "step": 17015 + }, + { + "epoch": 0.91, + "learning_rate": 1.903047353931675e-05, + "loss": 0.832, + "step": 17016 + }, + { + "epoch": 0.91, + "learning_rate": 1.900669614158701e-05, + "loss": 0.7266, + "step": 17017 + }, + { + "epoch": 0.91, + "learning_rate": 1.8982933319534378e-05, + "loss": 0.9141, + "step": 17018 + }, + { + "epoch": 0.91, + "learning_rate": 1.8959185073878893e-05, + "loss": 0.8125, + "step": 17019 + }, + { + "epoch": 0.91, + "learning_rate": 1.8935451405340252e-05, + "loss": 0.8125, + "step": 17020 + }, + { + "epoch": 0.91, + "learning_rate": 1.891173231463761e-05, + "loss": 0.7617, + "step": 17021 + }, + { + "epoch": 0.91, + "learning_rate": 1.888802780248977e-05, + "loss": 0.793, + "step": 17022 + }, + { + "epoch": 0.91, + "learning_rate": 1.8864337869615055e-05, + "loss": 0.7109, + "step": 17023 + }, + { + "epoch": 0.91, + "learning_rate": 1.8840662516731277e-05, + "loss": 0.7812, + "step": 17024 + }, + { + "epoch": 0.92, + "learning_rate": 1.8817001744556027e-05, + "loss": 0.7812, + "step": 17025 + }, + { + "epoch": 0.92, + "learning_rate": 1.8793355553806233e-05, + "loss": 0.7422, + "step": 17026 + }, + { + "epoch": 0.92, + "learning_rate": 1.8769723945198323e-05, + "loss": 0.707, + "step": 17027 + }, + { + "epoch": 0.92, + "learning_rate": 1.8746106919448613e-05, + "loss": 0.8516, + "step": 17028 + }, + { + "epoch": 0.92, + "learning_rate": 1.8722504477272695e-05, + "loss": 0.8242, + "step": 17029 + }, + { + "epoch": 0.92, + "learning_rate": 1.8698916619385774e-05, + "loss": 0.7891, + "step": 17030 + }, + { + "epoch": 0.92, + "learning_rate": 1.8675343346502617e-05, + "loss": 0.8281, + "step": 17031 + }, + { + "epoch": 0.92, + "learning_rate": 1.86517846593377e-05, + "loss": 0.8008, + "step": 17032 + }, + { + "epoch": 0.92, + "learning_rate": 1.8628240558604847e-05, + "loss": 0.7852, + "step": 17033 + }, + { + "epoch": 0.92, + "learning_rate": 1.8604711045017485e-05, + "loss": 0.7266, + "step": 17034 + }, + { + "epoch": 0.92, + "learning_rate": 1.8581196119288656e-05, + "loss": 0.7148, + "step": 17035 + }, + { + "epoch": 0.92, + "learning_rate": 1.8557695782130955e-05, + "loss": 0.7578, + "step": 17036 + }, + { + "epoch": 0.92, + "learning_rate": 1.8534210034256537e-05, + "loss": 0.8789, + "step": 17037 + }, + { + "epoch": 0.92, + "learning_rate": 1.851073887637711e-05, + "loss": 0.8594, + "step": 17038 + }, + { + "epoch": 0.92, + "learning_rate": 1.8487282309203823e-05, + "loss": 0.7695, + "step": 17039 + }, + { + "epoch": 0.92, + "learning_rate": 1.8463840333447558e-05, + "loss": 0.7969, + "step": 17040 + }, + { + "epoch": 0.92, + "learning_rate": 1.8440412949818742e-05, + "loss": 0.7578, + "step": 17041 + }, + { + "epoch": 0.92, + "learning_rate": 1.8417000159027254e-05, + "loss": 0.6406, + "step": 17042 + }, + { + "epoch": 0.92, + "learning_rate": 1.8393601961782526e-05, + "loss": 0.7344, + "step": 17043 + }, + { + "epoch": 0.92, + "learning_rate": 1.83702183587936e-05, + "loss": 0.8438, + "step": 17044 + }, + { + "epoch": 0.92, + "learning_rate": 1.8346849350769245e-05, + "loss": 0.75, + "step": 17045 + }, + { + "epoch": 0.92, + "learning_rate": 1.8323494938417396e-05, + "loss": 0.7656, + "step": 17046 + }, + { + "epoch": 0.92, + "learning_rate": 1.830015512244587e-05, + "loss": 0.8203, + "step": 17047 + }, + { + "epoch": 0.92, + "learning_rate": 1.8276829903562e-05, + "loss": 0.8477, + "step": 17048 + }, + { + "epoch": 0.92, + "learning_rate": 1.825351928247254e-05, + "loss": 0.8516, + "step": 17049 + }, + { + "epoch": 0.92, + "learning_rate": 1.8230223259883827e-05, + "loss": 0.8828, + "step": 17050 + }, + { + "epoch": 0.92, + "learning_rate": 1.82069418365019e-05, + "loss": 0.8594, + "step": 17051 + }, + { + "epoch": 0.92, + "learning_rate": 1.8183675013032254e-05, + "loss": 0.8477, + "step": 17052 + }, + { + "epoch": 0.92, + "learning_rate": 1.816042279017993e-05, + "loss": 0.7617, + "step": 17053 + }, + { + "epoch": 0.92, + "learning_rate": 1.8137185168649483e-05, + "loss": 0.8047, + "step": 17054 + }, + { + "epoch": 0.92, + "learning_rate": 1.8113962149145235e-05, + "loss": 0.7344, + "step": 17055 + }, + { + "epoch": 0.92, + "learning_rate": 1.8090753732370847e-05, + "loss": 0.7852, + "step": 17056 + }, + { + "epoch": 0.92, + "learning_rate": 1.806755991902953e-05, + "loss": 0.7852, + "step": 17057 + }, + { + "epoch": 0.92, + "learning_rate": 1.8044380709824226e-05, + "loss": 0.8203, + "step": 17058 + }, + { + "epoch": 0.92, + "learning_rate": 1.802121610545737e-05, + "loss": 0.7734, + "step": 17059 + }, + { + "epoch": 0.92, + "learning_rate": 1.7998066106630796e-05, + "loss": 0.7969, + "step": 17060 + }, + { + "epoch": 0.92, + "learning_rate": 1.7974930714046157e-05, + "loss": 0.75, + "step": 17061 + }, + { + "epoch": 0.92, + "learning_rate": 1.795180992840445e-05, + "loss": 0.8125, + "step": 17062 + }, + { + "epoch": 0.92, + "learning_rate": 1.7928703750406395e-05, + "loss": 0.8555, + "step": 17063 + }, + { + "epoch": 0.92, + "learning_rate": 1.790561218075204e-05, + "loss": 0.7461, + "step": 17064 + }, + { + "epoch": 0.92, + "learning_rate": 1.7882535220141328e-05, + "loss": 0.7383, + "step": 17065 + }, + { + "epoch": 0.92, + "learning_rate": 1.7859472869273363e-05, + "loss": 0.9375, + "step": 17066 + }, + { + "epoch": 0.92, + "learning_rate": 1.7836425128847144e-05, + "loss": 0.8555, + "step": 17067 + }, + { + "epoch": 0.92, + "learning_rate": 1.7813391999561057e-05, + "loss": 0.8281, + "step": 17068 + }, + { + "epoch": 0.92, + "learning_rate": 1.7790373482113154e-05, + "loss": 0.8359, + "step": 17069 + }, + { + "epoch": 0.92, + "learning_rate": 1.7767369577200763e-05, + "loss": 0.8516, + "step": 17070 + }, + { + "epoch": 0.92, + "learning_rate": 1.774438028552122e-05, + "loss": 0.8125, + "step": 17071 + }, + { + "epoch": 0.92, + "learning_rate": 1.772140560777108e-05, + "loss": 0.7656, + "step": 17072 + }, + { + "epoch": 0.92, + "learning_rate": 1.76984455446465e-05, + "loss": 0.8633, + "step": 17073 + }, + { + "epoch": 0.92, + "learning_rate": 1.7675500096843266e-05, + "loss": 0.8398, + "step": 17074 + }, + { + "epoch": 0.92, + "learning_rate": 1.765256926505676e-05, + "loss": 0.6914, + "step": 17075 + }, + { + "epoch": 0.92, + "learning_rate": 1.7629653049981876e-05, + "loss": 0.8281, + "step": 17076 + }, + { + "epoch": 0.92, + "learning_rate": 1.7606751452312885e-05, + "loss": 0.875, + "step": 17077 + }, + { + "epoch": 0.92, + "learning_rate": 1.758386447274396e-05, + "loss": 0.7539, + "step": 17078 + }, + { + "epoch": 0.92, + "learning_rate": 1.75609921119686e-05, + "loss": 0.9062, + "step": 17079 + }, + { + "epoch": 0.92, + "learning_rate": 1.753813437067986e-05, + "loss": 0.8359, + "step": 17080 + }, + { + "epoch": 0.92, + "learning_rate": 1.7515291249570408e-05, + "loss": 0.7969, + "step": 17081 + }, + { + "epoch": 0.92, + "learning_rate": 1.749246274933264e-05, + "loss": 0.8516, + "step": 17082 + }, + { + "epoch": 0.92, + "learning_rate": 1.746964887065805e-05, + "loss": 0.75, + "step": 17083 + }, + { + "epoch": 0.92, + "learning_rate": 1.74468496142382e-05, + "loss": 0.793, + "step": 17084 + }, + { + "epoch": 0.92, + "learning_rate": 1.7424064980763877e-05, + "loss": 0.8242, + "step": 17085 + }, + { + "epoch": 0.92, + "learning_rate": 1.740129497092563e-05, + "loss": 0.8867, + "step": 17086 + }, + { + "epoch": 0.92, + "learning_rate": 1.7378539585413245e-05, + "loss": 0.8789, + "step": 17087 + }, + { + "epoch": 0.92, + "learning_rate": 1.7355798824916556e-05, + "loss": 0.75, + "step": 17088 + }, + { + "epoch": 0.92, + "learning_rate": 1.7333072690124517e-05, + "loss": 0.7852, + "step": 17089 + }, + { + "epoch": 0.92, + "learning_rate": 1.73103611817258e-05, + "loss": 0.7852, + "step": 17090 + }, + { + "epoch": 0.92, + "learning_rate": 1.7287664300408734e-05, + "loss": 0.8438, + "step": 17091 + }, + { + "epoch": 0.92, + "learning_rate": 1.7264982046861057e-05, + "loss": 0.7344, + "step": 17092 + }, + { + "epoch": 0.92, + "learning_rate": 1.72423144217701e-05, + "loss": 0.8516, + "step": 17093 + }, + { + "epoch": 0.92, + "learning_rate": 1.7219661425822762e-05, + "loss": 0.7344, + "step": 17094 + }, + { + "epoch": 0.92, + "learning_rate": 1.719702305970555e-05, + "loss": 0.7656, + "step": 17095 + }, + { + "epoch": 0.92, + "learning_rate": 1.7174399324104472e-05, + "loss": 0.7656, + "step": 17096 + }, + { + "epoch": 0.92, + "learning_rate": 1.7151790219705032e-05, + "loss": 0.7617, + "step": 17097 + }, + { + "epoch": 0.92, + "learning_rate": 1.712919574719246e-05, + "loss": 0.7656, + "step": 17098 + }, + { + "epoch": 0.92, + "learning_rate": 1.7106615907251434e-05, + "loss": 0.7227, + "step": 17099 + }, + { + "epoch": 0.92, + "learning_rate": 1.7084050700566068e-05, + "loss": 0.8906, + "step": 17100 + }, + { + "epoch": 0.92, + "learning_rate": 1.7061500127820263e-05, + "loss": 0.7656, + "step": 17101 + }, + { + "epoch": 0.92, + "learning_rate": 1.7038964189697415e-05, + "loss": 0.9062, + "step": 17102 + }, + { + "epoch": 0.92, + "learning_rate": 1.701644288688037e-05, + "loss": 1.0, + "step": 17103 + }, + { + "epoch": 0.92, + "learning_rate": 1.699393622005163e-05, + "loss": 0.7812, + "step": 17104 + }, + { + "epoch": 0.92, + "learning_rate": 1.697144418989316e-05, + "loss": 0.8516, + "step": 17105 + }, + { + "epoch": 0.92, + "learning_rate": 1.6948966797086572e-05, + "loss": 0.7891, + "step": 17106 + }, + { + "epoch": 0.92, + "learning_rate": 1.6926504042313106e-05, + "loss": 0.8125, + "step": 17107 + }, + { + "epoch": 0.92, + "learning_rate": 1.690405592625338e-05, + "loss": 0.7812, + "step": 17108 + }, + { + "epoch": 0.92, + "learning_rate": 1.688162244958752e-05, + "loss": 0.8594, + "step": 17109 + }, + { + "epoch": 0.92, + "learning_rate": 1.6859203612995532e-05, + "loss": 0.8047, + "step": 17110 + }, + { + "epoch": 0.92, + "learning_rate": 1.683679941715671e-05, + "loss": 0.7617, + "step": 17111 + }, + { + "epoch": 0.92, + "learning_rate": 1.6814409862749956e-05, + "loss": 0.7656, + "step": 17112 + }, + { + "epoch": 0.92, + "learning_rate": 1.6792034950453672e-05, + "loss": 0.8945, + "step": 17113 + }, + { + "epoch": 0.92, + "learning_rate": 1.676967468094609e-05, + "loss": 0.8633, + "step": 17114 + }, + { + "epoch": 0.92, + "learning_rate": 1.674732905490467e-05, + "loss": 0.8281, + "step": 17115 + }, + { + "epoch": 0.92, + "learning_rate": 1.6724998073006482e-05, + "loss": 0.8984, + "step": 17116 + }, + { + "epoch": 0.92, + "learning_rate": 1.670268173592837e-05, + "loss": 0.8164, + "step": 17117 + }, + { + "epoch": 0.92, + "learning_rate": 1.668038004434658e-05, + "loss": 0.8438, + "step": 17118 + }, + { + "epoch": 0.92, + "learning_rate": 1.6658092998936835e-05, + "loss": 0.8242, + "step": 17119 + }, + { + "epoch": 0.92, + "learning_rate": 1.6635820600374553e-05, + "loss": 0.7734, + "step": 17120 + }, + { + "epoch": 0.92, + "learning_rate": 1.661356284933474e-05, + "loss": 0.7734, + "step": 17121 + }, + { + "epoch": 0.92, + "learning_rate": 1.659131974649175e-05, + "loss": 0.8711, + "step": 17122 + }, + { + "epoch": 0.92, + "learning_rate": 1.6569091292519656e-05, + "loss": 0.8828, + "step": 17123 + }, + { + "epoch": 0.92, + "learning_rate": 1.6546877488092083e-05, + "loss": 0.7422, + "step": 17124 + }, + { + "epoch": 0.92, + "learning_rate": 1.6524678333882215e-05, + "loss": 0.8555, + "step": 17125 + }, + { + "epoch": 0.92, + "learning_rate": 1.650249383056268e-05, + "loss": 0.8281, + "step": 17126 + }, + { + "epoch": 0.92, + "learning_rate": 1.6480323978805834e-05, + "loss": 0.8555, + "step": 17127 + }, + { + "epoch": 0.92, + "learning_rate": 1.645816877928341e-05, + "loss": 0.8047, + "step": 17128 + }, + { + "epoch": 0.92, + "learning_rate": 1.643602823266682e-05, + "loss": 0.8047, + "step": 17129 + }, + { + "epoch": 0.92, + "learning_rate": 1.6413902339626975e-05, + "loss": 0.8242, + "step": 17130 + }, + { + "epoch": 0.92, + "learning_rate": 1.6391791100834387e-05, + "loss": 0.7734, + "step": 17131 + }, + { + "epoch": 0.92, + "learning_rate": 1.636969451695908e-05, + "loss": 0.8281, + "step": 17132 + }, + { + "epoch": 0.92, + "learning_rate": 1.6347612588670624e-05, + "loss": 0.8242, + "step": 17133 + }, + { + "epoch": 0.92, + "learning_rate": 1.6325545316638268e-05, + "loss": 0.8516, + "step": 17134 + }, + { + "epoch": 0.92, + "learning_rate": 1.6303492701530697e-05, + "loss": 0.7656, + "step": 17135 + }, + { + "epoch": 0.92, + "learning_rate": 1.6281454744016045e-05, + "loss": 0.8086, + "step": 17136 + }, + { + "epoch": 0.92, + "learning_rate": 1.625943144476233e-05, + "loss": 0.7891, + "step": 17137 + }, + { + "epoch": 0.92, + "learning_rate": 1.6237422804436797e-05, + "loss": 0.793, + "step": 17138 + }, + { + "epoch": 0.92, + "learning_rate": 1.6215428823706413e-05, + "loss": 0.8125, + "step": 17139 + }, + { + "epoch": 0.92, + "learning_rate": 1.61934495032377e-05, + "loss": 0.7734, + "step": 17140 + }, + { + "epoch": 0.92, + "learning_rate": 1.6171484843696683e-05, + "loss": 0.8047, + "step": 17141 + }, + { + "epoch": 0.92, + "learning_rate": 1.6149534845748992e-05, + "loss": 0.8047, + "step": 17142 + }, + { + "epoch": 0.92, + "learning_rate": 1.6127599510059655e-05, + "loss": 0.6719, + "step": 17143 + }, + { + "epoch": 0.92, + "learning_rate": 1.6105678837293526e-05, + "loss": 0.8047, + "step": 17144 + }, + { + "epoch": 0.92, + "learning_rate": 1.608377282811485e-05, + "loss": 0.8828, + "step": 17145 + }, + { + "epoch": 0.92, + "learning_rate": 1.606188148318738e-05, + "loss": 0.707, + "step": 17146 + }, + { + "epoch": 0.92, + "learning_rate": 1.6040004803174635e-05, + "loss": 0.8086, + "step": 17147 + }, + { + "epoch": 0.92, + "learning_rate": 1.6018142788739364e-05, + "loss": 0.7617, + "step": 17148 + }, + { + "epoch": 0.92, + "learning_rate": 1.5996295440544205e-05, + "loss": 0.7734, + "step": 17149 + }, + { + "epoch": 0.92, + "learning_rate": 1.5974462759251186e-05, + "loss": 0.793, + "step": 17150 + }, + { + "epoch": 0.92, + "learning_rate": 1.5952644745521884e-05, + "loss": 0.793, + "step": 17151 + }, + { + "epoch": 0.92, + "learning_rate": 1.5930841400017383e-05, + "loss": 0.7461, + "step": 17152 + }, + { + "epoch": 0.92, + "learning_rate": 1.590905272339843e-05, + "loss": 0.8789, + "step": 17153 + }, + { + "epoch": 0.92, + "learning_rate": 1.5887278716325392e-05, + "loss": 0.8867, + "step": 17154 + }, + { + "epoch": 0.92, + "learning_rate": 1.5865519379458015e-05, + "loss": 0.7695, + "step": 17155 + }, + { + "epoch": 0.92, + "learning_rate": 1.584377471345566e-05, + "loss": 0.7734, + "step": 17156 + }, + { + "epoch": 0.92, + "learning_rate": 1.5822044718977357e-05, + "loss": 0.8164, + "step": 17157 + }, + { + "epoch": 0.92, + "learning_rate": 1.5800329396681522e-05, + "loss": 0.75, + "step": 17158 + }, + { + "epoch": 0.92, + "learning_rate": 1.5778628747226186e-05, + "loss": 0.8242, + "step": 17159 + }, + { + "epoch": 0.92, + "learning_rate": 1.5756942771268933e-05, + "loss": 0.8672, + "step": 17160 + }, + { + "epoch": 0.92, + "learning_rate": 1.573527146946707e-05, + "loss": 0.8711, + "step": 17161 + }, + { + "epoch": 0.92, + "learning_rate": 1.5713614842477075e-05, + "loss": 0.8047, + "step": 17162 + }, + { + "epoch": 0.92, + "learning_rate": 1.5691972890955365e-05, + "loss": 0.7656, + "step": 17163 + }, + { + "epoch": 0.92, + "learning_rate": 1.56703456155578e-05, + "loss": 0.7617, + "step": 17164 + }, + { + "epoch": 0.92, + "learning_rate": 1.56487330169397e-05, + "loss": 0.7852, + "step": 17165 + }, + { + "epoch": 0.92, + "learning_rate": 1.5627135095755917e-05, + "loss": 0.7656, + "step": 17166 + }, + { + "epoch": 0.92, + "learning_rate": 1.560555185266105e-05, + "loss": 0.8125, + "step": 17167 + }, + { + "epoch": 0.92, + "learning_rate": 1.5583983288309123e-05, + "loss": 0.9375, + "step": 17168 + }, + { + "epoch": 0.92, + "learning_rate": 1.5562429403353672e-05, + "loss": 0.8594, + "step": 17169 + }, + { + "epoch": 0.92, + "learning_rate": 1.5540890198447956e-05, + "loss": 0.7891, + "step": 17170 + }, + { + "epoch": 0.92, + "learning_rate": 1.5519365674244555e-05, + "loss": 0.918, + "step": 17171 + }, + { + "epoch": 0.92, + "learning_rate": 1.5497855831395845e-05, + "loss": 0.7617, + "step": 17172 + }, + { + "epoch": 0.92, + "learning_rate": 1.547636067055358e-05, + "loss": 0.875, + "step": 17173 + }, + { + "epoch": 0.92, + "learning_rate": 1.545488019236918e-05, + "loss": 0.7617, + "step": 17174 + }, + { + "epoch": 0.92, + "learning_rate": 1.5433414397493516e-05, + "loss": 0.832, + "step": 17175 + }, + { + "epoch": 0.92, + "learning_rate": 1.541196328657707e-05, + "loss": 0.8047, + "step": 17176 + }, + { + "epoch": 0.92, + "learning_rate": 1.5390526860270038e-05, + "loss": 0.7148, + "step": 17177 + }, + { + "epoch": 0.92, + "learning_rate": 1.5369105119221848e-05, + "loss": 0.8125, + "step": 17178 + }, + { + "epoch": 0.92, + "learning_rate": 1.534769806408154e-05, + "loss": 0.8555, + "step": 17179 + }, + { + "epoch": 0.92, + "learning_rate": 1.5326305695498143e-05, + "loss": 0.8242, + "step": 17180 + }, + { + "epoch": 0.92, + "learning_rate": 1.5304928014119758e-05, + "loss": 0.9141, + "step": 17181 + }, + { + "epoch": 0.92, + "learning_rate": 1.5283565020594083e-05, + "loss": 0.7812, + "step": 17182 + }, + { + "epoch": 0.92, + "learning_rate": 1.52622167155686e-05, + "loss": 0.8438, + "step": 17183 + }, + { + "epoch": 0.92, + "learning_rate": 1.5240883099690294e-05, + "loss": 0.7891, + "step": 17184 + }, + { + "epoch": 0.92, + "learning_rate": 1.5219564173605537e-05, + "loss": 0.8438, + "step": 17185 + }, + { + "epoch": 0.92, + "learning_rate": 1.5198259937960368e-05, + "loss": 0.8086, + "step": 17186 + }, + { + "epoch": 0.92, + "learning_rate": 1.517697039340038e-05, + "loss": 0.7969, + "step": 17187 + }, + { + "epoch": 0.92, + "learning_rate": 1.5155695540570836e-05, + "loss": 0.7812, + "step": 17188 + }, + { + "epoch": 0.92, + "learning_rate": 1.5134435380116273e-05, + "loss": 0.793, + "step": 17189 + }, + { + "epoch": 0.92, + "learning_rate": 1.511318991268107e-05, + "loss": 0.8047, + "step": 17190 + }, + { + "epoch": 0.92, + "learning_rate": 1.5091959138908873e-05, + "loss": 0.7617, + "step": 17191 + }, + { + "epoch": 0.92, + "learning_rate": 1.507074305944317e-05, + "loss": 0.8125, + "step": 17192 + }, + { + "epoch": 0.92, + "learning_rate": 1.5049541674926948e-05, + "loss": 0.8125, + "step": 17193 + }, + { + "epoch": 0.92, + "learning_rate": 1.5028354986002467e-05, + "loss": 0.8398, + "step": 17194 + }, + { + "epoch": 0.92, + "learning_rate": 1.500718299331194e-05, + "loss": 0.7578, + "step": 17195 + }, + { + "epoch": 0.92, + "learning_rate": 1.4986025697496853e-05, + "loss": 0.75, + "step": 17196 + }, + { + "epoch": 0.92, + "learning_rate": 1.4964883099198412e-05, + "loss": 0.7812, + "step": 17197 + }, + { + "epoch": 0.92, + "learning_rate": 1.4943755199057163e-05, + "loss": 0.6562, + "step": 17198 + }, + { + "epoch": 0.92, + "learning_rate": 1.4922641997713482e-05, + "loss": 0.7656, + "step": 17199 + }, + { + "epoch": 0.92, + "learning_rate": 1.490154349580719e-05, + "loss": 0.9492, + "step": 17200 + }, + { + "epoch": 0.92, + "learning_rate": 1.4880459693977611e-05, + "loss": 0.7969, + "step": 17201 + }, + { + "epoch": 0.92, + "learning_rate": 1.4859390592863508e-05, + "loss": 0.7305, + "step": 17202 + }, + { + "epoch": 0.92, + "learning_rate": 1.483833619310354e-05, + "loss": 0.8477, + "step": 17203 + }, + { + "epoch": 0.92, + "learning_rate": 1.4817296495335642e-05, + "loss": 0.8008, + "step": 17204 + }, + { + "epoch": 0.92, + "learning_rate": 1.479627150019741e-05, + "loss": 0.7969, + "step": 17205 + }, + { + "epoch": 0.92, + "learning_rate": 1.4775261208325896e-05, + "loss": 0.8047, + "step": 17206 + }, + { + "epoch": 0.92, + "learning_rate": 1.4754265620357865e-05, + "loss": 0.7461, + "step": 17207 + }, + { + "epoch": 0.92, + "learning_rate": 1.4733284736929587e-05, + "loss": 0.8242, + "step": 17208 + }, + { + "epoch": 0.92, + "learning_rate": 1.4712318558676719e-05, + "loss": 0.8555, + "step": 17209 + }, + { + "epoch": 0.92, + "learning_rate": 1.4691367086234697e-05, + "loss": 0.7422, + "step": 17210 + }, + { + "epoch": 0.93, + "learning_rate": 1.4670430320238403e-05, + "loss": 0.793, + "step": 17211 + }, + { + "epoch": 0.93, + "learning_rate": 1.4649508261322276e-05, + "loss": 0.9258, + "step": 17212 + }, + { + "epoch": 0.93, + "learning_rate": 1.462860091012036e-05, + "loss": 0.8281, + "step": 17213 + }, + { + "epoch": 0.93, + "learning_rate": 1.4607708267266152e-05, + "loss": 0.7617, + "step": 17214 + }, + { + "epoch": 0.93, + "learning_rate": 1.4586830333392754e-05, + "loss": 0.7734, + "step": 17215 + }, + { + "epoch": 0.93, + "learning_rate": 1.4565967109132994e-05, + "loss": 0.8008, + "step": 17216 + }, + { + "epoch": 0.93, + "learning_rate": 1.4545118595118977e-05, + "loss": 0.875, + "step": 17217 + }, + { + "epoch": 0.93, + "learning_rate": 1.4524284791982423e-05, + "loss": 0.8711, + "step": 17218 + }, + { + "epoch": 0.93, + "learning_rate": 1.4503465700354768e-05, + "loss": 0.8516, + "step": 17219 + }, + { + "epoch": 0.93, + "learning_rate": 1.4482661320866896e-05, + "loss": 0.8594, + "step": 17220 + }, + { + "epoch": 0.93, + "learning_rate": 1.4461871654149251e-05, + "loss": 0.8008, + "step": 17221 + }, + { + "epoch": 0.93, + "learning_rate": 1.4441096700831658e-05, + "loss": 0.918, + "step": 17222 + }, + { + "epoch": 0.93, + "learning_rate": 1.4420336461543892e-05, + "loss": 0.8008, + "step": 17223 + }, + { + "epoch": 0.93, + "learning_rate": 1.4399590936915008e-05, + "loss": 0.7305, + "step": 17224 + }, + { + "epoch": 0.93, + "learning_rate": 1.4378860127573556e-05, + "loss": 0.8398, + "step": 17225 + }, + { + "epoch": 0.93, + "learning_rate": 1.4358144034147812e-05, + "loss": 0.9297, + "step": 17226 + }, + { + "epoch": 0.93, + "learning_rate": 1.4337442657265553e-05, + "loss": 0.75, + "step": 17227 + }, + { + "epoch": 0.93, + "learning_rate": 1.4316755997554109e-05, + "loss": 0.7383, + "step": 17228 + }, + { + "epoch": 0.93, + "learning_rate": 1.4296084055640368e-05, + "loss": 0.8672, + "step": 17229 + }, + { + "epoch": 0.93, + "learning_rate": 1.4275426832150661e-05, + "loss": 0.9141, + "step": 17230 + }, + { + "epoch": 0.93, + "learning_rate": 1.4254784327711102e-05, + "loss": 0.8242, + "step": 17231 + }, + { + "epoch": 0.93, + "learning_rate": 1.4234156542947074e-05, + "loss": 0.8438, + "step": 17232 + }, + { + "epoch": 0.93, + "learning_rate": 1.4213543478483748e-05, + "loss": 0.7695, + "step": 17233 + }, + { + "epoch": 0.93, + "learning_rate": 1.4192945134945844e-05, + "loss": 0.8086, + "step": 17234 + }, + { + "epoch": 0.93, + "learning_rate": 1.4172361512957476e-05, + "loss": 0.8516, + "step": 17235 + }, + { + "epoch": 0.93, + "learning_rate": 1.4151792613142423e-05, + "loss": 0.7422, + "step": 17236 + }, + { + "epoch": 0.93, + "learning_rate": 1.4131238436123905e-05, + "loss": 0.8203, + "step": 17237 + }, + { + "epoch": 0.93, + "learning_rate": 1.4110698982524929e-05, + "loss": 0.7383, + "step": 17238 + }, + { + "epoch": 0.93, + "learning_rate": 1.4090174252967713e-05, + "loss": 0.8555, + "step": 17239 + }, + { + "epoch": 0.93, + "learning_rate": 1.4069664248074432e-05, + "loss": 0.7695, + "step": 17240 + }, + { + "epoch": 0.93, + "learning_rate": 1.4049168968466474e-05, + "loss": 0.8047, + "step": 17241 + }, + { + "epoch": 0.93, + "learning_rate": 1.4028688414764957e-05, + "loss": 0.6914, + "step": 17242 + }, + { + "epoch": 0.93, + "learning_rate": 1.4008222587590546e-05, + "loss": 0.832, + "step": 17243 + }, + { + "epoch": 0.93, + "learning_rate": 1.3987771487563361e-05, + "loss": 0.8477, + "step": 17244 + }, + { + "epoch": 0.93, + "learning_rate": 1.396733511530307e-05, + "loss": 0.7031, + "step": 17245 + }, + { + "epoch": 0.93, + "learning_rate": 1.394691347142918e-05, + "loss": 0.8203, + "step": 17246 + }, + { + "epoch": 0.93, + "learning_rate": 1.3926506556560358e-05, + "loss": 0.8477, + "step": 17247 + }, + { + "epoch": 0.93, + "learning_rate": 1.3906114371315004e-05, + "loss": 0.6875, + "step": 17248 + }, + { + "epoch": 0.93, + "learning_rate": 1.388573691631112e-05, + "loss": 0.7539, + "step": 17249 + }, + { + "epoch": 0.93, + "learning_rate": 1.386537419216627e-05, + "loss": 0.8789, + "step": 17250 + }, + { + "epoch": 0.93, + "learning_rate": 1.3845026199497401e-05, + "loss": 0.7852, + "step": 17251 + }, + { + "epoch": 0.93, + "learning_rate": 1.3824692938921134e-05, + "loss": 0.7695, + "step": 17252 + }, + { + "epoch": 0.93, + "learning_rate": 1.38043744110537e-05, + "loss": 0.7188, + "step": 17253 + }, + { + "epoch": 0.93, + "learning_rate": 1.378407061651077e-05, + "loss": 0.7969, + "step": 17254 + }, + { + "epoch": 0.93, + "learning_rate": 1.3763781555907628e-05, + "loss": 0.7734, + "step": 17255 + }, + { + "epoch": 0.93, + "learning_rate": 1.374350722985912e-05, + "loss": 0.8477, + "step": 17256 + }, + { + "epoch": 0.93, + "learning_rate": 1.372324763897953e-05, + "loss": 0.6836, + "step": 17257 + }, + { + "epoch": 0.93, + "learning_rate": 1.3703002783882924e-05, + "loss": 0.7109, + "step": 17258 + }, + { + "epoch": 0.93, + "learning_rate": 1.3682772665182697e-05, + "loss": 0.9414, + "step": 17259 + }, + { + "epoch": 0.93, + "learning_rate": 1.3662557283491972e-05, + "loss": 0.7734, + "step": 17260 + }, + { + "epoch": 0.93, + "learning_rate": 1.3642356639423259e-05, + "loss": 0.8047, + "step": 17261 + }, + { + "epoch": 0.93, + "learning_rate": 1.3622170733588679e-05, + "loss": 0.7422, + "step": 17262 + }, + { + "epoch": 0.93, + "learning_rate": 1.3601999566600076e-05, + "loss": 0.8477, + "step": 17263 + }, + { + "epoch": 0.93, + "learning_rate": 1.3581843139068573e-05, + "loss": 0.8359, + "step": 17264 + }, + { + "epoch": 0.93, + "learning_rate": 1.3561701451604957e-05, + "loss": 0.8125, + "step": 17265 + }, + { + "epoch": 0.93, + "learning_rate": 1.3541574504819686e-05, + "loss": 0.7773, + "step": 17266 + }, + { + "epoch": 0.93, + "learning_rate": 1.3521462299322662e-05, + "loss": 0.75, + "step": 17267 + }, + { + "epoch": 0.93, + "learning_rate": 1.3501364835723284e-05, + "loss": 0.8633, + "step": 17268 + }, + { + "epoch": 0.93, + "learning_rate": 1.3481282114630622e-05, + "loss": 0.8711, + "step": 17269 + }, + { + "epoch": 0.93, + "learning_rate": 1.3461214136653244e-05, + "loss": 0.7539, + "step": 17270 + }, + { + "epoch": 0.93, + "learning_rate": 1.3441160902399218e-05, + "loss": 0.8242, + "step": 17271 + }, + { + "epoch": 0.93, + "learning_rate": 1.3421122412476283e-05, + "loss": 0.832, + "step": 17272 + }, + { + "epoch": 0.93, + "learning_rate": 1.340109866749173e-05, + "loss": 0.7812, + "step": 17273 + }, + { + "epoch": 0.93, + "learning_rate": 1.3381089668052237e-05, + "loss": 0.7148, + "step": 17274 + }, + { + "epoch": 0.93, + "learning_rate": 1.3361095414764158e-05, + "loss": 0.793, + "step": 17275 + }, + { + "epoch": 0.93, + "learning_rate": 1.3341115908233337e-05, + "loss": 0.9531, + "step": 17276 + }, + { + "epoch": 0.93, + "learning_rate": 1.3321151149065403e-05, + "loss": 0.7695, + "step": 17277 + }, + { + "epoch": 0.93, + "learning_rate": 1.330120113786515e-05, + "loss": 0.9062, + "step": 17278 + }, + { + "epoch": 0.93, + "learning_rate": 1.3281265875237313e-05, + "loss": 0.7773, + "step": 17279 + }, + { + "epoch": 0.93, + "learning_rate": 1.3261345361785804e-05, + "loss": 0.8164, + "step": 17280 + }, + { + "epoch": 0.93, + "learning_rate": 1.3241439598114357e-05, + "loss": 0.875, + "step": 17281 + }, + { + "epoch": 0.93, + "learning_rate": 1.3221548584826326e-05, + "loss": 0.8594, + "step": 17282 + }, + { + "epoch": 0.93, + "learning_rate": 1.3201672322524282e-05, + "loss": 0.8008, + "step": 17283 + }, + { + "epoch": 0.93, + "learning_rate": 1.318181081181058e-05, + "loss": 0.7969, + "step": 17284 + }, + { + "epoch": 0.93, + "learning_rate": 1.3161964053287122e-05, + "loss": 0.8125, + "step": 17285 + }, + { + "epoch": 0.93, + "learning_rate": 1.3142132047555321e-05, + "loss": 0.7578, + "step": 17286 + }, + { + "epoch": 0.93, + "learning_rate": 1.3122314795216194e-05, + "loss": 0.8594, + "step": 17287 + }, + { + "epoch": 0.93, + "learning_rate": 1.310251229687015e-05, + "loss": 0.8711, + "step": 17288 + }, + { + "epoch": 0.93, + "learning_rate": 1.308272455311743e-05, + "loss": 0.7461, + "step": 17289 + }, + { + "epoch": 0.93, + "learning_rate": 1.3062951564557613e-05, + "loss": 0.8516, + "step": 17290 + }, + { + "epoch": 0.93, + "learning_rate": 1.3043193331789771e-05, + "loss": 0.8828, + "step": 17291 + }, + { + "epoch": 0.93, + "learning_rate": 1.3023449855412761e-05, + "loss": 0.8906, + "step": 17292 + }, + { + "epoch": 0.93, + "learning_rate": 1.3003721136024826e-05, + "loss": 0.8672, + "step": 17293 + }, + { + "epoch": 0.93, + "learning_rate": 1.2984007174223878e-05, + "loss": 0.7344, + "step": 17294 + }, + { + "epoch": 0.93, + "learning_rate": 1.296430797060727e-05, + "loss": 0.7734, + "step": 17295 + }, + { + "epoch": 0.93, + "learning_rate": 1.2944623525771859e-05, + "loss": 0.625, + "step": 17296 + }, + { + "epoch": 0.93, + "learning_rate": 1.2924953840314335e-05, + "loss": 0.7695, + "step": 17297 + }, + { + "epoch": 0.93, + "learning_rate": 1.290529891483061e-05, + "loss": 0.8945, + "step": 17298 + }, + { + "epoch": 0.93, + "learning_rate": 1.288565874991643e-05, + "loss": 0.75, + "step": 17299 + }, + { + "epoch": 0.93, + "learning_rate": 1.2866033346166762e-05, + "loss": 0.7891, + "step": 17300 + }, + { + "epoch": 0.93, + "learning_rate": 1.2846422704176409e-05, + "loss": 0.7539, + "step": 17301 + }, + { + "epoch": 0.93, + "learning_rate": 1.2826826824539728e-05, + "loss": 0.8438, + "step": 17302 + }, + { + "epoch": 0.93, + "learning_rate": 1.2807245707850468e-05, + "loss": 0.7891, + "step": 17303 + }, + { + "epoch": 0.93, + "learning_rate": 1.2787679354701986e-05, + "loss": 0.8398, + "step": 17304 + }, + { + "epoch": 0.93, + "learning_rate": 1.2768127765687199e-05, + "loss": 0.793, + "step": 17305 + }, + { + "epoch": 0.93, + "learning_rate": 1.274859094139863e-05, + "loss": 0.8867, + "step": 17306 + }, + { + "epoch": 0.93, + "learning_rate": 1.2729068882428252e-05, + "loss": 0.7812, + "step": 17307 + }, + { + "epoch": 0.93, + "learning_rate": 1.2709561589367647e-05, + "loss": 0.8008, + "step": 17308 + }, + { + "epoch": 0.93, + "learning_rate": 1.2690069062808063e-05, + "loss": 0.7266, + "step": 17309 + }, + { + "epoch": 0.93, + "learning_rate": 1.2670591303340085e-05, + "loss": 0.8359, + "step": 17310 + }, + { + "epoch": 0.93, + "learning_rate": 1.2651128311553905e-05, + "loss": 0.8438, + "step": 17311 + }, + { + "epoch": 0.93, + "learning_rate": 1.2631680088039444e-05, + "loss": 0.8008, + "step": 17312 + }, + { + "epoch": 0.93, + "learning_rate": 1.261224663338606e-05, + "loss": 0.8594, + "step": 17313 + }, + { + "epoch": 0.93, + "learning_rate": 1.259282794818245e-05, + "loss": 0.7461, + "step": 17314 + }, + { + "epoch": 0.93, + "learning_rate": 1.2573424033017256e-05, + "loss": 0.8945, + "step": 17315 + }, + { + "epoch": 0.93, + "learning_rate": 1.2554034888478449e-05, + "loss": 0.7656, + "step": 17316 + }, + { + "epoch": 0.93, + "learning_rate": 1.2534660515153507e-05, + "loss": 0.7617, + "step": 17317 + }, + { + "epoch": 0.93, + "learning_rate": 1.2515300913629568e-05, + "loss": 0.75, + "step": 17318 + }, + { + "epoch": 0.93, + "learning_rate": 1.2495956084493332e-05, + "loss": 0.8945, + "step": 17319 + }, + { + "epoch": 0.93, + "learning_rate": 1.2476626028330995e-05, + "loss": 0.7969, + "step": 17320 + }, + { + "epoch": 0.93, + "learning_rate": 1.2457310745728257e-05, + "loss": 0.8945, + "step": 17321 + }, + { + "epoch": 0.93, + "learning_rate": 1.2438010237270536e-05, + "loss": 0.8477, + "step": 17322 + }, + { + "epoch": 0.93, + "learning_rate": 1.2418724503542645e-05, + "loss": 0.8672, + "step": 17323 + }, + { + "epoch": 0.93, + "learning_rate": 1.2399453545129002e-05, + "loss": 0.8867, + "step": 17324 + }, + { + "epoch": 0.93, + "learning_rate": 1.2380197362613643e-05, + "loss": 0.8828, + "step": 17325 + }, + { + "epoch": 0.93, + "learning_rate": 1.2360955956579989e-05, + "loss": 0.8945, + "step": 17326 + }, + { + "epoch": 0.93, + "learning_rate": 1.2341729327611184e-05, + "loss": 0.7812, + "step": 17327 + }, + { + "epoch": 0.93, + "learning_rate": 1.2322517476289819e-05, + "loss": 0.7812, + "step": 17328 + }, + { + "epoch": 0.93, + "learning_rate": 1.2303320403198148e-05, + "loss": 0.7617, + "step": 17329 + }, + { + "epoch": 0.93, + "learning_rate": 1.2284138108917875e-05, + "loss": 0.793, + "step": 17330 + }, + { + "epoch": 0.93, + "learning_rate": 1.2264970594030145e-05, + "loss": 0.8633, + "step": 17331 + }, + { + "epoch": 0.93, + "learning_rate": 1.2245817859116049e-05, + "loss": 0.8672, + "step": 17332 + }, + { + "epoch": 0.93, + "learning_rate": 1.2226679904755844e-05, + "loss": 0.7891, + "step": 17333 + }, + { + "epoch": 0.93, + "learning_rate": 1.2207556731529401e-05, + "loss": 0.8281, + "step": 17334 + }, + { + "epoch": 0.93, + "learning_rate": 1.2188448340016367e-05, + "loss": 0.793, + "step": 17335 + }, + { + "epoch": 0.93, + "learning_rate": 1.2169354730795724e-05, + "loss": 0.8164, + "step": 17336 + }, + { + "epoch": 0.93, + "learning_rate": 1.2150275904446007e-05, + "loss": 0.832, + "step": 17337 + }, + { + "epoch": 0.93, + "learning_rate": 1.2131211861545476e-05, + "loss": 0.7734, + "step": 17338 + }, + { + "epoch": 0.93, + "learning_rate": 1.2112162602671728e-05, + "loss": 0.8711, + "step": 17339 + }, + { + "epoch": 0.93, + "learning_rate": 1.209312812840213e-05, + "loss": 0.707, + "step": 17340 + }, + { + "epoch": 0.93, + "learning_rate": 1.2074108439313335e-05, + "loss": 0.8008, + "step": 17341 + }, + { + "epoch": 0.93, + "learning_rate": 1.2055103535981882e-05, + "loss": 0.8672, + "step": 17342 + }, + { + "epoch": 0.93, + "learning_rate": 1.2036113418983586e-05, + "loss": 0.7656, + "step": 17343 + }, + { + "epoch": 0.93, + "learning_rate": 1.2017138088893874e-05, + "loss": 0.8008, + "step": 17344 + }, + { + "epoch": 0.93, + "learning_rate": 1.1998177546287903e-05, + "loss": 0.75, + "step": 17345 + }, + { + "epoch": 0.93, + "learning_rate": 1.1979231791740041e-05, + "loss": 0.75, + "step": 17346 + }, + { + "epoch": 0.93, + "learning_rate": 1.196030082582461e-05, + "loss": 0.75, + "step": 17347 + }, + { + "epoch": 0.93, + "learning_rate": 1.1941384649115095e-05, + "loss": 0.8125, + "step": 17348 + }, + { + "epoch": 0.93, + "learning_rate": 1.192248326218487e-05, + "loss": 0.8203, + "step": 17349 + }, + { + "epoch": 0.93, + "learning_rate": 1.1903596665606586e-05, + "loss": 0.8594, + "step": 17350 + }, + { + "epoch": 0.93, + "learning_rate": 1.1884724859952679e-05, + "loss": 0.8633, + "step": 17351 + }, + { + "epoch": 0.93, + "learning_rate": 1.1865867845794964e-05, + "loss": 0.6953, + "step": 17352 + }, + { + "epoch": 0.93, + "learning_rate": 1.1847025623704876e-05, + "loss": 0.8125, + "step": 17353 + }, + { + "epoch": 0.93, + "learning_rate": 1.1828198194253348e-05, + "loss": 0.8438, + "step": 17354 + }, + { + "epoch": 0.93, + "learning_rate": 1.1809385558011033e-05, + "loss": 0.7266, + "step": 17355 + }, + { + "epoch": 0.93, + "learning_rate": 1.1790587715547973e-05, + "loss": 0.8281, + "step": 17356 + }, + { + "epoch": 0.93, + "learning_rate": 1.1771804667433716e-05, + "loss": 0.7773, + "step": 17357 + }, + { + "epoch": 0.93, + "learning_rate": 1.1753036414237473e-05, + "loss": 0.7344, + "step": 17358 + }, + { + "epoch": 0.93, + "learning_rate": 1.173428295652812e-05, + "loss": 0.7969, + "step": 17359 + }, + { + "epoch": 0.93, + "learning_rate": 1.1715544294873814e-05, + "loss": 0.7617, + "step": 17360 + }, + { + "epoch": 0.93, + "learning_rate": 1.1696820429842491e-05, + "loss": 0.7969, + "step": 17361 + }, + { + "epoch": 0.93, + "learning_rate": 1.167811136200142e-05, + "loss": 0.8555, + "step": 17362 + }, + { + "epoch": 0.93, + "learning_rate": 1.1659417091917646e-05, + "loss": 0.8711, + "step": 17363 + }, + { + "epoch": 0.93, + "learning_rate": 1.1640737620157605e-05, + "loss": 0.8164, + "step": 17364 + }, + { + "epoch": 0.93, + "learning_rate": 1.1622072947287454e-05, + "loss": 0.7656, + "step": 17365 + }, + { + "epoch": 0.93, + "learning_rate": 1.1603423073872687e-05, + "loss": 0.7539, + "step": 17366 + }, + { + "epoch": 0.93, + "learning_rate": 1.1584788000478464e-05, + "loss": 0.7695, + "step": 17367 + }, + { + "epoch": 0.93, + "learning_rate": 1.1566167727669551e-05, + "loss": 0.832, + "step": 17368 + }, + { + "epoch": 0.93, + "learning_rate": 1.1547562256010169e-05, + "loss": 0.7812, + "step": 17369 + }, + { + "epoch": 0.93, + "learning_rate": 1.1528971586064085e-05, + "loss": 0.8047, + "step": 17370 + }, + { + "epoch": 0.93, + "learning_rate": 1.1510395718394684e-05, + "loss": 0.7031, + "step": 17371 + }, + { + "epoch": 0.93, + "learning_rate": 1.149183465356496e-05, + "loss": 0.793, + "step": 17372 + }, + { + "epoch": 0.93, + "learning_rate": 1.1473288392137238e-05, + "loss": 0.6875, + "step": 17373 + }, + { + "epoch": 0.93, + "learning_rate": 1.1454756934673627e-05, + "loss": 0.793, + "step": 17374 + }, + { + "epoch": 0.93, + "learning_rate": 1.1436240281735678e-05, + "loss": 0.8867, + "step": 17375 + }, + { + "epoch": 0.93, + "learning_rate": 1.1417738433884495e-05, + "loss": 0.7266, + "step": 17376 + }, + { + "epoch": 0.93, + "learning_rate": 1.1399251391680687e-05, + "loss": 0.7578, + "step": 17377 + }, + { + "epoch": 0.93, + "learning_rate": 1.1380779155684473e-05, + "loss": 0.8711, + "step": 17378 + }, + { + "epoch": 0.93, + "learning_rate": 1.1362321726455793e-05, + "loss": 0.7852, + "step": 17379 + }, + { + "epoch": 0.93, + "learning_rate": 1.1343879104553757e-05, + "loss": 0.8398, + "step": 17380 + }, + { + "epoch": 0.93, + "learning_rate": 1.1325451290537359e-05, + "loss": 0.7539, + "step": 17381 + }, + { + "epoch": 0.93, + "learning_rate": 1.1307038284964988e-05, + "loss": 0.8125, + "step": 17382 + }, + { + "epoch": 0.93, + "learning_rate": 1.1288640088394697e-05, + "loss": 0.7852, + "step": 17383 + }, + { + "epoch": 0.93, + "learning_rate": 1.1270256701383818e-05, + "loss": 0.7852, + "step": 17384 + }, + { + "epoch": 0.93, + "learning_rate": 1.1251888124489573e-05, + "loss": 0.7617, + "step": 17385 + }, + { + "epoch": 0.93, + "learning_rate": 1.1233534358268626e-05, + "loss": 0.8047, + "step": 17386 + }, + { + "epoch": 0.93, + "learning_rate": 1.1215195403276979e-05, + "loss": 0.8516, + "step": 17387 + }, + { + "epoch": 0.93, + "learning_rate": 1.1196871260070574e-05, + "loss": 0.7812, + "step": 17388 + }, + { + "epoch": 0.93, + "learning_rate": 1.1178561929204577e-05, + "loss": 0.8945, + "step": 17389 + }, + { + "epoch": 0.93, + "learning_rate": 1.1160267411233771e-05, + "loss": 0.918, + "step": 17390 + }, + { + "epoch": 0.93, + "learning_rate": 1.1141987706712709e-05, + "loss": 0.8867, + "step": 17391 + }, + { + "epoch": 0.93, + "learning_rate": 1.1123722816195226e-05, + "loss": 0.8906, + "step": 17392 + }, + { + "epoch": 0.93, + "learning_rate": 1.1105472740234712e-05, + "loss": 0.8086, + "step": 17393 + }, + { + "epoch": 0.93, + "learning_rate": 1.1087237479384338e-05, + "loss": 0.8281, + "step": 17394 + }, + { + "epoch": 0.93, + "learning_rate": 1.1069017034196716e-05, + "loss": 0.7109, + "step": 17395 + }, + { + "epoch": 0.93, + "learning_rate": 1.1050811405223904e-05, + "loss": 0.7578, + "step": 17396 + }, + { + "epoch": 0.94, + "learning_rate": 1.1032620593017461e-05, + "loss": 0.8633, + "step": 17397 + }, + { + "epoch": 0.94, + "learning_rate": 1.1014444598128947e-05, + "loss": 0.875, + "step": 17398 + }, + { + "epoch": 0.94, + "learning_rate": 1.0996283421108978e-05, + "loss": 0.7344, + "step": 17399 + }, + { + "epoch": 0.94, + "learning_rate": 1.0978137062507832e-05, + "loss": 0.8281, + "step": 17400 + }, + { + "epoch": 0.94, + "learning_rate": 1.096000552287546e-05, + "loss": 0.7656, + "step": 17401 + }, + { + "epoch": 0.94, + "learning_rate": 1.0941888802761423e-05, + "loss": 0.7539, + "step": 17402 + }, + { + "epoch": 0.94, + "learning_rate": 1.0923786902714561e-05, + "loss": 0.8398, + "step": 17403 + }, + { + "epoch": 0.94, + "learning_rate": 1.0905699823283489e-05, + "loss": 0.8008, + "step": 17404 + }, + { + "epoch": 0.94, + "learning_rate": 1.0887627565016268e-05, + "loss": 0.7852, + "step": 17405 + }, + { + "epoch": 0.94, + "learning_rate": 1.0869570128460627e-05, + "loss": 0.875, + "step": 17406 + }, + { + "epoch": 0.94, + "learning_rate": 1.0851527514163629e-05, + "loss": 0.8164, + "step": 17407 + }, + { + "epoch": 0.94, + "learning_rate": 1.0833499722672225e-05, + "loss": 0.8008, + "step": 17408 + }, + { + "epoch": 0.94, + "learning_rate": 1.0815486754532477e-05, + "loss": 0.8633, + "step": 17409 + }, + { + "epoch": 0.94, + "learning_rate": 1.0797488610290396e-05, + "loss": 0.7578, + "step": 17410 + }, + { + "epoch": 0.94, + "learning_rate": 1.0779505290491377e-05, + "loss": 0.8047, + "step": 17411 + }, + { + "epoch": 0.94, + "learning_rate": 1.076153679568037e-05, + "loss": 0.8203, + "step": 17412 + }, + { + "epoch": 0.94, + "learning_rate": 1.074358312640178e-05, + "loss": 0.7812, + "step": 17413 + }, + { + "epoch": 0.94, + "learning_rate": 1.0725644283199775e-05, + "loss": 0.7969, + "step": 17414 + }, + { + "epoch": 0.94, + "learning_rate": 1.0707720266617927e-05, + "loss": 0.8164, + "step": 17415 + }, + { + "epoch": 0.94, + "learning_rate": 1.0689811077199353e-05, + "loss": 0.7695, + "step": 17416 + }, + { + "epoch": 0.94, + "learning_rate": 1.0671916715486785e-05, + "loss": 0.7422, + "step": 17417 + }, + { + "epoch": 0.94, + "learning_rate": 1.0654037182022569e-05, + "loss": 0.832, + "step": 17418 + }, + { + "epoch": 0.94, + "learning_rate": 1.0636172477348382e-05, + "loss": 0.875, + "step": 17419 + }, + { + "epoch": 0.94, + "learning_rate": 1.061832260200557e-05, + "loss": 0.7695, + "step": 17420 + }, + { + "epoch": 0.94, + "learning_rate": 1.06004875565352e-05, + "loss": 0.793, + "step": 17421 + }, + { + "epoch": 0.94, + "learning_rate": 1.058266734147767e-05, + "loss": 0.7344, + "step": 17422 + }, + { + "epoch": 0.94, + "learning_rate": 1.0564861957372884e-05, + "loss": 0.832, + "step": 17423 + }, + { + "epoch": 0.94, + "learning_rate": 1.0547071404760467e-05, + "loss": 0.7734, + "step": 17424 + }, + { + "epoch": 0.94, + "learning_rate": 1.0529295684179652e-05, + "loss": 0.7461, + "step": 17425 + }, + { + "epoch": 0.94, + "learning_rate": 1.0511534796168953e-05, + "loss": 0.7891, + "step": 17426 + }, + { + "epoch": 0.94, + "learning_rate": 1.0493788741266552e-05, + "loss": 0.7617, + "step": 17427 + }, + { + "epoch": 0.94, + "learning_rate": 1.0476057520010352e-05, + "loss": 0.8359, + "step": 17428 + }, + { + "epoch": 0.94, + "learning_rate": 1.0458341132937588e-05, + "loss": 0.8789, + "step": 17429 + }, + { + "epoch": 0.94, + "learning_rate": 1.0440639580585166e-05, + "loss": 0.7891, + "step": 17430 + }, + { + "epoch": 0.94, + "learning_rate": 1.0422952863489432e-05, + "loss": 0.7852, + "step": 17431 + }, + { + "epoch": 0.94, + "learning_rate": 1.0405280982186404e-05, + "loss": 0.7266, + "step": 17432 + }, + { + "epoch": 0.94, + "learning_rate": 1.0387623937211598e-05, + "loss": 0.7422, + "step": 17433 + }, + { + "epoch": 0.94, + "learning_rate": 1.0369981729100086e-05, + "loss": 0.707, + "step": 17434 + }, + { + "epoch": 0.94, + "learning_rate": 1.035235435838644e-05, + "loss": 0.8398, + "step": 17435 + }, + { + "epoch": 0.94, + "learning_rate": 1.0334741825604843e-05, + "loss": 0.8203, + "step": 17436 + }, + { + "epoch": 0.94, + "learning_rate": 1.0317144131289036e-05, + "loss": 0.793, + "step": 17437 + }, + { + "epoch": 0.94, + "learning_rate": 1.0299561275972313e-05, + "loss": 0.8281, + "step": 17438 + }, + { + "epoch": 0.94, + "learning_rate": 1.0281993260187417e-05, + "loss": 0.8125, + "step": 17439 + }, + { + "epoch": 0.94, + "learning_rate": 1.0264440084466753e-05, + "loss": 0.7852, + "step": 17440 + }, + { + "epoch": 0.94, + "learning_rate": 1.0246901749342286e-05, + "loss": 0.707, + "step": 17441 + }, + { + "epoch": 0.94, + "learning_rate": 1.0229378255345479e-05, + "loss": 0.8477, + "step": 17442 + }, + { + "epoch": 0.94, + "learning_rate": 1.0211869603007185e-05, + "loss": 0.6719, + "step": 17443 + }, + { + "epoch": 0.94, + "learning_rate": 1.01943757928582e-05, + "loss": 0.8672, + "step": 17444 + }, + { + "epoch": 0.94, + "learning_rate": 1.0176896825428494e-05, + "loss": 0.832, + "step": 17445 + }, + { + "epoch": 0.94, + "learning_rate": 1.0159432701247807e-05, + "loss": 0.8438, + "step": 17446 + }, + { + "epoch": 0.94, + "learning_rate": 1.014198342084538e-05, + "loss": 0.7656, + "step": 17447 + }, + { + "epoch": 0.94, + "learning_rate": 1.0124548984749904e-05, + "loss": 0.7773, + "step": 17448 + }, + { + "epoch": 0.94, + "learning_rate": 1.0107129393489789e-05, + "loss": 0.7461, + "step": 17449 + }, + { + "epoch": 0.94, + "learning_rate": 1.008972464759278e-05, + "loss": 0.7852, + "step": 17450 + }, + { + "epoch": 0.94, + "learning_rate": 1.0072334747586454e-05, + "loss": 0.7852, + "step": 17451 + }, + { + "epoch": 0.94, + "learning_rate": 1.0054959693997611e-05, + "loss": 0.7148, + "step": 17452 + }, + { + "epoch": 0.94, + "learning_rate": 1.0037599487352889e-05, + "loss": 0.8945, + "step": 17453 + }, + { + "epoch": 0.94, + "learning_rate": 1.0020254128178418e-05, + "loss": 0.8203, + "step": 17454 + }, + { + "epoch": 0.94, + "learning_rate": 1.0002923616999671e-05, + "loss": 0.8477, + "step": 17455 + }, + { + "epoch": 0.94, + "learning_rate": 9.985607954341892e-06, + "loss": 0.8828, + "step": 17456 + }, + { + "epoch": 0.94, + "learning_rate": 9.968307140729826e-06, + "loss": 0.7539, + "step": 17457 + }, + { + "epoch": 0.94, + "learning_rate": 9.951021176687725e-06, + "loss": 0.7773, + "step": 17458 + }, + { + "epoch": 0.94, + "learning_rate": 9.933750062739334e-06, + "loss": 0.7031, + "step": 17459 + }, + { + "epoch": 0.94, + "learning_rate": 9.916493799408122e-06, + "loss": 0.7461, + "step": 17460 + }, + { + "epoch": 0.94, + "learning_rate": 9.899252387217006e-06, + "loss": 0.7656, + "step": 17461 + }, + { + "epoch": 0.94, + "learning_rate": 9.882025826688401e-06, + "loss": 0.7812, + "step": 17462 + }, + { + "epoch": 0.94, + "learning_rate": 9.864814118344334e-06, + "loss": 0.8008, + "step": 17463 + }, + { + "epoch": 0.94, + "learning_rate": 9.847617262706444e-06, + "loss": 0.6914, + "step": 17464 + }, + { + "epoch": 0.94, + "learning_rate": 9.83043526029581e-06, + "loss": 0.7344, + "step": 17465 + }, + { + "epoch": 0.94, + "learning_rate": 9.813268111633078e-06, + "loss": 0.793, + "step": 17466 + }, + { + "epoch": 0.94, + "learning_rate": 9.796115817238438e-06, + "loss": 0.8008, + "step": 17467 + }, + { + "epoch": 0.94, + "learning_rate": 9.778978377631808e-06, + "loss": 0.7734, + "step": 17468 + }, + { + "epoch": 0.94, + "learning_rate": 9.761855793332386e-06, + "loss": 0.7969, + "step": 17469 + }, + { + "epoch": 0.94, + "learning_rate": 9.744748064859088e-06, + "loss": 0.7734, + "step": 17470 + }, + { + "epoch": 0.94, + "learning_rate": 9.727655192730279e-06, + "loss": 0.8398, + "step": 17471 + }, + { + "epoch": 0.94, + "learning_rate": 9.710577177463986e-06, + "loss": 0.7812, + "step": 17472 + }, + { + "epoch": 0.94, + "learning_rate": 9.693514019577743e-06, + "loss": 0.8477, + "step": 17473 + }, + { + "epoch": 0.94, + "learning_rate": 9.676465719588579e-06, + "loss": 0.7891, + "step": 17474 + }, + { + "epoch": 0.94, + "learning_rate": 9.659432278013081e-06, + "loss": 0.7812, + "step": 17475 + }, + { + "epoch": 0.94, + "learning_rate": 9.642413695367448e-06, + "loss": 0.793, + "step": 17476 + }, + { + "epoch": 0.94, + "learning_rate": 9.625409972167487e-06, + "loss": 0.6797, + "step": 17477 + }, + { + "epoch": 0.94, + "learning_rate": 9.608421108928344e-06, + "loss": 0.8359, + "step": 17478 + }, + { + "epoch": 0.94, + "learning_rate": 9.591447106164885e-06, + "loss": 0.8945, + "step": 17479 + }, + { + "epoch": 0.94, + "learning_rate": 9.574487964391477e-06, + "loss": 0.8203, + "step": 17480 + }, + { + "epoch": 0.94, + "learning_rate": 9.557543684122095e-06, + "loss": 0.8047, + "step": 17481 + }, + { + "epoch": 0.94, + "learning_rate": 9.54061426587005e-06, + "loss": 0.793, + "step": 17482 + }, + { + "epoch": 0.94, + "learning_rate": 9.523699710148492e-06, + "loss": 0.7969, + "step": 17483 + }, + { + "epoch": 0.94, + "learning_rate": 9.506800017470007e-06, + "loss": 0.75, + "step": 17484 + }, + { + "epoch": 0.94, + "learning_rate": 9.489915188346632e-06, + "loss": 0.8086, + "step": 17485 + }, + { + "epoch": 0.94, + "learning_rate": 9.473045223290067e-06, + "loss": 0.7461, + "step": 17486 + }, + { + "epoch": 0.94, + "learning_rate": 9.456190122811458e-06, + "loss": 0.7227, + "step": 17487 + }, + { + "epoch": 0.94, + "learning_rate": 9.439349887421677e-06, + "loss": 0.8086, + "step": 17488 + }, + { + "epoch": 0.94, + "learning_rate": 9.422524517630981e-06, + "loss": 0.8555, + "step": 17489 + }, + { + "epoch": 0.94, + "learning_rate": 9.40571401394924e-06, + "loss": 0.7812, + "step": 17490 + }, + { + "epoch": 0.94, + "learning_rate": 9.388918376885936e-06, + "loss": 0.793, + "step": 17491 + }, + { + "epoch": 0.94, + "learning_rate": 9.372137606949938e-06, + "loss": 0.8438, + "step": 17492 + }, + { + "epoch": 0.94, + "learning_rate": 9.355371704649729e-06, + "loss": 0.7383, + "step": 17493 + }, + { + "epoch": 0.94, + "learning_rate": 9.338620670493458e-06, + "loss": 0.7344, + "step": 17494 + }, + { + "epoch": 0.94, + "learning_rate": 9.321884504988776e-06, + "loss": 0.7617, + "step": 17495 + }, + { + "epoch": 0.94, + "learning_rate": 9.30516320864272e-06, + "loss": 0.8867, + "step": 17496 + }, + { + "epoch": 0.94, + "learning_rate": 9.288456781962107e-06, + "loss": 0.8203, + "step": 17497 + }, + { + "epoch": 0.94, + "learning_rate": 9.271765225453088e-06, + "loss": 0.7773, + "step": 17498 + }, + { + "epoch": 0.94, + "learning_rate": 9.255088539621537e-06, + "loss": 0.7578, + "step": 17499 + }, + { + "epoch": 0.94, + "learning_rate": 9.238426724972825e-06, + "loss": 0.8594, + "step": 17500 + }, + { + "epoch": 0.94, + "learning_rate": 9.22177978201183e-06, + "loss": 0.7773, + "step": 17501 + }, + { + "epoch": 0.94, + "learning_rate": 9.205147711243033e-06, + "loss": 0.75, + "step": 17502 + }, + { + "epoch": 0.94, + "learning_rate": 9.188530513170424e-06, + "loss": 0.7969, + "step": 17503 + }, + { + "epoch": 0.94, + "learning_rate": 9.171928188297595e-06, + "loss": 0.7383, + "step": 17504 + }, + { + "epoch": 0.94, + "learning_rate": 9.155340737127593e-06, + "loss": 0.7891, + "step": 17505 + }, + { + "epoch": 0.94, + "learning_rate": 9.138768160163014e-06, + "loss": 0.7305, + "step": 17506 + }, + { + "epoch": 0.94, + "learning_rate": 9.122210457906233e-06, + "loss": 0.707, + "step": 17507 + }, + { + "epoch": 0.94, + "learning_rate": 9.10566763085896e-06, + "loss": 0.75, + "step": 17508 + }, + { + "epoch": 0.94, + "learning_rate": 9.089139679522352e-06, + "loss": 0.7266, + "step": 17509 + }, + { + "epoch": 0.94, + "learning_rate": 9.072626604397395e-06, + "loss": 0.8359, + "step": 17510 + }, + { + "epoch": 0.94, + "learning_rate": 9.05612840598452e-06, + "loss": 0.8828, + "step": 17511 + }, + { + "epoch": 0.94, + "learning_rate": 9.039645084783499e-06, + "loss": 0.8008, + "step": 17512 + }, + { + "epoch": 0.94, + "learning_rate": 9.02317664129404e-06, + "loss": 0.7461, + "step": 17513 + }, + { + "epoch": 0.94, + "learning_rate": 9.006723076015021e-06, + "loss": 0.7461, + "step": 17514 + }, + { + "epoch": 0.94, + "learning_rate": 8.990284389445214e-06, + "loss": 0.8477, + "step": 17515 + }, + { + "epoch": 0.94, + "learning_rate": 8.973860582082549e-06, + "loss": 0.8164, + "step": 17516 + }, + { + "epoch": 0.94, + "learning_rate": 8.957451654424909e-06, + "loss": 0.7773, + "step": 17517 + }, + { + "epoch": 0.94, + "learning_rate": 8.941057606969394e-06, + "loss": 0.7812, + "step": 17518 + }, + { + "epoch": 0.94, + "learning_rate": 8.92467844021294e-06, + "loss": 0.7891, + "step": 17519 + }, + { + "epoch": 0.94, + "learning_rate": 8.90831415465182e-06, + "loss": 0.707, + "step": 17520 + }, + { + "epoch": 0.94, + "learning_rate": 8.89196475078191e-06, + "loss": 0.793, + "step": 17521 + }, + { + "epoch": 0.94, + "learning_rate": 8.875630229098598e-06, + "loss": 0.7617, + "step": 17522 + }, + { + "epoch": 0.94, + "learning_rate": 8.85931059009698e-06, + "loss": 0.7773, + "step": 17523 + }, + { + "epoch": 0.94, + "learning_rate": 8.843005834271612e-06, + "loss": 0.7578, + "step": 17524 + }, + { + "epoch": 0.94, + "learning_rate": 8.826715962116428e-06, + "loss": 0.8164, + "step": 17525 + }, + { + "epoch": 0.94, + "learning_rate": 8.8104409741252e-06, + "loss": 0.875, + "step": 17526 + }, + { + "epoch": 0.94, + "learning_rate": 8.794180870791146e-06, + "loss": 0.8164, + "step": 17527 + }, + { + "epoch": 0.94, + "learning_rate": 8.777935652606872e-06, + "loss": 0.8555, + "step": 17528 + }, + { + "epoch": 0.94, + "learning_rate": 8.76170532006465e-06, + "loss": 0.8672, + "step": 17529 + }, + { + "epoch": 0.94, + "learning_rate": 8.745489873656476e-06, + "loss": 0.7305, + "step": 17530 + }, + { + "epoch": 0.94, + "learning_rate": 8.729289313873623e-06, + "loss": 0.8359, + "step": 17531 + }, + { + "epoch": 0.94, + "learning_rate": 8.713103641207032e-06, + "loss": 0.8008, + "step": 17532 + }, + { + "epoch": 0.94, + "learning_rate": 8.696932856147144e-06, + "loss": 0.8047, + "step": 17533 + }, + { + "epoch": 0.94, + "learning_rate": 8.680776959184066e-06, + "loss": 0.7773, + "step": 17534 + }, + { + "epoch": 0.94, + "learning_rate": 8.664635950807298e-06, + "loss": 0.8359, + "step": 17535 + }, + { + "epoch": 0.94, + "learning_rate": 8.648509831506057e-06, + "loss": 0.8633, + "step": 17536 + }, + { + "epoch": 0.94, + "learning_rate": 8.632398601768953e-06, + "loss": 0.7891, + "step": 17537 + }, + { + "epoch": 0.94, + "learning_rate": 8.616302262084208e-06, + "loss": 0.8047, + "step": 17538 + }, + { + "epoch": 0.94, + "learning_rate": 8.600220812939596e-06, + "loss": 0.8203, + "step": 17539 + }, + { + "epoch": 0.94, + "learning_rate": 8.584154254822451e-06, + "loss": 0.6523, + "step": 17540 + }, + { + "epoch": 0.94, + "learning_rate": 8.568102588219661e-06, + "loss": 0.7891, + "step": 17541 + }, + { + "epoch": 0.94, + "learning_rate": 8.55206581361756e-06, + "loss": 0.7539, + "step": 17542 + }, + { + "epoch": 0.94, + "learning_rate": 8.536043931502258e-06, + "loss": 0.8398, + "step": 17543 + }, + { + "epoch": 0.94, + "learning_rate": 8.520036942359199e-06, + "loss": 0.7461, + "step": 17544 + }, + { + "epoch": 0.94, + "learning_rate": 8.504044846673387e-06, + "loss": 0.7734, + "step": 17545 + }, + { + "epoch": 0.94, + "learning_rate": 8.488067644929432e-06, + "loss": 0.832, + "step": 17546 + }, + { + "epoch": 0.94, + "learning_rate": 8.472105337611668e-06, + "loss": 0.7734, + "step": 17547 + }, + { + "epoch": 0.94, + "learning_rate": 8.4561579252036e-06, + "loss": 0.8672, + "step": 17548 + }, + { + "epoch": 0.94, + "learning_rate": 8.440225408188617e-06, + "loss": 0.9062, + "step": 17549 + }, + { + "epoch": 0.94, + "learning_rate": 8.424307787049502e-06, + "loss": 0.8008, + "step": 17550 + }, + { + "epoch": 0.94, + "learning_rate": 8.408405062268586e-06, + "loss": 0.8281, + "step": 17551 + }, + { + "epoch": 0.94, + "learning_rate": 8.392517234327712e-06, + "loss": 0.8164, + "step": 17552 + }, + { + "epoch": 0.94, + "learning_rate": 8.376644303708436e-06, + "loss": 0.6836, + "step": 17553 + }, + { + "epoch": 0.94, + "learning_rate": 8.360786270891817e-06, + "loss": 0.8555, + "step": 17554 + }, + { + "epoch": 0.94, + "learning_rate": 8.344943136358196e-06, + "loss": 0.832, + "step": 17555 + }, + { + "epoch": 0.94, + "learning_rate": 8.329114900587853e-06, + "loss": 0.8672, + "step": 17556 + }, + { + "epoch": 0.94, + "learning_rate": 8.31330156406035e-06, + "loss": 0.7812, + "step": 17557 + }, + { + "epoch": 0.94, + "learning_rate": 8.297503127254969e-06, + "loss": 0.8008, + "step": 17558 + }, + { + "epoch": 0.94, + "learning_rate": 8.281719590650327e-06, + "loss": 0.7539, + "step": 17559 + }, + { + "epoch": 0.94, + "learning_rate": 8.265950954724765e-06, + "loss": 0.9102, + "step": 17560 + }, + { + "epoch": 0.94, + "learning_rate": 8.250197219956179e-06, + "loss": 0.7461, + "step": 17561 + }, + { + "epoch": 0.94, + "learning_rate": 8.234458386821908e-06, + "loss": 0.8711, + "step": 17562 + }, + { + "epoch": 0.94, + "learning_rate": 8.218734455798904e-06, + "loss": 0.8086, + "step": 17563 + }, + { + "epoch": 0.94, + "learning_rate": 8.20302542736373e-06, + "loss": 0.8906, + "step": 17564 + }, + { + "epoch": 0.94, + "learning_rate": 8.187331301992174e-06, + "loss": 0.8125, + "step": 17565 + }, + { + "epoch": 0.94, + "learning_rate": 8.17165208016013e-06, + "loss": 0.8711, + "step": 17566 + }, + { + "epoch": 0.94, + "learning_rate": 8.155987762342553e-06, + "loss": 0.7891, + "step": 17567 + }, + { + "epoch": 0.94, + "learning_rate": 8.140338349014176e-06, + "loss": 0.7773, + "step": 17568 + }, + { + "epoch": 0.94, + "learning_rate": 8.124703840649172e-06, + "loss": 0.7969, + "step": 17569 + }, + { + "epoch": 0.94, + "learning_rate": 8.109084237721387e-06, + "loss": 0.7578, + "step": 17570 + }, + { + "epoch": 0.94, + "learning_rate": 8.093479540704163e-06, + "loss": 0.8438, + "step": 17571 + }, + { + "epoch": 0.94, + "learning_rate": 8.077889750070232e-06, + "loss": 0.7734, + "step": 17572 + }, + { + "epoch": 0.94, + "learning_rate": 8.062314866292165e-06, + "loss": 0.8594, + "step": 17573 + }, + { + "epoch": 0.94, + "learning_rate": 8.046754889841913e-06, + "loss": 0.7617, + "step": 17574 + }, + { + "epoch": 0.94, + "learning_rate": 8.031209821190877e-06, + "loss": 0.6953, + "step": 17575 + }, + { + "epoch": 0.94, + "learning_rate": 8.015679660810238e-06, + "loss": 0.7734, + "step": 17576 + }, + { + "epoch": 0.94, + "learning_rate": 8.000164409170619e-06, + "loss": 0.8164, + "step": 17577 + }, + { + "epoch": 0.94, + "learning_rate": 7.984664066742086e-06, + "loss": 0.8008, + "step": 17578 + }, + { + "epoch": 0.94, + "learning_rate": 7.96917863399449e-06, + "loss": 0.7695, + "step": 17579 + }, + { + "epoch": 0.94, + "learning_rate": 7.953708111396952e-06, + "loss": 0.8945, + "step": 17580 + }, + { + "epoch": 0.94, + "learning_rate": 7.938252499418319e-06, + "loss": 0.8359, + "step": 17581 + }, + { + "epoch": 0.94, + "learning_rate": 7.922811798526997e-06, + "loss": 0.7852, + "step": 17582 + }, + { + "epoch": 0.95, + "learning_rate": 7.907386009190887e-06, + "loss": 0.8281, + "step": 17583 + }, + { + "epoch": 0.95, + "learning_rate": 7.891975131877338e-06, + "loss": 0.8555, + "step": 17584 + }, + { + "epoch": 0.95, + "learning_rate": 7.876579167053421e-06, + "loss": 0.8281, + "step": 17585 + }, + { + "epoch": 0.95, + "learning_rate": 7.861198115185764e-06, + "loss": 0.8477, + "step": 17586 + }, + { + "epoch": 0.95, + "learning_rate": 7.84583197674038e-06, + "loss": 0.7539, + "step": 17587 + }, + { + "epoch": 0.95, + "learning_rate": 7.830480752182844e-06, + "loss": 0.7266, + "step": 17588 + }, + { + "epoch": 0.95, + "learning_rate": 7.815144441978505e-06, + "loss": 0.7969, + "step": 17589 + }, + { + "epoch": 0.95, + "learning_rate": 7.799823046591992e-06, + "loss": 0.7227, + "step": 17590 + }, + { + "epoch": 0.95, + "learning_rate": 7.784516566487599e-06, + "loss": 0.8086, + "step": 17591 + }, + { + "epoch": 0.95, + "learning_rate": 7.769225002129177e-06, + "loss": 0.875, + "step": 17592 + }, + { + "epoch": 0.95, + "learning_rate": 7.75394835398019e-06, + "loss": 0.8906, + "step": 17593 + }, + { + "epoch": 0.95, + "learning_rate": 7.738686622503487e-06, + "loss": 0.7227, + "step": 17594 + }, + { + "epoch": 0.95, + "learning_rate": 7.723439808161536e-06, + "loss": 0.7188, + "step": 17595 + }, + { + "epoch": 0.95, + "learning_rate": 7.708207911416355e-06, + "loss": 0.7812, + "step": 17596 + }, + { + "epoch": 0.95, + "learning_rate": 7.692990932729627e-06, + "loss": 0.8516, + "step": 17597 + }, + { + "epoch": 0.95, + "learning_rate": 7.677788872562375e-06, + "loss": 0.7305, + "step": 17598 + }, + { + "epoch": 0.95, + "learning_rate": 7.662601731375341e-06, + "loss": 0.7539, + "step": 17599 + }, + { + "epoch": 0.95, + "learning_rate": 7.647429509628657e-06, + "loss": 0.7891, + "step": 17600 + }, + { + "epoch": 0.95, + "learning_rate": 7.632272207782176e-06, + "loss": 0.793, + "step": 17601 + }, + { + "epoch": 0.95, + "learning_rate": 7.617129826295199e-06, + "loss": 0.7461, + "step": 17602 + }, + { + "epoch": 0.95, + "learning_rate": 7.602002365626526e-06, + "loss": 0.8242, + "step": 17603 + }, + { + "epoch": 0.95, + "learning_rate": 7.5868898262346776e-06, + "loss": 0.7383, + "step": 17604 + }, + { + "epoch": 0.95, + "learning_rate": 7.571792208577455e-06, + "loss": 0.7734, + "step": 17605 + }, + { + "epoch": 0.95, + "learning_rate": 7.556709513112547e-06, + "loss": 0.793, + "step": 17606 + }, + { + "epoch": 0.95, + "learning_rate": 7.541641740296867e-06, + "loss": 0.8633, + "step": 17607 + }, + { + "epoch": 0.95, + "learning_rate": 7.526588890587105e-06, + "loss": 0.8633, + "step": 17608 + }, + { + "epoch": 0.95, + "learning_rate": 7.511550964439396e-06, + "loss": 0.7773, + "step": 17609 + }, + { + "epoch": 0.95, + "learning_rate": 7.496527962309429e-06, + "loss": 0.7305, + "step": 17610 + }, + { + "epoch": 0.95, + "learning_rate": 7.481519884652399e-06, + "loss": 0.9023, + "step": 17611 + }, + { + "epoch": 0.95, + "learning_rate": 7.466526731923162e-06, + "loss": 0.7852, + "step": 17612 + }, + { + "epoch": 0.95, + "learning_rate": 7.451548504576078e-06, + "loss": 0.8242, + "step": 17613 + }, + { + "epoch": 0.95, + "learning_rate": 7.4365852030649495e-06, + "loss": 0.8594, + "step": 17614 + }, + { + "epoch": 0.95, + "learning_rate": 7.421636827843248e-06, + "loss": 0.8164, + "step": 17615 + }, + { + "epoch": 0.95, + "learning_rate": 7.406703379364055e-06, + "loss": 0.707, + "step": 17616 + }, + { + "epoch": 0.95, + "learning_rate": 7.391784858079786e-06, + "loss": 0.8594, + "step": 17617 + }, + { + "epoch": 0.95, + "learning_rate": 7.376881264442581e-06, + "loss": 0.7969, + "step": 17618 + }, + { + "epoch": 0.95, + "learning_rate": 7.361992598904022e-06, + "loss": 0.8008, + "step": 17619 + }, + { + "epoch": 0.95, + "learning_rate": 7.347118861915303e-06, + "loss": 0.8633, + "step": 17620 + }, + { + "epoch": 0.95, + "learning_rate": 7.332260053927176e-06, + "loss": 0.9141, + "step": 17621 + }, + { + "epoch": 0.95, + "learning_rate": 7.3174161753898905e-06, + "loss": 0.9219, + "step": 17622 + }, + { + "epoch": 0.95, + "learning_rate": 7.302587226753255e-06, + "loss": 0.7422, + "step": 17623 + }, + { + "epoch": 0.95, + "learning_rate": 7.287773208466631e-06, + "loss": 0.8359, + "step": 17624 + }, + { + "epoch": 0.95, + "learning_rate": 7.272974120978937e-06, + "loss": 0.8906, + "step": 17625 + }, + { + "epoch": 0.95, + "learning_rate": 7.258189964738704e-06, + "loss": 0.7852, + "step": 17626 + }, + { + "epoch": 0.95, + "learning_rate": 7.2434207401937955e-06, + "loss": 0.6953, + "step": 17627 + }, + { + "epoch": 0.95, + "learning_rate": 7.228666447791854e-06, + "loss": 0.7461, + "step": 17628 + }, + { + "epoch": 0.95, + "learning_rate": 7.213927087980077e-06, + "loss": 0.8203, + "step": 17629 + }, + { + "epoch": 0.95, + "learning_rate": 7.19920266120494e-06, + "loss": 0.8711, + "step": 17630 + }, + { + "epoch": 0.95, + "learning_rate": 7.184493167912698e-06, + "loss": 0.7656, + "step": 17631 + }, + { + "epoch": 0.95, + "learning_rate": 7.169798608549161e-06, + "loss": 0.7148, + "step": 17632 + }, + { + "epoch": 0.95, + "learning_rate": 7.155118983559583e-06, + "loss": 0.7227, + "step": 17633 + }, + { + "epoch": 0.95, + "learning_rate": 7.140454293388776e-06, + "loss": 0.7148, + "step": 17634 + }, + { + "epoch": 0.95, + "learning_rate": 7.125804538481162e-06, + "loss": 0.8008, + "step": 17635 + }, + { + "epoch": 0.95, + "learning_rate": 7.111169719280719e-06, + "loss": 0.7812, + "step": 17636 + }, + { + "epoch": 0.95, + "learning_rate": 7.0965498362308145e-06, + "loss": 0.8086, + "step": 17637 + }, + { + "epoch": 0.95, + "learning_rate": 7.081944889774538e-06, + "loss": 0.8789, + "step": 17638 + }, + { + "epoch": 0.95, + "learning_rate": 7.067354880354538e-06, + "loss": 0.8086, + "step": 17639 + }, + { + "epoch": 0.95, + "learning_rate": 7.052779808412846e-06, + "loss": 0.7539, + "step": 17640 + }, + { + "epoch": 0.95, + "learning_rate": 7.0382196743911105e-06, + "loss": 0.7969, + "step": 17641 + }, + { + "epoch": 0.95, + "learning_rate": 7.0236744787306465e-06, + "loss": 0.7305, + "step": 17642 + }, + { + "epoch": 0.95, + "learning_rate": 7.00914422187221e-06, + "loss": 0.8203, + "step": 17643 + }, + { + "epoch": 0.95, + "learning_rate": 6.994628904256006e-06, + "loss": 0.7891, + "step": 17644 + }, + { + "epoch": 0.95, + "learning_rate": 6.980128526322071e-06, + "loss": 0.8516, + "step": 17645 + }, + { + "epoch": 0.95, + "learning_rate": 6.965643088509666e-06, + "loss": 0.7617, + "step": 17646 + }, + { + "epoch": 0.95, + "learning_rate": 6.9511725912578265e-06, + "loss": 0.8555, + "step": 17647 + }, + { + "epoch": 0.95, + "learning_rate": 6.93671703500498e-06, + "loss": 0.8828, + "step": 17648 + }, + { + "epoch": 0.95, + "learning_rate": 6.922276420189277e-06, + "loss": 0.7656, + "step": 17649 + }, + { + "epoch": 0.95, + "learning_rate": 6.907850747248201e-06, + "loss": 0.8281, + "step": 17650 + }, + { + "epoch": 0.95, + "learning_rate": 6.893440016619013e-06, + "loss": 0.8125, + "step": 17651 + }, + { + "epoch": 0.95, + "learning_rate": 6.8790442287383625e-06, + "loss": 0.8047, + "step": 17652 + }, + { + "epoch": 0.95, + "learning_rate": 6.864663384042457e-06, + "loss": 0.7617, + "step": 17653 + }, + { + "epoch": 0.95, + "learning_rate": 6.850297482967116e-06, + "loss": 0.7734, + "step": 17654 + }, + { + "epoch": 0.95, + "learning_rate": 6.8359465259476005e-06, + "loss": 0.8125, + "step": 17655 + }, + { + "epoch": 0.95, + "learning_rate": 6.821610513418952e-06, + "loss": 0.8438, + "step": 17656 + }, + { + "epoch": 0.95, + "learning_rate": 6.807289445815434e-06, + "loss": 0.7891, + "step": 17657 + }, + { + "epoch": 0.95, + "learning_rate": 6.792983323571145e-06, + "loss": 0.7969, + "step": 17658 + }, + { + "epoch": 0.95, + "learning_rate": 6.778692147119514e-06, + "loss": 0.8516, + "step": 17659 + }, + { + "epoch": 0.95, + "learning_rate": 6.764415916893696e-06, + "loss": 0.8125, + "step": 17660 + }, + { + "epoch": 0.95, + "learning_rate": 6.750154633326177e-06, + "loss": 0.75, + "step": 17661 + }, + { + "epoch": 0.95, + "learning_rate": 6.73590829684928e-06, + "loss": 0.7578, + "step": 17662 + }, + { + "epoch": 0.95, + "learning_rate": 6.721676907894603e-06, + "loss": 0.8008, + "step": 17663 + }, + { + "epoch": 0.95, + "learning_rate": 6.7074604668934666e-06, + "loss": 0.832, + "step": 17664 + }, + { + "epoch": 0.95, + "learning_rate": 6.693258974276639e-06, + "loss": 0.8008, + "step": 17665 + }, + { + "epoch": 0.95, + "learning_rate": 6.679072430474442e-06, + "loss": 0.8047, + "step": 17666 + }, + { + "epoch": 0.95, + "learning_rate": 6.664900835916921e-06, + "loss": 0.7969, + "step": 17667 + }, + { + "epoch": 0.95, + "learning_rate": 6.650744191033286e-06, + "loss": 0.7773, + "step": 17668 + }, + { + "epoch": 0.95, + "learning_rate": 6.636602496252753e-06, + "loss": 0.7891, + "step": 17669 + }, + { + "epoch": 0.95, + "learning_rate": 6.622475752003754e-06, + "loss": 0.8086, + "step": 17670 + }, + { + "epoch": 0.95, + "learning_rate": 6.608363958714336e-06, + "loss": 0.8359, + "step": 17671 + }, + { + "epoch": 0.95, + "learning_rate": 6.594267116812269e-06, + "loss": 0.7539, + "step": 17672 + }, + { + "epoch": 0.95, + "learning_rate": 6.5801852267246e-06, + "loss": 0.8203, + "step": 17673 + }, + { + "epoch": 0.95, + "learning_rate": 6.566118288878098e-06, + "loss": 0.832, + "step": 17674 + }, + { + "epoch": 0.95, + "learning_rate": 6.552066303699089e-06, + "loss": 0.7109, + "step": 17675 + }, + { + "epoch": 0.95, + "learning_rate": 6.538029271613344e-06, + "loss": 0.7461, + "step": 17676 + }, + { + "epoch": 0.95, + "learning_rate": 6.5240071930461886e-06, + "loss": 0.7266, + "step": 17677 + }, + { + "epoch": 0.95, + "learning_rate": 6.510000068422617e-06, + "loss": 0.7773, + "step": 17678 + }, + { + "epoch": 0.95, + "learning_rate": 6.496007898167122e-06, + "loss": 0.7461, + "step": 17679 + }, + { + "epoch": 0.95, + "learning_rate": 6.482030682703644e-06, + "loss": 0.8594, + "step": 17680 + }, + { + "epoch": 0.95, + "learning_rate": 6.468068422455675e-06, + "loss": 0.8281, + "step": 17681 + }, + { + "epoch": 0.95, + "learning_rate": 6.454121117846434e-06, + "loss": 0.8164, + "step": 17682 + }, + { + "epoch": 0.95, + "learning_rate": 6.4401887692985274e-06, + "loss": 0.8633, + "step": 17683 + }, + { + "epoch": 0.95, + "learning_rate": 6.426271377234172e-06, + "loss": 0.8086, + "step": 17684 + }, + { + "epoch": 0.95, + "learning_rate": 6.412368942075031e-06, + "loss": 0.8242, + "step": 17685 + }, + { + "epoch": 0.95, + "learning_rate": 6.398481464242489e-06, + "loss": 0.7695, + "step": 17686 + }, + { + "epoch": 0.95, + "learning_rate": 6.384608944157322e-06, + "loss": 0.875, + "step": 17687 + }, + { + "epoch": 0.95, + "learning_rate": 6.3707513822400255e-06, + "loss": 0.7773, + "step": 17688 + }, + { + "epoch": 0.95, + "learning_rate": 6.356908778910319e-06, + "loss": 0.793, + "step": 17689 + }, + { + "epoch": 0.95, + "learning_rate": 6.3430811345878695e-06, + "loss": 0.7969, + "step": 17690 + }, + { + "epoch": 0.95, + "learning_rate": 6.329268449691617e-06, + "loss": 0.8672, + "step": 17691 + }, + { + "epoch": 0.95, + "learning_rate": 6.3154707246401714e-06, + "loss": 0.7852, + "step": 17692 + }, + { + "epoch": 0.95, + "learning_rate": 6.301687959851588e-06, + "loss": 0.7734, + "step": 17693 + }, + { + "epoch": 0.95, + "learning_rate": 6.287920155743531e-06, + "loss": 0.7695, + "step": 17694 + }, + { + "epoch": 0.95, + "learning_rate": 6.2741673127333366e-06, + "loss": 0.8047, + "step": 17695 + }, + { + "epoch": 0.95, + "learning_rate": 6.260429431237613e-06, + "loss": 0.8438, + "step": 17696 + }, + { + "epoch": 0.95, + "learning_rate": 6.246706511672695e-06, + "loss": 0.7227, + "step": 17697 + }, + { + "epoch": 0.95, + "learning_rate": 6.232998554454472e-06, + "loss": 0.8125, + "step": 17698 + }, + { + "epoch": 0.95, + "learning_rate": 6.2193055599983895e-06, + "loss": 0.9023, + "step": 17699 + }, + { + "epoch": 0.95, + "learning_rate": 6.205627528719227e-06, + "loss": 0.8984, + "step": 17700 + }, + { + "epoch": 0.95, + "learning_rate": 6.191964461031596e-06, + "loss": 0.8711, + "step": 17701 + }, + { + "epoch": 0.95, + "learning_rate": 6.178316357349556e-06, + "loss": 0.9023, + "step": 17702 + }, + { + "epoch": 0.95, + "learning_rate": 6.164683218086609e-06, + "loss": 0.7617, + "step": 17703 + }, + { + "epoch": 0.95, + "learning_rate": 6.1510650436559235e-06, + "loss": 0.7734, + "step": 17704 + }, + { + "epoch": 0.95, + "learning_rate": 6.137461834470115e-06, + "loss": 0.8906, + "step": 17705 + }, + { + "epoch": 0.95, + "learning_rate": 6.12387359094152e-06, + "loss": 0.8008, + "step": 17706 + }, + { + "epoch": 0.95, + "learning_rate": 6.110300313481809e-06, + "loss": 0.8281, + "step": 17707 + }, + { + "epoch": 0.95, + "learning_rate": 6.096742002502375e-06, + "loss": 0.7305, + "step": 17708 + }, + { + "epoch": 0.95, + "learning_rate": 6.083198658414002e-06, + "loss": 0.7461, + "step": 17709 + }, + { + "epoch": 0.95, + "learning_rate": 6.069670281627138e-06, + "loss": 0.7852, + "step": 17710 + }, + { + "epoch": 0.95, + "learning_rate": 6.056156872551733e-06, + "loss": 0.8516, + "step": 17711 + }, + { + "epoch": 0.95, + "learning_rate": 6.042658431597292e-06, + "loss": 0.8164, + "step": 17712 + }, + { + "epoch": 0.95, + "learning_rate": 6.029174959172823e-06, + "loss": 0.8555, + "step": 17713 + }, + { + "epoch": 0.95, + "learning_rate": 6.015706455686998e-06, + "loss": 0.793, + "step": 17714 + }, + { + "epoch": 0.95, + "learning_rate": 6.0022529215478795e-06, + "loss": 0.8125, + "step": 17715 + }, + { + "epoch": 0.95, + "learning_rate": 5.988814357163197e-06, + "loss": 0.8594, + "step": 17716 + }, + { + "epoch": 0.95, + "learning_rate": 5.97539076294018e-06, + "loss": 0.7852, + "step": 17717 + }, + { + "epoch": 0.95, + "learning_rate": 5.961982139285616e-06, + "loss": 0.832, + "step": 17718 + }, + { + "epoch": 0.95, + "learning_rate": 5.948588486605788e-06, + "loss": 0.7969, + "step": 17719 + }, + { + "epoch": 0.95, + "learning_rate": 5.9352098053065965e-06, + "loss": 0.8125, + "step": 17720 + }, + { + "epoch": 0.95, + "learning_rate": 5.921846095793438e-06, + "loss": 0.7812, + "step": 17721 + }, + { + "epoch": 0.95, + "learning_rate": 5.908497358471321e-06, + "loss": 0.8359, + "step": 17722 + }, + { + "epoch": 0.95, + "learning_rate": 5.8951635937447015e-06, + "loss": 0.8672, + "step": 17723 + }, + { + "epoch": 0.95, + "learning_rate": 5.8818448020176994e-06, + "loss": 0.7695, + "step": 17724 + }, + { + "epoch": 0.95, + "learning_rate": 5.868540983693882e-06, + "loss": 0.918, + "step": 17725 + }, + { + "epoch": 0.95, + "learning_rate": 5.855252139176426e-06, + "loss": 0.7344, + "step": 17726 + }, + { + "epoch": 0.95, + "learning_rate": 5.8419782688679535e-06, + "loss": 0.7422, + "step": 17727 + }, + { + "epoch": 0.95, + "learning_rate": 5.828719373170755e-06, + "loss": 0.9023, + "step": 17728 + }, + { + "epoch": 0.95, + "learning_rate": 5.815475452486618e-06, + "loss": 0.7266, + "step": 17729 + }, + { + "epoch": 0.95, + "learning_rate": 5.802246507216891e-06, + "loss": 0.7734, + "step": 17730 + }, + { + "epoch": 0.95, + "learning_rate": 5.789032537762473e-06, + "loss": 0.8203, + "step": 17731 + }, + { + "epoch": 0.95, + "learning_rate": 5.775833544523712e-06, + "loss": 0.7344, + "step": 17732 + }, + { + "epoch": 0.95, + "learning_rate": 5.762649527900677e-06, + "loss": 0.8828, + "step": 17733 + }, + { + "epoch": 0.95, + "learning_rate": 5.749480488292769e-06, + "loss": 0.7266, + "step": 17734 + }, + { + "epoch": 0.95, + "learning_rate": 5.73632642609917e-06, + "loss": 0.7773, + "step": 17735 + }, + { + "epoch": 0.95, + "learning_rate": 5.723187341718394e-06, + "loss": 0.8203, + "step": 17736 + }, + { + "epoch": 0.95, + "learning_rate": 5.7100632355486795e-06, + "loss": 0.8008, + "step": 17737 + }, + { + "epoch": 0.95, + "learning_rate": 5.696954107987706e-06, + "loss": 0.8164, + "step": 17738 + }, + { + "epoch": 0.95, + "learning_rate": 5.683859959432714e-06, + "loss": 0.9258, + "step": 17739 + }, + { + "epoch": 0.95, + "learning_rate": 5.670780790280439e-06, + "loss": 0.8086, + "step": 17740 + }, + { + "epoch": 0.95, + "learning_rate": 5.657716600927343e-06, + "loss": 0.8047, + "step": 17741 + }, + { + "epoch": 0.95, + "learning_rate": 5.644667391769276e-06, + "loss": 0.7539, + "step": 17742 + }, + { + "epoch": 0.95, + "learning_rate": 5.6316331632015885e-06, + "loss": 0.6875, + "step": 17743 + }, + { + "epoch": 0.95, + "learning_rate": 5.618613915619353e-06, + "loss": 0.8516, + "step": 17744 + }, + { + "epoch": 0.95, + "learning_rate": 5.605609649417087e-06, + "loss": 0.7734, + "step": 17745 + }, + { + "epoch": 0.95, + "learning_rate": 5.5926203649888095e-06, + "loss": 0.8828, + "step": 17746 + }, + { + "epoch": 0.95, + "learning_rate": 5.579646062728206e-06, + "loss": 0.8789, + "step": 17747 + }, + { + "epoch": 0.95, + "learning_rate": 5.566686743028349e-06, + "loss": 0.7383, + "step": 17748 + }, + { + "epoch": 0.95, + "learning_rate": 5.553742406282036e-06, + "loss": 0.8477, + "step": 17749 + }, + { + "epoch": 0.95, + "learning_rate": 5.5408130528814545e-06, + "loss": 0.7852, + "step": 17750 + }, + { + "epoch": 0.95, + "learning_rate": 5.527898683218458e-06, + "loss": 0.8359, + "step": 17751 + }, + { + "epoch": 0.95, + "learning_rate": 5.5149992976843976e-06, + "loss": 0.7422, + "step": 17752 + }, + { + "epoch": 0.95, + "learning_rate": 5.502114896670129e-06, + "loss": 0.8281, + "step": 17753 + }, + { + "epoch": 0.95, + "learning_rate": 5.489245480566174e-06, + "loss": 0.8555, + "step": 17754 + }, + { + "epoch": 0.95, + "learning_rate": 5.476391049762386e-06, + "loss": 0.8984, + "step": 17755 + }, + { + "epoch": 0.95, + "learning_rate": 5.463551604648398e-06, + "loss": 0.7812, + "step": 17756 + }, + { + "epoch": 0.95, + "learning_rate": 5.450727145613232e-06, + "loss": 0.8906, + "step": 17757 + }, + { + "epoch": 0.95, + "learning_rate": 5.437917673045523e-06, + "loss": 0.793, + "step": 17758 + }, + { + "epoch": 0.95, + "learning_rate": 5.4251231873334585e-06, + "loss": 0.8438, + "step": 17759 + }, + { + "epoch": 0.95, + "learning_rate": 5.412343688864729e-06, + "loss": 0.7852, + "step": 17760 + }, + { + "epoch": 0.95, + "learning_rate": 5.399579178026692e-06, + "loss": 0.7461, + "step": 17761 + }, + { + "epoch": 0.95, + "learning_rate": 5.386829655206038e-06, + "loss": 0.7539, + "step": 17762 + }, + { + "epoch": 0.95, + "learning_rate": 5.374095120789124e-06, + "loss": 0.875, + "step": 17763 + }, + { + "epoch": 0.95, + "learning_rate": 5.361375575161865e-06, + "loss": 0.8516, + "step": 17764 + }, + { + "epoch": 0.95, + "learning_rate": 5.348671018709783e-06, + "loss": 0.7227, + "step": 17765 + }, + { + "epoch": 0.95, + "learning_rate": 5.33598145181774e-06, + "loss": 0.7617, + "step": 17766 + }, + { + "epoch": 0.95, + "learning_rate": 5.323306874870315e-06, + "loss": 0.8516, + "step": 17767 + }, + { + "epoch": 0.95, + "learning_rate": 5.310647288251702e-06, + "loss": 0.6914, + "step": 17768 + }, + { + "epoch": 0.96, + "learning_rate": 5.298002692345428e-06, + "loss": 0.7109, + "step": 17769 + }, + { + "epoch": 0.96, + "learning_rate": 5.285373087534628e-06, + "loss": 0.7031, + "step": 17770 + }, + { + "epoch": 0.96, + "learning_rate": 5.272758474202055e-06, + "loss": 0.8594, + "step": 17771 + }, + { + "epoch": 0.96, + "learning_rate": 5.260158852730012e-06, + "loss": 0.7656, + "step": 17772 + }, + { + "epoch": 0.96, + "learning_rate": 5.2475742235002515e-06, + "loss": 0.8477, + "step": 17773 + }, + { + "epoch": 0.96, + "learning_rate": 5.235004586894243e-06, + "loss": 0.832, + "step": 17774 + }, + { + "epoch": 0.96, + "learning_rate": 5.2224499432927394e-06, + "loss": 0.7227, + "step": 17775 + }, + { + "epoch": 0.96, + "learning_rate": 5.209910293076214e-06, + "loss": 0.625, + "step": 17776 + }, + { + "epoch": 0.96, + "learning_rate": 5.1973856366248075e-06, + "loss": 0.6641, + "step": 17777 + }, + { + "epoch": 0.96, + "learning_rate": 5.184875974317882e-06, + "loss": 0.7969, + "step": 17778 + }, + { + "epoch": 0.96, + "learning_rate": 5.172381306534579e-06, + "loss": 0.7617, + "step": 17779 + }, + { + "epoch": 0.96, + "learning_rate": 5.159901633653541e-06, + "loss": 0.7383, + "step": 17780 + }, + { + "epoch": 0.96, + "learning_rate": 5.147436956052964e-06, + "loss": 0.8008, + "step": 17781 + }, + { + "epoch": 0.96, + "learning_rate": 5.134987274110547e-06, + "loss": 0.7422, + "step": 17782 + }, + { + "epoch": 0.96, + "learning_rate": 5.122552588203488e-06, + "loss": 0.8242, + "step": 17783 + }, + { + "epoch": 0.96, + "learning_rate": 5.110132898708708e-06, + "loss": 0.8125, + "step": 17784 + }, + { + "epoch": 0.96, + "learning_rate": 5.097728206002572e-06, + "loss": 0.8125, + "step": 17785 + }, + { + "epoch": 0.96, + "learning_rate": 5.0853385104608355e-06, + "loss": 0.6602, + "step": 17786 + }, + { + "epoch": 0.96, + "learning_rate": 5.072963812459086e-06, + "loss": 0.8555, + "step": 17787 + }, + { + "epoch": 0.96, + "learning_rate": 5.060604112372302e-06, + "loss": 0.8203, + "step": 17788 + }, + { + "epoch": 0.96, + "learning_rate": 5.048259410574962e-06, + "loss": 0.7891, + "step": 17789 + }, + { + "epoch": 0.96, + "learning_rate": 5.035929707441156e-06, + "loss": 0.8164, + "step": 17790 + }, + { + "epoch": 0.96, + "learning_rate": 5.023615003344639e-06, + "loss": 0.832, + "step": 17791 + }, + { + "epoch": 0.96, + "learning_rate": 5.011315298658448e-06, + "loss": 0.9258, + "step": 17792 + }, + { + "epoch": 0.96, + "learning_rate": 4.999030593755283e-06, + "loss": 0.8633, + "step": 17793 + }, + { + "epoch": 0.96, + "learning_rate": 4.986760889007513e-06, + "loss": 0.7344, + "step": 17794 + }, + { + "epoch": 0.96, + "learning_rate": 4.974506184786953e-06, + "loss": 0.7656, + "step": 17795 + }, + { + "epoch": 0.96, + "learning_rate": 4.962266481464861e-06, + "loss": 0.8008, + "step": 17796 + }, + { + "epoch": 0.96, + "learning_rate": 4.950041779412218e-06, + "loss": 0.793, + "step": 17797 + }, + { + "epoch": 0.96, + "learning_rate": 4.937832078999449e-06, + "loss": 0.7812, + "step": 17798 + }, + { + "epoch": 0.96, + "learning_rate": 4.925637380596593e-06, + "loss": 0.7969, + "step": 17799 + }, + { + "epoch": 0.96, + "learning_rate": 4.91345768457313e-06, + "loss": 0.7773, + "step": 17800 + }, + { + "epoch": 0.96, + "learning_rate": 4.901292991298156e-06, + "loss": 0.8945, + "step": 17801 + }, + { + "epoch": 0.96, + "learning_rate": 4.88914330114032e-06, + "loss": 0.9453, + "step": 17802 + }, + { + "epoch": 0.96, + "learning_rate": 4.877008614467771e-06, + "loss": 0.8203, + "step": 17803 + }, + { + "epoch": 0.96, + "learning_rate": 4.86488893164827e-06, + "loss": 0.7734, + "step": 17804 + }, + { + "epoch": 0.96, + "learning_rate": 4.852784253049025e-06, + "loss": 0.6758, + "step": 17805 + }, + { + "epoch": 0.96, + "learning_rate": 4.840694579036853e-06, + "loss": 0.7656, + "step": 17806 + }, + { + "epoch": 0.96, + "learning_rate": 4.828619909978238e-06, + "loss": 0.8398, + "step": 17807 + }, + { + "epoch": 0.96, + "learning_rate": 4.8165602462389435e-06, + "loss": 0.7773, + "step": 17808 + }, + { + "epoch": 0.96, + "learning_rate": 4.8045155881844e-06, + "loss": 0.7891, + "step": 17809 + }, + { + "epoch": 0.96, + "learning_rate": 4.792485936179702e-06, + "loss": 0.7383, + "step": 17810 + }, + { + "epoch": 0.96, + "learning_rate": 4.7804712905893385e-06, + "loss": 0.7695, + "step": 17811 + }, + { + "epoch": 0.96, + "learning_rate": 4.768471651777406e-06, + "loss": 0.6992, + "step": 17812 + }, + { + "epoch": 0.96, + "learning_rate": 4.756487020107503e-06, + "loss": 0.7266, + "step": 17813 + }, + { + "epoch": 0.96, + "learning_rate": 4.744517395942838e-06, + "loss": 0.7266, + "step": 17814 + }, + { + "epoch": 0.96, + "learning_rate": 4.732562779646121e-06, + "loss": 0.7695, + "step": 17815 + }, + { + "epoch": 0.96, + "learning_rate": 4.720623171579619e-06, + "loss": 0.7422, + "step": 17816 + }, + { + "epoch": 0.96, + "learning_rate": 4.708698572105097e-06, + "loss": 0.6523, + "step": 17817 + }, + { + "epoch": 0.96, + "learning_rate": 4.696788981583988e-06, + "loss": 0.7539, + "step": 17818 + }, + { + "epoch": 0.96, + "learning_rate": 4.684894400377116e-06, + "loss": 0.8984, + "step": 17819 + }, + { + "epoch": 0.96, + "learning_rate": 4.673014828844968e-06, + "loss": 0.793, + "step": 17820 + }, + { + "epoch": 0.96, + "learning_rate": 4.6611502673475356e-06, + "loss": 0.7969, + "step": 17821 + }, + { + "epoch": 0.96, + "learning_rate": 4.649300716244365e-06, + "loss": 0.793, + "step": 17822 + }, + { + "epoch": 0.96, + "learning_rate": 4.637466175894445e-06, + "loss": 0.7852, + "step": 17823 + }, + { + "epoch": 0.96, + "learning_rate": 4.625646646656545e-06, + "loss": 0.7148, + "step": 17824 + }, + { + "epoch": 0.96, + "learning_rate": 4.613842128888712e-06, + "loss": 0.7852, + "step": 17825 + }, + { + "epoch": 0.96, + "learning_rate": 4.602052622948716e-06, + "loss": 0.7617, + "step": 17826 + }, + { + "epoch": 0.96, + "learning_rate": 4.590278129193881e-06, + "loss": 0.7773, + "step": 17827 + }, + { + "epoch": 0.96, + "learning_rate": 4.578518647980867e-06, + "loss": 0.6992, + "step": 17828 + }, + { + "epoch": 0.96, + "learning_rate": 4.5667741796661115e-06, + "loss": 0.8555, + "step": 17829 + }, + { + "epoch": 0.96, + "learning_rate": 4.55504472460555e-06, + "loss": 0.8125, + "step": 17830 + }, + { + "epoch": 0.96, + "learning_rate": 4.5433302831545675e-06, + "loss": 0.7734, + "step": 17831 + }, + { + "epoch": 0.96, + "learning_rate": 4.5316308556681e-06, + "loss": 0.8438, + "step": 17832 + }, + { + "epoch": 0.96, + "learning_rate": 4.519946442500755e-06, + "loss": 0.8555, + "step": 17833 + }, + { + "epoch": 0.96, + "learning_rate": 4.508277044006636e-06, + "loss": 0.8516, + "step": 17834 + }, + { + "epoch": 0.96, + "learning_rate": 4.496622660539296e-06, + "loss": 0.6289, + "step": 17835 + }, + { + "epoch": 0.96, + "learning_rate": 4.484983292451894e-06, + "loss": 0.8672, + "step": 17836 + }, + { + "epoch": 0.96, + "learning_rate": 4.4733589400972054e-06, + "loss": 0.832, + "step": 17837 + }, + { + "epoch": 0.96, + "learning_rate": 4.4617496038274475e-06, + "loss": 0.7852, + "step": 17838 + }, + { + "epoch": 0.96, + "learning_rate": 4.4501552839943945e-06, + "loss": 0.8125, + "step": 17839 + }, + { + "epoch": 0.96, + "learning_rate": 4.438575980949488e-06, + "loss": 0.7461, + "step": 17840 + }, + { + "epoch": 0.96, + "learning_rate": 4.4270116950435035e-06, + "loss": 0.7734, + "step": 17841 + }, + { + "epoch": 0.96, + "learning_rate": 4.4154624266269376e-06, + "loss": 0.918, + "step": 17842 + }, + { + "epoch": 0.96, + "learning_rate": 4.403928176049788e-06, + "loss": 0.8086, + "step": 17843 + }, + { + "epoch": 0.96, + "learning_rate": 4.392408943661552e-06, + "loss": 0.7578, + "step": 17844 + }, + { + "epoch": 0.96, + "learning_rate": 4.38090472981123e-06, + "loss": 0.8477, + "step": 17845 + }, + { + "epoch": 0.96, + "learning_rate": 4.369415534847543e-06, + "loss": 0.7617, + "step": 17846 + }, + { + "epoch": 0.96, + "learning_rate": 4.357941359118656e-06, + "loss": 0.8242, + "step": 17847 + }, + { + "epoch": 0.96, + "learning_rate": 4.346482202972235e-06, + "loss": 0.8242, + "step": 17848 + }, + { + "epoch": 0.96, + "learning_rate": 4.335038066755448e-06, + "loss": 0.793, + "step": 17849 + }, + { + "epoch": 0.96, + "learning_rate": 4.323608950815239e-06, + "loss": 0.7852, + "step": 17850 + }, + { + "epoch": 0.96, + "learning_rate": 4.3121948554979415e-06, + "loss": 0.8984, + "step": 17851 + }, + { + "epoch": 0.96, + "learning_rate": 4.3007957811492805e-06, + "loss": 0.918, + "step": 17852 + }, + { + "epoch": 0.96, + "learning_rate": 4.289411728114811e-06, + "loss": 0.7461, + "step": 17853 + }, + { + "epoch": 0.96, + "learning_rate": 4.278042696739481e-06, + "loss": 0.6562, + "step": 17854 + }, + { + "epoch": 0.96, + "learning_rate": 4.266688687367792e-06, + "loss": 0.8164, + "step": 17855 + }, + { + "epoch": 0.96, + "learning_rate": 4.255349700343858e-06, + "loss": 0.7969, + "step": 17856 + }, + { + "epoch": 0.96, + "learning_rate": 4.2440257360112364e-06, + "loss": 0.7422, + "step": 17857 + }, + { + "epoch": 0.96, + "learning_rate": 4.232716794713154e-06, + "loss": 0.7539, + "step": 17858 + }, + { + "epoch": 0.96, + "learning_rate": 4.221422876792169e-06, + "loss": 0.8398, + "step": 17859 + }, + { + "epoch": 0.96, + "learning_rate": 4.210143982590675e-06, + "loss": 0.832, + "step": 17860 + }, + { + "epoch": 0.96, + "learning_rate": 4.1988801124503424e-06, + "loss": 0.8633, + "step": 17861 + }, + { + "epoch": 0.96, + "learning_rate": 4.187631266712566e-06, + "loss": 0.832, + "step": 17862 + }, + { + "epoch": 0.96, + "learning_rate": 4.1763974457182385e-06, + "loss": 0.7461, + "step": 17863 + }, + { + "epoch": 0.96, + "learning_rate": 4.165178649807755e-06, + "loss": 0.7969, + "step": 17864 + }, + { + "epoch": 0.96, + "learning_rate": 4.153974879321065e-06, + "loss": 0.6875, + "step": 17865 + }, + { + "epoch": 0.96, + "learning_rate": 4.142786134597676e-06, + "loss": 0.8008, + "step": 17866 + }, + { + "epoch": 0.96, + "learning_rate": 4.131612415976705e-06, + "loss": 0.7305, + "step": 17867 + }, + { + "epoch": 0.96, + "learning_rate": 4.120453723796713e-06, + "loss": 0.8555, + "step": 17868 + }, + { + "epoch": 0.96, + "learning_rate": 4.109310058395766e-06, + "loss": 0.8398, + "step": 17869 + }, + { + "epoch": 0.96, + "learning_rate": 4.0981814201117016e-06, + "loss": 0.8672, + "step": 17870 + }, + { + "epoch": 0.96, + "learning_rate": 4.087067809281697e-06, + "loss": 0.8242, + "step": 17871 + }, + { + "epoch": 0.96, + "learning_rate": 4.075969226242482e-06, + "loss": 0.7148, + "step": 17872 + }, + { + "epoch": 0.96, + "learning_rate": 4.064885671330398e-06, + "loss": 0.8125, + "step": 17873 + }, + { + "epoch": 0.96, + "learning_rate": 4.0538171448814e-06, + "loss": 0.8555, + "step": 17874 + }, + { + "epoch": 0.96, + "learning_rate": 4.042763647230774e-06, + "loss": 0.8281, + "step": 17875 + }, + { + "epoch": 0.96, + "learning_rate": 4.03172517871353e-06, + "loss": 0.7422, + "step": 17876 + }, + { + "epoch": 0.96, + "learning_rate": 4.020701739664179e-06, + "loss": 0.8867, + "step": 17877 + }, + { + "epoch": 0.96, + "learning_rate": 4.009693330416786e-06, + "loss": 0.8477, + "step": 17878 + }, + { + "epoch": 0.96, + "learning_rate": 3.998699951304918e-06, + "loss": 0.832, + "step": 17879 + }, + { + "epoch": 0.96, + "learning_rate": 3.987721602661698e-06, + "loss": 0.8398, + "step": 17880 + }, + { + "epoch": 0.96, + "learning_rate": 3.976758284819804e-06, + "loss": 0.7578, + "step": 17881 + }, + { + "epoch": 0.96, + "learning_rate": 3.965809998111469e-06, + "loss": 0.7617, + "step": 17882 + }, + { + "epoch": 0.96, + "learning_rate": 3.9548767428685405e-06, + "loss": 0.8047, + "step": 17883 + }, + { + "epoch": 0.96, + "learning_rate": 3.9439585194221975e-06, + "loss": 0.7344, + "step": 17884 + }, + { + "epoch": 0.96, + "learning_rate": 3.933055328103341e-06, + "loss": 0.8008, + "step": 17885 + }, + { + "epoch": 0.96, + "learning_rate": 3.922167169242485e-06, + "loss": 0.875, + "step": 17886 + }, + { + "epoch": 0.96, + "learning_rate": 3.9112940431694225e-06, + "loss": 0.8438, + "step": 17887 + }, + { + "epoch": 0.96, + "learning_rate": 3.900435950213721e-06, + "loss": 0.7969, + "step": 17888 + }, + { + "epoch": 0.96, + "learning_rate": 3.889592890704396e-06, + "loss": 0.8281, + "step": 17889 + }, + { + "epoch": 0.96, + "learning_rate": 3.878764864970019e-06, + "loss": 0.6836, + "step": 17890 + }, + { + "epoch": 0.96, + "learning_rate": 3.867951873338771e-06, + "loss": 0.7539, + "step": 17891 + }, + { + "epoch": 0.96, + "learning_rate": 3.857153916138223e-06, + "loss": 0.75, + "step": 17892 + }, + { + "epoch": 0.96, + "learning_rate": 3.846370993695669e-06, + "loss": 0.9102, + "step": 17893 + }, + { + "epoch": 0.96, + "learning_rate": 3.835603106337904e-06, + "loss": 0.7695, + "step": 17894 + }, + { + "epoch": 0.96, + "learning_rate": 3.8248502543911104e-06, + "loss": 0.7109, + "step": 17895 + }, + { + "epoch": 0.96, + "learning_rate": 3.8141124381811944e-06, + "loss": 0.8203, + "step": 17896 + }, + { + "epoch": 0.96, + "learning_rate": 3.8033896580335627e-06, + "loss": 0.8398, + "step": 17897 + }, + { + "epoch": 0.96, + "learning_rate": 3.7926819142731215e-06, + "loss": 0.9141, + "step": 17898 + }, + { + "epoch": 0.96, + "learning_rate": 3.78198920722439e-06, + "loss": 0.7891, + "step": 17899 + }, + { + "epoch": 0.96, + "learning_rate": 3.7713115372113305e-06, + "loss": 0.8047, + "step": 17900 + }, + { + "epoch": 0.96, + "learning_rate": 3.760648904557573e-06, + "loss": 0.7578, + "step": 17901 + }, + { + "epoch": 0.96, + "learning_rate": 3.750001309586193e-06, + "loss": 0.8359, + "step": 17902 + }, + { + "epoch": 0.96, + "learning_rate": 3.739368752619876e-06, + "loss": 0.8828, + "step": 17903 + }, + { + "epoch": 0.96, + "learning_rate": 3.7287512339808093e-06, + "loss": 0.75, + "step": 17904 + }, + { + "epoch": 0.96, + "learning_rate": 3.7181487539907356e-06, + "loss": 0.8555, + "step": 17905 + }, + { + "epoch": 0.96, + "learning_rate": 3.7075613129709527e-06, + "loss": 0.7461, + "step": 17906 + }, + { + "epoch": 0.96, + "learning_rate": 3.6969889112422604e-06, + "loss": 0.8359, + "step": 17907 + }, + { + "epoch": 0.96, + "learning_rate": 3.686431549125069e-06, + "loss": 0.6953, + "step": 17908 + }, + { + "epoch": 0.96, + "learning_rate": 3.6758892269392883e-06, + "loss": 0.8203, + "step": 17909 + }, + { + "epoch": 0.96, + "learning_rate": 3.665361945004442e-06, + "loss": 0.7891, + "step": 17910 + }, + { + "epoch": 0.96, + "learning_rate": 3.65484970363944e-06, + "loss": 0.8867, + "step": 17911 + }, + { + "epoch": 0.96, + "learning_rate": 3.644352503162918e-06, + "loss": 0.875, + "step": 17912 + }, + { + "epoch": 0.96, + "learning_rate": 3.6338703438929533e-06, + "loss": 0.668, + "step": 17913 + }, + { + "epoch": 0.96, + "learning_rate": 3.6234032261471817e-06, + "loss": 0.8047, + "step": 17914 + }, + { + "epoch": 0.96, + "learning_rate": 3.612951150242738e-06, + "loss": 0.7617, + "step": 17915 + }, + { + "epoch": 0.96, + "learning_rate": 3.602514116496536e-06, + "loss": 0.7383, + "step": 17916 + }, + { + "epoch": 0.96, + "learning_rate": 3.592092125224655e-06, + "loss": 0.9453, + "step": 17917 + }, + { + "epoch": 0.96, + "learning_rate": 3.581685176742955e-06, + "loss": 0.8086, + "step": 17918 + }, + { + "epoch": 0.96, + "learning_rate": 3.571293271366849e-06, + "loss": 0.8359, + "step": 17919 + }, + { + "epoch": 0.96, + "learning_rate": 3.560916409411308e-06, + "loss": 0.7969, + "step": 17920 + }, + { + "epoch": 0.96, + "learning_rate": 3.5505545911906354e-06, + "loss": 0.8281, + "step": 17921 + }, + { + "epoch": 0.96, + "learning_rate": 3.5402078170189145e-06, + "loss": 0.75, + "step": 17922 + }, + { + "epoch": 0.96, + "learning_rate": 3.529876087209671e-06, + "loss": 0.8008, + "step": 17923 + }, + { + "epoch": 0.96, + "learning_rate": 3.519559402075989e-06, + "loss": 0.7773, + "step": 17924 + }, + { + "epoch": 0.96, + "learning_rate": 3.5092577619305068e-06, + "loss": 0.7695, + "step": 17925 + }, + { + "epoch": 0.96, + "learning_rate": 3.4989711670854186e-06, + "loss": 0.7344, + "step": 17926 + }, + { + "epoch": 0.96, + "learning_rate": 3.4886996178523645e-06, + "loss": 0.8711, + "step": 17927 + }, + { + "epoch": 0.96, + "learning_rate": 3.4784431145426507e-06, + "loss": 0.7305, + "step": 17928 + }, + { + "epoch": 0.96, + "learning_rate": 3.4682016574671404e-06, + "loss": 0.7578, + "step": 17929 + }, + { + "epoch": 0.96, + "learning_rate": 3.4579752469361403e-06, + "loss": 0.8047, + "step": 17930 + }, + { + "epoch": 0.96, + "learning_rate": 3.4477638832595138e-06, + "loss": 0.8477, + "step": 17931 + }, + { + "epoch": 0.96, + "learning_rate": 3.43756756674668e-06, + "loss": 0.7852, + "step": 17932 + }, + { + "epoch": 0.96, + "learning_rate": 3.427386297706725e-06, + "loss": 0.7812, + "step": 17933 + }, + { + "epoch": 0.96, + "learning_rate": 3.4172200764480686e-06, + "loss": 0.8125, + "step": 17934 + }, + { + "epoch": 0.96, + "learning_rate": 3.4070689032787983e-06, + "loss": 0.8711, + "step": 17935 + }, + { + "epoch": 0.96, + "learning_rate": 3.396932778506612e-06, + "loss": 0.8047, + "step": 17936 + }, + { + "epoch": 0.96, + "learning_rate": 3.386811702438597e-06, + "loss": 0.7773, + "step": 17937 + }, + { + "epoch": 0.96, + "learning_rate": 3.376705675381453e-06, + "loss": 0.8086, + "step": 17938 + }, + { + "epoch": 0.96, + "learning_rate": 3.3666146976414347e-06, + "loss": 0.7656, + "step": 17939 + }, + { + "epoch": 0.96, + "learning_rate": 3.3565387695244086e-06, + "loss": 0.707, + "step": 17940 + }, + { + "epoch": 0.96, + "learning_rate": 3.34647789133552e-06, + "loss": 0.8672, + "step": 17941 + }, + { + "epoch": 0.96, + "learning_rate": 3.3364320633798573e-06, + "loss": 0.8594, + "step": 17942 + }, + { + "epoch": 0.96, + "learning_rate": 3.3264012859616775e-06, + "loss": 0.7773, + "step": 17943 + }, + { + "epoch": 0.96, + "learning_rate": 3.3163855593850713e-06, + "loss": 0.8477, + "step": 17944 + }, + { + "epoch": 0.96, + "learning_rate": 3.3063848839534615e-06, + "loss": 0.7695, + "step": 17945 + }, + { + "epoch": 0.96, + "learning_rate": 3.29639925996994e-06, + "loss": 0.75, + "step": 17946 + }, + { + "epoch": 0.96, + "learning_rate": 3.286428687737153e-06, + "loss": 0.8984, + "step": 17947 + }, + { + "epoch": 0.96, + "learning_rate": 3.276473167557137e-06, + "loss": 0.8398, + "step": 17948 + }, + { + "epoch": 0.96, + "learning_rate": 3.2665326997315948e-06, + "loss": 0.8086, + "step": 17949 + }, + { + "epoch": 0.96, + "learning_rate": 3.2566072845618412e-06, + "loss": 0.7539, + "step": 17950 + }, + { + "epoch": 0.96, + "learning_rate": 3.2466969223485242e-06, + "loss": 0.8359, + "step": 17951 + }, + { + "epoch": 0.96, + "learning_rate": 3.236801613392126e-06, + "loss": 0.7812, + "step": 17952 + }, + { + "epoch": 0.96, + "learning_rate": 3.226921357992352e-06, + "loss": 0.7695, + "step": 17953 + }, + { + "epoch": 0.96, + "learning_rate": 3.2170561564486834e-06, + "loss": 0.8945, + "step": 17954 + }, + { + "epoch": 0.97, + "learning_rate": 3.2072060090600483e-06, + "loss": 0.8906, + "step": 17955 + }, + { + "epoch": 0.97, + "learning_rate": 3.197370916124931e-06, + "loss": 0.8164, + "step": 17956 + }, + { + "epoch": 0.97, + "learning_rate": 3.1875508779414252e-06, + "loss": 0.7305, + "step": 17957 + }, + { + "epoch": 0.97, + "learning_rate": 3.1777458948069606e-06, + "loss": 0.8086, + "step": 17958 + }, + { + "epoch": 0.97, + "learning_rate": 3.167955967018854e-06, + "loss": 0.8086, + "step": 17959 + }, + { + "epoch": 0.97, + "learning_rate": 3.1581810948736466e-06, + "loss": 0.9414, + "step": 17960 + }, + { + "epoch": 0.97, + "learning_rate": 3.1484212786676014e-06, + "loss": 0.793, + "step": 17961 + }, + { + "epoch": 0.97, + "learning_rate": 3.138676518696426e-06, + "loss": 0.8633, + "step": 17962 + }, + { + "epoch": 0.97, + "learning_rate": 3.12894681525544e-06, + "loss": 0.7148, + "step": 17963 + }, + { + "epoch": 0.97, + "learning_rate": 3.1192321686395187e-06, + "loss": 0.8633, + "step": 17964 + }, + { + "epoch": 0.97, + "learning_rate": 3.1095325791430374e-06, + "loss": 0.875, + "step": 17965 + }, + { + "epoch": 0.97, + "learning_rate": 3.0998480470598724e-06, + "loss": 0.8125, + "step": 17966 + }, + { + "epoch": 0.97, + "learning_rate": 3.0901785726835664e-06, + "loss": 0.8477, + "step": 17967 + }, + { + "epoch": 0.97, + "learning_rate": 3.080524156307052e-06, + "loss": 0.832, + "step": 17968 + }, + { + "epoch": 0.97, + "learning_rate": 3.0708847982229838e-06, + "loss": 0.8008, + "step": 17969 + }, + { + "epoch": 0.97, + "learning_rate": 3.0612604987234062e-06, + "loss": 0.7539, + "step": 17970 + }, + { + "epoch": 0.97, + "learning_rate": 3.0516512580999744e-06, + "loss": 0.9375, + "step": 17971 + }, + { + "epoch": 0.97, + "learning_rate": 3.0420570766439003e-06, + "loss": 0.8164, + "step": 17972 + }, + { + "epoch": 0.97, + "learning_rate": 3.032477954645951e-06, + "loss": 0.8008, + "step": 17973 + }, + { + "epoch": 0.97, + "learning_rate": 3.0229138923963393e-06, + "loss": 0.8008, + "step": 17974 + }, + { + "epoch": 0.97, + "learning_rate": 3.013364890184889e-06, + "loss": 0.7969, + "step": 17975 + }, + { + "epoch": 0.97, + "learning_rate": 3.0038309483009783e-06, + "loss": 0.7695, + "step": 17976 + }, + { + "epoch": 0.97, + "learning_rate": 2.9943120670335443e-06, + "loss": 0.7852, + "step": 17977 + }, + { + "epoch": 0.97, + "learning_rate": 2.9848082466710223e-06, + "loss": 0.7891, + "step": 17978 + }, + { + "epoch": 0.97, + "learning_rate": 2.97531948750146e-06, + "loss": 0.8164, + "step": 17979 + }, + { + "epoch": 0.97, + "learning_rate": 2.9658457898122936e-06, + "loss": 0.7852, + "step": 17980 + }, + { + "epoch": 0.97, + "learning_rate": 2.9563871538906827e-06, + "loss": 0.8594, + "step": 17981 + }, + { + "epoch": 0.97, + "learning_rate": 2.9469435800232314e-06, + "loss": 0.7734, + "step": 17982 + }, + { + "epoch": 0.97, + "learning_rate": 2.9375150684960994e-06, + "loss": 0.7734, + "step": 17983 + }, + { + "epoch": 0.97, + "learning_rate": 2.928101619595003e-06, + "loss": 0.8555, + "step": 17984 + }, + { + "epoch": 0.97, + "learning_rate": 2.918703233605269e-06, + "loss": 0.7617, + "step": 17985 + }, + { + "epoch": 0.97, + "learning_rate": 2.909319910811614e-06, + "loss": 0.793, + "step": 17986 + }, + { + "epoch": 0.97, + "learning_rate": 2.8999516514984224e-06, + "loss": 0.7422, + "step": 17987 + }, + { + "epoch": 0.97, + "learning_rate": 2.890598455949578e-06, + "loss": 0.8086, + "step": 17988 + }, + { + "epoch": 0.97, + "learning_rate": 2.881260324448465e-06, + "loss": 0.8008, + "step": 17989 + }, + { + "epoch": 0.97, + "learning_rate": 2.871937257278134e-06, + "loss": 0.7617, + "step": 17990 + }, + { + "epoch": 0.97, + "learning_rate": 2.862629254721083e-06, + "loss": 0.7305, + "step": 17991 + }, + { + "epoch": 0.97, + "learning_rate": 2.853336317059363e-06, + "loss": 0.793, + "step": 17992 + }, + { + "epoch": 0.97, + "learning_rate": 2.844058444574582e-06, + "loss": 0.7969, + "step": 17993 + }, + { + "epoch": 0.97, + "learning_rate": 2.8347956375479046e-06, + "loss": 0.7188, + "step": 17994 + }, + { + "epoch": 0.97, + "learning_rate": 2.8255478962599946e-06, + "loss": 0.8008, + "step": 17995 + }, + { + "epoch": 0.97, + "learning_rate": 2.816315220991128e-06, + "loss": 0.7578, + "step": 17996 + }, + { + "epoch": 0.97, + "learning_rate": 2.807097612021081e-06, + "loss": 0.8633, + "step": 17997 + }, + { + "epoch": 0.97, + "learning_rate": 2.7978950696291305e-06, + "loss": 0.8281, + "step": 17998 + }, + { + "epoch": 0.97, + "learning_rate": 2.788707594094164e-06, + "loss": 0.8945, + "step": 17999 + }, + { + "epoch": 0.97, + "learning_rate": 2.779535185694626e-06, + "loss": 0.6758, + "step": 18000 + }, + { + "epoch": 0.97, + "learning_rate": 2.770377844708405e-06, + "loss": 0.8047, + "step": 18001 + }, + { + "epoch": 0.97, + "learning_rate": 2.7612355714130568e-06, + "loss": 0.8633, + "step": 18002 + }, + { + "epoch": 0.97, + "learning_rate": 2.752108366085637e-06, + "loss": 0.8086, + "step": 18003 + }, + { + "epoch": 0.97, + "learning_rate": 2.7429962290027033e-06, + "loss": 0.8828, + "step": 18004 + }, + { + "epoch": 0.97, + "learning_rate": 2.733899160440312e-06, + "loss": 0.6562, + "step": 18005 + }, + { + "epoch": 0.97, + "learning_rate": 2.724817160674242e-06, + "loss": 0.7773, + "step": 18006 + }, + { + "epoch": 0.97, + "learning_rate": 2.715750229979663e-06, + "loss": 0.8398, + "step": 18007 + }, + { + "epoch": 0.97, + "learning_rate": 2.706698368631355e-06, + "loss": 0.7578, + "step": 18008 + }, + { + "epoch": 0.97, + "learning_rate": 2.697661576903598e-06, + "loss": 0.6562, + "step": 18009 + }, + { + "epoch": 0.97, + "learning_rate": 2.6886398550702294e-06, + "loss": 0.8789, + "step": 18010 + }, + { + "epoch": 0.97, + "learning_rate": 2.679633203404641e-06, + "loss": 0.5938, + "step": 18011 + }, + { + "epoch": 0.97, + "learning_rate": 2.670641622179726e-06, + "loss": 0.7305, + "step": 18012 + }, + { + "epoch": 0.97, + "learning_rate": 2.661665111668099e-06, + "loss": 0.832, + "step": 18013 + }, + { + "epoch": 0.97, + "learning_rate": 2.6527036721415987e-06, + "loss": 0.8711, + "step": 18014 + }, + { + "epoch": 0.97, + "learning_rate": 2.6437573038718966e-06, + "loss": 0.7227, + "step": 18015 + }, + { + "epoch": 0.97, + "learning_rate": 2.634826007130053e-06, + "loss": 0.8828, + "step": 18016 + }, + { + "epoch": 0.97, + "learning_rate": 2.6259097821867973e-06, + "loss": 0.7266, + "step": 18017 + }, + { + "epoch": 0.97, + "learning_rate": 2.6170086293121896e-06, + "loss": 0.7617, + "step": 18018 + }, + { + "epoch": 0.97, + "learning_rate": 2.6081225487760708e-06, + "loss": 0.8633, + "step": 18019 + }, + { + "epoch": 0.97, + "learning_rate": 2.599251540847614e-06, + "loss": 0.7969, + "step": 18020 + }, + { + "epoch": 0.97, + "learning_rate": 2.5903956057957145e-06, + "loss": 0.8164, + "step": 18021 + }, + { + "epoch": 0.97, + "learning_rate": 2.58155474388877e-06, + "loss": 0.7969, + "step": 18022 + }, + { + "epoch": 0.97, + "learning_rate": 2.572728955394621e-06, + "loss": 0.8164, + "step": 18023 + }, + { + "epoch": 0.97, + "learning_rate": 2.5639182405807205e-06, + "loss": 0.7617, + "step": 18024 + }, + { + "epoch": 0.97, + "learning_rate": 2.5551225997141324e-06, + "loss": 0.9297, + "step": 18025 + }, + { + "epoch": 0.97, + "learning_rate": 2.5463420330613108e-06, + "loss": 0.6992, + "step": 18026 + }, + { + "epoch": 0.97, + "learning_rate": 2.53757654088832e-06, + "loss": 0.7773, + "step": 18027 + }, + { + "epoch": 0.97, + "learning_rate": 2.528826123460892e-06, + "loss": 0.7969, + "step": 18028 + }, + { + "epoch": 0.97, + "learning_rate": 2.5200907810440933e-06, + "loss": 0.8008, + "step": 18029 + }, + { + "epoch": 0.97, + "learning_rate": 2.511370513902711e-06, + "loss": 0.7539, + "step": 18030 + }, + { + "epoch": 0.97, + "learning_rate": 2.5026653223009233e-06, + "loss": 0.8164, + "step": 18031 + }, + { + "epoch": 0.97, + "learning_rate": 2.4939752065025745e-06, + "loss": 0.8555, + "step": 18032 + }, + { + "epoch": 0.97, + "learning_rate": 2.4853001667710097e-06, + "loss": 0.7188, + "step": 18033 + }, + { + "epoch": 0.97, + "learning_rate": 2.4766402033690183e-06, + "loss": 0.8398, + "step": 18034 + }, + { + "epoch": 0.97, + "learning_rate": 2.4679953165591682e-06, + "loss": 0.9414, + "step": 18035 + }, + { + "epoch": 0.97, + "learning_rate": 2.4593655066033616e-06, + "loss": 0.8008, + "step": 18036 + }, + { + "epoch": 0.97, + "learning_rate": 2.450750773763055e-06, + "loss": 0.832, + "step": 18037 + }, + { + "epoch": 0.97, + "learning_rate": 2.442151118299374e-06, + "loss": 0.7812, + "step": 18038 + }, + { + "epoch": 0.97, + "learning_rate": 2.433566540472942e-06, + "loss": 0.7695, + "step": 18039 + }, + { + "epoch": 0.97, + "learning_rate": 2.4249970405437747e-06, + "loss": 0.7188, + "step": 18040 + }, + { + "epoch": 0.97, + "learning_rate": 2.4164426187716636e-06, + "loss": 0.8516, + "step": 18041 + }, + { + "epoch": 0.97, + "learning_rate": 2.4079032754158457e-06, + "loss": 0.7031, + "step": 18042 + }, + { + "epoch": 0.97, + "learning_rate": 2.3993790107350032e-06, + "loss": 0.8672, + "step": 18043 + }, + { + "epoch": 0.97, + "learning_rate": 2.3908698249875405e-06, + "loss": 0.8203, + "step": 18044 + }, + { + "epoch": 0.97, + "learning_rate": 2.3823757184312512e-06, + "loss": 0.8125, + "step": 18045 + }, + { + "epoch": 0.97, + "learning_rate": 2.373896691323596e-06, + "loss": 0.7617, + "step": 18046 + }, + { + "epoch": 0.97, + "learning_rate": 2.365432743921425e-06, + "loss": 0.8477, + "step": 18047 + }, + { + "epoch": 0.97, + "learning_rate": 2.3569838764813112e-06, + "loss": 0.9141, + "step": 18048 + }, + { + "epoch": 0.97, + "learning_rate": 2.3485500892592717e-06, + "loss": 0.7617, + "step": 18049 + }, + { + "epoch": 0.97, + "learning_rate": 2.3401313825107685e-06, + "loss": 0.7734, + "step": 18050 + }, + { + "epoch": 0.97, + "learning_rate": 2.3317277564910423e-06, + "loss": 0.7695, + "step": 18051 + }, + { + "epoch": 0.97, + "learning_rate": 2.323339211454778e-06, + "loss": 0.7617, + "step": 18052 + }, + { + "epoch": 0.97, + "learning_rate": 2.3149657476560505e-06, + "loss": 0.7969, + "step": 18053 + }, + { + "epoch": 0.97, + "learning_rate": 2.306607365348656e-06, + "loss": 0.7773, + "step": 18054 + }, + { + "epoch": 0.97, + "learning_rate": 2.2982640647858934e-06, + "loss": 0.7539, + "step": 18055 + }, + { + "epoch": 0.97, + "learning_rate": 2.289935846220559e-06, + "loss": 0.7461, + "step": 18056 + }, + { + "epoch": 0.97, + "learning_rate": 2.2816227099050623e-06, + "loss": 0.8242, + "step": 18057 + }, + { + "epoch": 0.97, + "learning_rate": 2.273324656091369e-06, + "loss": 0.7812, + "step": 18058 + }, + { + "epoch": 0.97, + "learning_rate": 2.265041685030833e-06, + "loss": 0.8281, + "step": 18059 + }, + { + "epoch": 0.97, + "learning_rate": 2.256773796974476e-06, + "loss": 0.8555, + "step": 18060 + }, + { + "epoch": 0.97, + "learning_rate": 2.2485209921728757e-06, + "loss": 0.7695, + "step": 18061 + }, + { + "epoch": 0.97, + "learning_rate": 2.240283270876109e-06, + "loss": 0.7539, + "step": 18062 + }, + { + "epoch": 0.97, + "learning_rate": 2.23206063333381e-06, + "loss": 0.8047, + "step": 18063 + }, + { + "epoch": 0.97, + "learning_rate": 2.2238530797951684e-06, + "loss": 0.8125, + "step": 18064 + }, + { + "epoch": 0.97, + "learning_rate": 2.215660610508874e-06, + "loss": 0.7852, + "step": 18065 + }, + { + "epoch": 0.97, + "learning_rate": 2.2074832257231727e-06, + "loss": 0.7344, + "step": 18066 + }, + { + "epoch": 0.97, + "learning_rate": 2.199320925685866e-06, + "loss": 0.7344, + "step": 18067 + }, + { + "epoch": 0.97, + "learning_rate": 2.1911737106443676e-06, + "loss": 0.8594, + "step": 18068 + }, + { + "epoch": 0.97, + "learning_rate": 2.1830415808454797e-06, + "loss": 0.793, + "step": 18069 + }, + { + "epoch": 0.97, + "learning_rate": 2.174924536535672e-06, + "loss": 0.8594, + "step": 18070 + }, + { + "epoch": 0.97, + "learning_rate": 2.1668225779608587e-06, + "loss": 0.8281, + "step": 18071 + }, + { + "epoch": 0.97, + "learning_rate": 2.1587357053666768e-06, + "loss": 0.832, + "step": 18072 + }, + { + "epoch": 0.97, + "learning_rate": 2.150663918998097e-06, + "loss": 0.793, + "step": 18073 + }, + { + "epoch": 0.97, + "learning_rate": 2.142607219099757e-06, + "loss": 0.8359, + "step": 18074 + }, + { + "epoch": 0.97, + "learning_rate": 2.134565605915739e-06, + "loss": 0.8164, + "step": 18075 + }, + { + "epoch": 0.97, + "learning_rate": 2.126539079689849e-06, + "loss": 0.8594, + "step": 18076 + }, + { + "epoch": 0.97, + "learning_rate": 2.1185276406651686e-06, + "loss": 0.8438, + "step": 18077 + }, + { + "epoch": 0.97, + "learning_rate": 2.1105312890846163e-06, + "loss": 0.7148, + "step": 18078 + }, + { + "epoch": 0.97, + "learning_rate": 2.102550025190386e-06, + "loss": 0.7422, + "step": 18079 + }, + { + "epoch": 0.97, + "learning_rate": 2.0945838492243964e-06, + "loss": 0.8594, + "step": 18080 + }, + { + "epoch": 0.97, + "learning_rate": 2.086632761428009e-06, + "loss": 0.7891, + "step": 18081 + }, + { + "epoch": 0.97, + "learning_rate": 2.078696762042254e-06, + "loss": 0.8594, + "step": 18082 + }, + { + "epoch": 0.97, + "learning_rate": 2.070775851307549e-06, + "loss": 0.7969, + "step": 18083 + }, + { + "epoch": 0.97, + "learning_rate": 2.0628700294639257e-06, + "loss": 0.7461, + "step": 18084 + }, + { + "epoch": 0.97, + "learning_rate": 2.054979296751025e-06, + "loss": 0.6719, + "step": 18085 + }, + { + "epoch": 0.97, + "learning_rate": 2.0471036534078225e-06, + "loss": 0.7578, + "step": 18086 + }, + { + "epoch": 0.97, + "learning_rate": 2.0392430996730715e-06, + "loss": 0.8008, + "step": 18087 + }, + { + "epoch": 0.97, + "learning_rate": 2.031397635785026e-06, + "loss": 0.75, + "step": 18088 + }, + { + "epoch": 0.97, + "learning_rate": 2.0235672619813293e-06, + "loss": 0.7227, + "step": 18089 + }, + { + "epoch": 0.97, + "learning_rate": 2.0157519784992915e-06, + "loss": 0.7617, + "step": 18090 + }, + { + "epoch": 0.97, + "learning_rate": 2.007951785575779e-06, + "loss": 0.8086, + "step": 18091 + }, + { + "epoch": 0.97, + "learning_rate": 2.000166683447102e-06, + "loss": 0.7969, + "step": 18092 + }, + { + "epoch": 0.97, + "learning_rate": 1.9923966723492393e-06, + "loss": 0.7656, + "step": 18093 + }, + { + "epoch": 0.97, + "learning_rate": 1.9846417525175573e-06, + "loss": 0.7773, + "step": 18094 + }, + { + "epoch": 0.97, + "learning_rate": 1.976901924187202e-06, + "loss": 0.7852, + "step": 18095 + }, + { + "epoch": 0.97, + "learning_rate": 1.9691771875925966e-06, + "loss": 0.8711, + "step": 18096 + }, + { + "epoch": 0.97, + "learning_rate": 1.961467542967832e-06, + "loss": 0.8828, + "step": 18097 + }, + { + "epoch": 0.97, + "learning_rate": 1.953772990546554e-06, + "loss": 0.7461, + "step": 18098 + }, + { + "epoch": 0.97, + "learning_rate": 1.9460935305620207e-06, + "loss": 0.8281, + "step": 18099 + }, + { + "epoch": 0.97, + "learning_rate": 1.9384291632467686e-06, + "loss": 0.7852, + "step": 18100 + }, + { + "epoch": 0.97, + "learning_rate": 1.930779888833223e-06, + "loss": 0.8438, + "step": 18101 + }, + { + "epoch": 0.97, + "learning_rate": 1.923145707553087e-06, + "loss": 0.7891, + "step": 18102 + }, + { + "epoch": 0.97, + "learning_rate": 1.9155266196376754e-06, + "loss": 0.9102, + "step": 18103 + }, + { + "epoch": 0.97, + "learning_rate": 1.907922625318026e-06, + "loss": 0.8359, + "step": 18104 + }, + { + "epoch": 0.97, + "learning_rate": 1.900333724824399e-06, + "loss": 0.8789, + "step": 18105 + }, + { + "epoch": 0.97, + "learning_rate": 1.8927599183867772e-06, + "loss": 0.8359, + "step": 18106 + }, + { + "epoch": 0.97, + "learning_rate": 1.8852012062347547e-06, + "loss": 0.8164, + "step": 18107 + }, + { + "epoch": 0.97, + "learning_rate": 1.8776575885973701e-06, + "loss": 0.8945, + "step": 18108 + }, + { + "epoch": 0.97, + "learning_rate": 1.8701290657031633e-06, + "loss": 0.7305, + "step": 18109 + }, + { + "epoch": 0.97, + "learning_rate": 1.8626156377803406e-06, + "loss": 0.8008, + "step": 18110 + }, + { + "epoch": 0.97, + "learning_rate": 1.8551173050565528e-06, + "loss": 0.7344, + "step": 18111 + }, + { + "epoch": 0.97, + "learning_rate": 1.8476340677590075e-06, + "loss": 0.8203, + "step": 18112 + }, + { + "epoch": 0.97, + "learning_rate": 1.8401659261144676e-06, + "loss": 0.793, + "step": 18113 + }, + { + "epoch": 0.97, + "learning_rate": 1.8327128803492522e-06, + "loss": 0.8555, + "step": 18114 + }, + { + "epoch": 0.97, + "learning_rate": 1.8252749306892913e-06, + "loss": 0.8359, + "step": 18115 + }, + { + "epoch": 0.97, + "learning_rate": 1.8178520773598496e-06, + "loss": 0.7617, + "step": 18116 + }, + { + "epoch": 0.97, + "learning_rate": 1.8104443205859133e-06, + "loss": 0.8125, + "step": 18117 + }, + { + "epoch": 0.97, + "learning_rate": 1.80305166059197e-06, + "loss": 0.8633, + "step": 18118 + }, + { + "epoch": 0.97, + "learning_rate": 1.795674097602118e-06, + "loss": 0.8359, + "step": 18119 + }, + { + "epoch": 0.97, + "learning_rate": 1.7883116318397896e-06, + "loss": 0.8555, + "step": 18120 + }, + { + "epoch": 0.97, + "learning_rate": 1.7809642635281397e-06, + "loss": 0.8164, + "step": 18121 + }, + { + "epoch": 0.97, + "learning_rate": 1.7736319928898236e-06, + "loss": 0.8398, + "step": 18122 + }, + { + "epoch": 0.97, + "learning_rate": 1.7663148201470524e-06, + "loss": 0.8008, + "step": 18123 + }, + { + "epoch": 0.97, + "learning_rate": 1.7590127455215931e-06, + "loss": 0.7969, + "step": 18124 + }, + { + "epoch": 0.97, + "learning_rate": 1.7517257692346022e-06, + "loss": 0.832, + "step": 18125 + }, + { + "epoch": 0.97, + "learning_rate": 1.7444538915070141e-06, + "loss": 0.7969, + "step": 18126 + }, + { + "epoch": 0.97, + "learning_rate": 1.7371971125591523e-06, + "loss": 0.7812, + "step": 18127 + }, + { + "epoch": 0.97, + "learning_rate": 1.7299554326108967e-06, + "loss": 0.7383, + "step": 18128 + }, + { + "epoch": 0.97, + "learning_rate": 1.7227288518816831e-06, + "loss": 0.793, + "step": 18129 + }, + { + "epoch": 0.97, + "learning_rate": 1.715517370590558e-06, + "loss": 0.7852, + "step": 18130 + }, + { + "epoch": 0.97, + "learning_rate": 1.708320988956069e-06, + "loss": 0.8477, + "step": 18131 + }, + { + "epoch": 0.97, + "learning_rate": 1.7011397071962087e-06, + "loss": 0.9297, + "step": 18132 + }, + { + "epoch": 0.97, + "learning_rate": 1.6939735255285803e-06, + "loss": 0.75, + "step": 18133 + }, + { + "epoch": 0.97, + "learning_rate": 1.686822444170455e-06, + "loss": 0.7617, + "step": 18134 + }, + { + "epoch": 0.97, + "learning_rate": 1.6796864633384923e-06, + "loss": 0.832, + "step": 18135 + }, + { + "epoch": 0.97, + "learning_rate": 1.6725655832489083e-06, + "loss": 0.75, + "step": 18136 + }, + { + "epoch": 0.97, + "learning_rate": 1.665459804117475e-06, + "loss": 0.6836, + "step": 18137 + }, + { + "epoch": 0.97, + "learning_rate": 1.6583691261595201e-06, + "loss": 0.8242, + "step": 18138 + }, + { + "epoch": 0.97, + "learning_rate": 1.6512935495899828e-06, + "loss": 0.7656, + "step": 18139 + }, + { + "epoch": 0.97, + "learning_rate": 1.6442330746231916e-06, + "loss": 0.8945, + "step": 18140 + }, + { + "epoch": 0.98, + "learning_rate": 1.6371877014731972e-06, + "loss": 0.8242, + "step": 18141 + }, + { + "epoch": 0.98, + "learning_rate": 1.6301574303533851e-06, + "loss": 0.8789, + "step": 18142 + }, + { + "epoch": 0.98, + "learning_rate": 1.6231422614769175e-06, + "loss": 0.8516, + "step": 18143 + }, + { + "epoch": 0.98, + "learning_rate": 1.6161421950562361e-06, + "loss": 0.7422, + "step": 18144 + }, + { + "epoch": 0.98, + "learning_rate": 1.609157231303615e-06, + "loss": 0.7461, + "step": 18145 + }, + { + "epoch": 0.98, + "learning_rate": 1.6021873704306079e-06, + "loss": 0.8555, + "step": 18146 + }, + { + "epoch": 0.98, + "learning_rate": 1.595232612648434e-06, + "loss": 0.8398, + "step": 18147 + }, + { + "epoch": 0.98, + "learning_rate": 1.588292958167925e-06, + "loss": 0.8086, + "step": 18148 + }, + { + "epoch": 0.98, + "learning_rate": 1.5813684071993573e-06, + "loss": 0.8047, + "step": 18149 + }, + { + "epoch": 0.98, + "learning_rate": 1.5744589599524517e-06, + "loss": 0.75, + "step": 18150 + }, + { + "epoch": 0.98, + "learning_rate": 1.567564616636763e-06, + "loss": 0.7422, + "step": 18151 + }, + { + "epoch": 0.98, + "learning_rate": 1.5606853774610686e-06, + "loss": 0.7539, + "step": 18152 + }, + { + "epoch": 0.98, + "learning_rate": 1.5538212426338682e-06, + "loss": 0.668, + "step": 18153 + }, + { + "epoch": 0.98, + "learning_rate": 1.5469722123632179e-06, + "loss": 0.7773, + "step": 18154 + }, + { + "epoch": 0.98, + "learning_rate": 1.5401382868566182e-06, + "loss": 0.8086, + "step": 18155 + }, + { + "epoch": 0.98, + "learning_rate": 1.5333194663211814e-06, + "loss": 0.7461, + "step": 18156 + }, + { + "epoch": 0.98, + "learning_rate": 1.5265157509635198e-06, + "loss": 0.75, + "step": 18157 + }, + { + "epoch": 0.98, + "learning_rate": 1.5197271409898571e-06, + "loss": 0.7305, + "step": 18158 + }, + { + "epoch": 0.98, + "learning_rate": 1.5129536366058627e-06, + "loss": 0.7305, + "step": 18159 + }, + { + "epoch": 0.98, + "learning_rate": 1.506195238016761e-06, + "loss": 0.8086, + "step": 18160 + }, + { + "epoch": 0.98, + "learning_rate": 1.4994519454274436e-06, + "loss": 0.9141, + "step": 18161 + }, + { + "epoch": 0.98, + "learning_rate": 1.4927237590421916e-06, + "loss": 0.7461, + "step": 18162 + }, + { + "epoch": 0.98, + "learning_rate": 1.486010679064953e-06, + "loss": 0.7695, + "step": 18163 + }, + { + "epoch": 0.98, + "learning_rate": 1.4793127056990652e-06, + "loss": 0.8633, + "step": 18164 + }, + { + "epoch": 0.98, + "learning_rate": 1.4726298391475879e-06, + "loss": 0.8203, + "step": 18165 + }, + { + "epoch": 0.98, + "learning_rate": 1.4659620796129702e-06, + "loss": 0.8906, + "step": 18166 + }, + { + "epoch": 0.98, + "learning_rate": 1.459309427297273e-06, + "loss": 0.7148, + "step": 18167 + }, + { + "epoch": 0.98, + "learning_rate": 1.4526718824021678e-06, + "loss": 0.8203, + "step": 18168 + }, + { + "epoch": 0.98, + "learning_rate": 1.446049445128661e-06, + "loss": 0.8086, + "step": 18169 + }, + { + "epoch": 0.98, + "learning_rate": 1.4394421156775915e-06, + "loss": 0.875, + "step": 18170 + }, + { + "epoch": 0.98, + "learning_rate": 1.4328498942490775e-06, + "loss": 0.7734, + "step": 18171 + }, + { + "epoch": 0.98, + "learning_rate": 1.4262727810428476e-06, + "loss": 0.6797, + "step": 18172 + }, + { + "epoch": 0.98, + "learning_rate": 1.4197107762583538e-06, + "loss": 0.8281, + "step": 18173 + }, + { + "epoch": 0.98, + "learning_rate": 1.413163880094326e-06, + "loss": 0.8203, + "step": 18174 + }, + { + "epoch": 0.98, + "learning_rate": 1.406632092749216e-06, + "loss": 0.7891, + "step": 18175 + }, + { + "epoch": 0.98, + "learning_rate": 1.4001154144209215e-06, + "loss": 0.8086, + "step": 18176 + }, + { + "epoch": 0.98, + "learning_rate": 1.3936138453068957e-06, + "loss": 0.8438, + "step": 18177 + }, + { + "epoch": 0.98, + "learning_rate": 1.3871273856042588e-06, + "loss": 0.6914, + "step": 18178 + }, + { + "epoch": 0.98, + "learning_rate": 1.3806560355095198e-06, + "loss": 0.7734, + "step": 18179 + }, + { + "epoch": 0.98, + "learning_rate": 1.3741997952186892e-06, + "loss": 0.832, + "step": 18180 + }, + { + "epoch": 0.98, + "learning_rate": 1.3677586649276097e-06, + "loss": 0.8359, + "step": 18181 + }, + { + "epoch": 0.98, + "learning_rate": 1.3613326448312369e-06, + "loss": 0.8203, + "step": 18182 + }, + { + "epoch": 0.98, + "learning_rate": 1.3549217351245258e-06, + "loss": 0.8516, + "step": 18183 + }, + { + "epoch": 0.98, + "learning_rate": 1.3485259360015434e-06, + "loss": 0.6133, + "step": 18184 + }, + { + "epoch": 0.98, + "learning_rate": 1.3421452476563012e-06, + "loss": 0.8047, + "step": 18185 + }, + { + "epoch": 0.98, + "learning_rate": 1.335779670281978e-06, + "loss": 0.8555, + "step": 18186 + }, + { + "epoch": 0.98, + "learning_rate": 1.3294292040715861e-06, + "loss": 0.7344, + "step": 18187 + }, + { + "epoch": 0.98, + "learning_rate": 1.3230938492174716e-06, + "loss": 0.7539, + "step": 18188 + }, + { + "epoch": 0.98, + "learning_rate": 1.316773605911703e-06, + "loss": 0.7266, + "step": 18189 + }, + { + "epoch": 0.98, + "learning_rate": 1.310468474345794e-06, + "loss": 0.8164, + "step": 18190 + }, + { + "epoch": 0.98, + "learning_rate": 1.3041784547108137e-06, + "loss": 0.8047, + "step": 18191 + }, + { + "epoch": 0.98, + "learning_rate": 1.2979035471972767e-06, + "loss": 0.9375, + "step": 18192 + }, + { + "epoch": 0.98, + "learning_rate": 1.2916437519954749e-06, + "loss": 0.8203, + "step": 18193 + }, + { + "epoch": 0.98, + "learning_rate": 1.2853990692949791e-06, + "loss": 0.7383, + "step": 18194 + }, + { + "epoch": 0.98, + "learning_rate": 1.2791694992850822e-06, + "loss": 0.7578, + "step": 18195 + }, + { + "epoch": 0.98, + "learning_rate": 1.2729550421545777e-06, + "loss": 0.8086, + "step": 18196 + }, + { + "epoch": 0.98, + "learning_rate": 1.266755698091704e-06, + "loss": 0.875, + "step": 18197 + }, + { + "epoch": 0.98, + "learning_rate": 1.2605714672844216e-06, + "loss": 0.7852, + "step": 18198 + }, + { + "epoch": 0.98, + "learning_rate": 1.2544023499200808e-06, + "loss": 0.8594, + "step": 18199 + }, + { + "epoch": 0.98, + "learning_rate": 1.2482483461856432e-06, + "loss": 0.7617, + "step": 18200 + }, + { + "epoch": 0.98, + "learning_rate": 1.2421094562675706e-06, + "loss": 0.7812, + "step": 18201 + }, + { + "epoch": 0.98, + "learning_rate": 1.235985680351881e-06, + "loss": 0.8398, + "step": 18202 + }, + { + "epoch": 0.98, + "learning_rate": 1.2298770186242037e-06, + "loss": 0.8281, + "step": 18203 + }, + { + "epoch": 0.98, + "learning_rate": 1.2237834712696128e-06, + "loss": 0.7422, + "step": 18204 + }, + { + "epoch": 0.98, + "learning_rate": 1.217705038472794e-06, + "loss": 0.8203, + "step": 18205 + }, + { + "epoch": 0.98, + "learning_rate": 1.2116417204178775e-06, + "loss": 0.8203, + "step": 18206 + }, + { + "epoch": 0.98, + "learning_rate": 1.2055935172886612e-06, + "loss": 0.7188, + "step": 18207 + }, + { + "epoch": 0.98, + "learning_rate": 1.199560429268387e-06, + "loss": 0.7188, + "step": 18208 + }, + { + "epoch": 0.98, + "learning_rate": 1.193542456539909e-06, + "loss": 0.7461, + "step": 18209 + }, + { + "epoch": 0.98, + "learning_rate": 1.1875395992855808e-06, + "loss": 0.8086, + "step": 18210 + }, + { + "epoch": 0.98, + "learning_rate": 1.181551857687313e-06, + "loss": 0.7695, + "step": 18211 + }, + { + "epoch": 0.98, + "learning_rate": 1.1755792319265157e-06, + "loss": 0.7734, + "step": 18212 + }, + { + "epoch": 0.98, + "learning_rate": 1.1696217221842665e-06, + "loss": 0.8203, + "step": 18213 + }, + { + "epoch": 0.98, + "learning_rate": 1.1636793286409765e-06, + "loss": 0.8633, + "step": 18214 + }, + { + "epoch": 0.98, + "learning_rate": 1.1577520514768346e-06, + "loss": 0.7695, + "step": 18215 + }, + { + "epoch": 0.98, + "learning_rate": 1.1518398908713646e-06, + "loss": 0.6758, + "step": 18216 + }, + { + "epoch": 0.98, + "learning_rate": 1.1459428470038114e-06, + "loss": 0.8164, + "step": 18217 + }, + { + "epoch": 0.98, + "learning_rate": 1.14006092005281e-06, + "loss": 0.8867, + "step": 18218 + }, + { + "epoch": 0.98, + "learning_rate": 1.1341941101966068e-06, + "loss": 0.7383, + "step": 18219 + }, + { + "epoch": 0.98, + "learning_rate": 1.1283424176130597e-06, + "loss": 0.7852, + "step": 18220 + }, + { + "epoch": 0.98, + "learning_rate": 1.1225058424794154e-06, + "loss": 0.7852, + "step": 18221 + }, + { + "epoch": 0.98, + "learning_rate": 1.1166843849725327e-06, + "loss": 0.9219, + "step": 18222 + }, + { + "epoch": 0.98, + "learning_rate": 1.1108780452688261e-06, + "loss": 0.7617, + "step": 18223 + }, + { + "epoch": 0.98, + "learning_rate": 1.1050868235443211e-06, + "loss": 0.8828, + "step": 18224 + }, + { + "epoch": 0.98, + "learning_rate": 1.0993107199744335e-06, + "loss": 0.8477, + "step": 18225 + }, + { + "epoch": 0.98, + "learning_rate": 1.093549734734245e-06, + "loss": 0.8516, + "step": 18226 + }, + { + "epoch": 0.98, + "learning_rate": 1.0878038679982827e-06, + "loss": 0.8008, + "step": 18227 + }, + { + "epoch": 0.98, + "learning_rate": 1.0820731199407408e-06, + "loss": 0.8203, + "step": 18228 + }, + { + "epoch": 0.98, + "learning_rate": 1.0763574907352025e-06, + "loss": 0.8086, + "step": 18229 + }, + { + "epoch": 0.98, + "learning_rate": 1.0706569805549182e-06, + "loss": 0.7656, + "step": 18230 + }, + { + "epoch": 0.98, + "learning_rate": 1.0649715895725832e-06, + "loss": 0.832, + "step": 18231 + }, + { + "epoch": 0.98, + "learning_rate": 1.0593013179605038e-06, + "loss": 0.7617, + "step": 18232 + }, + { + "epoch": 0.98, + "learning_rate": 1.0536461658905427e-06, + "loss": 0.7773, + "step": 18233 + }, + { + "epoch": 0.98, + "learning_rate": 1.048006133534063e-06, + "loss": 0.7773, + "step": 18234 + }, + { + "epoch": 0.98, + "learning_rate": 1.0423812210619278e-06, + "loss": 0.8398, + "step": 18235 + }, + { + "epoch": 0.98, + "learning_rate": 1.0367714286446673e-06, + "loss": 0.8594, + "step": 18236 + }, + { + "epoch": 0.98, + "learning_rate": 1.0311767564522567e-06, + "loss": 0.7734, + "step": 18237 + }, + { + "epoch": 0.98, + "learning_rate": 1.0255972046541163e-06, + "loss": 0.8086, + "step": 18238 + }, + { + "epoch": 0.98, + "learning_rate": 1.0200327734194437e-06, + "loss": 0.8789, + "step": 18239 + }, + { + "epoch": 0.98, + "learning_rate": 1.014483462916882e-06, + "loss": 0.7031, + "step": 18240 + }, + { + "epoch": 0.98, + "learning_rate": 1.008949273314519e-06, + "loss": 0.7852, + "step": 18241 + }, + { + "epoch": 0.98, + "learning_rate": 1.0034302047800537e-06, + "loss": 0.7656, + "step": 18242 + }, + { + "epoch": 0.98, + "learning_rate": 9.979262574807967e-07, + "loss": 0.8164, + "step": 18243 + }, + { + "epoch": 0.98, + "learning_rate": 9.924374315835038e-07, + "loss": 0.7539, + "step": 18244 + }, + { + "epoch": 0.98, + "learning_rate": 9.869637272544862e-07, + "loss": 0.8477, + "step": 18245 + }, + { + "epoch": 0.98, + "learning_rate": 9.815051446596113e-07, + "loss": 0.7227, + "step": 18246 + }, + { + "epoch": 0.98, + "learning_rate": 9.760616839643577e-07, + "loss": 0.8242, + "step": 18247 + }, + { + "epoch": 0.98, + "learning_rate": 9.706333453335936e-07, + "loss": 0.832, + "step": 18248 + }, + { + "epoch": 0.98, + "learning_rate": 9.65220128931854e-07, + "loss": 0.7539, + "step": 18249 + }, + { + "epoch": 0.98, + "learning_rate": 9.598220349231746e-07, + "loss": 0.8008, + "step": 18250 + }, + { + "epoch": 0.98, + "learning_rate": 9.54439063471202e-07, + "loss": 0.7891, + "step": 18251 + }, + { + "epoch": 0.98, + "learning_rate": 9.490712147389168e-07, + "loss": 0.7148, + "step": 18252 + }, + { + "epoch": 0.98, + "learning_rate": 9.437184888891337e-07, + "loss": 0.7148, + "step": 18253 + }, + { + "epoch": 0.98, + "learning_rate": 9.383808860839449e-07, + "loss": 0.9023, + "step": 18254 + }, + { + "epoch": 0.98, + "learning_rate": 9.3305840648511e-07, + "loss": 0.8047, + "step": 18255 + }, + { + "epoch": 0.98, + "learning_rate": 9.277510502539999e-07, + "loss": 0.7656, + "step": 18256 + }, + { + "epoch": 0.98, + "learning_rate": 9.224588175513749e-07, + "loss": 0.75, + "step": 18257 + }, + { + "epoch": 0.98, + "learning_rate": 9.171817085376066e-07, + "loss": 0.8828, + "step": 18258 + }, + { + "epoch": 0.98, + "learning_rate": 9.119197233726229e-07, + "loss": 0.7852, + "step": 18259 + }, + { + "epoch": 0.98, + "learning_rate": 9.066728622158516e-07, + "loss": 0.7656, + "step": 18260 + }, + { + "epoch": 0.98, + "learning_rate": 9.014411252263876e-07, + "loss": 0.7695, + "step": 18261 + }, + { + "epoch": 0.98, + "learning_rate": 8.9622451256266e-07, + "loss": 0.8477, + "step": 18262 + }, + { + "epoch": 0.98, + "learning_rate": 8.910230243828199e-07, + "loss": 0.7539, + "step": 18263 + }, + { + "epoch": 0.98, + "learning_rate": 8.858366608444079e-07, + "loss": 0.8164, + "step": 18264 + }, + { + "epoch": 0.98, + "learning_rate": 8.806654221046873e-07, + "loss": 0.7188, + "step": 18265 + }, + { + "epoch": 0.98, + "learning_rate": 8.755093083203103e-07, + "loss": 0.8203, + "step": 18266 + }, + { + "epoch": 0.98, + "learning_rate": 8.703683196475409e-07, + "loss": 0.8281, + "step": 18267 + }, + { + "epoch": 0.98, + "learning_rate": 8.65242456242088e-07, + "loss": 0.6953, + "step": 18268 + }, + { + "epoch": 0.98, + "learning_rate": 8.601317182594381e-07, + "loss": 0.8242, + "step": 18269 + }, + { + "epoch": 0.98, + "learning_rate": 8.550361058543566e-07, + "loss": 0.7695, + "step": 18270 + }, + { + "epoch": 0.98, + "learning_rate": 8.499556191812197e-07, + "loss": 0.7969, + "step": 18271 + }, + { + "epoch": 0.98, + "learning_rate": 8.448902583940709e-07, + "loss": 0.8359, + "step": 18272 + }, + { + "epoch": 0.98, + "learning_rate": 8.398400236463988e-07, + "loss": 0.8711, + "step": 18273 + }, + { + "epoch": 0.98, + "learning_rate": 8.348049150912473e-07, + "loss": 0.8047, + "step": 18274 + }, + { + "epoch": 0.98, + "learning_rate": 8.297849328811058e-07, + "loss": 0.7695, + "step": 18275 + }, + { + "epoch": 0.98, + "learning_rate": 8.247800771681857e-07, + "loss": 0.8477, + "step": 18276 + }, + { + "epoch": 0.98, + "learning_rate": 8.197903481041436e-07, + "loss": 0.8203, + "step": 18277 + }, + { + "epoch": 0.98, + "learning_rate": 8.148157458401362e-07, + "loss": 0.7539, + "step": 18278 + }, + { + "epoch": 0.98, + "learning_rate": 8.098562705269319e-07, + "loss": 0.8281, + "step": 18279 + }, + { + "epoch": 0.98, + "learning_rate": 8.049119223148549e-07, + "loss": 0.8477, + "step": 18280 + }, + { + "epoch": 0.98, + "learning_rate": 7.999827013536743e-07, + "loss": 0.7305, + "step": 18281 + }, + { + "epoch": 0.98, + "learning_rate": 7.95068607792826e-07, + "loss": 0.8359, + "step": 18282 + }, + { + "epoch": 0.98, + "learning_rate": 7.90169641781191e-07, + "loss": 0.7852, + "step": 18283 + }, + { + "epoch": 0.98, + "learning_rate": 7.852858034672061e-07, + "loss": 0.8242, + "step": 18284 + }, + { + "epoch": 0.98, + "learning_rate": 7.804170929989196e-07, + "loss": 0.7578, + "step": 18285 + }, + { + "epoch": 0.98, + "learning_rate": 7.755635105238246e-07, + "loss": 0.7656, + "step": 18286 + }, + { + "epoch": 0.98, + "learning_rate": 7.707250561890256e-07, + "loss": 0.7383, + "step": 18287 + }, + { + "epoch": 0.98, + "learning_rate": 7.65901730141072e-07, + "loss": 0.7734, + "step": 18288 + }, + { + "epoch": 0.98, + "learning_rate": 7.610935325262358e-07, + "loss": 0.7773, + "step": 18289 + }, + { + "epoch": 0.98, + "learning_rate": 7.563004634901782e-07, + "loss": 0.7891, + "step": 18290 + }, + { + "epoch": 0.98, + "learning_rate": 7.515225231781164e-07, + "loss": 0.8867, + "step": 18291 + }, + { + "epoch": 0.98, + "learning_rate": 7.467597117348235e-07, + "loss": 0.7656, + "step": 18292 + }, + { + "epoch": 0.98, + "learning_rate": 7.42012029304684e-07, + "loss": 0.7852, + "step": 18293 + }, + { + "epoch": 0.98, + "learning_rate": 7.372794760315827e-07, + "loss": 0.6992, + "step": 18294 + }, + { + "epoch": 0.98, + "learning_rate": 7.325620520588494e-07, + "loss": 0.7578, + "step": 18295 + }, + { + "epoch": 0.98, + "learning_rate": 7.278597575295365e-07, + "loss": 0.8867, + "step": 18296 + }, + { + "epoch": 0.98, + "learning_rate": 7.231725925860855e-07, + "loss": 0.8594, + "step": 18297 + }, + { + "epoch": 0.98, + "learning_rate": 7.185005573704939e-07, + "loss": 0.8242, + "step": 18298 + }, + { + "epoch": 0.98, + "learning_rate": 7.138436520243707e-07, + "loss": 0.8164, + "step": 18299 + }, + { + "epoch": 0.98, + "learning_rate": 7.092018766888808e-07, + "loss": 0.7656, + "step": 18300 + }, + { + "epoch": 0.98, + "learning_rate": 7.045752315046339e-07, + "loss": 0.8477, + "step": 18301 + }, + { + "epoch": 0.98, + "learning_rate": 6.999637166118511e-07, + "loss": 0.8125, + "step": 18302 + }, + { + "epoch": 0.98, + "learning_rate": 6.95367332150254e-07, + "loss": 0.6797, + "step": 18303 + }, + { + "epoch": 0.98, + "learning_rate": 6.907860782591757e-07, + "loss": 0.9141, + "step": 18304 + }, + { + "epoch": 0.98, + "learning_rate": 6.86219955077394e-07, + "loss": 0.8984, + "step": 18305 + }, + { + "epoch": 0.98, + "learning_rate": 6.816689627433536e-07, + "loss": 0.8203, + "step": 18306 + }, + { + "epoch": 0.98, + "learning_rate": 6.771331013948335e-07, + "loss": 0.9062, + "step": 18307 + }, + { + "epoch": 0.98, + "learning_rate": 6.726123711693899e-07, + "loss": 0.8281, + "step": 18308 + }, + { + "epoch": 0.98, + "learning_rate": 6.681067722040246e-07, + "loss": 0.8281, + "step": 18309 + }, + { + "epoch": 0.98, + "learning_rate": 6.636163046351839e-07, + "loss": 0.7891, + "step": 18310 + }, + { + "epoch": 0.98, + "learning_rate": 6.591409685989813e-07, + "loss": 0.7031, + "step": 18311 + }, + { + "epoch": 0.98, + "learning_rate": 6.546807642310859e-07, + "loss": 0.7891, + "step": 18312 + }, + { + "epoch": 0.98, + "learning_rate": 6.502356916666119e-07, + "loss": 0.9375, + "step": 18313 + }, + { + "epoch": 0.98, + "learning_rate": 6.458057510402848e-07, + "loss": 0.7812, + "step": 18314 + }, + { + "epoch": 0.98, + "learning_rate": 6.413909424862751e-07, + "loss": 0.7109, + "step": 18315 + }, + { + "epoch": 0.98, + "learning_rate": 6.369912661384758e-07, + "loss": 0.8477, + "step": 18316 + }, + { + "epoch": 0.98, + "learning_rate": 6.326067221301135e-07, + "loss": 0.7539, + "step": 18317 + }, + { + "epoch": 0.98, + "learning_rate": 6.282373105940819e-07, + "loss": 0.793, + "step": 18318 + }, + { + "epoch": 0.98, + "learning_rate": 6.238830316628307e-07, + "loss": 0.8242, + "step": 18319 + }, + { + "epoch": 0.98, + "learning_rate": 6.195438854682544e-07, + "loss": 0.8359, + "step": 18320 + }, + { + "epoch": 0.98, + "learning_rate": 6.152198721419145e-07, + "loss": 0.7539, + "step": 18321 + }, + { + "epoch": 0.98, + "learning_rate": 6.109109918147615e-07, + "loss": 0.7891, + "step": 18322 + }, + { + "epoch": 0.98, + "learning_rate": 6.066172446174135e-07, + "loss": 0.7109, + "step": 18323 + }, + { + "epoch": 0.98, + "learning_rate": 6.023386306799883e-07, + "loss": 0.7617, + "step": 18324 + }, + { + "epoch": 0.98, + "learning_rate": 5.980751501321046e-07, + "loss": 0.8125, + "step": 18325 + }, + { + "epoch": 0.98, + "learning_rate": 5.938268031029925e-07, + "loss": 0.7852, + "step": 18326 + }, + { + "epoch": 0.99, + "learning_rate": 5.895935897213823e-07, + "loss": 0.7695, + "step": 18327 + }, + { + "epoch": 0.99, + "learning_rate": 5.8537551011556e-07, + "loss": 0.7773, + "step": 18328 + }, + { + "epoch": 0.99, + "learning_rate": 5.811725644133125e-07, + "loss": 0.7539, + "step": 18329 + }, + { + "epoch": 0.99, + "learning_rate": 5.76984752742038e-07, + "loss": 0.7617, + "step": 18330 + }, + { + "epoch": 0.99, + "learning_rate": 5.728120752286902e-07, + "loss": 0.8945, + "step": 18331 + }, + { + "epoch": 0.99, + "learning_rate": 5.686545319996128e-07, + "loss": 0.7461, + "step": 18332 + }, + { + "epoch": 0.99, + "learning_rate": 5.645121231808159e-07, + "loss": 0.8555, + "step": 18333 + }, + { + "epoch": 0.99, + "learning_rate": 5.603848488979213e-07, + "loss": 0.8203, + "step": 18334 + }, + { + "epoch": 0.99, + "learning_rate": 5.562727092758845e-07, + "loss": 0.7695, + "step": 18335 + }, + { + "epoch": 0.99, + "learning_rate": 5.521757044393838e-07, + "loss": 0.7891, + "step": 18336 + }, + { + "epoch": 0.99, + "learning_rate": 5.480938345125419e-07, + "loss": 0.7695, + "step": 18337 + }, + { + "epoch": 0.99, + "learning_rate": 5.440270996190378e-07, + "loss": 0.8789, + "step": 18338 + }, + { + "epoch": 0.99, + "learning_rate": 5.399754998821615e-07, + "loss": 0.7109, + "step": 18339 + }, + { + "epoch": 0.99, + "learning_rate": 5.359390354246485e-07, + "loss": 0.7617, + "step": 18340 + }, + { + "epoch": 0.99, + "learning_rate": 5.319177063687897e-07, + "loss": 0.8008, + "step": 18341 + }, + { + "epoch": 0.99, + "learning_rate": 5.279115128365431e-07, + "loss": 0.8672, + "step": 18342 + }, + { + "epoch": 0.99, + "learning_rate": 5.239204549492005e-07, + "loss": 0.7656, + "step": 18343 + }, + { + "epoch": 0.99, + "learning_rate": 5.199445328277764e-07, + "loss": 0.7734, + "step": 18344 + }, + { + "epoch": 0.99, + "learning_rate": 5.159837465927298e-07, + "loss": 0.8633, + "step": 18345 + }, + { + "epoch": 0.99, + "learning_rate": 5.120380963640759e-07, + "loss": 0.7344, + "step": 18346 + }, + { + "epoch": 0.99, + "learning_rate": 5.081075822613856e-07, + "loss": 0.7188, + "step": 18347 + }, + { + "epoch": 0.99, + "learning_rate": 5.041922044037861e-07, + "loss": 0.832, + "step": 18348 + }, + { + "epoch": 0.99, + "learning_rate": 5.002919629099046e-07, + "loss": 0.7969, + "step": 18349 + }, + { + "epoch": 0.99, + "learning_rate": 4.964068578979797e-07, + "loss": 0.6758, + "step": 18350 + }, + { + "epoch": 0.99, + "learning_rate": 4.925368894856397e-07, + "loss": 0.9062, + "step": 18351 + }, + { + "epoch": 0.99, + "learning_rate": 4.886820577902352e-07, + "loss": 0.8398, + "step": 18352 + }, + { + "epoch": 0.99, + "learning_rate": 4.84842362928617e-07, + "loss": 0.8008, + "step": 18353 + }, + { + "epoch": 0.99, + "learning_rate": 4.810178050170255e-07, + "loss": 0.7227, + "step": 18354 + }, + { + "epoch": 0.99, + "learning_rate": 4.772083841714237e-07, + "loss": 0.8672, + "step": 18355 + }, + { + "epoch": 0.99, + "learning_rate": 4.7341410050727453e-07, + "loss": 0.7852, + "step": 18356 + }, + { + "epoch": 0.99, + "learning_rate": 4.696349541394862e-07, + "loss": 0.7188, + "step": 18357 + }, + { + "epoch": 0.99, + "learning_rate": 4.658709451826337e-07, + "loss": 0.8477, + "step": 18358 + }, + { + "epoch": 0.99, + "learning_rate": 4.621220737507925e-07, + "loss": 0.7266, + "step": 18359 + }, + { + "epoch": 0.99, + "learning_rate": 4.583883399575384e-07, + "loss": 0.793, + "step": 18360 + }, + { + "epoch": 0.99, + "learning_rate": 4.5466974391600303e-07, + "loss": 0.7578, + "step": 18361 + }, + { + "epoch": 0.99, + "learning_rate": 4.509662857388741e-07, + "loss": 0.7461, + "step": 18362 + }, + { + "epoch": 0.99, + "learning_rate": 4.4727796553839515e-07, + "loss": 0.8008, + "step": 18363 + }, + { + "epoch": 0.99, + "learning_rate": 4.4360478342631016e-07, + "loss": 0.7344, + "step": 18364 + }, + { + "epoch": 0.99, + "learning_rate": 4.3994673951403e-07, + "loss": 0.8867, + "step": 18365 + }, + { + "epoch": 0.99, + "learning_rate": 4.3630383391224383e-07, + "loss": 0.5938, + "step": 18366 + }, + { + "epoch": 0.99, + "learning_rate": 4.326760667314189e-07, + "loss": 0.832, + "step": 18367 + }, + { + "epoch": 0.99, + "learning_rate": 4.2906343808152283e-07, + "loss": 0.7656, + "step": 18368 + }, + { + "epoch": 0.99, + "learning_rate": 4.2546594807202356e-07, + "loss": 0.8242, + "step": 18369 + }, + { + "epoch": 0.99, + "learning_rate": 4.218835968118895e-07, + "loss": 0.75, + "step": 18370 + }, + { + "epoch": 0.99, + "learning_rate": 4.1831638440970043e-07, + "loss": 0.7266, + "step": 18371 + }, + { + "epoch": 0.99, + "learning_rate": 4.1476431097353664e-07, + "loss": 0.8125, + "step": 18372 + }, + { + "epoch": 0.99, + "learning_rate": 4.1122737661108966e-07, + "loss": 0.7422, + "step": 18373 + }, + { + "epoch": 0.99, + "learning_rate": 4.077055814294961e-07, + "loss": 0.7812, + "step": 18374 + }, + { + "epoch": 0.99, + "learning_rate": 4.0419892553544833e-07, + "loss": 0.6992, + "step": 18375 + }, + { + "epoch": 0.99, + "learning_rate": 4.007074090353058e-07, + "loss": 0.7266, + "step": 18376 + }, + { + "epoch": 0.99, + "learning_rate": 3.9723103203481713e-07, + "loss": 0.8242, + "step": 18377 + }, + { + "epoch": 0.99, + "learning_rate": 3.9376979463928706e-07, + "loss": 0.7695, + "step": 18378 + }, + { + "epoch": 0.99, + "learning_rate": 3.903236969536872e-07, + "loss": 0.7969, + "step": 18379 + }, + { + "epoch": 0.99, + "learning_rate": 3.8689273908237843e-07, + "loss": 0.8164, + "step": 18380 + }, + { + "epoch": 0.99, + "learning_rate": 3.834769211293332e-07, + "loss": 0.7812, + "step": 18381 + }, + { + "epoch": 0.99, + "learning_rate": 3.800762431981353e-07, + "loss": 0.7969, + "step": 18382 + }, + { + "epoch": 0.99, + "learning_rate": 3.766907053917024e-07, + "loss": 0.7656, + "step": 18383 + }, + { + "epoch": 0.99, + "learning_rate": 3.7332030781278556e-07, + "loss": 0.8125, + "step": 18384 + }, + { + "epoch": 0.99, + "learning_rate": 3.6996505056341446e-07, + "loss": 0.8125, + "step": 18385 + }, + { + "epoch": 0.99, + "learning_rate": 3.666249337452854e-07, + "loss": 0.6562, + "step": 18386 + }, + { + "epoch": 0.99, + "learning_rate": 3.632999574596507e-07, + "loss": 0.7734, + "step": 18387 + }, + { + "epoch": 0.99, + "learning_rate": 3.599901218072077e-07, + "loss": 0.8477, + "step": 18388 + }, + { + "epoch": 0.99, + "learning_rate": 3.5669542688826496e-07, + "loss": 0.8516, + "step": 18389 + }, + { + "epoch": 0.99, + "learning_rate": 3.5341587280274255e-07, + "loss": 0.7188, + "step": 18390 + }, + { + "epoch": 0.99, + "learning_rate": 3.501514596498945e-07, + "loss": 0.8672, + "step": 18391 + }, + { + "epoch": 0.99, + "learning_rate": 3.469021875288081e-07, + "loss": 0.8906, + "step": 18392 + }, + { + "epoch": 0.99, + "learning_rate": 3.436680565377381e-07, + "loss": 0.8828, + "step": 18393 + }, + { + "epoch": 0.99, + "learning_rate": 3.4044906677488385e-07, + "loss": 0.7969, + "step": 18394 + }, + { + "epoch": 0.99, + "learning_rate": 3.372452183376673e-07, + "loss": 0.7891, + "step": 18395 + }, + { + "epoch": 0.99, + "learning_rate": 3.34056511323233e-07, + "loss": 0.8242, + "step": 18396 + }, + { + "epoch": 0.99, + "learning_rate": 3.3088294582822585e-07, + "loss": 0.7539, + "step": 18397 + }, + { + "epoch": 0.99, + "learning_rate": 3.277245219487357e-07, + "loss": 0.875, + "step": 18398 + }, + { + "epoch": 0.99, + "learning_rate": 3.245812397805192e-07, + "loss": 0.8516, + "step": 18399 + }, + { + "epoch": 0.99, + "learning_rate": 3.214530994188891e-07, + "loss": 0.7891, + "step": 18400 + }, + { + "epoch": 0.99, + "learning_rate": 3.183401009585474e-07, + "loss": 0.8516, + "step": 18401 + }, + { + "epoch": 0.99, + "learning_rate": 3.1524224449380745e-07, + "loss": 0.7812, + "step": 18402 + }, + { + "epoch": 0.99, + "learning_rate": 3.1215953011864973e-07, + "loss": 0.8164, + "step": 18403 + }, + { + "epoch": 0.99, + "learning_rate": 3.0909195792644396e-07, + "loss": 0.8008, + "step": 18404 + }, + { + "epoch": 0.99, + "learning_rate": 3.060395280101158e-07, + "loss": 0.7148, + "step": 18405 + }, + { + "epoch": 0.99, + "learning_rate": 3.0300224046220235e-07, + "loss": 0.793, + "step": 18406 + }, + { + "epoch": 0.99, + "learning_rate": 2.9998009537474113e-07, + "loss": 0.7344, + "step": 18407 + }, + { + "epoch": 0.99, + "learning_rate": 2.9697309283927e-07, + "loss": 0.8594, + "step": 18408 + }, + { + "epoch": 0.99, + "learning_rate": 2.939812329469382e-07, + "loss": 0.8359, + "step": 18409 + }, + { + "epoch": 0.99, + "learning_rate": 2.910045157884511e-07, + "loss": 0.7617, + "step": 18410 + }, + { + "epoch": 0.99, + "learning_rate": 2.880429414539587e-07, + "loss": 0.8711, + "step": 18411 + }, + { + "epoch": 0.99, + "learning_rate": 2.8509651003322256e-07, + "loss": 0.8281, + "step": 18412 + }, + { + "epoch": 0.99, + "learning_rate": 2.8216522161556014e-07, + "loss": 0.7539, + "step": 18413 + }, + { + "epoch": 0.99, + "learning_rate": 2.792490762897337e-07, + "loss": 0.7461, + "step": 18414 + }, + { + "epoch": 0.99, + "learning_rate": 2.7634807414417264e-07, + "loss": 0.8125, + "step": 18415 + }, + { + "epoch": 0.99, + "learning_rate": 2.7346221526675095e-07, + "loss": 0.6797, + "step": 18416 + }, + { + "epoch": 0.99, + "learning_rate": 2.7059149974489885e-07, + "loss": 0.7852, + "step": 18417 + }, + { + "epoch": 0.99, + "learning_rate": 2.677359276656577e-07, + "loss": 0.7852, + "step": 18418 + }, + { + "epoch": 0.99, + "learning_rate": 2.648954991155694e-07, + "loss": 0.7539, + "step": 18419 + }, + { + "epoch": 0.99, + "learning_rate": 2.6207021418067636e-07, + "loss": 0.7578, + "step": 18420 + }, + { + "epoch": 0.99, + "learning_rate": 2.5926007294657663e-07, + "loss": 0.8125, + "step": 18421 + }, + { + "epoch": 0.99, + "learning_rate": 2.5646507549847984e-07, + "loss": 0.6914, + "step": 18422 + }, + { + "epoch": 0.99, + "learning_rate": 2.536852219210406e-07, + "loss": 0.8281, + "step": 18423 + }, + { + "epoch": 0.99, + "learning_rate": 2.509205122985248e-07, + "loss": 0.7461, + "step": 18424 + }, + { + "epoch": 0.99, + "learning_rate": 2.4817094671464315e-07, + "loss": 0.8516, + "step": 18425 + }, + { + "epoch": 0.99, + "learning_rate": 2.45436525252829e-07, + "loss": 0.7617, + "step": 18426 + }, + { + "epoch": 0.99, + "learning_rate": 2.427172479958495e-07, + "loss": 0.8086, + "step": 18427 + }, + { + "epoch": 0.99, + "learning_rate": 2.4001311502619414e-07, + "loss": 0.8086, + "step": 18428 + }, + { + "epoch": 0.99, + "learning_rate": 2.3732412642574197e-07, + "loss": 0.8984, + "step": 18429 + }, + { + "epoch": 0.99, + "learning_rate": 2.3465028227592778e-07, + "loss": 0.6602, + "step": 18430 + }, + { + "epoch": 0.99, + "learning_rate": 2.3199158265790888e-07, + "loss": 0.8242, + "step": 18431 + }, + { + "epoch": 0.99, + "learning_rate": 2.2934802765217645e-07, + "loss": 0.8047, + "step": 18432 + }, + { + "epoch": 0.99, + "learning_rate": 2.2671961733883305e-07, + "loss": 0.7617, + "step": 18433 + }, + { + "epoch": 0.99, + "learning_rate": 2.2410635179759276e-07, + "loss": 0.9531, + "step": 18434 + }, + { + "epoch": 0.99, + "learning_rate": 2.215082311075589e-07, + "loss": 0.7148, + "step": 18435 + }, + { + "epoch": 0.99, + "learning_rate": 2.1892525534750184e-07, + "loss": 0.8438, + "step": 18436 + }, + { + "epoch": 0.99, + "learning_rate": 2.163574245956923e-07, + "loss": 0.8711, + "step": 18437 + }, + { + "epoch": 0.99, + "learning_rate": 2.138047389299569e-07, + "loss": 0.7891, + "step": 18438 + }, + { + "epoch": 0.99, + "learning_rate": 2.1126719842767817e-07, + "loss": 0.7188, + "step": 18439 + }, + { + "epoch": 0.99, + "learning_rate": 2.0874480316568357e-07, + "loss": 0.7305, + "step": 18440 + }, + { + "epoch": 0.99, + "learning_rate": 2.0623755322041194e-07, + "loss": 0.7773, + "step": 18441 + }, + { + "epoch": 0.99, + "learning_rate": 2.0374544866791355e-07, + "loss": 0.7578, + "step": 18442 + }, + { + "epoch": 0.99, + "learning_rate": 2.012684895836281e-07, + "loss": 0.9258, + "step": 18443 + }, + { + "epoch": 0.99, + "learning_rate": 1.9880667604266212e-07, + "loss": 0.7461, + "step": 18444 + }, + { + "epoch": 0.99, + "learning_rate": 1.9636000811962262e-07, + "loss": 0.8008, + "step": 18445 + }, + { + "epoch": 0.99, + "learning_rate": 1.93928485888617e-07, + "loss": 0.7305, + "step": 18446 + }, + { + "epoch": 0.99, + "learning_rate": 1.9151210942330855e-07, + "loss": 0.8086, + "step": 18447 + }, + { + "epoch": 0.99, + "learning_rate": 1.891108787970275e-07, + "loss": 0.8438, + "step": 18448 + }, + { + "epoch": 0.99, + "learning_rate": 1.8672479408243792e-07, + "loss": 0.8047, + "step": 18449 + }, + { + "epoch": 0.99, + "learning_rate": 1.8435385535187087e-07, + "loss": 0.8633, + "step": 18450 + }, + { + "epoch": 0.99, + "learning_rate": 1.819980626772133e-07, + "loss": 0.8164, + "step": 18451 + }, + { + "epoch": 0.99, + "learning_rate": 1.7965741612979703e-07, + "loss": 0.7305, + "step": 18452 + }, + { + "epoch": 0.99, + "learning_rate": 1.773319157805653e-07, + "loss": 0.8672, + "step": 18453 + }, + { + "epoch": 0.99, + "learning_rate": 1.7502156170001726e-07, + "loss": 0.7891, + "step": 18454 + }, + { + "epoch": 0.99, + "learning_rate": 1.7272635395815252e-07, + "loss": 0.875, + "step": 18455 + }, + { + "epoch": 0.99, + "learning_rate": 1.704462926245265e-07, + "loss": 0.8398, + "step": 18456 + }, + { + "epoch": 0.99, + "learning_rate": 1.681813777681951e-07, + "loss": 0.8281, + "step": 18457 + }, + { + "epoch": 0.99, + "learning_rate": 1.659316094578256e-07, + "loss": 0.7695, + "step": 18458 + }, + { + "epoch": 0.99, + "learning_rate": 1.636969877615857e-07, + "loss": 0.8789, + "step": 18459 + }, + { + "epoch": 0.99, + "learning_rate": 1.61477512747199e-07, + "loss": 0.9219, + "step": 18460 + }, + { + "epoch": 0.99, + "learning_rate": 1.5927318448188954e-07, + "loss": 0.7266, + "step": 18461 + }, + { + "epoch": 0.99, + "learning_rate": 1.570840030325482e-07, + "loss": 0.8164, + "step": 18462 + }, + { + "epoch": 0.99, + "learning_rate": 1.5490996846539984e-07, + "loss": 0.9062, + "step": 18463 + }, + { + "epoch": 0.99, + "learning_rate": 1.5275108084639168e-07, + "loss": 0.7891, + "step": 18464 + }, + { + "epoch": 0.99, + "learning_rate": 1.5060734024091582e-07, + "loss": 0.9141, + "step": 18465 + }, + { + "epoch": 0.99, + "learning_rate": 1.4847874671397587e-07, + "loss": 0.75, + "step": 18466 + }, + { + "epoch": 0.99, + "learning_rate": 1.4636530033002027e-07, + "loss": 0.7695, + "step": 18467 + }, + { + "epoch": 0.99, + "learning_rate": 1.4426700115310886e-07, + "loss": 0.7773, + "step": 18468 + }, + { + "epoch": 0.99, + "learning_rate": 1.4218384924691297e-07, + "loss": 0.7148, + "step": 18469 + }, + { + "epoch": 0.99, + "learning_rate": 1.4011584467438222e-07, + "loss": 0.7461, + "step": 18470 + }, + { + "epoch": 0.99, + "learning_rate": 1.380629874983552e-07, + "loss": 0.7969, + "step": 18471 + }, + { + "epoch": 0.99, + "learning_rate": 1.3602527778094897e-07, + "loss": 0.793, + "step": 18472 + }, + { + "epoch": 0.99, + "learning_rate": 1.3400271558394738e-07, + "loss": 0.7695, + "step": 18473 + }, + { + "epoch": 0.99, + "learning_rate": 1.3199530096863476e-07, + "loss": 0.8438, + "step": 18474 + }, + { + "epoch": 0.99, + "learning_rate": 1.3000303399579583e-07, + "loss": 0.707, + "step": 18475 + }, + { + "epoch": 0.99, + "learning_rate": 1.280259147258822e-07, + "loss": 0.7109, + "step": 18476 + }, + { + "epoch": 0.99, + "learning_rate": 1.2606394321873494e-07, + "loss": 0.8555, + "step": 18477 + }, + { + "epoch": 0.99, + "learning_rate": 1.2411711953386195e-07, + "loss": 0.7422, + "step": 18478 + }, + { + "epoch": 0.99, + "learning_rate": 1.2218544373021612e-07, + "loss": 0.7578, + "step": 18479 + }, + { + "epoch": 0.99, + "learning_rate": 1.202689158663617e-07, + "loss": 0.793, + "step": 18480 + }, + { + "epoch": 0.99, + "learning_rate": 1.1836753600041883e-07, + "loss": 0.8086, + "step": 18481 + }, + { + "epoch": 0.99, + "learning_rate": 1.1648130418989711e-07, + "loss": 0.7148, + "step": 18482 + }, + { + "epoch": 0.99, + "learning_rate": 1.1461022049202852e-07, + "loss": 0.8359, + "step": 18483 + }, + { + "epoch": 0.99, + "learning_rate": 1.1275428496348994e-07, + "loss": 0.8477, + "step": 18484 + }, + { + "epoch": 0.99, + "learning_rate": 1.1091349766056968e-07, + "loss": 0.7852, + "step": 18485 + }, + { + "epoch": 0.99, + "learning_rate": 1.0908785863894544e-07, + "loss": 0.8047, + "step": 18486 + }, + { + "epoch": 0.99, + "learning_rate": 1.0727736795407284e-07, + "loss": 0.7344, + "step": 18487 + }, + { + "epoch": 0.99, + "learning_rate": 1.0548202566068587e-07, + "loss": 0.9336, + "step": 18488 + }, + { + "epoch": 0.99, + "learning_rate": 1.037018318132965e-07, + "loss": 0.7578, + "step": 18489 + }, + { + "epoch": 0.99, + "learning_rate": 1.0193678646580606e-07, + "loss": 0.8125, + "step": 18490 + }, + { + "epoch": 0.99, + "learning_rate": 1.0018688967167177e-07, + "loss": 0.7305, + "step": 18491 + }, + { + "epoch": 0.99, + "learning_rate": 9.845214148396232e-08, + "loss": 0.8164, + "step": 18492 + }, + { + "epoch": 0.99, + "learning_rate": 9.673254195524673e-08, + "loss": 0.7695, + "step": 18493 + }, + { + "epoch": 0.99, + "learning_rate": 9.502809113764998e-08, + "loss": 0.8516, + "step": 18494 + }, + { + "epoch": 0.99, + "learning_rate": 9.333878908274197e-08, + "loss": 0.8438, + "step": 18495 + }, + { + "epoch": 0.99, + "learning_rate": 9.166463584175943e-08, + "loss": 0.7852, + "step": 18496 + }, + { + "epoch": 0.99, + "learning_rate": 9.000563146549513e-08, + "loss": 0.793, + "step": 18497 + }, + { + "epoch": 0.99, + "learning_rate": 8.836177600413109e-08, + "loss": 0.8398, + "step": 18498 + }, + { + "epoch": 0.99, + "learning_rate": 8.673306950751636e-08, + "loss": 0.8164, + "step": 18499 + }, + { + "epoch": 0.99, + "learning_rate": 8.511951202505586e-08, + "loss": 0.7227, + "step": 18500 + }, + { + "epoch": 0.99, + "learning_rate": 8.352110360554388e-08, + "loss": 0.6914, + "step": 18501 + }, + { + "epoch": 0.99, + "learning_rate": 8.193784429749718e-08, + "loss": 0.7461, + "step": 18502 + }, + { + "epoch": 0.99, + "learning_rate": 8.036973414887738e-08, + "loss": 0.7617, + "step": 18503 + }, + { + "epoch": 0.99, + "learning_rate": 7.881677320720204e-08, + "loss": 0.8281, + "step": 18504 + }, + { + "epoch": 0.99, + "learning_rate": 7.72789615194891e-08, + "loss": 0.8125, + "step": 18505 + }, + { + "epoch": 0.99, + "learning_rate": 7.575629913242343e-08, + "loss": 0.8438, + "step": 18506 + }, + { + "epoch": 0.99, + "learning_rate": 7.424878609207931e-08, + "loss": 0.7344, + "step": 18507 + }, + { + "epoch": 0.99, + "learning_rate": 7.275642244414238e-08, + "loss": 0.832, + "step": 18508 + }, + { + "epoch": 0.99, + "learning_rate": 7.127920823385426e-08, + "loss": 0.8828, + "step": 18509 + }, + { + "epoch": 0.99, + "learning_rate": 6.981714350595692e-08, + "loss": 0.7812, + "step": 18510 + }, + { + "epoch": 0.99, + "learning_rate": 6.837022830480378e-08, + "loss": 0.8359, + "step": 18511 + }, + { + "epoch": 0.99, + "learning_rate": 6.693846267419313e-08, + "loss": 0.7852, + "step": 18512 + }, + { + "epoch": 1.0, + "learning_rate": 6.552184665753469e-08, + "loss": 0.7188, + "step": 18513 + }, + { + "epoch": 1.0, + "learning_rate": 6.41203802977941e-08, + "loss": 0.8008, + "step": 18514 + }, + { + "epoch": 1.0, + "learning_rate": 6.273406363738188e-08, + "loss": 0.8047, + "step": 18515 + }, + { + "epoch": 1.0, + "learning_rate": 6.136289671831996e-08, + "loss": 0.832, + "step": 18516 + }, + { + "epoch": 1.0, + "learning_rate": 6.000687958213069e-08, + "loss": 0.8164, + "step": 18517 + }, + { + "epoch": 1.0, + "learning_rate": 5.866601227000335e-08, + "loss": 0.8438, + "step": 18518 + }, + { + "epoch": 1.0, + "learning_rate": 5.734029482246106e-08, + "loss": 0.7812, + "step": 18519 + }, + { + "epoch": 1.0, + "learning_rate": 5.602972727974942e-08, + "loss": 0.8633, + "step": 18520 + }, + { + "epoch": 1.0, + "learning_rate": 5.47343096815589e-08, + "loss": 0.7617, + "step": 18521 + }, + { + "epoch": 1.0, + "learning_rate": 5.3454042067080374e-08, + "loss": 0.7734, + "step": 18522 + }, + { + "epoch": 1.0, + "learning_rate": 5.218892447522716e-08, + "loss": 0.6914, + "step": 18523 + }, + { + "epoch": 1.0, + "learning_rate": 5.093895694430195e-08, + "loss": 0.8477, + "step": 18524 + }, + { + "epoch": 1.0, + "learning_rate": 4.970413951216335e-08, + "loss": 0.8633, + "step": 18525 + }, + { + "epoch": 1.0, + "learning_rate": 4.848447221617036e-08, + "loss": 0.8125, + "step": 18526 + }, + { + "epoch": 1.0, + "learning_rate": 4.7279955093404433e-08, + "loss": 0.8086, + "step": 18527 + }, + { + "epoch": 1.0, + "learning_rate": 4.609058818028089e-08, + "loss": 0.7656, + "step": 18528 + }, + { + "epoch": 1.0, + "learning_rate": 4.491637151288197e-08, + "loss": 0.8008, + "step": 18529 + }, + { + "epoch": 1.0, + "learning_rate": 4.375730512673482e-08, + "loss": 0.7773, + "step": 18530 + }, + { + "epoch": 1.0, + "learning_rate": 4.261338905703349e-08, + "loss": 0.8242, + "step": 18531 + }, + { + "epoch": 1.0, + "learning_rate": 4.148462333841696e-08, + "loss": 0.8438, + "step": 18532 + }, + { + "epoch": 1.0, + "learning_rate": 4.037100800508009e-08, + "loss": 0.7852, + "step": 18533 + }, + { + "epoch": 1.0, + "learning_rate": 3.927254309077366e-08, + "loss": 0.8125, + "step": 18534 + }, + { + "epoch": 1.0, + "learning_rate": 3.8189228628804363e-08, + "loss": 0.7969, + "step": 18535 + }, + { + "epoch": 1.0, + "learning_rate": 3.712106465192378e-08, + "loss": 0.6797, + "step": 18536 + }, + { + "epoch": 1.0, + "learning_rate": 3.6068051192605924e-08, + "loss": 0.707, + "step": 18537 + }, + { + "epoch": 1.0, + "learning_rate": 3.5030188282714204e-08, + "loss": 0.9023, + "step": 18538 + }, + { + "epoch": 1.0, + "learning_rate": 3.4007475953667934e-08, + "loss": 0.8398, + "step": 18539 + }, + { + "epoch": 1.0, + "learning_rate": 3.299991423649784e-08, + "loss": 0.8086, + "step": 18540 + }, + { + "epoch": 1.0, + "learning_rate": 3.200750316173506e-08, + "loss": 0.8164, + "step": 18541 + }, + { + "epoch": 1.0, + "learning_rate": 3.103024275941113e-08, + "loss": 0.7383, + "step": 18542 + }, + { + "epoch": 1.0, + "learning_rate": 3.006813305922451e-08, + "loss": 0.7578, + "step": 18543 + }, + { + "epoch": 1.0, + "learning_rate": 2.9121174090263046e-08, + "loss": 0.8867, + "step": 18544 + }, + { + "epoch": 1.0, + "learning_rate": 2.8189365881226002e-08, + "loss": 0.6758, + "step": 18545 + }, + { + "epoch": 1.0, + "learning_rate": 2.7272708460368556e-08, + "loss": 0.8086, + "step": 18546 + }, + { + "epoch": 1.0, + "learning_rate": 2.637120185544628e-08, + "loss": 0.8984, + "step": 18547 + }, + { + "epoch": 1.0, + "learning_rate": 2.5484846093826174e-08, + "loss": 0.8828, + "step": 18548 + }, + { + "epoch": 1.0, + "learning_rate": 2.4613641202320125e-08, + "loss": 0.7305, + "step": 18549 + }, + { + "epoch": 1.0, + "learning_rate": 2.375758720735144e-08, + "loss": 0.8633, + "step": 18550 + }, + { + "epoch": 1.0, + "learning_rate": 2.2916684134843825e-08, + "loss": 0.7812, + "step": 18551 + }, + { + "epoch": 1.0, + "learning_rate": 2.209093201033241e-08, + "loss": 0.6602, + "step": 18552 + }, + { + "epoch": 1.0, + "learning_rate": 2.1280330858741704e-08, + "loss": 0.8516, + "step": 18553 + }, + { + "epoch": 1.0, + "learning_rate": 2.0484880704718656e-08, + "loss": 0.8398, + "step": 18554 + }, + { + "epoch": 1.0, + "learning_rate": 1.9704581572355107e-08, + "loss": 0.7891, + "step": 18555 + }, + { + "epoch": 1.0, + "learning_rate": 1.8939433485243296e-08, + "loss": 0.8594, + "step": 18556 + }, + { + "epoch": 1.0, + "learning_rate": 1.8189436466642393e-08, + "loss": 0.793, + "step": 18557 + }, + { + "epoch": 1.0, + "learning_rate": 1.7454590539200953e-08, + "loss": 0.8672, + "step": 18558 + }, + { + "epoch": 1.0, + "learning_rate": 1.6734895725289967e-08, + "loss": 0.8164, + "step": 18559 + }, + { + "epoch": 1.0, + "learning_rate": 1.6030352046614293e-08, + "loss": 0.8008, + "step": 18560 + }, + { + "epoch": 1.0, + "learning_rate": 1.5340959524601238e-08, + "loss": 0.8164, + "step": 18561 + }, + { + "epoch": 1.0, + "learning_rate": 1.4666718180122995e-08, + "loss": 0.7539, + "step": 18562 + }, + { + "epoch": 1.0, + "learning_rate": 1.4007628033552156e-08, + "loss": 0.7891, + "step": 18563 + }, + { + "epoch": 1.0, + "learning_rate": 1.3363689104983756e-08, + "loss": 0.7578, + "step": 18564 + }, + { + "epoch": 1.0, + "learning_rate": 1.2734901413791188e-08, + "loss": 0.7617, + "step": 18565 + }, + { + "epoch": 1.0, + "learning_rate": 1.2121264979125801e-08, + "loss": 0.7734, + "step": 18566 + }, + { + "epoch": 1.0, + "learning_rate": 1.1522779819528317e-08, + "loss": 0.8555, + "step": 18567 + }, + { + "epoch": 1.0, + "learning_rate": 1.0939445953150883e-08, + "loss": 0.8203, + "step": 18568 + }, + { + "epoch": 1.0, + "learning_rate": 1.0371263397701558e-08, + "loss": 0.8672, + "step": 18569 + }, + { + "epoch": 1.0, + "learning_rate": 9.818232170388797e-09, + "loss": 0.8086, + "step": 18570 + }, + { + "epoch": 1.0, + "learning_rate": 9.280352287921457e-09, + "loss": 0.9258, + "step": 18571 + }, + { + "epoch": 1.0, + "learning_rate": 8.757623766619815e-09, + "loss": 0.8242, + "step": 18572 + }, + { + "epoch": 1.0, + "learning_rate": 8.250046622415575e-09, + "loss": 0.8047, + "step": 18573 + }, + { + "epoch": 1.0, + "learning_rate": 7.757620870518789e-09, + "loss": 0.8789, + "step": 18574 + }, + { + "epoch": 1.0, + "learning_rate": 7.280346526028492e-09, + "loss": 0.75, + "step": 18575 + }, + { + "epoch": 1.0, + "learning_rate": 6.81822360326656e-09, + "loss": 0.7266, + "step": 18576 + }, + { + "epoch": 1.0, + "learning_rate": 6.3712521163328265e-09, + "loss": 0.8164, + "step": 18577 + }, + { + "epoch": 1.0, + "learning_rate": 5.9394320787165004e-09, + "loss": 0.7812, + "step": 18578 + }, + { + "epoch": 1.0, + "learning_rate": 5.522763503518213e-09, + "loss": 0.8047, + "step": 18579 + }, + { + "epoch": 1.0, + "learning_rate": 5.121246403338997e-09, + "loss": 0.707, + "step": 18580 + }, + { + "epoch": 1.0, + "learning_rate": 4.734880790391305e-09, + "loss": 0.8945, + "step": 18581 + }, + { + "epoch": 1.0, + "learning_rate": 4.3636666763879895e-09, + "loss": 0.7344, + "step": 18582 + }, + { + "epoch": 1.0, + "learning_rate": 4.0076040725423035e-09, + "loss": 0.7734, + "step": 18583 + }, + { + "epoch": 1.0, + "learning_rate": 3.6666929896789213e-09, + "loss": 0.8164, + "step": 18584 + }, + { + "epoch": 1.0, + "learning_rate": 3.3409334380674062e-09, + "loss": 0.793, + "step": 18585 + }, + { + "epoch": 1.0, + "learning_rate": 3.0303254276442536e-09, + "loss": 0.7891, + "step": 18586 + }, + { + "epoch": 1.0, + "learning_rate": 2.734868967790849e-09, + "loss": 0.8242, + "step": 18587 + }, + { + "epoch": 1.0, + "learning_rate": 2.454564067499998e-09, + "loss": 0.8633, + "step": 18588 + }, + { + "epoch": 1.0, + "learning_rate": 2.189410735153885e-09, + "loss": 0.8633, + "step": 18589 + }, + { + "epoch": 1.0, + "learning_rate": 1.93940897896816e-09, + "loss": 0.8047, + "step": 18590 + }, + { + "epoch": 1.0, + "learning_rate": 1.7045588063258066e-09, + "loss": 0.7539, + "step": 18591 + }, + { + "epoch": 1.0, + "learning_rate": 1.4848602244987853e-09, + "loss": 0.7383, + "step": 18592 + }, + { + "epoch": 1.0, + "learning_rate": 1.2803132400374118e-09, + "loss": 0.8359, + "step": 18593 + }, + { + "epoch": 1.0, + "learning_rate": 1.0909178592144464e-09, + "loss": 0.8125, + "step": 18594 + }, + { + "epoch": 1.0, + "learning_rate": 9.166740876920265e-10, + "loss": 0.707, + "step": 18595 + }, + { + "epoch": 1.0, + "learning_rate": 7.575819308547338e-10, + "loss": 0.9727, + "step": 18596 + }, + { + "epoch": 1.0, + "learning_rate": 6.136413934210161e-10, + "loss": 0.7344, + "step": 18597 + }, + { + "epoch": 1.0, + "learning_rate": 4.848524798317655e-10, + "loss": 0.8438, + "step": 18598 + }, + { + "epoch": 1.0, + "learning_rate": 3.7121519391725143e-10, + "loss": 0.7266, + "step": 18599 + }, + { + "epoch": 1.0, + "learning_rate": 2.727295391746765e-10, + "loss": 0.7578, + "step": 18600 + }, + { + "epoch": 1.0, + "learning_rate": 1.8939551860164273e-10, + "loss": 0.8555, + "step": 18601 + }, + { + "epoch": 1.0, + "learning_rate": 1.2121313464064086e-10, + "loss": 0.7812, + "step": 18602 + }, + { + "epoch": 1.0, + "learning_rate": 6.818238945660582e-11, + "loss": 0.7266, + "step": 18603 + }, + { + "epoch": 1.0, + "learning_rate": 3.030328460384979e-11, + "loss": 0.8711, + "step": 18604 + }, + { + "epoch": 1.0, + "learning_rate": 7.57582119259581e-12, + "loss": 0.6484, + "step": 18605 + }, + { + "epoch": 1.0, + "learning_rate": 0.0, + "loss": 0.6562, + "step": 18606 + }, + { + "epoch": 1.0, + "step": 18606, + "total_flos": 2.9318438414240973e+18, + "train_loss": 0.8734312856067935, + "train_runtime": 107801.7223, + "train_samples_per_second": 5.523, + "train_steps_per_second": 0.173 + } + ], + "max_steps": 18606, + "num_train_epochs": 1, + "total_flos": 2.9318438414240973e+18, + "trial_name": null, + "trial_params": null +}