diff --git a/.gitattributes b/.gitattributes
index d18ea26..5f233b8 100644
--- a/.gitattributes
+++ b/.gitattributes
@@ -45,3 +45,9 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
*.wasm filter=lfs diff=lfs merge=lfs -text
*.zst filter=lfs diff=lfs merge=lfs -text
*tfevents* filter=lfs diff=lfs merge=lfs -text
+
+vocab.json filter=lfs diff=lfs merge=lfs -text
+training_args.bin filter=lfs diff=lfs merge=lfs -text
+model.safetensors filter=lfs diff=lfs merge=lfs -text
+merges.txt filter=lfs diff=lfs merge=lfs -text
+tokenizer.json filter=lfs diff=lfs merge=lfs -text
\ No newline at end of file
diff --git a/README.md b/README.md
index 6886429..bbda0e7 100644
--- a/README.md
+++ b/README.md
@@ -1,48 +1,61 @@
---
-license: Apache License 2.0
-tags: []
-
-#model-type:
-##如 gpt、phi、llama、chatglm、baichuan 等
-#- gpt
-
-#domain:
-##如 nlp、cv、audio、multi-modal
-#- nlp
-
-#language:
-##语言代码列表 https://help.aliyun.com/document_detail/215387.html?spm=a2c4g.11186623.0.0.9f8d7467kni6Aa
-#- cn
-
-#metrics:
-##如 CIDEr、Blue、ROUGE 等
-#- CIDEr
-
-#tags:
-##各种自定义,包括 pretrained、fine-tuned、instruction-tuned、RL-tuned 等训练方法和其他
-#- pretrained
-
-#tools:
-##如 vllm、fastchat、llamacpp、AdaSeq 等
-#- vllm
+library_name: transformers
+license: apache-2.0
+base_model: Qwen/Qwen2.5-1.5B-Instruct
+tags:
+- llama-factory
+- full
+- generated_from_trainer
+model-index:
+- name: openthoughts3_100k_qwen25_1b_bsz1024_lr16e5_epochs5
+ results: []
---
-### 当前模型的贡献者未提供更加详细的模型介绍。模型文件和权重,可浏览“模型文件”页面获取。
-#### 您可以通过如下git clone命令,或者ModelScope SDK来下载模型
-SDK下载
-```bash
-#安装ModelScope
-pip install modelscope
-```
-```python
-#SDK模型下载
-from modelscope import snapshot_download
-model_dir = snapshot_download('mlfoundations-dev/openthoughts3_100k_qwen25_1b_bsz1024_lr16e5_epochs5')
-```
-Git下载
-```
-#Git模型下载
-git clone https://www.modelscope.cn/mlfoundations-dev/openthoughts3_100k_qwen25_1b_bsz1024_lr16e5_epochs5.git
-```
+
-
如果您是本模型的贡献者,我们邀请您根据模型贡献文档,及时完善模型卡片内容。
\ No newline at end of file
+# openthoughts3_100k_qwen25_1b_bsz1024_lr16e5_epochs5
+
+This model is a fine-tuned version of [Qwen/Qwen2.5-1.5B-Instruct](https://huggingface.co/Qwen/Qwen2.5-1.5B-Instruct) on the mlfoundations-dev/openthoughts3_100k dataset.
+
+## Model description
+
+More information needed
+
+## Intended uses & limitations
+
+More information needed
+
+## Training and evaluation data
+
+More information needed
+
+## Training procedure
+
+### Training hyperparameters
+
+The following hyperparameters were used during training:
+- learning_rate: 0.00016
+- train_batch_size: 4
+- eval_batch_size: 8
+- seed: 42
+- distributed_type: multi-GPU
+- num_devices: 32
+- gradient_accumulation_steps: 8
+- total_train_batch_size: 1024
+- total_eval_batch_size: 256
+- optimizer: Use OptimizerNames.ADAMW_TORCH with betas=(0.9,0.999) and epsilon=1e-08 and optimizer_args=No additional optimizer arguments
+- lr_scheduler_type: cosine
+- lr_scheduler_warmup_ratio: 0.1
+- num_epochs: 5.0
+
+### Training results
+
+
+
+### Framework versions
+
+- Transformers 4.46.1
+- Pytorch 2.3.0
+- Datasets 3.1.0
+- Tokenizers 0.20.3
diff --git a/added_tokens.json b/added_tokens.json
new file mode 100644
index 0000000..482ced4
--- /dev/null
+++ b/added_tokens.json
@@ -0,0 +1,24 @@
+{
+ "": 151658,
+ "": 151657,
+ "<|box_end|>": 151649,
+ "<|box_start|>": 151648,
+ "<|endoftext|>": 151643,
+ "<|file_sep|>": 151664,
+ "<|fim_middle|>": 151660,
+ "<|fim_pad|>": 151662,
+ "<|fim_prefix|>": 151659,
+ "<|fim_suffix|>": 151661,
+ "<|im_end|>": 151645,
+ "<|im_start|>": 151644,
+ "<|image_pad|>": 151655,
+ "<|object_ref_end|>": 151647,
+ "<|object_ref_start|>": 151646,
+ "<|quad_end|>": 151651,
+ "<|quad_start|>": 151650,
+ "<|repo_name|>": 151663,
+ "<|video_pad|>": 151656,
+ "<|vision_end|>": 151653,
+ "<|vision_pad|>": 151654,
+ "<|vision_start|>": 151652
+}
diff --git a/all_results.json b/all_results.json
new file mode 100644
index 0000000..5b356e6
--- /dev/null
+++ b/all_results.json
@@ -0,0 +1,8 @@
+{
+ "epoch": 4.961636828644501,
+ "total_flos": 7065760181780480.0,
+ "train_loss": 1.075610858509221,
+ "train_runtime": 69151.8205,
+ "train_samples_per_second": 7.23,
+ "train_steps_per_second": 0.007
+}
\ No newline at end of file
diff --git a/config.json b/config.json
new file mode 100644
index 0000000..0e002a8
--- /dev/null
+++ b/config.json
@@ -0,0 +1,29 @@
+{
+ "_name_or_path": "Qwen/Qwen2.5-1.5B-Instruct",
+ "architectures": [
+ "Qwen2ForCausalLM"
+ ],
+ "attention_dropout": 0.0,
+ "bos_token_id": 151643,
+ "eos_token_id": 151645,
+ "hidden_act": "silu",
+ "hidden_size": 1536,
+ "initializer_range": 0.02,
+ "intermediate_size": 8960,
+ "max_position_embeddings": 32768,
+ "max_window_layers": 21,
+ "model_type": "qwen2",
+ "num_attention_heads": 12,
+ "num_hidden_layers": 28,
+ "num_key_value_heads": 2,
+ "rms_norm_eps": 1e-06,
+ "rope_scaling": null,
+ "rope_theta": 1000000.0,
+ "sliding_window": null,
+ "tie_word_embeddings": true,
+ "torch_dtype": "bfloat16",
+ "transformers_version": "4.46.1",
+ "use_cache": false,
+ "use_sliding_window": false,
+ "vocab_size": 151936
+}
diff --git a/configs.yaml b/configs.yaml
new file mode 100644
index 0000000..36afb5a
--- /dev/null
+++ b/configs.yaml
@@ -0,0 +1,39 @@
+assistant_tag: gpt
+bf16: 'True'
+content_tag: value
+cutoff_len: '16384'
+dataloader_num_workers: '4'
+dataloader_persistent_workers: 'True'
+dataloader_pin_memory: 'True'
+dataset: mlfoundations-dev/openthoughts3_100k
+dataset_dir: ONLINE
+ddp_timeout: '180000000'
+deepspeed: /opt/ml/code/zero3.json
+do_train: 'True'
+enable_liger_kernel: 'True'
+finetuning_type: full
+formatting: sharegpt
+global_batch_size: '1024'
+gradient_accumulation_steps: '8'
+hub_model_id: mlfoundations-dev/openthoughts3_100k_qwen25_1b_bsz1024_lr16e5_epochs5
+learning_rate: '0.00016'
+logging_steps: '1'
+lr_scheduler_type: cosine
+messages: conversations
+model_name_or_path: Qwen/Qwen2.5-1.5B-Instruct
+num_train_epochs: '5.0'
+output_dir: /opt/ml/model
+overwrite_cache: 'True'
+per_device_train_batch_size: '4'
+plot_loss: 'True'
+preprocessing_num_workers: '16'
+push_to_db: 'True'
+push_to_hub: 'True'
+report_to: wandb
+role_tag: from
+run_name: openthoughts3_100k_qwen25_1b_bsz1024_lr16e5_epochs5
+save_strategy: epoch
+stage: sft
+template: qwen25
+user_tag: human
+warmup_ratio: '0.1'
diff --git a/configuration.json b/configuration.json
new file mode 100644
index 0000000..bbeeda1
--- /dev/null
+++ b/configuration.json
@@ -0,0 +1 @@
+{"framework": "pytorch", "task": "text-generation", "allow_remote": true}
\ No newline at end of file
diff --git a/generation_config.json b/generation_config.json
new file mode 100644
index 0000000..16e88f7
--- /dev/null
+++ b/generation_config.json
@@ -0,0 +1,14 @@
+{
+ "bos_token_id": 151643,
+ "do_sample": true,
+ "eos_token_id": [
+ 151645,
+ 151643
+ ],
+ "pad_token_id": 151643,
+ "repetition_penalty": 1.1,
+ "temperature": 0.7,
+ "top_k": 20,
+ "top_p": 0.8,
+ "transformers_version": "4.46.1"
+}
diff --git a/merges.txt b/merges.txt
new file mode 100644
index 0000000..80c1a19
--- /dev/null
+++ b/merges.txt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:8831e4f1a044471340f7c0a83d7bd71306a5b867e95fd870f74d0c5308a904d5
+size 1671853
diff --git a/model.safetensors b/model.safetensors
new file mode 100644
index 0000000..6678e83
--- /dev/null
+++ b/model.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:a037d183c5b8bc2409a94e8d84a02273b7be1333efa95685813e81985db394bb
+size 3087467144
diff --git a/special_tokens_map.json b/special_tokens_map.json
new file mode 100644
index 0000000..17305b3
--- /dev/null
+++ b/special_tokens_map.json
@@ -0,0 +1,31 @@
+{
+ "additional_special_tokens": [
+ "<|im_start|>",
+ "<|im_end|>",
+ "<|object_ref_start|>",
+ "<|object_ref_end|>",
+ "<|box_start|>",
+ "<|box_end|>",
+ "<|quad_start|>",
+ "<|quad_end|>",
+ "<|vision_start|>",
+ "<|vision_end|>",
+ "<|vision_pad|>",
+ "<|image_pad|>",
+ "<|video_pad|>"
+ ],
+ "eos_token": {
+ "content": "<|endoftext|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false
+ },
+ "pad_token": {
+ "content": "<|endoftext|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false
+ }
+}
diff --git a/tokenizer.json b/tokenizer.json
new file mode 100644
index 0000000..51ebb3b
--- /dev/null
+++ b/tokenizer.json
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:9c5ae00e602b8860cbd784ba82a8aa14e8feecec692e7076590d014d7b7fdafa
+size 11421896
diff --git a/tokenizer_config.json b/tokenizer_config.json
new file mode 100644
index 0000000..b84f53a
--- /dev/null
+++ b/tokenizer_config.json
@@ -0,0 +1,208 @@
+{
+ "add_bos_token": false,
+ "add_prefix_space": false,
+ "added_tokens_decoder": {
+ "151643": {
+ "content": "<|endoftext|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "151644": {
+ "content": "<|im_start|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "151645": {
+ "content": "<|im_end|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "151646": {
+ "content": "<|object_ref_start|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "151647": {
+ "content": "<|object_ref_end|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "151648": {
+ "content": "<|box_start|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "151649": {
+ "content": "<|box_end|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "151650": {
+ "content": "<|quad_start|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "151651": {
+ "content": "<|quad_end|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "151652": {
+ "content": "<|vision_start|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "151653": {
+ "content": "<|vision_end|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "151654": {
+ "content": "<|vision_pad|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "151655": {
+ "content": "<|image_pad|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "151656": {
+ "content": "<|video_pad|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "151657": {
+ "content": "",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": false
+ },
+ "151658": {
+ "content": "",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": false
+ },
+ "151659": {
+ "content": "<|fim_prefix|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": false
+ },
+ "151660": {
+ "content": "<|fim_middle|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": false
+ },
+ "151661": {
+ "content": "<|fim_suffix|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": false
+ },
+ "151662": {
+ "content": "<|fim_pad|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": false
+ },
+ "151663": {
+ "content": "<|repo_name|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": false
+ },
+ "151664": {
+ "content": "<|file_sep|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": false
+ }
+ },
+ "additional_special_tokens": [
+ "<|im_start|>",
+ "<|im_end|>",
+ "<|object_ref_start|>",
+ "<|object_ref_end|>",
+ "<|box_start|>",
+ "<|box_end|>",
+ "<|quad_start|>",
+ "<|quad_end|>",
+ "<|vision_start|>",
+ "<|vision_end|>",
+ "<|vision_pad|>",
+ "<|image_pad|>",
+ "<|video_pad|>"
+ ],
+ "bos_token": null,
+ "chat_template": "{%- if tools %}\n {{- '<|im_start|>system\\n' }}\n {%- if messages[0]['role'] == 'system' %}\n {{- messages[0]['content'] }}\n {%- else %}\n {{- 'You are Qwen, created by Alibaba Cloud. You are a helpful assistant.' }}\n {%- endif %}\n {{- \"\\n\\n# Tools\\n\\nYou may call one or more functions to assist with the user query.\\n\\nYou are provided with function signatures within XML tags:\\n\" }}\n {%- for tool in tools %}\n {{- \"\\n\" }}\n {{- tool | tojson }}\n {%- endfor %}\n {{- \"\\n\\n\\nFor each function call, return a json object with function name and arguments within XML tags:\\n\\n{\\\"name\\\": , \\\"arguments\\\": }\\n<|im_end|>\\n\" }}\n{%- else %}\n {%- if messages[0]['role'] == 'system' %}\n {{- '<|im_start|>system\\n' + messages[0]['content'] + '<|im_end|>\\n' }}\n {%- else %}\n {{- '<|im_start|>system\\nYou are Qwen, created by Alibaba Cloud. You are a helpful assistant.<|im_end|>\\n' }}\n {%- endif %}\n{%- endif %}\n{%- for message in messages %}\n {%- if (message.role == \"user\") or (message.role == \"system\" and not loop.first) or (message.role == \"assistant\" and not message.tool_calls) %}\n {{- '<|im_start|>' + message.role + '\\n' + message.content + '<|im_end|>' + '\\n' }}\n {%- elif message.role == \"assistant\" %}\n {{- '<|im_start|>' + message.role }}\n {%- if message.content %}\n {{- '\\n' + message.content }}\n {%- endif %}\n {%- for tool_call in message.tool_calls %}\n {%- if tool_call.function is defined %}\n {%- set tool_call = tool_call.function %}\n {%- endif %}\n {{- '\\n\\n{\"name\": \"' }}\n {{- tool_call.name }}\n {{- '\", \"arguments\": ' }}\n {{- tool_call.arguments | tojson }}\n {{- '}\\n' }}\n {%- endfor %}\n {{- '<|im_end|>\\n' }}\n {%- elif message.role == \"tool\" %}\n {%- if (loop.index0 == 0) or (messages[loop.index0 - 1].role != \"tool\") %}\n {{- '<|im_start|>user' }}\n {%- endif %}\n {{- '\\n\\n' }}\n {{- message.content }}\n {{- '\\n' }}\n {%- if loop.last or (messages[loop.index0 + 1].role != \"tool\") %}\n {{- '<|im_end|>\\n' }}\n {%- endif %}\n {%- endif %}\n{%- endfor %}\n{%- if add_generation_prompt %}\n {{- '<|im_start|>assistant\\n' }}\n{%- endif %}\n",
+ "clean_up_tokenization_spaces": false,
+ "eos_token": "<|endoftext|>",
+ "errors": "replace",
+ "model_max_length": 131072,
+ "pad_token": "<|endoftext|>",
+ "padding_side": "right",
+ "split_special_tokens": false,
+ "tokenizer_class": "Qwen2Tokenizer",
+ "unk_token": null
+}
diff --git a/train_results.json b/train_results.json
new file mode 100644
index 0000000..5b356e6
--- /dev/null
+++ b/train_results.json
@@ -0,0 +1,8 @@
+{
+ "epoch": 4.961636828644501,
+ "total_flos": 7065760181780480.0,
+ "train_loss": 1.075610858509221,
+ "train_runtime": 69151.8205,
+ "train_samples_per_second": 7.23,
+ "train_steps_per_second": 0.007
+}
\ No newline at end of file
diff --git a/trainer_log.jsonl b/trainer_log.jsonl
new file mode 100644
index 0000000..478f33f
--- /dev/null
+++ b/trainer_log.jsonl
@@ -0,0 +1,486 @@
+{"current_steps": 1, "total_steps": 485, "loss": 1.4918, "lr": 3.2653061224489794e-06, "epoch": 0.010230179028132993, "percentage": 0.21, "elapsed_time": "0:02:28", "remaining_time": "20:00:30"}
+{"current_steps": 2, "total_steps": 485, "loss": 1.4922, "lr": 6.530612244897959e-06, "epoch": 0.020460358056265986, "percentage": 0.41, "elapsed_time": "0:04:52", "remaining_time": "19:37:22"}
+{"current_steps": 3, "total_steps": 485, "loss": 1.503, "lr": 9.795918367346939e-06, "epoch": 0.030690537084398978, "percentage": 0.62, "elapsed_time": "0:07:14", "remaining_time": "19:23:06"}
+{"current_steps": 4, "total_steps": 485, "loss": 1.4675, "lr": 1.3061224489795918e-05, "epoch": 0.04092071611253197, "percentage": 0.82, "elapsed_time": "0:09:37", "remaining_time": "19:17:22"}
+{"current_steps": 5, "total_steps": 485, "loss": 1.4533, "lr": 1.63265306122449e-05, "epoch": 0.05115089514066496, "percentage": 1.03, "elapsed_time": "0:12:00", "remaining_time": "19:13:16"}
+{"current_steps": 6, "total_steps": 485, "loss": 1.4122, "lr": 1.9591836734693877e-05, "epoch": 0.061381074168797956, "percentage": 1.24, "elapsed_time": "0:14:22", "remaining_time": "19:07:58"}
+{"current_steps": 7, "total_steps": 485, "loss": 1.4056, "lr": 2.2857142857142858e-05, "epoch": 0.07161125319693094, "percentage": 1.44, "elapsed_time": "0:16:45", "remaining_time": "19:03:57"}
+{"current_steps": 8, "total_steps": 485, "loss": 1.3833, "lr": 2.6122448979591835e-05, "epoch": 0.08184143222506395, "percentage": 1.65, "elapsed_time": "0:19:07", "remaining_time": "19:00:06"}
+{"current_steps": 9, "total_steps": 485, "loss": 1.3843, "lr": 2.938775510204082e-05, "epoch": 0.09207161125319693, "percentage": 1.86, "elapsed_time": "0:21:29", "remaining_time": "18:56:59"}
+{"current_steps": 10, "total_steps": 485, "loss": 1.3515, "lr": 3.26530612244898e-05, "epoch": 0.10230179028132992, "percentage": 2.06, "elapsed_time": "0:23:52", "remaining_time": "18:54:11"}
+{"current_steps": 11, "total_steps": 485, "loss": 1.3224, "lr": 3.591836734693878e-05, "epoch": 0.11253196930946291, "percentage": 2.27, "elapsed_time": "0:26:14", "remaining_time": "18:50:30"}
+{"current_steps": 12, "total_steps": 485, "loss": 1.322, "lr": 3.9183673469387755e-05, "epoch": 0.12276214833759591, "percentage": 2.47, "elapsed_time": "0:28:37", "remaining_time": "18:48:05"}
+{"current_steps": 13, "total_steps": 485, "loss": 1.2956, "lr": 4.244897959183674e-05, "epoch": 0.1329923273657289, "percentage": 2.68, "elapsed_time": "0:30:59", "remaining_time": "18:45:23"}
+{"current_steps": 14, "total_steps": 485, "loss": 1.3067, "lr": 4.5714285714285716e-05, "epoch": 0.1432225063938619, "percentage": 2.89, "elapsed_time": "0:33:21", "remaining_time": "18:42:25"}
+{"current_steps": 15, "total_steps": 485, "loss": 1.2961, "lr": 4.89795918367347e-05, "epoch": 0.1534526854219949, "percentage": 3.09, "elapsed_time": "0:35:43", "remaining_time": "18:39:30"}
+{"current_steps": 16, "total_steps": 485, "loss": 1.301, "lr": 5.224489795918367e-05, "epoch": 0.1636828644501279, "percentage": 3.3, "elapsed_time": "0:38:05", "remaining_time": "18:36:26"}
+{"current_steps": 17, "total_steps": 485, "loss": 1.2656, "lr": 5.551020408163266e-05, "epoch": 0.17391304347826086, "percentage": 3.51, "elapsed_time": "0:40:29", "remaining_time": "18:34:31"}
+{"current_steps": 18, "total_steps": 485, "loss": 1.246, "lr": 5.877551020408164e-05, "epoch": 0.18414322250639387, "percentage": 3.71, "elapsed_time": "0:42:51", "remaining_time": "18:31:46"}
+{"current_steps": 19, "total_steps": 485, "loss": 1.2565, "lr": 6.204081632653062e-05, "epoch": 0.19437340153452684, "percentage": 3.92, "elapsed_time": "0:45:13", "remaining_time": "18:29:02"}
+{"current_steps": 20, "total_steps": 485, "loss": 1.241, "lr": 6.53061224489796e-05, "epoch": 0.20460358056265984, "percentage": 4.12, "elapsed_time": "0:47:34", "remaining_time": "18:26:06"}
+{"current_steps": 21, "total_steps": 485, "loss": 1.232, "lr": 6.857142857142857e-05, "epoch": 0.21483375959079284, "percentage": 4.33, "elapsed_time": "0:49:57", "remaining_time": "18:23:43"}
+{"current_steps": 22, "total_steps": 485, "loss": 1.2567, "lr": 7.183673469387756e-05, "epoch": 0.22506393861892582, "percentage": 4.54, "elapsed_time": "0:52:19", "remaining_time": "18:21:04"}
+{"current_steps": 23, "total_steps": 485, "loss": 1.2448, "lr": 7.510204081632654e-05, "epoch": 0.23529411764705882, "percentage": 4.74, "elapsed_time": "0:54:41", "remaining_time": "18:18:34"}
+{"current_steps": 24, "total_steps": 485, "loss": 1.2357, "lr": 7.836734693877551e-05, "epoch": 0.24552429667519182, "percentage": 4.95, "elapsed_time": "0:57:03", "remaining_time": "18:15:52"}
+{"current_steps": 25, "total_steps": 485, "loss": 1.2164, "lr": 8.16326530612245e-05, "epoch": 0.2557544757033248, "percentage": 5.15, "elapsed_time": "0:59:25", "remaining_time": "18:13:18"}
+{"current_steps": 26, "total_steps": 485, "loss": 1.2115, "lr": 8.489795918367348e-05, "epoch": 0.2659846547314578, "percentage": 5.36, "elapsed_time": "1:01:46", "remaining_time": "18:10:37"}
+{"current_steps": 27, "total_steps": 485, "loss": 1.2004, "lr": 8.816326530612245e-05, "epoch": 0.27621483375959077, "percentage": 5.57, "elapsed_time": "1:04:08", "remaining_time": "18:08:01"}
+{"current_steps": 28, "total_steps": 485, "loss": 1.2235, "lr": 9.142857142857143e-05, "epoch": 0.2864450127877238, "percentage": 5.77, "elapsed_time": "1:06:30", "remaining_time": "18:05:30"}
+{"current_steps": 29, "total_steps": 485, "loss": 1.208, "lr": 9.469387755102041e-05, "epoch": 0.2966751918158568, "percentage": 5.98, "elapsed_time": "1:08:53", "remaining_time": "18:03:20"}
+{"current_steps": 30, "total_steps": 485, "loss": 1.2118, "lr": 9.79591836734694e-05, "epoch": 0.3069053708439898, "percentage": 6.19, "elapsed_time": "1:11:14", "remaining_time": "18:00:33"}
+{"current_steps": 31, "total_steps": 485, "loss": 1.2032, "lr": 0.00010122448979591839, "epoch": 0.3171355498721228, "percentage": 6.39, "elapsed_time": "1:13:36", "remaining_time": "17:58:03"}
+{"current_steps": 32, "total_steps": 485, "loss": 1.1912, "lr": 0.00010448979591836734, "epoch": 0.3273657289002558, "percentage": 6.6, "elapsed_time": "1:15:59", "remaining_time": "17:55:46"}
+{"current_steps": 33, "total_steps": 485, "loss": 1.2025, "lr": 0.00010775510204081634, "epoch": 0.3375959079283887, "percentage": 6.8, "elapsed_time": "1:18:21", "remaining_time": "17:53:15"}
+{"current_steps": 34, "total_steps": 485, "loss": 1.2118, "lr": 0.00011102040816326532, "epoch": 0.34782608695652173, "percentage": 7.01, "elapsed_time": "1:20:44", "remaining_time": "17:51:00"}
+{"current_steps": 35, "total_steps": 485, "loss": 1.1986, "lr": 0.0001142857142857143, "epoch": 0.35805626598465473, "percentage": 7.22, "elapsed_time": "1:23:07", "remaining_time": "17:48:43"}
+{"current_steps": 36, "total_steps": 485, "loss": 1.1939, "lr": 0.00011755102040816328, "epoch": 0.36828644501278773, "percentage": 7.42, "elapsed_time": "1:25:30", "remaining_time": "17:46:25"}
+{"current_steps": 37, "total_steps": 485, "loss": 1.1997, "lr": 0.00012081632653061224, "epoch": 0.37851662404092073, "percentage": 7.63, "elapsed_time": "1:27:51", "remaining_time": "17:43:51"}
+{"current_steps": 38, "total_steps": 485, "loss": 1.1791, "lr": 0.00012408163265306124, "epoch": 0.3887468030690537, "percentage": 7.84, "elapsed_time": "1:30:13", "remaining_time": "17:41:23"}
+{"current_steps": 39, "total_steps": 485, "loss": 1.1896, "lr": 0.0001273469387755102, "epoch": 0.3989769820971867, "percentage": 8.04, "elapsed_time": "1:32:35", "remaining_time": "17:38:52"}
+{"current_steps": 40, "total_steps": 485, "loss": 1.1876, "lr": 0.0001306122448979592, "epoch": 0.4092071611253197, "percentage": 8.25, "elapsed_time": "1:34:57", "remaining_time": "17:36:28"}
+{"current_steps": 41, "total_steps": 485, "loss": 1.1709, "lr": 0.00013387755102040817, "epoch": 0.4194373401534527, "percentage": 8.45, "elapsed_time": "1:37:20", "remaining_time": "17:34:04"}
+{"current_steps": 42, "total_steps": 485, "loss": 1.1795, "lr": 0.00013714285714285713, "epoch": 0.4296675191815857, "percentage": 8.66, "elapsed_time": "1:39:42", "remaining_time": "17:31:39"}
+{"current_steps": 43, "total_steps": 485, "loss": 1.1753, "lr": 0.00014040816326530613, "epoch": 0.4398976982097187, "percentage": 8.87, "elapsed_time": "1:42:04", "remaining_time": "17:29:13"}
+{"current_steps": 44, "total_steps": 485, "loss": 1.2028, "lr": 0.00014367346938775512, "epoch": 0.45012787723785164, "percentage": 9.07, "elapsed_time": "1:44:26", "remaining_time": "17:26:46"}
+{"current_steps": 45, "total_steps": 485, "loss": 1.1822, "lr": 0.0001469387755102041, "epoch": 0.46035805626598464, "percentage": 9.28, "elapsed_time": "1:46:47", "remaining_time": "17:24:14"}
+{"current_steps": 46, "total_steps": 485, "loss": 1.1836, "lr": 0.00015020408163265308, "epoch": 0.47058823529411764, "percentage": 9.48, "elapsed_time": "1:49:09", "remaining_time": "17:21:49"}
+{"current_steps": 47, "total_steps": 485, "loss": 1.1901, "lr": 0.00015346938775510205, "epoch": 0.48081841432225064, "percentage": 9.69, "elapsed_time": "1:51:31", "remaining_time": "17:19:21"}
+{"current_steps": 48, "total_steps": 485, "loss": 1.1562, "lr": 0.00015673469387755102, "epoch": 0.49104859335038364, "percentage": 9.9, "elapsed_time": "1:53:53", "remaining_time": "17:16:56"}
+{"current_steps": 49, "total_steps": 485, "loss": 1.1794, "lr": 0.00016, "epoch": 0.5012787723785166, "percentage": 10.1, "elapsed_time": "1:56:17", "remaining_time": "17:14:48"}
+{"current_steps": 50, "total_steps": 485, "loss": 1.1877, "lr": 0.00015999792324684382, "epoch": 0.5115089514066496, "percentage": 10.31, "elapsed_time": "1:58:40", "remaining_time": "17:12:24"}
+{"current_steps": 51, "total_steps": 485, "loss": 1.1585, "lr": 0.00015999169309519789, "epoch": 0.5217391304347826, "percentage": 10.52, "elapsed_time": "2:01:01", "remaining_time": "17:09:56"}
+{"current_steps": 52, "total_steps": 485, "loss": 1.1732, "lr": 0.0001599813098685243, "epoch": 0.5319693094629157, "percentage": 10.72, "elapsed_time": "2:03:23", "remaining_time": "17:07:27"}
+{"current_steps": 53, "total_steps": 485, "loss": 1.1617, "lr": 0.0001599667741059081, "epoch": 0.5421994884910486, "percentage": 10.93, "elapsed_time": "2:05:44", "remaining_time": "17:04:55"}
+{"current_steps": 54, "total_steps": 485, "loss": 1.1584, "lr": 0.00015994808656202904, "epoch": 0.5524296675191815, "percentage": 11.13, "elapsed_time": "2:08:06", "remaining_time": "17:02:31"}
+{"current_steps": 55, "total_steps": 485, "loss": 1.1478, "lr": 0.00015992524820712252, "epoch": 0.5626598465473146, "percentage": 11.34, "elapsed_time": "2:10:28", "remaining_time": "17:00:06"}
+{"current_steps": 56, "total_steps": 485, "loss": 1.1493, "lr": 0.00015989826022692918, "epoch": 0.5728900255754475, "percentage": 11.55, "elapsed_time": "2:12:51", "remaining_time": "16:57:45"}
+{"current_steps": 57, "total_steps": 485, "loss": 1.148, "lr": 0.00015986712402263334, "epoch": 0.5831202046035806, "percentage": 11.75, "elapsed_time": "2:15:13", "remaining_time": "16:55:22"}
+{"current_steps": 58, "total_steps": 485, "loss": 1.1532, "lr": 0.00015983184121079024, "epoch": 0.5933503836317136, "percentage": 11.96, "elapsed_time": "2:17:35", "remaining_time": "16:53:00"}
+{"current_steps": 59, "total_steps": 485, "loss": 1.1313, "lr": 0.00015979241362324223, "epoch": 0.6035805626598465, "percentage": 12.16, "elapsed_time": "2:19:57", "remaining_time": "16:50:36"}
+{"current_steps": 60, "total_steps": 485, "loss": 1.138, "lr": 0.0001597488433070234, "epoch": 0.6138107416879796, "percentage": 12.37, "elapsed_time": "2:22:21", "remaining_time": "16:48:19"}
+{"current_steps": 61, "total_steps": 485, "loss": 1.1546, "lr": 0.00015970113252425356, "epoch": 0.6240409207161125, "percentage": 12.58, "elapsed_time": "2:24:42", "remaining_time": "16:45:53"}
+{"current_steps": 62, "total_steps": 485, "loss": 1.163, "lr": 0.00015964928375202068, "epoch": 0.6342710997442456, "percentage": 12.78, "elapsed_time": "2:27:04", "remaining_time": "16:43:28"}
+{"current_steps": 63, "total_steps": 485, "loss": 1.1564, "lr": 0.00015959329968225232, "epoch": 0.6445012787723785, "percentage": 12.99, "elapsed_time": "2:29:26", "remaining_time": "16:41:03"}
+{"current_steps": 64, "total_steps": 485, "loss": 1.1684, "lr": 0.0001595331832215758, "epoch": 0.6547314578005116, "percentage": 13.2, "elapsed_time": "2:31:48", "remaining_time": "16:38:34"}
+{"current_steps": 65, "total_steps": 485, "loss": 1.1788, "lr": 0.00015946893749116734, "epoch": 0.6649616368286445, "percentage": 13.4, "elapsed_time": "2:34:10", "remaining_time": "16:36:12"}
+{"current_steps": 66, "total_steps": 485, "loss": 1.1537, "lr": 0.00015940056582659006, "epoch": 0.6751918158567775, "percentage": 13.61, "elapsed_time": "2:36:32", "remaining_time": "16:33:48"}
+{"current_steps": 67, "total_steps": 485, "loss": 1.1487, "lr": 0.0001593280717776207, "epoch": 0.6854219948849105, "percentage": 13.81, "elapsed_time": "2:38:55", "remaining_time": "16:31:27"}
+{"current_steps": 68, "total_steps": 485, "loss": 1.15, "lr": 0.0001592514591080654, "epoch": 0.6956521739130435, "percentage": 14.02, "elapsed_time": "2:41:17", "remaining_time": "16:29:02"}
+{"current_steps": 69, "total_steps": 485, "loss": 1.1487, "lr": 0.0001591707317955642, "epoch": 0.7058823529411765, "percentage": 14.23, "elapsed_time": "2:43:38", "remaining_time": "16:26:37"}
+{"current_steps": 70, "total_steps": 485, "loss": 1.1766, "lr": 0.00015908589403138468, "epoch": 0.7161125319693095, "percentage": 14.43, "elapsed_time": "2:46:02", "remaining_time": "16:24:20"}
+{"current_steps": 71, "total_steps": 485, "loss": 1.1464, "lr": 0.00015899695022020415, "epoch": 0.7263427109974424, "percentage": 14.64, "elapsed_time": "2:48:24", "remaining_time": "16:21:58"}
+{"current_steps": 72, "total_steps": 485, "loss": 1.1487, "lr": 0.00015890390497988116, "epoch": 0.7365728900255755, "percentage": 14.85, "elapsed_time": "2:50:47", "remaining_time": "16:19:39"}
+{"current_steps": 73, "total_steps": 485, "loss": 1.1447, "lr": 0.0001588067631412156, "epoch": 0.7468030690537084, "percentage": 15.05, "elapsed_time": "2:53:09", "remaining_time": "16:17:15"}
+{"current_steps": 74, "total_steps": 485, "loss": 1.1299, "lr": 0.000158705529747698, "epoch": 0.7570332480818415, "percentage": 15.26, "elapsed_time": "2:55:31", "remaining_time": "16:14:52"}
+{"current_steps": 75, "total_steps": 485, "loss": 1.1473, "lr": 0.0001586002100552476, "epoch": 0.7672634271099744, "percentage": 15.46, "elapsed_time": "2:57:53", "remaining_time": "16:12:28"}
+{"current_steps": 76, "total_steps": 485, "loss": 1.1368, "lr": 0.00015849080953193943, "epoch": 0.7774936061381074, "percentage": 15.67, "elapsed_time": "3:00:16", "remaining_time": "16:10:09"}
+{"current_steps": 77, "total_steps": 485, "loss": 1.123, "lr": 0.00015837733385772062, "epoch": 0.7877237851662404, "percentage": 15.88, "elapsed_time": "3:02:38", "remaining_time": "16:07:46"}
+{"current_steps": 78, "total_steps": 485, "loss": 1.1403, "lr": 0.00015825978892411522, "epoch": 0.7979539641943734, "percentage": 16.08, "elapsed_time": "3:05:00", "remaining_time": "16:05:24"}
+{"current_steps": 79, "total_steps": 485, "loss": 1.1229, "lr": 0.00015813818083391858, "epoch": 0.8081841432225064, "percentage": 16.29, "elapsed_time": "3:07:22", "remaining_time": "16:02:56"}
+{"current_steps": 80, "total_steps": 485, "loss": 1.129, "lr": 0.0001580125159008803, "epoch": 0.8184143222506394, "percentage": 16.49, "elapsed_time": "3:09:44", "remaining_time": "16:00:33"}
+{"current_steps": 81, "total_steps": 485, "loss": 1.1351, "lr": 0.00015788280064937655, "epoch": 0.8286445012787724, "percentage": 16.7, "elapsed_time": "3:12:06", "remaining_time": "15:58:11"}
+{"current_steps": 82, "total_steps": 485, "loss": 1.1334, "lr": 0.00015774904181407127, "epoch": 0.8388746803069054, "percentage": 16.91, "elapsed_time": "3:14:28", "remaining_time": "15:55:48"}
+{"current_steps": 83, "total_steps": 485, "loss": 1.1363, "lr": 0.00015761124633956652, "epoch": 0.8491048593350383, "percentage": 17.11, "elapsed_time": "3:16:51", "remaining_time": "15:53:25"}
+{"current_steps": 84, "total_steps": 485, "loss": 1.1142, "lr": 0.00015746942138004203, "epoch": 0.8593350383631714, "percentage": 17.32, "elapsed_time": "3:19:12", "remaining_time": "15:50:59"}
+{"current_steps": 85, "total_steps": 485, "loss": 1.13, "lr": 0.00015732357429888355, "epoch": 0.8695652173913043, "percentage": 17.53, "elapsed_time": "3:21:34", "remaining_time": "15:48:33"}
+{"current_steps": 86, "total_steps": 485, "loss": 1.1225, "lr": 0.00015717371266830076, "epoch": 0.8797953964194374, "percentage": 17.73, "elapsed_time": "3:23:56", "remaining_time": "15:46:11"}
+{"current_steps": 87, "total_steps": 485, "loss": 1.1237, "lr": 0.000157019844268934, "epoch": 0.8900255754475703, "percentage": 17.94, "elapsed_time": "3:26:19", "remaining_time": "15:43:51"}
+{"current_steps": 88, "total_steps": 485, "loss": 1.1223, "lr": 0.0001568619770894504, "epoch": 0.9002557544757033, "percentage": 18.14, "elapsed_time": "3:28:41", "remaining_time": "15:41:29"}
+{"current_steps": 89, "total_steps": 485, "loss": 1.1117, "lr": 0.000156700119326129, "epoch": 0.9104859335038363, "percentage": 18.35, "elapsed_time": "3:31:05", "remaining_time": "15:39:13"}
+{"current_steps": 90, "total_steps": 485, "loss": 1.1195, "lr": 0.00015653427938243532, "epoch": 0.9207161125319693, "percentage": 18.56, "elapsed_time": "3:33:27", "remaining_time": "15:36:52"}
+{"current_steps": 91, "total_steps": 485, "loss": 1.1112, "lr": 0.0001563644658685851, "epoch": 0.9309462915601023, "percentage": 18.76, "elapsed_time": "3:35:50", "remaining_time": "15:34:29"}
+{"current_steps": 92, "total_steps": 485, "loss": 1.1334, "lr": 0.00015619068760109703, "epoch": 0.9411764705882353, "percentage": 18.97, "elapsed_time": "3:38:12", "remaining_time": "15:32:07"}
+{"current_steps": 93, "total_steps": 485, "loss": 1.123, "lr": 0.00015601295360233528, "epoch": 0.9514066496163683, "percentage": 19.18, "elapsed_time": "3:40:34", "remaining_time": "15:29:42"}
+{"current_steps": 94, "total_steps": 485, "loss": 1.1245, "lr": 0.0001558312731000409, "epoch": 0.9616368286445013, "percentage": 19.38, "elapsed_time": "3:42:56", "remaining_time": "15:27:20"}
+{"current_steps": 95, "total_steps": 485, "loss": 1.1159, "lr": 0.00015564565552685282, "epoch": 0.9718670076726342, "percentage": 19.59, "elapsed_time": "3:45:18", "remaining_time": "15:24:58"}
+{"current_steps": 96, "total_steps": 485, "loss": 1.1086, "lr": 0.00015545611051981807, "epoch": 0.9820971867007673, "percentage": 19.79, "elapsed_time": "3:47:41", "remaining_time": "15:22:37"}
+{"current_steps": 97, "total_steps": 485, "loss": 1.1396, "lr": 0.00015526264791989144, "epoch": 0.9923273657289002, "percentage": 20.0, "elapsed_time": "3:50:03", "remaining_time": "15:20:15"}
+{"current_steps": 98, "total_steps": 485, "loss": 1.4022, "lr": 0.00015506527777142453, "epoch": 1.0025575447570332, "percentage": 20.21, "elapsed_time": "3:52:37", "remaining_time": "15:18:39"}
+{"current_steps": 99, "total_steps": 485, "loss": 1.0962, "lr": 0.00015486401032164434, "epoch": 1.0127877237851663, "percentage": 20.41, "elapsed_time": "3:55:00", "remaining_time": "15:16:16"}
+{"current_steps": 100, "total_steps": 485, "loss": 1.1252, "lr": 0.00015465885602012117, "epoch": 1.0230179028132993, "percentage": 20.62, "elapsed_time": "3:57:22", "remaining_time": "15:13:52"}
+{"current_steps": 101, "total_steps": 485, "loss": 1.1044, "lr": 0.00015444982551822604, "epoch": 1.0332480818414322, "percentage": 20.82, "elapsed_time": "3:59:44", "remaining_time": "15:11:29"}
+{"current_steps": 102, "total_steps": 485, "loss": 1.1138, "lr": 0.00015423692966857788, "epoch": 1.0434782608695652, "percentage": 21.03, "elapsed_time": "4:02:06", "remaining_time": "15:09:06"}
+{"current_steps": 103, "total_steps": 485, "loss": 1.0804, "lr": 0.00015402017952447983, "epoch": 1.0537084398976981, "percentage": 21.24, "elapsed_time": "4:04:29", "remaining_time": "15:06:44"}
+{"current_steps": 104, "total_steps": 485, "loss": 1.1212, "lr": 0.00015379958633934555, "epoch": 1.0639386189258313, "percentage": 21.44, "elapsed_time": "4:06:52", "remaining_time": "15:04:23"}
+{"current_steps": 105, "total_steps": 485, "loss": 1.1, "lr": 0.0001535751615661149, "epoch": 1.0741687979539642, "percentage": 21.65, "elapsed_time": "4:09:14", "remaining_time": "15:02:00"}
+{"current_steps": 106, "total_steps": 485, "loss": 1.096, "lr": 0.00015334691685665928, "epoch": 1.0843989769820972, "percentage": 21.86, "elapsed_time": "4:11:36", "remaining_time": "14:59:37"}
+{"current_steps": 107, "total_steps": 485, "loss": 1.0882, "lr": 0.00015311486406117668, "epoch": 1.0946291560102301, "percentage": 22.06, "elapsed_time": "4:13:58", "remaining_time": "14:57:14"}
+{"current_steps": 108, "total_steps": 485, "loss": 1.1214, "lr": 0.00015287901522757652, "epoch": 1.104859335038363, "percentage": 22.27, "elapsed_time": "4:16:21", "remaining_time": "14:54:52"}
+{"current_steps": 109, "total_steps": 485, "loss": 1.0963, "lr": 0.000152639382600854, "epoch": 1.1150895140664963, "percentage": 22.47, "elapsed_time": "4:18:43", "remaining_time": "14:52:29"}
+{"current_steps": 110, "total_steps": 485, "loss": 1.0855, "lr": 0.00015239597862245452, "epoch": 1.1253196930946292, "percentage": 22.68, "elapsed_time": "4:21:06", "remaining_time": "14:50:10"}
+{"current_steps": 111, "total_steps": 485, "loss": 1.094, "lr": 0.00015214881592962753, "epoch": 1.1355498721227621, "percentage": 22.89, "elapsed_time": "4:23:29", "remaining_time": "14:47:49"}
+{"current_steps": 112, "total_steps": 485, "loss": 1.0819, "lr": 0.00015189790735477062, "epoch": 1.145780051150895, "percentage": 23.09, "elapsed_time": "4:25:51", "remaining_time": "14:45:24"}
+{"current_steps": 113, "total_steps": 485, "loss": 1.099, "lr": 0.00015164326592476316, "epoch": 1.156010230179028, "percentage": 23.3, "elapsed_time": "4:28:13", "remaining_time": "14:43:00"}
+{"current_steps": 114, "total_steps": 485, "loss": 1.0887, "lr": 0.00015138490486028998, "epoch": 1.1662404092071612, "percentage": 23.51, "elapsed_time": "4:30:36", "remaining_time": "14:40:38"}
+{"current_steps": 115, "total_steps": 485, "loss": 1.0879, "lr": 0.000151122837575155, "epoch": 1.1764705882352942, "percentage": 23.71, "elapsed_time": "4:32:58", "remaining_time": "14:38:15"}
+{"current_steps": 116, "total_steps": 485, "loss": 1.0974, "lr": 0.00015085707767558475, "epoch": 1.186700767263427, "percentage": 23.92, "elapsed_time": "4:35:20", "remaining_time": "14:35:51"}
+{"current_steps": 117, "total_steps": 485, "loss": 1.1016, "lr": 0.00015058763895952194, "epoch": 1.19693094629156, "percentage": 24.12, "elapsed_time": "4:37:41", "remaining_time": "14:33:26"}
+{"current_steps": 118, "total_steps": 485, "loss": 1.0789, "lr": 0.00015031453541590925, "epoch": 1.207161125319693, "percentage": 24.33, "elapsed_time": "4:40:03", "remaining_time": "14:31:02"}
+{"current_steps": 119, "total_steps": 485, "loss": 1.0851, "lr": 0.00015003778122396277, "epoch": 1.2173913043478262, "percentage": 24.54, "elapsed_time": "4:42:25", "remaining_time": "14:28:39"}
+{"current_steps": 120, "total_steps": 485, "loss": 1.1047, "lr": 0.0001497573907524361, "epoch": 1.227621483375959, "percentage": 24.74, "elapsed_time": "4:44:48", "remaining_time": "14:26:18"}
+{"current_steps": 121, "total_steps": 485, "loss": 1.0943, "lr": 0.00014947337855887406, "epoch": 1.237851662404092, "percentage": 24.95, "elapsed_time": "4:47:11", "remaining_time": "14:23:56"}
+{"current_steps": 122, "total_steps": 485, "loss": 1.0896, "lr": 0.00014918575938885725, "epoch": 1.248081841432225, "percentage": 25.15, "elapsed_time": "4:49:33", "remaining_time": "14:21:33"}
+{"current_steps": 123, "total_steps": 485, "loss": 1.0984, "lr": 0.00014889454817523608, "epoch": 1.258312020460358, "percentage": 25.36, "elapsed_time": "4:51:56", "remaining_time": "14:19:11"}
+{"current_steps": 124, "total_steps": 485, "loss": 1.091, "lr": 0.00014859976003735572, "epoch": 1.2685421994884911, "percentage": 25.57, "elapsed_time": "4:54:19", "remaining_time": "14:16:52"}
+{"current_steps": 125, "total_steps": 485, "loss": 1.0749, "lr": 0.0001483014102802711, "epoch": 1.278772378516624, "percentage": 25.77, "elapsed_time": "4:56:41", "remaining_time": "14:14:29"}
+{"current_steps": 126, "total_steps": 485, "loss": 1.0901, "lr": 0.00014799951439395221, "epoch": 1.289002557544757, "percentage": 25.98, "elapsed_time": "4:59:05", "remaining_time": "14:12:09"}
+{"current_steps": 127, "total_steps": 485, "loss": 1.0848, "lr": 0.00014769408805247986, "epoch": 1.29923273657289, "percentage": 26.19, "elapsed_time": "5:01:27", "remaining_time": "14:09:46"}
+{"current_steps": 128, "total_steps": 485, "loss": 1.0897, "lr": 0.0001473851471132321, "epoch": 1.309462915601023, "percentage": 26.39, "elapsed_time": "5:03:49", "remaining_time": "14:07:23"}
+{"current_steps": 129, "total_steps": 485, "loss": 1.0695, "lr": 0.00014707270761606063, "epoch": 1.319693094629156, "percentage": 26.6, "elapsed_time": "5:06:12", "remaining_time": "14:05:02"}
+{"current_steps": 130, "total_steps": 485, "loss": 1.0895, "lr": 0.00014675678578245828, "epoch": 1.329923273657289, "percentage": 26.8, "elapsed_time": "5:08:34", "remaining_time": "14:02:39"}
+{"current_steps": 131, "total_steps": 485, "loss": 1.1003, "lr": 0.00014643739801471667, "epoch": 1.340153452685422, "percentage": 27.01, "elapsed_time": "5:10:57", "remaining_time": "14:00:17"}
+{"current_steps": 132, "total_steps": 485, "loss": 1.098, "lr": 0.00014611456089507464, "epoch": 1.350383631713555, "percentage": 27.22, "elapsed_time": "5:13:20", "remaining_time": "13:57:57"}
+{"current_steps": 133, "total_steps": 485, "loss": 1.0698, "lr": 0.00014578829118485742, "epoch": 1.3606138107416879, "percentage": 27.42, "elapsed_time": "5:15:42", "remaining_time": "13:55:34"}
+{"current_steps": 134, "total_steps": 485, "loss": 1.1071, "lr": 0.00014545860582360624, "epoch": 1.370843989769821, "percentage": 27.63, "elapsed_time": "5:18:06", "remaining_time": "13:53:14"}
+{"current_steps": 135, "total_steps": 485, "loss": 1.0869, "lr": 0.00014512552192819897, "epoch": 1.381074168797954, "percentage": 27.84, "elapsed_time": "5:20:28", "remaining_time": "13:50:51"}
+{"current_steps": 136, "total_steps": 485, "loss": 1.0954, "lr": 0.0001447890567919614, "epoch": 1.391304347826087, "percentage": 28.04, "elapsed_time": "5:22:50", "remaining_time": "13:48:29"}
+{"current_steps": 137, "total_steps": 485, "loss": 1.0784, "lr": 0.00014444922788376934, "epoch": 1.40153452685422, "percentage": 28.25, "elapsed_time": "5:25:13", "remaining_time": "13:46:06"}
+{"current_steps": 138, "total_steps": 485, "loss": 1.0888, "lr": 0.00014410605284714175, "epoch": 1.4117647058823528, "percentage": 28.45, "elapsed_time": "5:27:36", "remaining_time": "13:43:45"}
+{"current_steps": 139, "total_steps": 485, "loss": 1.0842, "lr": 0.0001437595494993246, "epoch": 1.421994884910486, "percentage": 28.66, "elapsed_time": "5:29:58", "remaining_time": "13:41:21"}
+{"current_steps": 140, "total_steps": 485, "loss": 1.0795, "lr": 0.000143409735830366, "epoch": 1.432225063938619, "percentage": 28.87, "elapsed_time": "5:32:20", "remaining_time": "13:38:58"}
+{"current_steps": 141, "total_steps": 485, "loss": 1.0907, "lr": 0.00014305663000218193, "epoch": 1.4424552429667519, "percentage": 29.07, "elapsed_time": "5:34:42", "remaining_time": "13:36:36"}
+{"current_steps": 142, "total_steps": 485, "loss": 1.0817, "lr": 0.00014270025034761352, "epoch": 1.452685421994885, "percentage": 29.28, "elapsed_time": "5:37:05", "remaining_time": "13:34:14"}
+{"current_steps": 143, "total_steps": 485, "loss": 1.0819, "lr": 0.000142340615369475, "epoch": 1.4629156010230178, "percentage": 29.48, "elapsed_time": "5:39:28", "remaining_time": "13:31:52"}
+{"current_steps": 144, "total_steps": 485, "loss": 1.0931, "lr": 0.00014197774373959327, "epoch": 1.473145780051151, "percentage": 29.69, "elapsed_time": "5:41:50", "remaining_time": "13:29:30"}
+{"current_steps": 145, "total_steps": 485, "loss": 1.0884, "lr": 0.00014161165429783844, "epoch": 1.4833759590792839, "percentage": 29.9, "elapsed_time": "5:44:12", "remaining_time": "13:27:06"}
+{"current_steps": 146, "total_steps": 485, "loss": 1.0924, "lr": 0.0001412423660511456, "epoch": 1.4936061381074168, "percentage": 30.1, "elapsed_time": "5:46:34", "remaining_time": "13:24:44"}
+{"current_steps": 147, "total_steps": 485, "loss": 1.0785, "lr": 0.00014086989817252803, "epoch": 1.50383631713555, "percentage": 30.31, "elapsed_time": "5:48:57", "remaining_time": "13:22:21"}
+{"current_steps": 148, "total_steps": 485, "loss": 1.0699, "lr": 0.00014049427000008185, "epoch": 1.5140664961636827, "percentage": 30.52, "elapsed_time": "5:51:19", "remaining_time": "13:19:57"}
+{"current_steps": 149, "total_steps": 485, "loss": 1.064, "lr": 0.00014011550103598176, "epoch": 1.5242966751918159, "percentage": 30.72, "elapsed_time": "5:53:41", "remaining_time": "13:17:35"}
+{"current_steps": 150, "total_steps": 485, "loss": 1.1002, "lr": 0.0001397336109454689, "epoch": 1.5345268542199488, "percentage": 30.93, "elapsed_time": "5:56:04", "remaining_time": "13:15:13"}
+{"current_steps": 151, "total_steps": 485, "loss": 1.0709, "lr": 0.0001393486195558295, "epoch": 1.5447570332480818, "percentage": 31.13, "elapsed_time": "5:58:26", "remaining_time": "13:12:51"}
+{"current_steps": 152, "total_steps": 485, "loss": 1.0717, "lr": 0.00013896054685536566, "epoch": 1.554987212276215, "percentage": 31.34, "elapsed_time": "6:00:49", "remaining_time": "13:10:28"}
+{"current_steps": 153, "total_steps": 485, "loss": 1.0714, "lr": 0.00013856941299235752, "epoch": 1.5652173913043477, "percentage": 31.55, "elapsed_time": "6:03:12", "remaining_time": "13:08:08"}
+{"current_steps": 154, "total_steps": 485, "loss": 1.0825, "lr": 0.00013817523827401715, "epoch": 1.5754475703324808, "percentage": 31.75, "elapsed_time": "6:05:34", "remaining_time": "13:05:45"}
+{"current_steps": 155, "total_steps": 485, "loss": 1.0583, "lr": 0.00013777804316543438, "epoch": 1.5856777493606138, "percentage": 31.96, "elapsed_time": "6:07:57", "remaining_time": "13:03:22"}
+{"current_steps": 156, "total_steps": 485, "loss": 1.0998, "lr": 0.00013737784828851405, "epoch": 1.5959079283887467, "percentage": 32.16, "elapsed_time": "6:10:20", "remaining_time": "13:01:01"}
+{"current_steps": 157, "total_steps": 485, "loss": 1.0814, "lr": 0.0001369746744209055, "epoch": 1.60613810741688, "percentage": 32.37, "elapsed_time": "6:12:42", "remaining_time": "12:58:39"}
+{"current_steps": 158, "total_steps": 485, "loss": 1.0672, "lr": 0.00013656854249492382, "epoch": 1.6163682864450126, "percentage": 32.58, "elapsed_time": "6:15:05", "remaining_time": "12:56:17"}
+{"current_steps": 159, "total_steps": 485, "loss": 1.077, "lr": 0.00013615947359646295, "epoch": 1.6265984654731458, "percentage": 32.78, "elapsed_time": "6:17:28", "remaining_time": "12:53:56"}
+{"current_steps": 160, "total_steps": 485, "loss": 1.0831, "lr": 0.00013574748896390105, "epoch": 1.6368286445012787, "percentage": 32.99, "elapsed_time": "6:19:50", "remaining_time": "12:51:33"}
+{"current_steps": 161, "total_steps": 485, "loss": 1.0808, "lr": 0.00013533260998699776, "epoch": 1.6470588235294117, "percentage": 33.2, "elapsed_time": "6:22:13", "remaining_time": "12:49:11"}
+{"current_steps": 162, "total_steps": 485, "loss": 1.0609, "lr": 0.00013491485820578373, "epoch": 1.6572890025575449, "percentage": 33.4, "elapsed_time": "6:24:35", "remaining_time": "12:46:48"}
+{"current_steps": 163, "total_steps": 485, "loss": 1.0822, "lr": 0.00013449425530944218, "epoch": 1.6675191815856778, "percentage": 33.61, "elapsed_time": "6:26:58", "remaining_time": "12:44:26"}
+{"current_steps": 164, "total_steps": 485, "loss": 1.0771, "lr": 0.00013407082313518292, "epoch": 1.6777493606138107, "percentage": 33.81, "elapsed_time": "6:29:20", "remaining_time": "12:42:04"}
+{"current_steps": 165, "total_steps": 485, "loss": 1.0853, "lr": 0.0001336445836671086, "epoch": 1.6879795396419437, "percentage": 34.02, "elapsed_time": "6:31:43", "remaining_time": "12:39:41"}
+{"current_steps": 166, "total_steps": 485, "loss": 1.0838, "lr": 0.0001332155590350732, "epoch": 1.6982097186700766, "percentage": 34.23, "elapsed_time": "6:34:05", "remaining_time": "12:37:19"}
+{"current_steps": 167, "total_steps": 485, "loss": 1.082, "lr": 0.0001327837715135332, "epoch": 1.7084398976982098, "percentage": 34.43, "elapsed_time": "6:36:27", "remaining_time": "12:34:56"}
+{"current_steps": 168, "total_steps": 485, "loss": 1.0802, "lr": 0.00013234924352039103, "epoch": 1.7186700767263428, "percentage": 34.64, "elapsed_time": "6:38:51", "remaining_time": "12:32:35"}
+{"current_steps": 169, "total_steps": 485, "loss": 1.0887, "lr": 0.00013191199761583124, "epoch": 1.7289002557544757, "percentage": 34.85, "elapsed_time": "6:41:14", "remaining_time": "12:30:14"}
+{"current_steps": 170, "total_steps": 485, "loss": 1.0718, "lr": 0.00013147205650114913, "epoch": 1.7391304347826086, "percentage": 35.05, "elapsed_time": "6:43:36", "remaining_time": "12:27:52"}
+{"current_steps": 171, "total_steps": 485, "loss": 1.0788, "lr": 0.0001310294430175722, "epoch": 1.7493606138107416, "percentage": 35.26, "elapsed_time": "6:45:59", "remaining_time": "12:25:29"}
+{"current_steps": 172, "total_steps": 485, "loss": 1.0879, "lr": 0.00013058418014507412, "epoch": 1.7595907928388748, "percentage": 35.46, "elapsed_time": "6:48:21", "remaining_time": "12:23:07"}
+{"current_steps": 173, "total_steps": 485, "loss": 1.0721, "lr": 0.00013013629100118183, "epoch": 1.7698209718670077, "percentage": 35.67, "elapsed_time": "6:50:43", "remaining_time": "12:20:44"}
+{"current_steps": 174, "total_steps": 485, "loss": 1.0737, "lr": 0.00012968579883977508, "epoch": 1.7800511508951407, "percentage": 35.88, "elapsed_time": "6:53:06", "remaining_time": "12:18:21"}
+{"current_steps": 175, "total_steps": 485, "loss": 1.0742, "lr": 0.00012923272704987943, "epoch": 1.7902813299232738, "percentage": 36.08, "elapsed_time": "6:55:28", "remaining_time": "12:15:58"}
+{"current_steps": 176, "total_steps": 485, "loss": 1.0721, "lr": 0.00012877709915445155, "epoch": 1.8005115089514065, "percentage": 36.29, "elapsed_time": "6:57:50", "remaining_time": "12:13:35"}
+{"current_steps": 177, "total_steps": 485, "loss": 1.0555, "lr": 0.00012831893880915822, "epoch": 1.8107416879795397, "percentage": 36.49, "elapsed_time": "7:00:12", "remaining_time": "12:11:12"}
+{"current_steps": 178, "total_steps": 485, "loss": 1.0804, "lr": 0.00012785826980114798, "epoch": 1.8209718670076727, "percentage": 36.7, "elapsed_time": "7:02:34", "remaining_time": "12:08:50"}
+{"current_steps": 179, "total_steps": 485, "loss": 1.063, "lr": 0.0001273951160478163, "epoch": 1.8312020460358056, "percentage": 36.91, "elapsed_time": "7:04:57", "remaining_time": "12:06:27"}
+{"current_steps": 180, "total_steps": 485, "loss": 1.0666, "lr": 0.00012692950159556358, "epoch": 1.8414322250639388, "percentage": 37.11, "elapsed_time": "7:07:19", "remaining_time": "12:04:04"}
+{"current_steps": 181, "total_steps": 485, "loss": 1.0703, "lr": 0.00012646145061854697, "epoch": 1.8516624040920715, "percentage": 37.32, "elapsed_time": "7:09:41", "remaining_time": "12:01:42"}
+{"current_steps": 182, "total_steps": 485, "loss": 1.0571, "lr": 0.00012599098741742504, "epoch": 1.8618925831202047, "percentage": 37.53, "elapsed_time": "7:12:05", "remaining_time": "11:59:20"}
+{"current_steps": 183, "total_steps": 485, "loss": 1.0706, "lr": 0.00012551813641809622, "epoch": 1.8721227621483376, "percentage": 37.73, "elapsed_time": "7:14:27", "remaining_time": "11:56:58"}
+{"current_steps": 184, "total_steps": 485, "loss": 1.0779, "lr": 0.0001250429221704306, "epoch": 1.8823529411764706, "percentage": 37.94, "elapsed_time": "7:16:50", "remaining_time": "11:54:36"}
+{"current_steps": 185, "total_steps": 485, "loss": 1.064, "lr": 0.00012456536934699552, "epoch": 1.8925831202046037, "percentage": 38.14, "elapsed_time": "7:19:12", "remaining_time": "11:52:13"}
+{"current_steps": 186, "total_steps": 485, "loss": 1.0585, "lr": 0.0001240855027417742, "epoch": 1.9028132992327365, "percentage": 38.35, "elapsed_time": "7:21:34", "remaining_time": "11:49:51"}
+{"current_steps": 187, "total_steps": 485, "loss": 1.0672, "lr": 0.00012360334726887887, "epoch": 1.9130434782608696, "percentage": 38.56, "elapsed_time": "7:23:57", "remaining_time": "11:47:28"}
+{"current_steps": 188, "total_steps": 485, "loss": 1.0713, "lr": 0.00012311892796125704, "epoch": 1.9232736572890026, "percentage": 38.76, "elapsed_time": "7:26:19", "remaining_time": "11:45:06"}
+{"current_steps": 189, "total_steps": 485, "loss": 1.0536, "lr": 0.0001226322699693918, "epoch": 1.9335038363171355, "percentage": 38.97, "elapsed_time": "7:28:42", "remaining_time": "11:42:44"}
+{"current_steps": 190, "total_steps": 485, "loss": 1.0807, "lr": 0.00012214339855999624, "epoch": 1.9437340153452687, "percentage": 39.18, "elapsed_time": "7:31:05", "remaining_time": "11:40:22"}
+{"current_steps": 191, "total_steps": 485, "loss": 1.0777, "lr": 0.00012165233911470136, "epoch": 1.9539641943734014, "percentage": 39.38, "elapsed_time": "7:33:28", "remaining_time": "11:38:01"}
+{"current_steps": 192, "total_steps": 485, "loss": 1.0525, "lr": 0.00012115911712873851, "epoch": 1.9641943734015346, "percentage": 39.59, "elapsed_time": "7:35:51", "remaining_time": "11:35:39"}
+{"current_steps": 193, "total_steps": 485, "loss": 1.0617, "lr": 0.00012066375820961558, "epoch": 1.9744245524296675, "percentage": 39.79, "elapsed_time": "7:38:14", "remaining_time": "11:33:17"}
+{"current_steps": 194, "total_steps": 485, "loss": 1.0682, "lr": 0.00012016628807578756, "epoch": 1.9846547314578005, "percentage": 40.0, "elapsed_time": "7:40:37", "remaining_time": "11:30:56"}
+{"current_steps": 195, "total_steps": 485, "loss": 1.1518, "lr": 0.00011966673255532119, "epoch": 1.9948849104859336, "percentage": 40.21, "elapsed_time": "7:43:01", "remaining_time": "11:28:35"}
+{"current_steps": 196, "total_steps": 485, "loss": 1.226, "lr": 0.00011916511758455407, "epoch": 2.0051150895140664, "percentage": 40.41, "elapsed_time": "7:45:33", "remaining_time": "11:26:27"}
+{"current_steps": 197, "total_steps": 485, "loss": 1.068, "lr": 0.00011866146920674807, "epoch": 2.0153452685421995, "percentage": 40.62, "elapsed_time": "7:47:56", "remaining_time": "11:24:05"}
+{"current_steps": 198, "total_steps": 485, "loss": 1.0502, "lr": 0.0001181558135707371, "epoch": 2.0255754475703327, "percentage": 40.82, "elapsed_time": "7:50:19", "remaining_time": "11:21:44"}
+{"current_steps": 199, "total_steps": 485, "loss": 1.0286, "lr": 0.00011764817692956966, "epoch": 2.0358056265984654, "percentage": 41.03, "elapsed_time": "7:52:42", "remaining_time": "11:19:22"}
+{"current_steps": 200, "total_steps": 485, "loss": 1.0747, "lr": 0.00011713858563914562, "epoch": 2.0460358056265986, "percentage": 41.24, "elapsed_time": "7:55:05", "remaining_time": "11:17:00"}
+{"current_steps": 201, "total_steps": 485, "loss": 1.045, "lr": 0.00011662706615684803, "epoch": 2.0562659846547313, "percentage": 41.44, "elapsed_time": "7:57:27", "remaining_time": "11:14:37"}
+{"current_steps": 202, "total_steps": 485, "loss": 1.0678, "lr": 0.00011611364504016935, "epoch": 2.0664961636828645, "percentage": 41.65, "elapsed_time": "7:59:50", "remaining_time": "11:12:15"}
+{"current_steps": 203, "total_steps": 485, "loss": 1.0458, "lr": 0.00011559834894533275, "epoch": 2.0767263427109977, "percentage": 41.86, "elapsed_time": "8:02:12", "remaining_time": "11:09:52"}
+{"current_steps": 204, "total_steps": 485, "loss": 1.0461, "lr": 0.00011508120462590794, "epoch": 2.0869565217391304, "percentage": 42.06, "elapsed_time": "8:04:35", "remaining_time": "11:07:29"}
+{"current_steps": 205, "total_steps": 485, "loss": 1.0407, "lr": 0.00011456223893142238, "epoch": 2.0971867007672635, "percentage": 42.27, "elapsed_time": "8:06:57", "remaining_time": "11:05:06"}
+{"current_steps": 206, "total_steps": 485, "loss": 1.0534, "lr": 0.0001140414788059672, "epoch": 2.1074168797953963, "percentage": 42.47, "elapsed_time": "8:09:20", "remaining_time": "11:02:45"}
+{"current_steps": 207, "total_steps": 485, "loss": 1.0577, "lr": 0.00011351895128679823, "epoch": 2.1176470588235294, "percentage": 42.68, "elapsed_time": "8:11:42", "remaining_time": "11:00:22"}
+{"current_steps": 208, "total_steps": 485, "loss": 1.0592, "lr": 0.00011299468350293232, "epoch": 2.1278772378516626, "percentage": 42.89, "elapsed_time": "8:14:05", "remaining_time": "10:57:59"}
+{"current_steps": 209, "total_steps": 485, "loss": 1.069, "lr": 0.00011246870267373885, "epoch": 2.1381074168797953, "percentage": 43.09, "elapsed_time": "8:16:28", "remaining_time": "10:55:38"}
+{"current_steps": 210, "total_steps": 485, "loss": 1.0454, "lr": 0.00011194103610752655, "epoch": 2.1483375959079285, "percentage": 43.3, "elapsed_time": "8:18:51", "remaining_time": "10:53:15"}
+{"current_steps": 211, "total_steps": 485, "loss": 1.0723, "lr": 0.00011141171120012552, "epoch": 2.1585677749360612, "percentage": 43.51, "elapsed_time": "8:21:13", "remaining_time": "10:50:52"}
+{"current_steps": 212, "total_steps": 485, "loss": 1.0428, "lr": 0.0001108807554334651, "epoch": 2.1687979539641944, "percentage": 43.71, "elapsed_time": "8:23:35", "remaining_time": "10:48:29"}
+{"current_steps": 213, "total_steps": 485, "loss": 1.061, "lr": 0.00011034819637414686, "epoch": 2.1790281329923276, "percentage": 43.92, "elapsed_time": "8:25:58", "remaining_time": "10:46:07"}
+{"current_steps": 214, "total_steps": 485, "loss": 1.0355, "lr": 0.00010981406167201354, "epoch": 2.1892583120204603, "percentage": 44.12, "elapsed_time": "8:28:20", "remaining_time": "10:43:44"}
+{"current_steps": 215, "total_steps": 485, "loss": 1.0777, "lr": 0.0001092783790587133, "epoch": 2.1994884910485935, "percentage": 44.33, "elapsed_time": "8:30:43", "remaining_time": "10:41:21"}
+{"current_steps": 216, "total_steps": 485, "loss": 1.0541, "lr": 0.00010874117634626011, "epoch": 2.209718670076726, "percentage": 44.54, "elapsed_time": "8:33:07", "remaining_time": "10:39:01"}
+{"current_steps": 217, "total_steps": 485, "loss": 1.0435, "lr": 0.00010820248142558965, "epoch": 2.2199488491048593, "percentage": 44.74, "elapsed_time": "8:35:30", "remaining_time": "10:36:39"}
+{"current_steps": 218, "total_steps": 485, "loss": 1.0513, "lr": 0.00010766232226511142, "epoch": 2.2301790281329925, "percentage": 44.95, "elapsed_time": "8:37:52", "remaining_time": "10:34:17"}
+{"current_steps": 219, "total_steps": 485, "loss": 1.0509, "lr": 0.00010712072690925638, "epoch": 2.2404092071611252, "percentage": 45.15, "elapsed_time": "8:40:15", "remaining_time": "10:31:54"}
+{"current_steps": 220, "total_steps": 485, "loss": 1.0325, "lr": 0.00010657772347702118, "epoch": 2.2506393861892584, "percentage": 45.36, "elapsed_time": "8:42:37", "remaining_time": "10:29:31"}
+{"current_steps": 221, "total_steps": 485, "loss": 1.0369, "lr": 0.00010603334016050808, "epoch": 2.260869565217391, "percentage": 45.57, "elapsed_time": "8:45:00", "remaining_time": "10:27:09"}
+{"current_steps": 222, "total_steps": 485, "loss": 1.0414, "lr": 0.00010548760522346138, "epoch": 2.2710997442455243, "percentage": 45.77, "elapsed_time": "8:47:22", "remaining_time": "10:24:46"}
+{"current_steps": 223, "total_steps": 485, "loss": 1.056, "lr": 0.00010494054699979992, "epoch": 2.2813299232736575, "percentage": 45.98, "elapsed_time": "8:49:45", "remaining_time": "10:22:24"}
+{"current_steps": 224, "total_steps": 485, "loss": 1.0573, "lr": 0.00010439219389214595, "epoch": 2.29156010230179, "percentage": 46.19, "elapsed_time": "8:52:07", "remaining_time": "10:20:01"}
+{"current_steps": 225, "total_steps": 485, "loss": 1.0412, "lr": 0.0001038425743703507, "epoch": 2.3017902813299234, "percentage": 46.39, "elapsed_time": "8:54:30", "remaining_time": "10:17:38"}
+{"current_steps": 226, "total_steps": 485, "loss": 1.0366, "lr": 0.00010329171697001608, "epoch": 2.312020460358056, "percentage": 46.6, "elapsed_time": "8:56:53", "remaining_time": "10:15:17"}
+{"current_steps": 227, "total_steps": 485, "loss": 1.0451, "lr": 0.0001027396502910132, "epoch": 2.3222506393861893, "percentage": 46.8, "elapsed_time": "8:59:15", "remaining_time": "10:12:54"}
+{"current_steps": 228, "total_steps": 485, "loss": 1.0428, "lr": 0.0001021864029959975, "epoch": 2.3324808184143224, "percentage": 47.01, "elapsed_time": "9:01:39", "remaining_time": "10:10:32"}
+{"current_steps": 229, "total_steps": 485, "loss": 1.0612, "lr": 0.00010163200380892063, "epoch": 2.342710997442455, "percentage": 47.22, "elapsed_time": "9:04:01", "remaining_time": "10:08:10"}
+{"current_steps": 230, "total_steps": 485, "loss": 1.0247, "lr": 0.00010107648151353916, "epoch": 2.3529411764705883, "percentage": 47.42, "elapsed_time": "9:06:25", "remaining_time": "10:05:48"}
+{"current_steps": 231, "total_steps": 485, "loss": 1.0363, "lr": 0.00010051986495192008, "epoch": 2.363171355498721, "percentage": 47.63, "elapsed_time": "9:08:47", "remaining_time": "10:03:25"}
+{"current_steps": 232, "total_steps": 485, "loss": 1.05, "lr": 9.99621830229434e-05, "epoch": 2.373401534526854, "percentage": 47.84, "elapsed_time": "9:11:09", "remaining_time": "10:01:02"}
+{"current_steps": 233, "total_steps": 485, "loss": 1.0537, "lr": 9.94034646808018e-05, "epoch": 2.3836317135549874, "percentage": 48.04, "elapsed_time": "9:13:31", "remaining_time": "9:58:40"}
+{"current_steps": 234, "total_steps": 485, "loss": 1.0273, "lr": 9.884373893349725e-05, "epoch": 2.39386189258312, "percentage": 48.25, "elapsed_time": "9:15:54", "remaining_time": "9:56:17"}
+{"current_steps": 235, "total_steps": 485, "loss": 1.053, "lr": 9.828303484133515e-05, "epoch": 2.4040920716112533, "percentage": 48.45, "elapsed_time": "9:18:17", "remaining_time": "9:53:55"}
+{"current_steps": 236, "total_steps": 485, "loss": 1.0364, "lr": 9.772138151541522e-05, "epoch": 2.414322250639386, "percentage": 48.66, "elapsed_time": "9:20:39", "remaining_time": "9:51:32"}
+{"current_steps": 237, "total_steps": 485, "loss": 1.0331, "lr": 9.715880811612044e-05, "epoch": 2.424552429667519, "percentage": 48.87, "elapsed_time": "9:23:02", "remaining_time": "9:49:10"}
+{"current_steps": 238, "total_steps": 485, "loss": 1.0323, "lr": 9.659534385160289e-05, "epoch": 2.4347826086956523, "percentage": 49.07, "elapsed_time": "9:25:25", "remaining_time": "9:46:48"}
+{"current_steps": 239, "total_steps": 485, "loss": 1.0491, "lr": 9.603101797626729e-05, "epoch": 2.445012787723785, "percentage": 49.28, "elapsed_time": "9:27:47", "remaining_time": "9:44:25"}
+{"current_steps": 240, "total_steps": 485, "loss": 1.0127, "lr": 9.546585978925221e-05, "epoch": 2.455242966751918, "percentage": 49.48, "elapsed_time": "9:30:10", "remaining_time": "9:42:02"}
+{"current_steps": 241, "total_steps": 485, "loss": 1.0637, "lr": 9.489989863290885e-05, "epoch": 2.4654731457800514, "percentage": 49.69, "elapsed_time": "9:32:32", "remaining_time": "9:39:40"}
+{"current_steps": 242, "total_steps": 485, "loss": 1.038, "lr": 9.433316389127768e-05, "epoch": 2.475703324808184, "percentage": 49.9, "elapsed_time": "9:34:55", "remaining_time": "9:37:17"}
+{"current_steps": 243, "total_steps": 485, "loss": 1.0441, "lr": 9.37656849885628e-05, "epoch": 2.4859335038363173, "percentage": 50.1, "elapsed_time": "9:37:17", "remaining_time": "9:34:54"}
+{"current_steps": 244, "total_steps": 485, "loss": 1.0317, "lr": 9.319749138760424e-05, "epoch": 2.49616368286445, "percentage": 50.31, "elapsed_time": "9:39:39", "remaining_time": "9:32:32"}
+{"current_steps": 245, "total_steps": 485, "loss": 1.0353, "lr": 9.262861258834833e-05, "epoch": 2.506393861892583, "percentage": 50.52, "elapsed_time": "9:42:02", "remaining_time": "9:30:09"}
+{"current_steps": 246, "total_steps": 485, "loss": 1.0211, "lr": 9.205907812631616e-05, "epoch": 2.516624040920716, "percentage": 50.72, "elapsed_time": "9:44:24", "remaining_time": "9:27:47"}
+{"current_steps": 247, "total_steps": 485, "loss": 1.0381, "lr": 9.148891757106999e-05, "epoch": 2.526854219948849, "percentage": 50.93, "elapsed_time": "9:46:48", "remaining_time": "9:25:25"}
+{"current_steps": 248, "total_steps": 485, "loss": 1.045, "lr": 9.091816052467817e-05, "epoch": 2.5370843989769822, "percentage": 51.13, "elapsed_time": "9:49:10", "remaining_time": "9:23:02"}
+{"current_steps": 249, "total_steps": 485, "loss": 1.0339, "lr": 9.034683662017812e-05, "epoch": 2.547314578005115, "percentage": 51.34, "elapsed_time": "9:51:33", "remaining_time": "9:20:39"}
+{"current_steps": 250, "total_steps": 485, "loss": 1.0297, "lr": 8.977497552003785e-05, "epoch": 2.557544757033248, "percentage": 51.55, "elapsed_time": "9:53:55", "remaining_time": "9:18:17"}
+{"current_steps": 251, "total_steps": 485, "loss": 1.0474, "lr": 8.920260691461602e-05, "epoch": 2.5677749360613813, "percentage": 51.75, "elapsed_time": "9:56:17", "remaining_time": "9:15:54"}
+{"current_steps": 252, "total_steps": 485, "loss": 1.0478, "lr": 8.862976052062034e-05, "epoch": 2.578005115089514, "percentage": 51.96, "elapsed_time": "9:58:40", "remaining_time": "9:13:32"}
+{"current_steps": 253, "total_steps": 485, "loss": 1.0384, "lr": 8.805646607956467e-05, "epoch": 2.588235294117647, "percentage": 52.16, "elapsed_time": "10:01:03", "remaining_time": "9:11:09"}
+{"current_steps": 254, "total_steps": 485, "loss": 1.0352, "lr": 8.748275335622506e-05, "epoch": 2.59846547314578, "percentage": 52.37, "elapsed_time": "10:03:25", "remaining_time": "9:08:46"}
+{"current_steps": 255, "total_steps": 485, "loss": 1.0251, "lr": 8.69086521370942e-05, "epoch": 2.608695652173913, "percentage": 52.58, "elapsed_time": "10:05:47", "remaining_time": "9:06:24"}
+{"current_steps": 256, "total_steps": 485, "loss": 1.0388, "lr": 8.633419222883508e-05, "epoch": 2.618925831202046, "percentage": 52.78, "elapsed_time": "10:08:09", "remaining_time": "9:04:01"}
+{"current_steps": 257, "total_steps": 485, "loss": 1.0415, "lr": 8.575940345673337e-05, "epoch": 2.629156010230179, "percentage": 52.99, "elapsed_time": "10:10:32", "remaining_time": "9:01:38"}
+{"current_steps": 258, "total_steps": 485, "loss": 1.0338, "lr": 8.518431566314901e-05, "epoch": 2.639386189258312, "percentage": 53.2, "elapsed_time": "10:12:54", "remaining_time": "8:59:16"}
+{"current_steps": 259, "total_steps": 485, "loss": 1.0455, "lr": 8.460895870596675e-05, "epoch": 2.649616368286445, "percentage": 53.4, "elapsed_time": "10:15:17", "remaining_time": "8:56:53"}
+{"current_steps": 260, "total_steps": 485, "loss": 1.0446, "lr": 8.4033362457046e-05, "epoch": 2.659846547314578, "percentage": 53.61, "elapsed_time": "10:17:39", "remaining_time": "8:54:30"}
+{"current_steps": 261, "total_steps": 485, "loss": 1.0282, "lr": 8.345755680066993e-05, "epoch": 2.670076726342711, "percentage": 53.81, "elapsed_time": "10:20:02", "remaining_time": "8:52:08"}
+{"current_steps": 262, "total_steps": 485, "loss": 1.0278, "lr": 8.288157163199389e-05, "epoch": 2.680306905370844, "percentage": 54.02, "elapsed_time": "10:22:24", "remaining_time": "8:49:45"}
+{"current_steps": 263, "total_steps": 485, "loss": 1.0317, "lr": 8.230543685549333e-05, "epoch": 2.690537084398977, "percentage": 54.23, "elapsed_time": "10:24:46", "remaining_time": "8:47:22"}
+{"current_steps": 264, "total_steps": 485, "loss": 1.0326, "lr": 8.17291823834111e-05, "epoch": 2.70076726342711, "percentage": 54.43, "elapsed_time": "10:27:09", "remaining_time": "8:45:00"}
+{"current_steps": 265, "total_steps": 485, "loss": 1.0375, "lr": 8.115283813420459e-05, "epoch": 2.710997442455243, "percentage": 54.64, "elapsed_time": "10:29:32", "remaining_time": "8:42:38"}
+{"current_steps": 266, "total_steps": 485, "loss": 1.0584, "lr": 8.057643403099221e-05, "epoch": 2.7212276214833757, "percentage": 54.85, "elapsed_time": "10:31:54", "remaining_time": "8:40:15"}
+{"current_steps": 267, "total_steps": 485, "loss": 1.0395, "lr": 8e-05, "epoch": 2.731457800511509, "percentage": 55.05, "elapsed_time": "10:34:17", "remaining_time": "8:37:53"}
+{"current_steps": 268, "total_steps": 485, "loss": 1.0369, "lr": 7.94235659690078e-05, "epoch": 2.741687979539642, "percentage": 55.26, "elapsed_time": "10:36:40", "remaining_time": "8:35:31"}
+{"current_steps": 269, "total_steps": 485, "loss": 1.0532, "lr": 7.884716186579545e-05, "epoch": 2.7519181585677748, "percentage": 55.46, "elapsed_time": "10:39:03", "remaining_time": "8:33:08"}
+{"current_steps": 270, "total_steps": 485, "loss": 1.0266, "lr": 7.827081761658892e-05, "epoch": 2.762148337595908, "percentage": 55.67, "elapsed_time": "10:41:24", "remaining_time": "8:30:45"}
+{"current_steps": 271, "total_steps": 485, "loss": 1.0344, "lr": 7.76945631445067e-05, "epoch": 2.772378516624041, "percentage": 55.88, "elapsed_time": "10:43:47", "remaining_time": "8:28:22"}
+{"current_steps": 272, "total_steps": 485, "loss": 1.0285, "lr": 7.711842836800614e-05, "epoch": 2.782608695652174, "percentage": 56.08, "elapsed_time": "10:46:09", "remaining_time": "8:26:00"}
+{"current_steps": 273, "total_steps": 485, "loss": 1.0272, "lr": 7.654244319933009e-05, "epoch": 2.792838874680307, "percentage": 56.29, "elapsed_time": "10:48:31", "remaining_time": "8:23:37"}
+{"current_steps": 274, "total_steps": 485, "loss": 1.0427, "lr": 7.596663754295404e-05, "epoch": 2.80306905370844, "percentage": 56.49, "elapsed_time": "10:50:54", "remaining_time": "8:21:14"}
+{"current_steps": 275, "total_steps": 485, "loss": 1.0474, "lr": 7.539104129403327e-05, "epoch": 2.813299232736573, "percentage": 56.7, "elapsed_time": "10:53:17", "remaining_time": "8:18:52"}
+{"current_steps": 276, "total_steps": 485, "loss": 1.0445, "lr": 7.4815684336851e-05, "epoch": 2.8235294117647056, "percentage": 56.91, "elapsed_time": "10:55:40", "remaining_time": "8:16:30"}
+{"current_steps": 277, "total_steps": 485, "loss": 1.04, "lr": 7.424059654326664e-05, "epoch": 2.833759590792839, "percentage": 57.11, "elapsed_time": "10:58:02", "remaining_time": "8:14:07"}
+{"current_steps": 278, "total_steps": 485, "loss": 1.0406, "lr": 7.366580777116495e-05, "epoch": 2.843989769820972, "percentage": 57.32, "elapsed_time": "11:00:25", "remaining_time": "8:11:45"}
+{"current_steps": 279, "total_steps": 485, "loss": 1.0321, "lr": 7.309134786290583e-05, "epoch": 2.8542199488491047, "percentage": 57.53, "elapsed_time": "11:02:47", "remaining_time": "8:09:22"}
+{"current_steps": 280, "total_steps": 485, "loss": 1.0371, "lr": 7.251724664377497e-05, "epoch": 2.864450127877238, "percentage": 57.73, "elapsed_time": "11:05:10", "remaining_time": "8:06:59"}
+{"current_steps": 281, "total_steps": 485, "loss": 1.039, "lr": 7.194353392043534e-05, "epoch": 2.874680306905371, "percentage": 57.94, "elapsed_time": "11:07:32", "remaining_time": "8:04:37"}
+{"current_steps": 282, "total_steps": 485, "loss": 1.0364, "lr": 7.13702394793797e-05, "epoch": 2.8849104859335037, "percentage": 58.14, "elapsed_time": "11:09:54", "remaining_time": "8:02:14"}
+{"current_steps": 283, "total_steps": 485, "loss": 1.0277, "lr": 7.079739308538399e-05, "epoch": 2.895140664961637, "percentage": 58.35, "elapsed_time": "11:12:17", "remaining_time": "7:59:51"}
+{"current_steps": 284, "total_steps": 485, "loss": 1.0275, "lr": 7.022502447996215e-05, "epoch": 2.90537084398977, "percentage": 58.56, "elapsed_time": "11:14:39", "remaining_time": "7:57:29"}
+{"current_steps": 285, "total_steps": 485, "loss": 1.0381, "lr": 6.965316337982191e-05, "epoch": 2.915601023017903, "percentage": 58.76, "elapsed_time": "11:17:02", "remaining_time": "7:55:06"}
+{"current_steps": 286, "total_steps": 485, "loss": 1.0342, "lr": 6.908183947532184e-05, "epoch": 2.9258312020460355, "percentage": 58.97, "elapsed_time": "11:19:24", "remaining_time": "7:52:44"}
+{"current_steps": 287, "total_steps": 485, "loss": 1.0377, "lr": 6.851108242893002e-05, "epoch": 2.9360613810741687, "percentage": 59.18, "elapsed_time": "11:21:47", "remaining_time": "7:50:21"}
+{"current_steps": 288, "total_steps": 485, "loss": 1.0428, "lr": 6.794092187368387e-05, "epoch": 2.946291560102302, "percentage": 59.38, "elapsed_time": "11:24:10", "remaining_time": "7:47:59"}
+{"current_steps": 289, "total_steps": 485, "loss": 1.0503, "lr": 6.737138741165168e-05, "epoch": 2.9565217391304346, "percentage": 59.59, "elapsed_time": "11:26:32", "remaining_time": "7:45:36"}
+{"current_steps": 290, "total_steps": 485, "loss": 1.035, "lr": 6.680250861239581e-05, "epoch": 2.9667519181585678, "percentage": 59.79, "elapsed_time": "11:28:55", "remaining_time": "7:43:14"}
+{"current_steps": 291, "total_steps": 485, "loss": 1.0313, "lr": 6.623431501143723e-05, "epoch": 2.976982097186701, "percentage": 60.0, "elapsed_time": "11:31:17", "remaining_time": "7:40:51"}
+{"current_steps": 292, "total_steps": 485, "loss": 1.0564, "lr": 6.566683610872231e-05, "epoch": 2.9872122762148337, "percentage": 60.21, "elapsed_time": "11:33:40", "remaining_time": "7:38:29"}
+{"current_steps": 293, "total_steps": 485, "loss": 1.2037, "lr": 6.510010136709118e-05, "epoch": 2.997442455242967, "percentage": 60.41, "elapsed_time": "11:36:02", "remaining_time": "7:36:06"}
+{"current_steps": 294, "total_steps": 485, "loss": 1.1394, "lr": 6.453414021074781e-05, "epoch": 3.0076726342710995, "percentage": 60.62, "elapsed_time": "11:38:34", "remaining_time": "7:33:50"}
+{"current_steps": 295, "total_steps": 485, "loss": 1.0223, "lr": 6.396898202373277e-05, "epoch": 3.0179028132992327, "percentage": 60.82, "elapsed_time": "11:40:56", "remaining_time": "7:31:27"}
+{"current_steps": 296, "total_steps": 485, "loss": 1.0336, "lr": 6.340465614839714e-05, "epoch": 3.028132992327366, "percentage": 61.03, "elapsed_time": "11:43:19", "remaining_time": "7:29:04"}
+{"current_steps": 297, "total_steps": 485, "loss": 1.0107, "lr": 6.284119188387957e-05, "epoch": 3.0383631713554986, "percentage": 61.24, "elapsed_time": "11:45:41", "remaining_time": "7:26:42"}
+{"current_steps": 298, "total_steps": 485, "loss": 1.0134, "lr": 6.227861848458481e-05, "epoch": 3.0485933503836318, "percentage": 61.44, "elapsed_time": "11:48:03", "remaining_time": "7:24:19"}
+{"current_steps": 299, "total_steps": 485, "loss": 1.0289, "lr": 6.171696515866488e-05, "epoch": 3.0588235294117645, "percentage": 61.65, "elapsed_time": "11:50:26", "remaining_time": "7:21:56"}
+{"current_steps": 300, "total_steps": 485, "loss": 1.0297, "lr": 6.115626106650273e-05, "epoch": 3.0690537084398977, "percentage": 61.86, "elapsed_time": "11:52:49", "remaining_time": "7:19:34"}
+{"current_steps": 301, "total_steps": 485, "loss": 1.0282, "lr": 6.059653531919823e-05, "epoch": 3.079283887468031, "percentage": 62.06, "elapsed_time": "11:55:11", "remaining_time": "7:17:11"}
+{"current_steps": 302, "total_steps": 485, "loss": 1.0531, "lr": 6.0037816977056625e-05, "epoch": 3.0895140664961636, "percentage": 62.27, "elapsed_time": "11:57:34", "remaining_time": "7:14:49"}
+{"current_steps": 303, "total_steps": 485, "loss": 1.0113, "lr": 5.9480135048079964e-05, "epoch": 3.0997442455242967, "percentage": 62.47, "elapsed_time": "11:59:56", "remaining_time": "7:12:26"}
+{"current_steps": 304, "total_steps": 485, "loss": 1.0394, "lr": 5.892351848646087e-05, "epoch": 3.10997442455243, "percentage": 62.68, "elapsed_time": "12:02:19", "remaining_time": "7:10:04"}
+{"current_steps": 305, "total_steps": 485, "loss": 1.0365, "lr": 5.836799619107937e-05, "epoch": 3.1202046035805626, "percentage": 62.89, "elapsed_time": "12:04:41", "remaining_time": "7:07:41"}
+{"current_steps": 306, "total_steps": 485, "loss": 1.0039, "lr": 5.781359700400254e-05, "epoch": 3.130434782608696, "percentage": 63.09, "elapsed_time": "12:07:04", "remaining_time": "7:05:18"}
+{"current_steps": 307, "total_steps": 485, "loss": 1.0243, "lr": 5.726034970898682e-05, "epoch": 3.1406649616368285, "percentage": 63.3, "elapsed_time": "12:09:26", "remaining_time": "7:02:56"}
+{"current_steps": 308, "total_steps": 485, "loss": 1.0314, "lr": 5.670828302998393e-05, "epoch": 3.1508951406649617, "percentage": 63.51, "elapsed_time": "12:11:49", "remaining_time": "7:00:33"}
+{"current_steps": 309, "total_steps": 485, "loss": 1.0485, "lr": 5.6157425629649314e-05, "epoch": 3.1611253196930944, "percentage": 63.71, "elapsed_time": "12:14:11", "remaining_time": "6:58:10"}
+{"current_steps": 310, "total_steps": 485, "loss": 1.018, "lr": 5.560780610785406e-05, "epoch": 3.1713554987212276, "percentage": 63.92, "elapsed_time": "12:16:34", "remaining_time": "6:55:48"}
+{"current_steps": 311, "total_steps": 485, "loss": 1.0061, "lr": 5.5059453000200125e-05, "epoch": 3.1815856777493607, "percentage": 64.12, "elapsed_time": "12:18:56", "remaining_time": "6:53:25"}
+{"current_steps": 312, "total_steps": 485, "loss": 1.0205, "lr": 5.451239477653864e-05, "epoch": 3.1918158567774935, "percentage": 64.33, "elapsed_time": "12:21:19", "remaining_time": "6:51:03"}
+{"current_steps": 313, "total_steps": 485, "loss": 1.0226, "lr": 5.3966659839491936e-05, "epoch": 3.2020460358056266, "percentage": 64.54, "elapsed_time": "12:23:41", "remaining_time": "6:48:40"}
+{"current_steps": 314, "total_steps": 485, "loss": 1.001, "lr": 5.342227652297887e-05, "epoch": 3.21227621483376, "percentage": 64.74, "elapsed_time": "12:26:03", "remaining_time": "6:46:17"}
+{"current_steps": 315, "total_steps": 485, "loss": 1.031, "lr": 5.287927309074365e-05, "epoch": 3.2225063938618925, "percentage": 64.95, "elapsed_time": "12:28:25", "remaining_time": "6:43:54"}
+{"current_steps": 316, "total_steps": 485, "loss": 1.015, "lr": 5.233767773488859e-05, "epoch": 3.2327365728900257, "percentage": 65.15, "elapsed_time": "12:30:48", "remaining_time": "6:41:32"}
+{"current_steps": 317, "total_steps": 485, "loss": 1.0053, "lr": 5.179751857441036e-05, "epoch": 3.2429667519181584, "percentage": 65.36, "elapsed_time": "12:33:10", "remaining_time": "6:39:09"}
+{"current_steps": 318, "total_steps": 485, "loss": 1.0211, "lr": 5.1258823653739914e-05, "epoch": 3.2531969309462916, "percentage": 65.57, "elapsed_time": "12:35:32", "remaining_time": "6:36:46"}
+{"current_steps": 319, "total_steps": 485, "loss": 1.0143, "lr": 5.0721620941286735e-05, "epoch": 3.2634271099744243, "percentage": 65.77, "elapsed_time": "12:37:54", "remaining_time": "6:34:23"}
+{"current_steps": 320, "total_steps": 485, "loss": 1.0375, "lr": 5.018593832798649e-05, "epoch": 3.2736572890025575, "percentage": 65.98, "elapsed_time": "12:40:16", "remaining_time": "6:32:01"}
+{"current_steps": 321, "total_steps": 485, "loss": 1.0253, "lr": 4.965180362585315e-05, "epoch": 3.2838874680306906, "percentage": 66.19, "elapsed_time": "12:42:39", "remaining_time": "6:29:38"}
+{"current_steps": 322, "total_steps": 485, "loss": 1.0209, "lr": 4.911924456653494e-05, "epoch": 3.2941176470588234, "percentage": 66.39, "elapsed_time": "12:45:01", "remaining_time": "6:27:15"}
+{"current_steps": 323, "total_steps": 485, "loss": 1.0112, "lr": 4.8588288799874514e-05, "epoch": 3.3043478260869565, "percentage": 66.6, "elapsed_time": "12:47:23", "remaining_time": "6:24:53"}
+{"current_steps": 324, "total_steps": 485, "loss": 1.0077, "lr": 4.805896389247348e-05, "epoch": 3.3145780051150897, "percentage": 66.8, "elapsed_time": "12:49:46", "remaining_time": "6:22:30"}
+{"current_steps": 325, "total_steps": 485, "loss": 1.0229, "lr": 4.753129732626116e-05, "epoch": 3.3248081841432224, "percentage": 67.01, "elapsed_time": "12:52:08", "remaining_time": "6:20:08"}
+{"current_steps": 326, "total_steps": 485, "loss": 1.0184, "lr": 4.70053164970677e-05, "epoch": 3.3350383631713556, "percentage": 67.22, "elapsed_time": "12:54:31", "remaining_time": "6:17:45"}
+{"current_steps": 327, "total_steps": 485, "loss": 1.0058, "lr": 4.6481048713201825e-05, "epoch": 3.3452685421994883, "percentage": 67.42, "elapsed_time": "12:56:53", "remaining_time": "6:15:22"}
+{"current_steps": 328, "total_steps": 485, "loss": 1.0278, "lr": 4.595852119403282e-05, "epoch": 3.3554987212276215, "percentage": 67.63, "elapsed_time": "12:59:16", "remaining_time": "6:13:00"}
+{"current_steps": 329, "total_steps": 485, "loss": 1.012, "lr": 4.543776106857765e-05, "epoch": 3.3657289002557547, "percentage": 67.84, "elapsed_time": "13:01:38", "remaining_time": "6:10:37"}
+{"current_steps": 330, "total_steps": 485, "loss": 1.0242, "lr": 4.491879537409211e-05, "epoch": 3.3759590792838874, "percentage": 68.04, "elapsed_time": "13:04:01", "remaining_time": "6:08:15"}
+{"current_steps": 331, "total_steps": 485, "loss": 1.0078, "lr": 4.4401651054667274e-05, "epoch": 3.3861892583120206, "percentage": 68.25, "elapsed_time": "13:06:24", "remaining_time": "6:05:52"}
+{"current_steps": 332, "total_steps": 485, "loss": 1.0141, "lr": 4.3886354959830625e-05, "epoch": 3.3964194373401533, "percentage": 68.45, "elapsed_time": "13:08:47", "remaining_time": "6:03:30"}
+{"current_steps": 333, "total_steps": 485, "loss": 1.031, "lr": 4.3372933843152e-05, "epoch": 3.4066496163682864, "percentage": 68.66, "elapsed_time": "13:11:10", "remaining_time": "6:01:08"}
+{"current_steps": 334, "total_steps": 485, "loss": 1.0261, "lr": 4.2861414360854387e-05, "epoch": 3.4168797953964196, "percentage": 68.87, "elapsed_time": "13:13:32", "remaining_time": "5:58:45"}
+{"current_steps": 335, "total_steps": 485, "loss": 1.0191, "lr": 4.2351823070430376e-05, "epoch": 3.4271099744245523, "percentage": 69.07, "elapsed_time": "13:15:55", "remaining_time": "5:56:22"}
+{"current_steps": 336, "total_steps": 485, "loss": 1.0267, "lr": 4.184418642926289e-05, "epoch": 3.4373401534526855, "percentage": 69.28, "elapsed_time": "13:18:17", "remaining_time": "5:54:00"}
+{"current_steps": 337, "total_steps": 485, "loss": 1.025, "lr": 4.133853079325196e-05, "epoch": 3.4475703324808182, "percentage": 69.48, "elapsed_time": "13:20:39", "remaining_time": "5:51:37"}
+{"current_steps": 338, "total_steps": 485, "loss": 1.0459, "lr": 4.083488241544595e-05, "epoch": 3.4578005115089514, "percentage": 69.69, "elapsed_time": "13:23:02", "remaining_time": "5:49:15"}
+{"current_steps": 339, "total_steps": 485, "loss": 1.0112, "lr": 4.033326744467882e-05, "epoch": 3.4680306905370846, "percentage": 69.9, "elapsed_time": "13:25:24", "remaining_time": "5:46:52"}
+{"current_steps": 340, "total_steps": 485, "loss": 1.0306, "lr": 3.983371192421246e-05, "epoch": 3.4782608695652173, "percentage": 70.1, "elapsed_time": "13:27:47", "remaining_time": "5:44:29"}
+{"current_steps": 341, "total_steps": 485, "loss": 1.0185, "lr": 3.933624179038446e-05, "epoch": 3.4884910485933505, "percentage": 70.31, "elapsed_time": "13:30:10", "remaining_time": "5:42:07"}
+{"current_steps": 342, "total_steps": 485, "loss": 1.0293, "lr": 3.884088287126151e-05, "epoch": 3.498721227621483, "percentage": 70.52, "elapsed_time": "13:32:33", "remaining_time": "5:39:45"}
+{"current_steps": 343, "total_steps": 485, "loss": 1.0458, "lr": 3.834766088529867e-05, "epoch": 3.5089514066496164, "percentage": 70.72, "elapsed_time": "13:34:55", "remaining_time": "5:37:22"}
+{"current_steps": 344, "total_steps": 485, "loss": 1.0056, "lr": 3.785660144000378e-05, "epoch": 3.5191815856777495, "percentage": 70.93, "elapsed_time": "13:37:18", "remaining_time": "5:34:59"}
+{"current_steps": 345, "total_steps": 485, "loss": 1.0297, "lr": 3.736773003060821e-05, "epoch": 3.5294117647058822, "percentage": 71.13, "elapsed_time": "13:39:40", "remaining_time": "5:32:37"}
+{"current_steps": 346, "total_steps": 485, "loss": 1.0416, "lr": 3.688107203874301e-05, "epoch": 3.5396419437340154, "percentage": 71.34, "elapsed_time": "13:42:02", "remaining_time": "5:30:14"}
+{"current_steps": 347, "total_steps": 485, "loss": 1.0204, "lr": 3.6396652731121136e-05, "epoch": 3.5498721227621486, "percentage": 71.55, "elapsed_time": "13:44:25", "remaining_time": "5:27:52"}
+{"current_steps": 348, "total_steps": 485, "loss": 1.0281, "lr": 3.5914497258225815e-05, "epoch": 3.5601023017902813, "percentage": 71.75, "elapsed_time": "13:46:48", "remaining_time": "5:25:29"}
+{"current_steps": 349, "total_steps": 485, "loss": 1.0271, "lr": 3.543463065300452e-05, "epoch": 3.5703324808184145, "percentage": 71.96, "elapsed_time": "13:49:10", "remaining_time": "5:23:07"}
+{"current_steps": 350, "total_steps": 485, "loss": 1.0253, "lr": 3.49570778295694e-05, "epoch": 3.580562659846547, "percentage": 72.16, "elapsed_time": "13:51:32", "remaining_time": "5:20:44"}
+{"current_steps": 351, "total_steps": 485, "loss": 1.0155, "lr": 3.448186358190383e-05, "epoch": 3.5907928388746804, "percentage": 72.37, "elapsed_time": "13:53:55", "remaining_time": "5:18:22"}
+{"current_steps": 352, "total_steps": 485, "loss": 1.0316, "lr": 3.400901258257501e-05, "epoch": 3.601023017902813, "percentage": 72.58, "elapsed_time": "13:56:18", "remaining_time": "5:15:59"}
+{"current_steps": 353, "total_steps": 485, "loss": 1.0147, "lr": 3.3538549381453046e-05, "epoch": 3.6112531969309463, "percentage": 72.78, "elapsed_time": "13:58:40", "remaining_time": "5:13:36"}
+{"current_steps": 354, "total_steps": 485, "loss": 1.0158, "lr": 3.307049840443644e-05, "epoch": 3.6214833759590794, "percentage": 72.99, "elapsed_time": "14:01:03", "remaining_time": "5:11:14"}
+{"current_steps": 355, "total_steps": 485, "loss": 1.0219, "lr": 3.2604883952183716e-05, "epoch": 3.631713554987212, "percentage": 73.2, "elapsed_time": "14:03:25", "remaining_time": "5:08:51"}
+{"current_steps": 356, "total_steps": 485, "loss": 1.0165, "lr": 3.214173019885202e-05, "epoch": 3.6419437340153453, "percentage": 73.4, "elapsed_time": "14:05:48", "remaining_time": "5:06:29"}
+{"current_steps": 357, "total_steps": 485, "loss": 1.0193, "lr": 3.1681061190841806e-05, "epoch": 3.6521739130434785, "percentage": 73.61, "elapsed_time": "14:08:10", "remaining_time": "5:04:06"}
+{"current_steps": 358, "total_steps": 485, "loss": 1.0309, "lr": 3.122290084554845e-05, "epoch": 3.662404092071611, "percentage": 73.81, "elapsed_time": "14:10:33", "remaining_time": "5:01:44"}
+{"current_steps": 359, "total_steps": 485, "loss": 1.0106, "lr": 3.076727295012059e-05, "epoch": 3.6726342710997444, "percentage": 74.02, "elapsed_time": "14:12:56", "remaining_time": "4:59:21"}
+{"current_steps": 360, "total_steps": 485, "loss": 1.0237, "lr": 3.031420116022493e-05, "epoch": 3.682864450127877, "percentage": 74.23, "elapsed_time": "14:15:20", "remaining_time": "4:56:59"}
+{"current_steps": 361, "total_steps": 485, "loss": 1.0071, "lr": 2.98637089988182e-05, "epoch": 3.6930946291560103, "percentage": 74.43, "elapsed_time": "14:17:42", "remaining_time": "4:54:36"}
+{"current_steps": 362, "total_steps": 485, "loss": 1.0213, "lr": 2.94158198549259e-05, "epoch": 3.703324808184143, "percentage": 74.64, "elapsed_time": "14:20:06", "remaining_time": "4:52:14"}
+{"current_steps": 363, "total_steps": 485, "loss": 1.0114, "lr": 2.8970556982427836e-05, "epoch": 3.713554987212276, "percentage": 74.85, "elapsed_time": "14:22:28", "remaining_time": "4:49:52"}
+{"current_steps": 364, "total_steps": 485, "loss": 1.0141, "lr": 2.852794349885087e-05, "epoch": 3.7237851662404093, "percentage": 75.05, "elapsed_time": "14:24:50", "remaining_time": "4:47:29"}
+{"current_steps": 365, "total_steps": 485, "loss": 1.0309, "lr": 2.8088002384168783e-05, "epoch": 3.734015345268542, "percentage": 75.26, "elapsed_time": "14:27:13", "remaining_time": "4:45:06"}
+{"current_steps": 366, "total_steps": 485, "loss": 1.0133, "lr": 2.765075647960898e-05, "epoch": 3.7442455242966752, "percentage": 75.46, "elapsed_time": "14:29:35", "remaining_time": "4:42:44"}
+{"current_steps": 367, "total_steps": 485, "loss": 1.0158, "lr": 2.7216228486466856e-05, "epoch": 3.7544757033248084, "percentage": 75.67, "elapsed_time": "14:31:57", "remaining_time": "4:40:21"}
+{"current_steps": 368, "total_steps": 485, "loss": 1.02, "lr": 2.678444096492683e-05, "epoch": 3.764705882352941, "percentage": 75.88, "elapsed_time": "14:34:20", "remaining_time": "4:37:59"}
+{"current_steps": 369, "total_steps": 485, "loss": 1.0185, "lr": 2.6355416332891404e-05, "epoch": 3.7749360613810743, "percentage": 76.08, "elapsed_time": "14:36:42", "remaining_time": "4:35:36"}
+{"current_steps": 370, "total_steps": 485, "loss": 1.0038, "lr": 2.592917686481708e-05, "epoch": 3.785166240409207, "percentage": 76.29, "elapsed_time": "14:39:05", "remaining_time": "4:33:13"}
+{"current_steps": 371, "total_steps": 485, "loss": 1.0376, "lr": 2.5505744690557846e-05, "epoch": 3.79539641943734, "percentage": 76.49, "elapsed_time": "14:41:27", "remaining_time": "4:30:51"}
+{"current_steps": 372, "total_steps": 485, "loss": 1.0358, "lr": 2.508514179421629e-05, "epoch": 3.805626598465473, "percentage": 76.7, "elapsed_time": "14:43:49", "remaining_time": "4:28:28"}
+{"current_steps": 373, "total_steps": 485, "loss": 1.0211, "lr": 2.4667390013002254e-05, "epoch": 3.815856777493606, "percentage": 76.91, "elapsed_time": "14:46:12", "remaining_time": "4:26:05"}
+{"current_steps": 374, "total_steps": 485, "loss": 1.0332, "lr": 2.425251103609898e-05, "epoch": 3.8260869565217392, "percentage": 77.11, "elapsed_time": "14:48:35", "remaining_time": "4:23:43"}
+{"current_steps": 375, "total_steps": 485, "loss": 1.0143, "lr": 2.3840526403537095e-05, "epoch": 3.836317135549872, "percentage": 77.32, "elapsed_time": "14:50:57", "remaining_time": "4:21:20"}
+{"current_steps": 376, "total_steps": 485, "loss": 1.0173, "lr": 2.3431457505076205e-05, "epoch": 3.846547314578005, "percentage": 77.53, "elapsed_time": "14:53:19", "remaining_time": "4:18:58"}
+{"current_steps": 377, "total_steps": 485, "loss": 1.0362, "lr": 2.3025325579094498e-05, "epoch": 3.8567774936061383, "percentage": 77.73, "elapsed_time": "14:55:42", "remaining_time": "4:16:35"}
+{"current_steps": 378, "total_steps": 485, "loss": 1.0124, "lr": 2.2622151711485962e-05, "epoch": 3.867007672634271, "percentage": 77.94, "elapsed_time": "14:58:04", "remaining_time": "4:14:12"}
+{"current_steps": 379, "total_steps": 485, "loss": 1.0139, "lr": 2.2221956834565647e-05, "epoch": 3.877237851662404, "percentage": 78.14, "elapsed_time": "15:00:26", "remaining_time": "4:11:50"}
+{"current_steps": 380, "total_steps": 485, "loss": 1.0523, "lr": 2.1824761725982874e-05, "epoch": 3.887468030690537, "percentage": 78.35, "elapsed_time": "15:02:49", "remaining_time": "4:09:27"}
+{"current_steps": 381, "total_steps": 485, "loss": 0.991, "lr": 2.1430587007642513e-05, "epoch": 3.89769820971867, "percentage": 78.56, "elapsed_time": "15:05:11", "remaining_time": "4:07:05"}
+{"current_steps": 382, "total_steps": 485, "loss": 1.0355, "lr": 2.1039453144634364e-05, "epoch": 3.907928388746803, "percentage": 78.76, "elapsed_time": "15:07:34", "remaining_time": "4:04:42"}
+{"current_steps": 383, "total_steps": 485, "loss": 1.015, "lr": 2.0651380444170527e-05, "epoch": 3.918158567774936, "percentage": 78.97, "elapsed_time": "15:09:56", "remaining_time": "4:02:20"}
+{"current_steps": 384, "total_steps": 485, "loss": 1.0229, "lr": 2.026638905453111e-05, "epoch": 3.928388746803069, "percentage": 79.18, "elapsed_time": "15:12:19", "remaining_time": "3:59:57"}
+{"current_steps": 385, "total_steps": 485, "loss": 1.0135, "lr": 1.9884498964018233e-05, "epoch": 3.938618925831202, "percentage": 79.38, "elapsed_time": "15:14:41", "remaining_time": "3:57:35"}
+{"current_steps": 386, "total_steps": 485, "loss": 1.0044, "lr": 1.9505729999918194e-05, "epoch": 3.948849104859335, "percentage": 79.59, "elapsed_time": "15:17:04", "remaining_time": "3:55:12"}
+{"current_steps": 387, "total_steps": 485, "loss": 1.0103, "lr": 1.913010182747196e-05, "epoch": 3.959079283887468, "percentage": 79.79, "elapsed_time": "15:19:26", "remaining_time": "3:52:49"}
+{"current_steps": 388, "total_steps": 485, "loss": 1.0491, "lr": 1.875763394885441e-05, "epoch": 3.969309462915601, "percentage": 80.0, "elapsed_time": "15:21:49", "remaining_time": "3:50:27"}
+{"current_steps": 389, "total_steps": 485, "loss": 1.0355, "lr": 1.8388345702161556e-05, "epoch": 3.979539641943734, "percentage": 80.21, "elapsed_time": "15:24:11", "remaining_time": "3:48:04"}
+{"current_steps": 390, "total_steps": 485, "loss": 1.021, "lr": 1.8022256260406756e-05, "epoch": 3.9897698209718673, "percentage": 80.41, "elapsed_time": "15:26:34", "remaining_time": "3:45:42"}
+{"current_steps": 391, "total_steps": 485, "loss": 1.288, "lr": 1.765938463052506e-05, "epoch": 4.0, "percentage": 80.62, "elapsed_time": "15:28:56", "remaining_time": "3:43:19"}
+{"current_steps": 392, "total_steps": 485, "loss": 1.001, "lr": 1.729974965238651e-05, "epoch": 4.010230179028133, "percentage": 80.82, "elapsed_time": "15:31:27", "remaining_time": "3:40:59"}
+{"current_steps": 393, "total_steps": 485, "loss": 1.015, "lr": 1.6943369997818066e-05, "epoch": 4.020460358056266, "percentage": 81.03, "elapsed_time": "15:33:51", "remaining_time": "3:38:36"}
+{"current_steps": 394, "total_steps": 485, "loss": 1.0076, "lr": 1.659026416963401e-05, "epoch": 4.030690537084399, "percentage": 81.24, "elapsed_time": "15:36:13", "remaining_time": "3:36:14"}
+{"current_steps": 395, "total_steps": 485, "loss": 1.0148, "lr": 1.6240450500675393e-05, "epoch": 4.040920716112532, "percentage": 81.44, "elapsed_time": "15:38:36", "remaining_time": "3:33:51"}
+{"current_steps": 396, "total_steps": 485, "loss": 1.0016, "lr": 1.5893947152858285e-05, "epoch": 4.051150895140665, "percentage": 81.65, "elapsed_time": "15:40:58", "remaining_time": "3:31:28"}
+{"current_steps": 397, "total_steps": 485, "loss": 1.0043, "lr": 1.55507721162307e-05, "epoch": 4.061381074168798, "percentage": 81.86, "elapsed_time": "15:43:20", "remaining_time": "3:29:06"}
+{"current_steps": 398, "total_steps": 485, "loss": 1.0288, "lr": 1.5210943208038634e-05, "epoch": 4.071611253196931, "percentage": 82.06, "elapsed_time": "15:45:44", "remaining_time": "3:26:43"}
+{"current_steps": 399, "total_steps": 485, "loss": 1.0302, "lr": 1.4874478071801055e-05, "epoch": 4.081841432225064, "percentage": 82.27, "elapsed_time": "15:48:06", "remaining_time": "3:24:21"}
+{"current_steps": 400, "total_steps": 485, "loss": 1.0021, "lr": 1.454139417639377e-05, "epoch": 4.092071611253197, "percentage": 82.47, "elapsed_time": "15:50:29", "remaining_time": "3:21:58"}
+{"current_steps": 401, "total_steps": 485, "loss": 1.008, "lr": 1.4211708815142599e-05, "epoch": 4.10230179028133, "percentage": 82.68, "elapsed_time": "15:52:51", "remaining_time": "3:19:36"}
+{"current_steps": 402, "total_steps": 485, "loss": 1.0082, "lr": 1.3885439104925387e-05, "epoch": 4.112531969309463, "percentage": 82.89, "elapsed_time": "15:55:13", "remaining_time": "3:17:13"}
+{"current_steps": 403, "total_steps": 485, "loss": 1.0103, "lr": 1.3562601985283358e-05, "epoch": 4.122762148337596, "percentage": 83.09, "elapsed_time": "15:57:35", "remaining_time": "3:14:50"}
+{"current_steps": 404, "total_steps": 485, "loss": 1.0186, "lr": 1.3243214217541751e-05, "epoch": 4.132992327365729, "percentage": 83.3, "elapsed_time": "15:59:57", "remaining_time": "3:12:28"}
+{"current_steps": 405, "total_steps": 485, "loss": 1.0103, "lr": 1.2927292383939407e-05, "epoch": 4.143222506393862, "percentage": 83.51, "elapsed_time": "16:02:20", "remaining_time": "3:10:05"}
+{"current_steps": 406, "total_steps": 485, "loss": 1.0172, "lr": 1.2614852886767932e-05, "epoch": 4.153452685421995, "percentage": 83.71, "elapsed_time": "16:04:42", "remaining_time": "3:07:42"}
+{"current_steps": 407, "total_steps": 485, "loss": 1.0172, "lr": 1.2305911947520159e-05, "epoch": 4.163682864450128, "percentage": 83.92, "elapsed_time": "16:07:04", "remaining_time": "3:05:20"}
+{"current_steps": 408, "total_steps": 485, "loss": 1.0254, "lr": 1.2000485606047837e-05, "epoch": 4.173913043478261, "percentage": 84.12, "elapsed_time": "16:09:27", "remaining_time": "3:02:57"}
+{"current_steps": 409, "total_steps": 485, "loss": 1.025, "lr": 1.1698589719728911e-05, "epoch": 4.1841432225063935, "percentage": 84.33, "elapsed_time": "16:11:49", "remaining_time": "3:00:35"}
+{"current_steps": 410, "total_steps": 485, "loss": 1.011, "lr": 1.1400239962644294e-05, "epoch": 4.194373401534527, "percentage": 84.54, "elapsed_time": "16:14:12", "remaining_time": "2:58:12"}
+{"current_steps": 411, "total_steps": 485, "loss": 1.0064, "lr": 1.1105451824763933e-05, "epoch": 4.20460358056266, "percentage": 84.74, "elapsed_time": "16:16:34", "remaining_time": "2:55:49"}
+{"current_steps": 412, "total_steps": 485, "loss": 1.0049, "lr": 1.0814240611142765e-05, "epoch": 4.2148337595907925, "percentage": 84.95, "elapsed_time": "16:18:56", "remaining_time": "2:53:27"}
+{"current_steps": 413, "total_steps": 485, "loss": 1.0039, "lr": 1.0526621441125946e-05, "epoch": 4.225063938618926, "percentage": 85.15, "elapsed_time": "16:21:18", "remaining_time": "2:51:04"}
+{"current_steps": 414, "total_steps": 485, "loss": 1.0204, "lr": 1.0242609247563924e-05, "epoch": 4.235294117647059, "percentage": 85.36, "elapsed_time": "16:23:41", "remaining_time": "2:48:42"}
+{"current_steps": 415, "total_steps": 485, "loss": 1.0178, "lr": 9.962218776037234e-06, "epoch": 4.245524296675192, "percentage": 85.57, "elapsed_time": "16:26:04", "remaining_time": "2:46:19"}
+{"current_steps": 416, "total_steps": 485, "loss": 1.0144, "lr": 9.68546458409077e-06, "epoch": 4.255754475703325, "percentage": 85.77, "elapsed_time": "16:28:26", "remaining_time": "2:43:56"}
+{"current_steps": 417, "total_steps": 485, "loss": 1.0008, "lr": 9.41236104047806e-06, "epoch": 4.265984654731458, "percentage": 85.98, "elapsed_time": "16:30:49", "remaining_time": "2:41:34"}
+{"current_steps": 418, "total_steps": 485, "loss": 1.0102, "lr": 9.14292232441528e-06, "epoch": 4.276214833759591, "percentage": 86.19, "elapsed_time": "16:33:12", "remaining_time": "2:39:11"}
+{"current_steps": 419, "total_steps": 485, "loss": 0.997, "lr": 8.877162424845012e-06, "epoch": 4.286445012787723, "percentage": 86.39, "elapsed_time": "16:35:34", "remaining_time": "2:36:49"}
+{"current_steps": 420, "total_steps": 485, "loss": 1.0204, "lr": 8.615095139710044e-06, "epoch": 4.296675191815857, "percentage": 86.6, "elapsed_time": "16:37:56", "remaining_time": "2:34:26"}
+{"current_steps": 421, "total_steps": 485, "loss": 1.0286, "lr": 8.356734075236858e-06, "epoch": 4.30690537084399, "percentage": 86.8, "elapsed_time": "16:40:19", "remaining_time": "2:32:04"}
+{"current_steps": 422, "total_steps": 485, "loss": 0.9999, "lr": 8.102092645229392e-06, "epoch": 4.3171355498721224, "percentage": 87.01, "elapsed_time": "16:42:44", "remaining_time": "2:29:41"}
+{"current_steps": 423, "total_steps": 485, "loss": 1.0157, "lr": 7.8511840703725e-06, "epoch": 4.327365728900256, "percentage": 87.22, "elapsed_time": "16:45:06", "remaining_time": "2:27:19"}
+{"current_steps": 424, "total_steps": 485, "loss": 1.0177, "lr": 7.604021377545518e-06, "epoch": 4.337595907928389, "percentage": 87.42, "elapsed_time": "16:47:29", "remaining_time": "2:24:56"}
+{"current_steps": 425, "total_steps": 485, "loss": 1.0053, "lr": 7.36061739914601e-06, "epoch": 4.3478260869565215, "percentage": 87.63, "elapsed_time": "16:49:51", "remaining_time": "2:22:34"}
+{"current_steps": 426, "total_steps": 485, "loss": 1.0116, "lr": 7.120984772423507e-06, "epoch": 4.358056265984655, "percentage": 87.84, "elapsed_time": "16:52:14", "remaining_time": "2:20:11"}
+{"current_steps": 427, "total_steps": 485, "loss": 1.0157, "lr": 6.88513593882334e-06, "epoch": 4.368286445012788, "percentage": 88.04, "elapsed_time": "16:54:36", "remaining_time": "2:17:48"}
+{"current_steps": 428, "total_steps": 485, "loss": 1.0321, "lr": 6.653083143340748e-06, "epoch": 4.378516624040921, "percentage": 88.25, "elapsed_time": "16:56:58", "remaining_time": "2:15:26"}
+{"current_steps": 429, "total_steps": 485, "loss": 1.0166, "lr": 6.4248384338851146e-06, "epoch": 4.388746803069053, "percentage": 88.45, "elapsed_time": "16:59:21", "remaining_time": "2:13:03"}
+{"current_steps": 430, "total_steps": 485, "loss": 1.0155, "lr": 6.2004136606544515e-06, "epoch": 4.398976982097187, "percentage": 88.66, "elapsed_time": "17:01:43", "remaining_time": "2:10:41"}
+{"current_steps": 431, "total_steps": 485, "loss": 1.0268, "lr": 5.979820475520202e-06, "epoch": 4.40920716112532, "percentage": 88.87, "elapsed_time": "17:04:05", "remaining_time": "2:08:18"}
+{"current_steps": 432, "total_steps": 485, "loss": 1.0094, "lr": 5.763070331422151e-06, "epoch": 4.419437340153452, "percentage": 89.07, "elapsed_time": "17:06:28", "remaining_time": "2:05:55"}
+{"current_steps": 433, "total_steps": 485, "loss": 1.0117, "lr": 5.550174481773969e-06, "epoch": 4.429667519181586, "percentage": 89.28, "elapsed_time": "17:08:50", "remaining_time": "2:03:33"}
+{"current_steps": 434, "total_steps": 485, "loss": 1.024, "lr": 5.341143979878851e-06, "epoch": 4.439897698209719, "percentage": 89.48, "elapsed_time": "17:11:13", "remaining_time": "2:01:10"}
+{"current_steps": 435, "total_steps": 485, "loss": 1.0068, "lr": 5.135989678355664e-06, "epoch": 4.450127877237851, "percentage": 89.69, "elapsed_time": "17:13:35", "remaining_time": "1:58:48"}
+{"current_steps": 436, "total_steps": 485, "loss": 1.0144, "lr": 4.934722228575481e-06, "epoch": 4.460358056265985, "percentage": 89.9, "elapsed_time": "17:15:57", "remaining_time": "1:56:25"}
+{"current_steps": 437, "total_steps": 485, "loss": 1.0149, "lr": 4.7373520801085705e-06, "epoch": 4.470588235294118, "percentage": 90.1, "elapsed_time": "17:18:20", "remaining_time": "1:54:03"}
+{"current_steps": 438, "total_steps": 485, "loss": 1.0209, "lr": 4.543889480181944e-06, "epoch": 4.4808184143222505, "percentage": 90.31, "elapsed_time": "17:20:43", "remaining_time": "1:51:40"}
+{"current_steps": 439, "total_steps": 485, "loss": 1.0229, "lr": 4.354344473147194e-06, "epoch": 4.491048593350383, "percentage": 90.52, "elapsed_time": "17:23:05", "remaining_time": "1:49:17"}
+{"current_steps": 440, "total_steps": 485, "loss": 1.0093, "lr": 4.1687268999591164e-06, "epoch": 4.501278772378517, "percentage": 90.72, "elapsed_time": "17:25:27", "remaining_time": "1:46:55"}
+{"current_steps": 441, "total_steps": 485, "loss": 1.0227, "lr": 3.98704639766474e-06, "epoch": 4.5115089514066495, "percentage": 90.93, "elapsed_time": "17:27:50", "remaining_time": "1:44:32"}
+{"current_steps": 442, "total_steps": 485, "loss": 1.0206, "lr": 3.809312398903e-06, "epoch": 4.521739130434782, "percentage": 91.13, "elapsed_time": "17:30:12", "remaining_time": "1:42:10"}
+{"current_steps": 443, "total_steps": 485, "loss": 1.0061, "lr": 3.6355341314149216e-06, "epoch": 4.531969309462916, "percentage": 91.34, "elapsed_time": "17:32:34", "remaining_time": "1:39:47"}
+{"current_steps": 444, "total_steps": 485, "loss": 1.0001, "lr": 3.465720617564676e-06, "epoch": 4.542199488491049, "percentage": 91.55, "elapsed_time": "17:34:57", "remaining_time": "1:37:25"}
+{"current_steps": 445, "total_steps": 485, "loss": 1.0179, "lr": 3.299880673871023e-06, "epoch": 4.552429667519181, "percentage": 91.75, "elapsed_time": "17:37:20", "remaining_time": "1:35:02"}
+{"current_steps": 446, "total_steps": 485, "loss": 1.0261, "lr": 3.138022910549632e-06, "epoch": 4.562659846547315, "percentage": 91.96, "elapsed_time": "17:39:42", "remaining_time": "1:32:39"}
+{"current_steps": 447, "total_steps": 485, "loss": 0.9983, "lr": 2.980155731066017e-06, "epoch": 4.572890025575448, "percentage": 92.16, "elapsed_time": "17:42:05", "remaining_time": "1:30:17"}
+{"current_steps": 448, "total_steps": 485, "loss": 1.0232, "lr": 2.8262873316992556e-06, "epoch": 4.58312020460358, "percentage": 92.37, "elapsed_time": "17:44:27", "remaining_time": "1:27:54"}
+{"current_steps": 449, "total_steps": 485, "loss": 1.0065, "lr": 2.676425701116463e-06, "epoch": 4.593350383631714, "percentage": 92.58, "elapsed_time": "17:46:49", "remaining_time": "1:25:32"}
+{"current_steps": 450, "total_steps": 485, "loss": 1.0117, "lr": 2.530578619957993e-06, "epoch": 4.603580562659847, "percentage": 92.78, "elapsed_time": "17:49:12", "remaining_time": "1:23:09"}
+{"current_steps": 451, "total_steps": 485, "loss": 0.9904, "lr": 2.3887536604334784e-06, "epoch": 4.6138107416879794, "percentage": 92.99, "elapsed_time": "17:51:34", "remaining_time": "1:20:47"}
+{"current_steps": 452, "total_steps": 485, "loss": 1.018, "lr": 2.2509581859287576e-06, "epoch": 4.624040920716112, "percentage": 93.2, "elapsed_time": "17:53:57", "remaining_time": "1:18:24"}
+{"current_steps": 453, "total_steps": 485, "loss": 1.0224, "lr": 2.117199350623462e-06, "epoch": 4.634271099744246, "percentage": 93.4, "elapsed_time": "17:56:19", "remaining_time": "1:16:01"}
+{"current_steps": 454, "total_steps": 485, "loss": 1.0256, "lr": 1.987484099119712e-06, "epoch": 4.6445012787723785, "percentage": 93.61, "elapsed_time": "17:58:42", "remaining_time": "1:13:39"}
+{"current_steps": 455, "total_steps": 485, "loss": 1.0126, "lr": 1.8618191660814356e-06, "epoch": 4.654731457800511, "percentage": 93.81, "elapsed_time": "18:01:04", "remaining_time": "1:11:16"}
+{"current_steps": 456, "total_steps": 485, "loss": 1.0064, "lr": 1.7402110758847834e-06, "epoch": 4.664961636828645, "percentage": 94.02, "elapsed_time": "18:03:26", "remaining_time": "1:08:54"}
+{"current_steps": 457, "total_steps": 485, "loss": 1.0015, "lr": 1.6226661422794033e-06, "epoch": 4.675191815856778, "percentage": 94.23, "elapsed_time": "18:05:48", "remaining_time": "1:06:31"}
+{"current_steps": 458, "total_steps": 485, "loss": 1.0195, "lr": 1.5091904680605862e-06, "epoch": 4.68542199488491, "percentage": 94.43, "elapsed_time": "18:08:12", "remaining_time": "1:04:09"}
+{"current_steps": 459, "total_steps": 485, "loss": 1.0313, "lr": 1.3997899447524277e-06, "epoch": 4.695652173913043, "percentage": 94.64, "elapsed_time": "18:10:34", "remaining_time": "1:01:46"}
+{"current_steps": 460, "total_steps": 485, "loss": 1.0074, "lr": 1.294470252302009e-06, "epoch": 4.705882352941177, "percentage": 94.85, "elapsed_time": "18:12:57", "remaining_time": "0:59:23"}
+{"current_steps": 461, "total_steps": 485, "loss": 1.0073, "lr": 1.193236858784408e-06, "epoch": 4.716112531969309, "percentage": 95.05, "elapsed_time": "18:15:19", "remaining_time": "0:57:01"}
+{"current_steps": 462, "total_steps": 485, "loss": 1.0217, "lr": 1.0960950201188524e-06, "epoch": 4.726342710997442, "percentage": 95.26, "elapsed_time": "18:17:41", "remaining_time": "0:54:38"}
+{"current_steps": 463, "total_steps": 485, "loss": 1.0167, "lr": 1.003049779795866e-06, "epoch": 4.736572890025576, "percentage": 95.46, "elapsed_time": "18:20:04", "remaining_time": "0:52:16"}
+{"current_steps": 464, "total_steps": 485, "loss": 1.0207, "lr": 9.141059686153419e-07, "epoch": 4.746803069053708, "percentage": 95.67, "elapsed_time": "18:22:26", "remaining_time": "0:49:53"}
+{"current_steps": 465, "total_steps": 485, "loss": 1.0169, "lr": 8.292682044358114e-07, "epoch": 4.757033248081841, "percentage": 95.88, "elapsed_time": "18:24:49", "remaining_time": "0:47:31"}
+{"current_steps": 466, "total_steps": 485, "loss": 1.0276, "lr": 7.485408919346171e-07, "epoch": 4.767263427109975, "percentage": 96.08, "elapsed_time": "18:27:11", "remaining_time": "0:45:08"}
+{"current_steps": 467, "total_steps": 485, "loss": 1.0108, "lr": 6.719282223793056e-07, "epoch": 4.7774936061381075, "percentage": 96.29, "elapsed_time": "18:29:34", "remaining_time": "0:42:46"}
+{"current_steps": 468, "total_steps": 485, "loss": 1.0213, "lr": 5.994341734099429e-07, "epoch": 4.78772378516624, "percentage": 96.49, "elapsed_time": "18:31:56", "remaining_time": "0:40:23"}
+{"current_steps": 469, "total_steps": 485, "loss": 0.9962, "lr": 5.310625088326671e-07, "epoch": 4.797953964194374, "percentage": 96.7, "elapsed_time": "18:34:18", "remaining_time": "0:38:00"}
+{"current_steps": 470, "total_steps": 485, "loss": 1.0079, "lr": 4.6681677842421724e-07, "epoch": 4.8081841432225065, "percentage": 96.91, "elapsed_time": "18:36:40", "remaining_time": "0:35:38"}
+{"current_steps": 471, "total_steps": 485, "loss": 1.0025, "lr": 4.067003177476991e-07, "epoch": 4.818414322250639, "percentage": 97.11, "elapsed_time": "18:39:03", "remaining_time": "0:33:15"}
+{"current_steps": 472, "total_steps": 485, "loss": 1.0173, "lr": 3.507162479793369e-07, "epoch": 4.828644501278772, "percentage": 97.32, "elapsed_time": "18:41:25", "remaining_time": "0:30:53"}
+{"current_steps": 473, "total_steps": 485, "loss": 1.0001, "lr": 2.9886747574646936e-07, "epoch": 4.838874680306906, "percentage": 97.53, "elapsed_time": "18:43:47", "remaining_time": "0:28:30"}
+{"current_steps": 474, "total_steps": 485, "loss": 1.0062, "lr": 2.511566929766396e-07, "epoch": 4.849104859335038, "percentage": 97.73, "elapsed_time": "18:46:10", "remaining_time": "0:26:08"}
+{"current_steps": 475, "total_steps": 485, "loss": 1.0195, "lr": 2.075863767577957e-07, "epoch": 4.859335038363171, "percentage": 97.94, "elapsed_time": "18:48:32", "remaining_time": "0:23:45"}
+{"current_steps": 476, "total_steps": 485, "loss": 1.0159, "lr": 1.681587892097536e-07, "epoch": 4.869565217391305, "percentage": 98.14, "elapsed_time": "18:50:54", "remaining_time": "0:21:22"}
+{"current_steps": 477, "total_steps": 485, "loss": 1.0233, "lr": 1.3287597736667323e-07, "epoch": 4.879795396419437, "percentage": 98.35, "elapsed_time": "18:53:16", "remaining_time": "0:19:00"}
+{"current_steps": 478, "total_steps": 485, "loss": 1.0188, "lr": 1.0173977307082361e-07, "epoch": 4.89002557544757, "percentage": 98.56, "elapsed_time": "18:55:38", "remaining_time": "0:16:37"}
+{"current_steps": 479, "total_steps": 485, "loss": 1.0235, "lr": 7.475179287748547e-08, "epoch": 4.900255754475703, "percentage": 98.76, "elapsed_time": "18:58:01", "remaining_time": "0:14:15"}
+{"current_steps": 480, "total_steps": 485, "loss": 1.0018, "lr": 5.191343797096515e-08, "epoch": 4.910485933503836, "percentage": 98.97, "elapsed_time": "19:00:23", "remaining_time": "0:11:52"}
+{"current_steps": 481, "total_steps": 485, "loss": 1.009, "lr": 3.322589409190613e-08, "epoch": 4.920716112531969, "percentage": 99.18, "elapsed_time": "19:02:45", "remaining_time": "0:09:30"}
+{"current_steps": 482, "total_steps": 485, "loss": 1.0273, "lr": 1.8690131475711527e-08, "epoch": 4.930946291560103, "percentage": 99.38, "elapsed_time": "19:05:08", "remaining_time": "0:07:07"}
+{"current_steps": 483, "total_steps": 485, "loss": 1.0322, "lr": 8.306904802148907e-09, "epoch": 4.9411764705882355, "percentage": 99.59, "elapsed_time": "19:07:30", "remaining_time": "0:04:45"}
+{"current_steps": 484, "total_steps": 485, "loss": 1.0035, "lr": 2.07675315618161e-09, "epoch": 4.951406649616368, "percentage": 99.79, "elapsed_time": "19:09:52", "remaining_time": "0:02:22"}
+{"current_steps": 485, "total_steps": 485, "loss": 1.0124, "lr": 0.0, "epoch": 4.961636828644501, "percentage": 100.0, "elapsed_time": "19:12:15", "remaining_time": "0:00:00"}
+{"current_steps": 485, "total_steps": 485, "epoch": 4.961636828644501, "percentage": 100.0, "elapsed_time": "19:12:30", "remaining_time": "0:00:00"}
diff --git a/trainer_state.json b/trainer_state.json
new file mode 100644
index 0000000..d451855
--- /dev/null
+++ b/trainer_state.json
@@ -0,0 +1,3437 @@
+{
+ "best_metric": null,
+ "best_model_checkpoint": null,
+ "epoch": 4.961636828644501,
+ "eval_steps": 500,
+ "global_step": 485,
+ "is_hyper_param_search": false,
+ "is_local_process_zero": true,
+ "is_world_process_zero": true,
+ "log_history": [
+ {
+ "epoch": 0.010230179028132993,
+ "grad_norm": 2.9682868161196363,
+ "learning_rate": 3.2653061224489794e-06,
+ "loss": 1.4918,
+ "step": 1
+ },
+ {
+ "epoch": 0.020460358056265986,
+ "grad_norm": 2.9796653677109557,
+ "learning_rate": 6.530612244897959e-06,
+ "loss": 1.4922,
+ "step": 2
+ },
+ {
+ "epoch": 0.030690537084398978,
+ "grad_norm": 2.8660911829024167,
+ "learning_rate": 9.795918367346939e-06,
+ "loss": 1.503,
+ "step": 3
+ },
+ {
+ "epoch": 0.04092071611253197,
+ "grad_norm": 1.9805780511052813,
+ "learning_rate": 1.3061224489795918e-05,
+ "loss": 1.4675,
+ "step": 4
+ },
+ {
+ "epoch": 0.05115089514066496,
+ "grad_norm": 1.998834758427541,
+ "learning_rate": 1.63265306122449e-05,
+ "loss": 1.4533,
+ "step": 5
+ },
+ {
+ "epoch": 0.061381074168797956,
+ "grad_norm": 2.050049149995988,
+ "learning_rate": 1.9591836734693877e-05,
+ "loss": 1.4122,
+ "step": 6
+ },
+ {
+ "epoch": 0.07161125319693094,
+ "grad_norm": 2.399845329221154,
+ "learning_rate": 2.2857142857142858e-05,
+ "loss": 1.4056,
+ "step": 7
+ },
+ {
+ "epoch": 0.08184143222506395,
+ "grad_norm": 1.7232075850783724,
+ "learning_rate": 2.6122448979591835e-05,
+ "loss": 1.3833,
+ "step": 8
+ },
+ {
+ "epoch": 0.09207161125319693,
+ "grad_norm": 1.410904151553637,
+ "learning_rate": 2.938775510204082e-05,
+ "loss": 1.3843,
+ "step": 9
+ },
+ {
+ "epoch": 0.10230179028132992,
+ "grad_norm": 1.1900105733063466,
+ "learning_rate": 3.26530612244898e-05,
+ "loss": 1.3515,
+ "step": 10
+ },
+ {
+ "epoch": 0.11253196930946291,
+ "grad_norm": 1.2160713923639859,
+ "learning_rate": 3.591836734693878e-05,
+ "loss": 1.3224,
+ "step": 11
+ },
+ {
+ "epoch": 0.12276214833759591,
+ "grad_norm": 1.0389820905559795,
+ "learning_rate": 3.9183673469387755e-05,
+ "loss": 1.322,
+ "step": 12
+ },
+ {
+ "epoch": 0.1329923273657289,
+ "grad_norm": 0.9606384846443378,
+ "learning_rate": 4.244897959183674e-05,
+ "loss": 1.2956,
+ "step": 13
+ },
+ {
+ "epoch": 0.1432225063938619,
+ "grad_norm": 1.032658088356086,
+ "learning_rate": 4.5714285714285716e-05,
+ "loss": 1.3067,
+ "step": 14
+ },
+ {
+ "epoch": 0.1534526854219949,
+ "grad_norm": 1.006086286942022,
+ "learning_rate": 4.89795918367347e-05,
+ "loss": 1.2961,
+ "step": 15
+ },
+ {
+ "epoch": 0.1636828644501279,
+ "grad_norm": 1.1671471070443895,
+ "learning_rate": 5.224489795918367e-05,
+ "loss": 1.301,
+ "step": 16
+ },
+ {
+ "epoch": 0.17391304347826086,
+ "grad_norm": 0.8672174854035504,
+ "learning_rate": 5.551020408163266e-05,
+ "loss": 1.2656,
+ "step": 17
+ },
+ {
+ "epoch": 0.18414322250639387,
+ "grad_norm": 0.7154082384610629,
+ "learning_rate": 5.877551020408164e-05,
+ "loss": 1.246,
+ "step": 18
+ },
+ {
+ "epoch": 0.19437340153452684,
+ "grad_norm": 0.694176256293125,
+ "learning_rate": 6.204081632653062e-05,
+ "loss": 1.2565,
+ "step": 19
+ },
+ {
+ "epoch": 0.20460358056265984,
+ "grad_norm": 0.7538553851540746,
+ "learning_rate": 6.53061224489796e-05,
+ "loss": 1.241,
+ "step": 20
+ },
+ {
+ "epoch": 0.21483375959079284,
+ "grad_norm": 0.7434156335821637,
+ "learning_rate": 6.857142857142857e-05,
+ "loss": 1.232,
+ "step": 21
+ },
+ {
+ "epoch": 0.22506393861892582,
+ "grad_norm": 1.2022520877750795,
+ "learning_rate": 7.183673469387756e-05,
+ "loss": 1.2567,
+ "step": 22
+ },
+ {
+ "epoch": 0.23529411764705882,
+ "grad_norm": 1.150528684217217,
+ "learning_rate": 7.510204081632654e-05,
+ "loss": 1.2448,
+ "step": 23
+ },
+ {
+ "epoch": 0.24552429667519182,
+ "grad_norm": 0.8365697619839101,
+ "learning_rate": 7.836734693877551e-05,
+ "loss": 1.2357,
+ "step": 24
+ },
+ {
+ "epoch": 0.2557544757033248,
+ "grad_norm": 1.1469314213901645,
+ "learning_rate": 8.16326530612245e-05,
+ "loss": 1.2164,
+ "step": 25
+ },
+ {
+ "epoch": 0.2659846547314578,
+ "grad_norm": 0.9677681397432715,
+ "learning_rate": 8.489795918367348e-05,
+ "loss": 1.2115,
+ "step": 26
+ },
+ {
+ "epoch": 0.27621483375959077,
+ "grad_norm": 1.106826210998024,
+ "learning_rate": 8.816326530612245e-05,
+ "loss": 1.2004,
+ "step": 27
+ },
+ {
+ "epoch": 0.2864450127877238,
+ "grad_norm": 1.1744803496009477,
+ "learning_rate": 9.142857142857143e-05,
+ "loss": 1.2235,
+ "step": 28
+ },
+ {
+ "epoch": 0.2966751918158568,
+ "grad_norm": 1.3610877277166293,
+ "learning_rate": 9.469387755102041e-05,
+ "loss": 1.208,
+ "step": 29
+ },
+ {
+ "epoch": 0.3069053708439898,
+ "grad_norm": 0.7447757731907216,
+ "learning_rate": 9.79591836734694e-05,
+ "loss": 1.2118,
+ "step": 30
+ },
+ {
+ "epoch": 0.3171355498721228,
+ "grad_norm": 1.3296162158652725,
+ "learning_rate": 0.00010122448979591839,
+ "loss": 1.2032,
+ "step": 31
+ },
+ {
+ "epoch": 0.3273657289002558,
+ "grad_norm": 1.144264777414957,
+ "learning_rate": 0.00010448979591836734,
+ "loss": 1.1912,
+ "step": 32
+ },
+ {
+ "epoch": 0.3375959079283887,
+ "grad_norm": 1.0342650793984924,
+ "learning_rate": 0.00010775510204081634,
+ "loss": 1.2025,
+ "step": 33
+ },
+ {
+ "epoch": 0.34782608695652173,
+ "grad_norm": 1.9336445124108894,
+ "learning_rate": 0.00011102040816326532,
+ "loss": 1.2118,
+ "step": 34
+ },
+ {
+ "epoch": 0.35805626598465473,
+ "grad_norm": 0.9195357493046576,
+ "learning_rate": 0.0001142857142857143,
+ "loss": 1.1986,
+ "step": 35
+ },
+ {
+ "epoch": 0.36828644501278773,
+ "grad_norm": 1.588300333871633,
+ "learning_rate": 0.00011755102040816328,
+ "loss": 1.1939,
+ "step": 36
+ },
+ {
+ "epoch": 0.37851662404092073,
+ "grad_norm": 1.4710948512542918,
+ "learning_rate": 0.00012081632653061224,
+ "loss": 1.1997,
+ "step": 37
+ },
+ {
+ "epoch": 0.3887468030690537,
+ "grad_norm": 1.2164757524877552,
+ "learning_rate": 0.00012408163265306124,
+ "loss": 1.1791,
+ "step": 38
+ },
+ {
+ "epoch": 0.3989769820971867,
+ "grad_norm": 1.4375602427525511,
+ "learning_rate": 0.0001273469387755102,
+ "loss": 1.1896,
+ "step": 39
+ },
+ {
+ "epoch": 0.4092071611253197,
+ "grad_norm": 1.5736548612793597,
+ "learning_rate": 0.0001306122448979592,
+ "loss": 1.1876,
+ "step": 40
+ },
+ {
+ "epoch": 0.4194373401534527,
+ "grad_norm": 1.0735537900578096,
+ "learning_rate": 0.00013387755102040817,
+ "loss": 1.1709,
+ "step": 41
+ },
+ {
+ "epoch": 0.4296675191815857,
+ "grad_norm": 1.8510870420688348,
+ "learning_rate": 0.00013714285714285713,
+ "loss": 1.1795,
+ "step": 42
+ },
+ {
+ "epoch": 0.4398976982097187,
+ "grad_norm": 1.1080619334489261,
+ "learning_rate": 0.00014040816326530613,
+ "loss": 1.1753,
+ "step": 43
+ },
+ {
+ "epoch": 0.45012787723785164,
+ "grad_norm": 1.9521299298489614,
+ "learning_rate": 0.00014367346938775512,
+ "loss": 1.2028,
+ "step": 44
+ },
+ {
+ "epoch": 0.46035805626598464,
+ "grad_norm": 1.41657060512908,
+ "learning_rate": 0.0001469387755102041,
+ "loss": 1.1822,
+ "step": 45
+ },
+ {
+ "epoch": 0.47058823529411764,
+ "grad_norm": 1.1977389520948274,
+ "learning_rate": 0.00015020408163265308,
+ "loss": 1.1836,
+ "step": 46
+ },
+ {
+ "epoch": 0.48081841432225064,
+ "grad_norm": 1.5288668435257222,
+ "learning_rate": 0.00015346938775510205,
+ "loss": 1.1901,
+ "step": 47
+ },
+ {
+ "epoch": 0.49104859335038364,
+ "grad_norm": 0.9552067161901755,
+ "learning_rate": 0.00015673469387755102,
+ "loss": 1.1562,
+ "step": 48
+ },
+ {
+ "epoch": 0.5012787723785166,
+ "grad_norm": 1.6547871414680237,
+ "learning_rate": 0.00016,
+ "loss": 1.1794,
+ "step": 49
+ },
+ {
+ "epoch": 0.5115089514066496,
+ "grad_norm": 1.8368364441109388,
+ "learning_rate": 0.00015999792324684382,
+ "loss": 1.1877,
+ "step": 50
+ },
+ {
+ "epoch": 0.5217391304347826,
+ "grad_norm": 1.1718896076543888,
+ "learning_rate": 0.00015999169309519789,
+ "loss": 1.1585,
+ "step": 51
+ },
+ {
+ "epoch": 0.5319693094629157,
+ "grad_norm": 2.015243385621693,
+ "learning_rate": 0.0001599813098685243,
+ "loss": 1.1732,
+ "step": 52
+ },
+ {
+ "epoch": 0.5421994884910486,
+ "grad_norm": 1.638588399609178,
+ "learning_rate": 0.0001599667741059081,
+ "loss": 1.1617,
+ "step": 53
+ },
+ {
+ "epoch": 0.5524296675191815,
+ "grad_norm": 1.1352479883236335,
+ "learning_rate": 0.00015994808656202904,
+ "loss": 1.1584,
+ "step": 54
+ },
+ {
+ "epoch": 0.5626598465473146,
+ "grad_norm": 1.2074962340041349,
+ "learning_rate": 0.00015992524820712252,
+ "loss": 1.1478,
+ "step": 55
+ },
+ {
+ "epoch": 0.5728900255754475,
+ "grad_norm": 1.4410034215063858,
+ "learning_rate": 0.00015989826022692918,
+ "loss": 1.1493,
+ "step": 56
+ },
+ {
+ "epoch": 0.5831202046035806,
+ "grad_norm": 0.8679551934822172,
+ "learning_rate": 0.00015986712402263334,
+ "loss": 1.148,
+ "step": 57
+ },
+ {
+ "epoch": 0.5933503836317136,
+ "grad_norm": 1.3034239933412972,
+ "learning_rate": 0.00015983184121079024,
+ "loss": 1.1532,
+ "step": 58
+ },
+ {
+ "epoch": 0.6035805626598465,
+ "grad_norm": 0.9780209051309727,
+ "learning_rate": 0.00015979241362324223,
+ "loss": 1.1313,
+ "step": 59
+ },
+ {
+ "epoch": 0.6138107416879796,
+ "grad_norm": 1.057690857573897,
+ "learning_rate": 0.0001597488433070234,
+ "loss": 1.138,
+ "step": 60
+ },
+ {
+ "epoch": 0.6240409207161125,
+ "grad_norm": 1.456389761257225,
+ "learning_rate": 0.00015970113252425356,
+ "loss": 1.1546,
+ "step": 61
+ },
+ {
+ "epoch": 0.6342710997442456,
+ "grad_norm": 2.3692148862526485,
+ "learning_rate": 0.00015964928375202068,
+ "loss": 1.163,
+ "step": 62
+ },
+ {
+ "epoch": 0.6445012787723785,
+ "grad_norm": 1.047447123295706,
+ "learning_rate": 0.00015959329968225232,
+ "loss": 1.1564,
+ "step": 63
+ },
+ {
+ "epoch": 0.6547314578005116,
+ "grad_norm": 3.5383718747156534,
+ "learning_rate": 0.0001595331832215758,
+ "loss": 1.1684,
+ "step": 64
+ },
+ {
+ "epoch": 0.6649616368286445,
+ "grad_norm": 2.968800505278846,
+ "learning_rate": 0.00015946893749116734,
+ "loss": 1.1788,
+ "step": 65
+ },
+ {
+ "epoch": 0.6751918158567775,
+ "grad_norm": 1.9185023523212743,
+ "learning_rate": 0.00015940056582659006,
+ "loss": 1.1537,
+ "step": 66
+ },
+ {
+ "epoch": 0.6854219948849105,
+ "grad_norm": 1.404723257524876,
+ "learning_rate": 0.0001593280717776207,
+ "loss": 1.1487,
+ "step": 67
+ },
+ {
+ "epoch": 0.6956521739130435,
+ "grad_norm": 1.9443271708932357,
+ "learning_rate": 0.0001592514591080654,
+ "loss": 1.15,
+ "step": 68
+ },
+ {
+ "epoch": 0.7058823529411765,
+ "grad_norm": 1.0325757666109745,
+ "learning_rate": 0.0001591707317955642,
+ "loss": 1.1487,
+ "step": 69
+ },
+ {
+ "epoch": 0.7161125319693095,
+ "grad_norm": 2.1150324509757006,
+ "learning_rate": 0.00015908589403138468,
+ "loss": 1.1766,
+ "step": 70
+ },
+ {
+ "epoch": 0.7263427109974424,
+ "grad_norm": 1.7663965705197828,
+ "learning_rate": 0.00015899695022020415,
+ "loss": 1.1464,
+ "step": 71
+ },
+ {
+ "epoch": 0.7365728900255755,
+ "grad_norm": 1.4582930405137742,
+ "learning_rate": 0.00015890390497988116,
+ "loss": 1.1487,
+ "step": 72
+ },
+ {
+ "epoch": 0.7468030690537084,
+ "grad_norm": 1.394985742880279,
+ "learning_rate": 0.0001588067631412156,
+ "loss": 1.1447,
+ "step": 73
+ },
+ {
+ "epoch": 0.7570332480818415,
+ "grad_norm": 1.1897882316318342,
+ "learning_rate": 0.000158705529747698,
+ "loss": 1.1299,
+ "step": 74
+ },
+ {
+ "epoch": 0.7672634271099744,
+ "grad_norm": 1.1626082561212874,
+ "learning_rate": 0.0001586002100552476,
+ "loss": 1.1473,
+ "step": 75
+ },
+ {
+ "epoch": 0.7774936061381074,
+ "grad_norm": 1.0736945268719569,
+ "learning_rate": 0.00015849080953193943,
+ "loss": 1.1368,
+ "step": 76
+ },
+ {
+ "epoch": 0.7877237851662404,
+ "grad_norm": 1.5654343283068946,
+ "learning_rate": 0.00015837733385772062,
+ "loss": 1.123,
+ "step": 77
+ },
+ {
+ "epoch": 0.7979539641943734,
+ "grad_norm": 0.8523221287975669,
+ "learning_rate": 0.00015825978892411522,
+ "loss": 1.1403,
+ "step": 78
+ },
+ {
+ "epoch": 0.8081841432225064,
+ "grad_norm": 1.035617914076512,
+ "learning_rate": 0.00015813818083391858,
+ "loss": 1.1229,
+ "step": 79
+ },
+ {
+ "epoch": 0.8184143222506394,
+ "grad_norm": 1.4757207346915424,
+ "learning_rate": 0.0001580125159008803,
+ "loss": 1.129,
+ "step": 80
+ },
+ {
+ "epoch": 0.8286445012787724,
+ "grad_norm": 1.0711086873355355,
+ "learning_rate": 0.00015788280064937655,
+ "loss": 1.1351,
+ "step": 81
+ },
+ {
+ "epoch": 0.8388746803069054,
+ "grad_norm": 1.2110898160193528,
+ "learning_rate": 0.00015774904181407127,
+ "loss": 1.1334,
+ "step": 82
+ },
+ {
+ "epoch": 0.8491048593350383,
+ "grad_norm": 1.2411189602993498,
+ "learning_rate": 0.00015761124633956652,
+ "loss": 1.1363,
+ "step": 83
+ },
+ {
+ "epoch": 0.8593350383631714,
+ "grad_norm": 1.1341606871236802,
+ "learning_rate": 0.00015746942138004203,
+ "loss": 1.1142,
+ "step": 84
+ },
+ {
+ "epoch": 0.8695652173913043,
+ "grad_norm": 0.9942900226210764,
+ "learning_rate": 0.00015732357429888355,
+ "loss": 1.13,
+ "step": 85
+ },
+ {
+ "epoch": 0.8797953964194374,
+ "grad_norm": 1.1602205285909497,
+ "learning_rate": 0.00015717371266830076,
+ "loss": 1.1225,
+ "step": 86
+ },
+ {
+ "epoch": 0.8900255754475703,
+ "grad_norm": 1.2773792554197163,
+ "learning_rate": 0.000157019844268934,
+ "loss": 1.1237,
+ "step": 87
+ },
+ {
+ "epoch": 0.9002557544757033,
+ "grad_norm": 1.0698753174294156,
+ "learning_rate": 0.0001568619770894504,
+ "loss": 1.1223,
+ "step": 88
+ },
+ {
+ "epoch": 0.9104859335038363,
+ "grad_norm": 1.1279028063380314,
+ "learning_rate": 0.000156700119326129,
+ "loss": 1.1117,
+ "step": 89
+ },
+ {
+ "epoch": 0.9207161125319693,
+ "grad_norm": 1.047346608848377,
+ "learning_rate": 0.00015653427938243532,
+ "loss": 1.1195,
+ "step": 90
+ },
+ {
+ "epoch": 0.9309462915601023,
+ "grad_norm": 1.1063879184523018,
+ "learning_rate": 0.0001563644658685851,
+ "loss": 1.1112,
+ "step": 91
+ },
+ {
+ "epoch": 0.9411764705882353,
+ "grad_norm": 1.403473791127022,
+ "learning_rate": 0.00015619068760109703,
+ "loss": 1.1334,
+ "step": 92
+ },
+ {
+ "epoch": 0.9514066496163683,
+ "grad_norm": 1.1738079096105647,
+ "learning_rate": 0.00015601295360233528,
+ "loss": 1.123,
+ "step": 93
+ },
+ {
+ "epoch": 0.9616368286445013,
+ "grad_norm": 1.2622400384084083,
+ "learning_rate": 0.0001558312731000409,
+ "loss": 1.1245,
+ "step": 94
+ },
+ {
+ "epoch": 0.9718670076726342,
+ "grad_norm": 1.5103980615396817,
+ "learning_rate": 0.00015564565552685282,
+ "loss": 1.1159,
+ "step": 95
+ },
+ {
+ "epoch": 0.9820971867007673,
+ "grad_norm": 0.9924832092739461,
+ "learning_rate": 0.00015545611051981807,
+ "loss": 1.1086,
+ "step": 96
+ },
+ {
+ "epoch": 0.9923273657289002,
+ "grad_norm": 1.2306800508291864,
+ "learning_rate": 0.00015526264791989144,
+ "loss": 1.1396,
+ "step": 97
+ },
+ {
+ "epoch": 1.0025575447570332,
+ "grad_norm": 1.7155461125801181,
+ "learning_rate": 0.00015506527777142453,
+ "loss": 1.4022,
+ "step": 98
+ },
+ {
+ "epoch": 1.0127877237851663,
+ "grad_norm": 1.0329131057981322,
+ "learning_rate": 0.00015486401032164434,
+ "loss": 1.0962,
+ "step": 99
+ },
+ {
+ "epoch": 1.0230179028132993,
+ "grad_norm": 1.5658900134919402,
+ "learning_rate": 0.00015465885602012117,
+ "loss": 1.1252,
+ "step": 100
+ },
+ {
+ "epoch": 1.0332480818414322,
+ "grad_norm": 0.8015741030349912,
+ "learning_rate": 0.00015444982551822604,
+ "loss": 1.1044,
+ "step": 101
+ },
+ {
+ "epoch": 1.0434782608695652,
+ "grad_norm": 1.3138293387151978,
+ "learning_rate": 0.00015423692966857788,
+ "loss": 1.1138,
+ "step": 102
+ },
+ {
+ "epoch": 1.0537084398976981,
+ "grad_norm": 0.8910062603103389,
+ "learning_rate": 0.00015402017952447983,
+ "loss": 1.0804,
+ "step": 103
+ },
+ {
+ "epoch": 1.0639386189258313,
+ "grad_norm": 1.4375718907843533,
+ "learning_rate": 0.00015379958633934555,
+ "loss": 1.1212,
+ "step": 104
+ },
+ {
+ "epoch": 1.0741687979539642,
+ "grad_norm": 1.2721636337229971,
+ "learning_rate": 0.0001535751615661149,
+ "loss": 1.1,
+ "step": 105
+ },
+ {
+ "epoch": 1.0843989769820972,
+ "grad_norm": 1.0105810629121363,
+ "learning_rate": 0.00015334691685665928,
+ "loss": 1.096,
+ "step": 106
+ },
+ {
+ "epoch": 1.0946291560102301,
+ "grad_norm": 1.0900734221547557,
+ "learning_rate": 0.00015311486406117668,
+ "loss": 1.0882,
+ "step": 107
+ },
+ {
+ "epoch": 1.104859335038363,
+ "grad_norm": 1.1336612336243286,
+ "learning_rate": 0.00015287901522757652,
+ "loss": 1.1214,
+ "step": 108
+ },
+ {
+ "epoch": 1.1150895140664963,
+ "grad_norm": 1.1569430105162481,
+ "learning_rate": 0.000152639382600854,
+ "loss": 1.0963,
+ "step": 109
+ },
+ {
+ "epoch": 1.1253196930946292,
+ "grad_norm": 1.1569271586356937,
+ "learning_rate": 0.00015239597862245452,
+ "loss": 1.0855,
+ "step": 110
+ },
+ {
+ "epoch": 1.1355498721227621,
+ "grad_norm": 1.137534239049028,
+ "learning_rate": 0.00015214881592962753,
+ "loss": 1.094,
+ "step": 111
+ },
+ {
+ "epoch": 1.145780051150895,
+ "grad_norm": 1.2673351118799951,
+ "learning_rate": 0.00015189790735477062,
+ "loss": 1.0819,
+ "step": 112
+ },
+ {
+ "epoch": 1.156010230179028,
+ "grad_norm": 0.8230868750468249,
+ "learning_rate": 0.00015164326592476316,
+ "loss": 1.099,
+ "step": 113
+ },
+ {
+ "epoch": 1.1662404092071612,
+ "grad_norm": 1.0550234081618097,
+ "learning_rate": 0.00015138490486028998,
+ "loss": 1.0887,
+ "step": 114
+ },
+ {
+ "epoch": 1.1764705882352942,
+ "grad_norm": 1.2808691339484497,
+ "learning_rate": 0.000151122837575155,
+ "loss": 1.0879,
+ "step": 115
+ },
+ {
+ "epoch": 1.186700767263427,
+ "grad_norm": 1.0207057907200907,
+ "learning_rate": 0.00015085707767558475,
+ "loss": 1.0974,
+ "step": 116
+ },
+ {
+ "epoch": 1.19693094629156,
+ "grad_norm": 1.7224394340286069,
+ "learning_rate": 0.00015058763895952194,
+ "loss": 1.1016,
+ "step": 117
+ },
+ {
+ "epoch": 1.207161125319693,
+ "grad_norm": 0.5474668831444041,
+ "learning_rate": 0.00015031453541590925,
+ "loss": 1.0789,
+ "step": 118
+ },
+ {
+ "epoch": 1.2173913043478262,
+ "grad_norm": 1.2675844883943275,
+ "learning_rate": 0.00015003778122396277,
+ "loss": 1.0851,
+ "step": 119
+ },
+ {
+ "epoch": 1.227621483375959,
+ "grad_norm": 1.2798593237994211,
+ "learning_rate": 0.0001497573907524361,
+ "loss": 1.1047,
+ "step": 120
+ },
+ {
+ "epoch": 1.237851662404092,
+ "grad_norm": 1.1502336899283057,
+ "learning_rate": 0.00014947337855887406,
+ "loss": 1.0943,
+ "step": 121
+ },
+ {
+ "epoch": 1.248081841432225,
+ "grad_norm": 1.2423026547868323,
+ "learning_rate": 0.00014918575938885725,
+ "loss": 1.0896,
+ "step": 122
+ },
+ {
+ "epoch": 1.258312020460358,
+ "grad_norm": 0.8127468643676744,
+ "learning_rate": 0.00014889454817523608,
+ "loss": 1.0984,
+ "step": 123
+ },
+ {
+ "epoch": 1.2685421994884911,
+ "grad_norm": 1.1236998604735062,
+ "learning_rate": 0.00014859976003735572,
+ "loss": 1.091,
+ "step": 124
+ },
+ {
+ "epoch": 1.278772378516624,
+ "grad_norm": 1.0625241691652207,
+ "learning_rate": 0.0001483014102802711,
+ "loss": 1.0749,
+ "step": 125
+ },
+ {
+ "epoch": 1.289002557544757,
+ "grad_norm": 1.2427086503303664,
+ "learning_rate": 0.00014799951439395221,
+ "loss": 1.0901,
+ "step": 126
+ },
+ {
+ "epoch": 1.29923273657289,
+ "grad_norm": 0.9886108496701173,
+ "learning_rate": 0.00014769408805247986,
+ "loss": 1.0848,
+ "step": 127
+ },
+ {
+ "epoch": 1.309462915601023,
+ "grad_norm": 1.017219160331291,
+ "learning_rate": 0.0001473851471132321,
+ "loss": 1.0897,
+ "step": 128
+ },
+ {
+ "epoch": 1.319693094629156,
+ "grad_norm": 1.4542703012009928,
+ "learning_rate": 0.00014707270761606063,
+ "loss": 1.0695,
+ "step": 129
+ },
+ {
+ "epoch": 1.329923273657289,
+ "grad_norm": 1.0782706459189146,
+ "learning_rate": 0.00014675678578245828,
+ "loss": 1.0895,
+ "step": 130
+ },
+ {
+ "epoch": 1.340153452685422,
+ "grad_norm": 1.0968316967604133,
+ "learning_rate": 0.00014643739801471667,
+ "loss": 1.1003,
+ "step": 131
+ },
+ {
+ "epoch": 1.350383631713555,
+ "grad_norm": 0.9003561398004416,
+ "learning_rate": 0.00014611456089507464,
+ "loss": 1.098,
+ "step": 132
+ },
+ {
+ "epoch": 1.3606138107416879,
+ "grad_norm": 1.351034619458769,
+ "learning_rate": 0.00014578829118485742,
+ "loss": 1.0698,
+ "step": 133
+ },
+ {
+ "epoch": 1.370843989769821,
+ "grad_norm": 0.8658183503538551,
+ "learning_rate": 0.00014545860582360624,
+ "loss": 1.1071,
+ "step": 134
+ },
+ {
+ "epoch": 1.381074168797954,
+ "grad_norm": 1.0177123952310823,
+ "learning_rate": 0.00014512552192819897,
+ "loss": 1.0869,
+ "step": 135
+ },
+ {
+ "epoch": 1.391304347826087,
+ "grad_norm": 1.4195927565971158,
+ "learning_rate": 0.0001447890567919614,
+ "loss": 1.0954,
+ "step": 136
+ },
+ {
+ "epoch": 1.40153452685422,
+ "grad_norm": 0.9931491307968467,
+ "learning_rate": 0.00014444922788376934,
+ "loss": 1.0784,
+ "step": 137
+ },
+ {
+ "epoch": 1.4117647058823528,
+ "grad_norm": 1.270639141634098,
+ "learning_rate": 0.00014410605284714175,
+ "loss": 1.0888,
+ "step": 138
+ },
+ {
+ "epoch": 1.421994884910486,
+ "grad_norm": 0.8408471380854269,
+ "learning_rate": 0.0001437595494993246,
+ "loss": 1.0842,
+ "step": 139
+ },
+ {
+ "epoch": 1.432225063938619,
+ "grad_norm": 1.380164251857079,
+ "learning_rate": 0.000143409735830366,
+ "loss": 1.0795,
+ "step": 140
+ },
+ {
+ "epoch": 1.4424552429667519,
+ "grad_norm": 0.9896679635139878,
+ "learning_rate": 0.00014305663000218193,
+ "loss": 1.0907,
+ "step": 141
+ },
+ {
+ "epoch": 1.452685421994885,
+ "grad_norm": 1.412072301199199,
+ "learning_rate": 0.00014270025034761352,
+ "loss": 1.0817,
+ "step": 142
+ },
+ {
+ "epoch": 1.4629156010230178,
+ "grad_norm": 0.770940428011719,
+ "learning_rate": 0.000142340615369475,
+ "loss": 1.0819,
+ "step": 143
+ },
+ {
+ "epoch": 1.473145780051151,
+ "grad_norm": 0.7983918736084521,
+ "learning_rate": 0.00014197774373959327,
+ "loss": 1.0931,
+ "step": 144
+ },
+ {
+ "epoch": 1.4833759590792839,
+ "grad_norm": 1.3538690920598053,
+ "learning_rate": 0.00014161165429783844,
+ "loss": 1.0884,
+ "step": 145
+ },
+ {
+ "epoch": 1.4936061381074168,
+ "grad_norm": 0.8169757443568035,
+ "learning_rate": 0.0001412423660511456,
+ "loss": 1.0924,
+ "step": 146
+ },
+ {
+ "epoch": 1.50383631713555,
+ "grad_norm": 0.9256987946838048,
+ "learning_rate": 0.00014086989817252803,
+ "loss": 1.0785,
+ "step": 147
+ },
+ {
+ "epoch": 1.5140664961636827,
+ "grad_norm": 1.1631633494660514,
+ "learning_rate": 0.00014049427000008185,
+ "loss": 1.0699,
+ "step": 148
+ },
+ {
+ "epoch": 1.5242966751918159,
+ "grad_norm": 1.212333243880141,
+ "learning_rate": 0.00014011550103598176,
+ "loss": 1.064,
+ "step": 149
+ },
+ {
+ "epoch": 1.5345268542199488,
+ "grad_norm": 0.9967182911949714,
+ "learning_rate": 0.0001397336109454689,
+ "loss": 1.1002,
+ "step": 150
+ },
+ {
+ "epoch": 1.5447570332480818,
+ "grad_norm": 1.1502313981018193,
+ "learning_rate": 0.0001393486195558295,
+ "loss": 1.0709,
+ "step": 151
+ },
+ {
+ "epoch": 1.554987212276215,
+ "grad_norm": 1.311911957474106,
+ "learning_rate": 0.00013896054685536566,
+ "loss": 1.0717,
+ "step": 152
+ },
+ {
+ "epoch": 1.5652173913043477,
+ "grad_norm": 0.8141917822120857,
+ "learning_rate": 0.00013856941299235752,
+ "loss": 1.0714,
+ "step": 153
+ },
+ {
+ "epoch": 1.5754475703324808,
+ "grad_norm": 0.5826303398204319,
+ "learning_rate": 0.00013817523827401715,
+ "loss": 1.0825,
+ "step": 154
+ },
+ {
+ "epoch": 1.5856777493606138,
+ "grad_norm": 1.1035174246946824,
+ "learning_rate": 0.00013777804316543438,
+ "loss": 1.0583,
+ "step": 155
+ },
+ {
+ "epoch": 1.5959079283887467,
+ "grad_norm": 1.483439389847773,
+ "learning_rate": 0.00013737784828851405,
+ "loss": 1.0998,
+ "step": 156
+ },
+ {
+ "epoch": 1.60613810741688,
+ "grad_norm": 0.5590063439764236,
+ "learning_rate": 0.0001369746744209055,
+ "loss": 1.0814,
+ "step": 157
+ },
+ {
+ "epoch": 1.6163682864450126,
+ "grad_norm": 1.427302602685382,
+ "learning_rate": 0.00013656854249492382,
+ "loss": 1.0672,
+ "step": 158
+ },
+ {
+ "epoch": 1.6265984654731458,
+ "grad_norm": 0.9907320262762309,
+ "learning_rate": 0.00013615947359646295,
+ "loss": 1.077,
+ "step": 159
+ },
+ {
+ "epoch": 1.6368286445012787,
+ "grad_norm": 1.3253726577143705,
+ "learning_rate": 0.00013574748896390105,
+ "loss": 1.0831,
+ "step": 160
+ },
+ {
+ "epoch": 1.6470588235294117,
+ "grad_norm": 0.6170129994977172,
+ "learning_rate": 0.00013533260998699776,
+ "loss": 1.0808,
+ "step": 161
+ },
+ {
+ "epoch": 1.6572890025575449,
+ "grad_norm": 0.8974715249718852,
+ "learning_rate": 0.00013491485820578373,
+ "loss": 1.0609,
+ "step": 162
+ },
+ {
+ "epoch": 1.6675191815856778,
+ "grad_norm": 0.8218782327367921,
+ "learning_rate": 0.00013449425530944218,
+ "loss": 1.0822,
+ "step": 163
+ },
+ {
+ "epoch": 1.6777493606138107,
+ "grad_norm": 0.6783603661700464,
+ "learning_rate": 0.00013407082313518292,
+ "loss": 1.0771,
+ "step": 164
+ },
+ {
+ "epoch": 1.6879795396419437,
+ "grad_norm": 0.6976792612146404,
+ "learning_rate": 0.0001336445836671086,
+ "loss": 1.0853,
+ "step": 165
+ },
+ {
+ "epoch": 1.6982097186700766,
+ "grad_norm": 0.823428019130252,
+ "learning_rate": 0.0001332155590350732,
+ "loss": 1.0838,
+ "step": 166
+ },
+ {
+ "epoch": 1.7084398976982098,
+ "grad_norm": 1.0953340521038164,
+ "learning_rate": 0.0001327837715135332,
+ "loss": 1.082,
+ "step": 167
+ },
+ {
+ "epoch": 1.7186700767263428,
+ "grad_norm": 0.9474195557921864,
+ "learning_rate": 0.00013234924352039103,
+ "loss": 1.0802,
+ "step": 168
+ },
+ {
+ "epoch": 1.7289002557544757,
+ "grad_norm": 0.9444470757389084,
+ "learning_rate": 0.00013191199761583124,
+ "loss": 1.0887,
+ "step": 169
+ },
+ {
+ "epoch": 1.7391304347826086,
+ "grad_norm": 1.101212086185109,
+ "learning_rate": 0.00013147205650114913,
+ "loss": 1.0718,
+ "step": 170
+ },
+ {
+ "epoch": 1.7493606138107416,
+ "grad_norm": 1.0553531873256305,
+ "learning_rate": 0.0001310294430175722,
+ "loss": 1.0788,
+ "step": 171
+ },
+ {
+ "epoch": 1.7595907928388748,
+ "grad_norm": 1.05582162764911,
+ "learning_rate": 0.00013058418014507412,
+ "loss": 1.0879,
+ "step": 172
+ },
+ {
+ "epoch": 1.7698209718670077,
+ "grad_norm": 0.9944536264830629,
+ "learning_rate": 0.00013013629100118183,
+ "loss": 1.0721,
+ "step": 173
+ },
+ {
+ "epoch": 1.7800511508951407,
+ "grad_norm": 1.1307659477843484,
+ "learning_rate": 0.00012968579883977508,
+ "loss": 1.0737,
+ "step": 174
+ },
+ {
+ "epoch": 1.7902813299232738,
+ "grad_norm": 1.2420812191214468,
+ "learning_rate": 0.00012923272704987943,
+ "loss": 1.0742,
+ "step": 175
+ },
+ {
+ "epoch": 1.8005115089514065,
+ "grad_norm": 1.0472853919924061,
+ "learning_rate": 0.00012877709915445155,
+ "loss": 1.0721,
+ "step": 176
+ },
+ {
+ "epoch": 1.8107416879795397,
+ "grad_norm": 0.6947884508219546,
+ "learning_rate": 0.00012831893880915822,
+ "loss": 1.0555,
+ "step": 177
+ },
+ {
+ "epoch": 1.8209718670076727,
+ "grad_norm": 0.7757404896202875,
+ "learning_rate": 0.00012785826980114798,
+ "loss": 1.0804,
+ "step": 178
+ },
+ {
+ "epoch": 1.8312020460358056,
+ "grad_norm": 1.253778618594067,
+ "learning_rate": 0.0001273951160478163,
+ "loss": 1.063,
+ "step": 179
+ },
+ {
+ "epoch": 1.8414322250639388,
+ "grad_norm": 0.5979958514242996,
+ "learning_rate": 0.00012692950159556358,
+ "loss": 1.0666,
+ "step": 180
+ },
+ {
+ "epoch": 1.8516624040920715,
+ "grad_norm": 0.8662747748641294,
+ "learning_rate": 0.00012646145061854697,
+ "loss": 1.0703,
+ "step": 181
+ },
+ {
+ "epoch": 1.8618925831202047,
+ "grad_norm": 0.8287006788806506,
+ "learning_rate": 0.00012599098741742504,
+ "loss": 1.0571,
+ "step": 182
+ },
+ {
+ "epoch": 1.8721227621483376,
+ "grad_norm": 1.0265713289334373,
+ "learning_rate": 0.00012551813641809622,
+ "loss": 1.0706,
+ "step": 183
+ },
+ {
+ "epoch": 1.8823529411764706,
+ "grad_norm": 1.2306509484226458,
+ "learning_rate": 0.0001250429221704306,
+ "loss": 1.0779,
+ "step": 184
+ },
+ {
+ "epoch": 1.8925831202046037,
+ "grad_norm": 0.8951251773522786,
+ "learning_rate": 0.00012456536934699552,
+ "loss": 1.064,
+ "step": 185
+ },
+ {
+ "epoch": 1.9028132992327365,
+ "grad_norm": 1.1181307738147266,
+ "learning_rate": 0.0001240855027417742,
+ "loss": 1.0585,
+ "step": 186
+ },
+ {
+ "epoch": 1.9130434782608696,
+ "grad_norm": 0.977542614018667,
+ "learning_rate": 0.00012360334726887887,
+ "loss": 1.0672,
+ "step": 187
+ },
+ {
+ "epoch": 1.9232736572890026,
+ "grad_norm": 1.1637804133002638,
+ "learning_rate": 0.00012311892796125704,
+ "loss": 1.0713,
+ "step": 188
+ },
+ {
+ "epoch": 1.9335038363171355,
+ "grad_norm": 0.445419436951453,
+ "learning_rate": 0.0001226322699693918,
+ "loss": 1.0536,
+ "step": 189
+ },
+ {
+ "epoch": 1.9437340153452687,
+ "grad_norm": 0.9034412973205753,
+ "learning_rate": 0.00012214339855999624,
+ "loss": 1.0807,
+ "step": 190
+ },
+ {
+ "epoch": 1.9539641943734014,
+ "grad_norm": 0.5236086539347802,
+ "learning_rate": 0.00012165233911470136,
+ "loss": 1.0777,
+ "step": 191
+ },
+ {
+ "epoch": 1.9641943734015346,
+ "grad_norm": 0.6745807390622581,
+ "learning_rate": 0.00012115911712873851,
+ "loss": 1.0525,
+ "step": 192
+ },
+ {
+ "epoch": 1.9744245524296675,
+ "grad_norm": 0.5551428173478703,
+ "learning_rate": 0.00012066375820961558,
+ "loss": 1.0617,
+ "step": 193
+ },
+ {
+ "epoch": 1.9846547314578005,
+ "grad_norm": 0.5930456521809883,
+ "learning_rate": 0.00012016628807578756,
+ "loss": 1.0682,
+ "step": 194
+ },
+ {
+ "epoch": 1.9948849104859336,
+ "grad_norm": 0.5333039982670191,
+ "learning_rate": 0.00011966673255532119,
+ "loss": 1.1518,
+ "step": 195
+ },
+ {
+ "epoch": 2.0051150895140664,
+ "grad_norm": 0.7368227183464856,
+ "learning_rate": 0.00011916511758455407,
+ "loss": 1.226,
+ "step": 196
+ },
+ {
+ "epoch": 2.0153452685421995,
+ "grad_norm": 1.0538129513540742,
+ "learning_rate": 0.00011866146920674807,
+ "loss": 1.068,
+ "step": 197
+ },
+ {
+ "epoch": 2.0255754475703327,
+ "grad_norm": 1.2094094160759923,
+ "learning_rate": 0.0001181558135707371,
+ "loss": 1.0502,
+ "step": 198
+ },
+ {
+ "epoch": 2.0358056265984654,
+ "grad_norm": 0.915224534709575,
+ "learning_rate": 0.00011764817692956966,
+ "loss": 1.0286,
+ "step": 199
+ },
+ {
+ "epoch": 2.0460358056265986,
+ "grad_norm": 1.0711022599910567,
+ "learning_rate": 0.00011713858563914562,
+ "loss": 1.0747,
+ "step": 200
+ },
+ {
+ "epoch": 2.0562659846547313,
+ "grad_norm": 1.0787442473268498,
+ "learning_rate": 0.00011662706615684803,
+ "loss": 1.045,
+ "step": 201
+ },
+ {
+ "epoch": 2.0664961636828645,
+ "grad_norm": 0.8778392977870085,
+ "learning_rate": 0.00011611364504016935,
+ "loss": 1.0678,
+ "step": 202
+ },
+ {
+ "epoch": 2.0767263427109977,
+ "grad_norm": 0.9910713045345557,
+ "learning_rate": 0.00011559834894533275,
+ "loss": 1.0458,
+ "step": 203
+ },
+ {
+ "epoch": 2.0869565217391304,
+ "grad_norm": 1.1592891619781418,
+ "learning_rate": 0.00011508120462590794,
+ "loss": 1.0461,
+ "step": 204
+ },
+ {
+ "epoch": 2.0971867007672635,
+ "grad_norm": 0.6931145505443533,
+ "learning_rate": 0.00011456223893142238,
+ "loss": 1.0407,
+ "step": 205
+ },
+ {
+ "epoch": 2.1074168797953963,
+ "grad_norm": 0.8009294768801537,
+ "learning_rate": 0.0001140414788059672,
+ "loss": 1.0534,
+ "step": 206
+ },
+ {
+ "epoch": 2.1176470588235294,
+ "grad_norm": 0.6527102431957467,
+ "learning_rate": 0.00011351895128679823,
+ "loss": 1.0577,
+ "step": 207
+ },
+ {
+ "epoch": 2.1278772378516626,
+ "grad_norm": 0.5808810881566437,
+ "learning_rate": 0.00011299468350293232,
+ "loss": 1.0592,
+ "step": 208
+ },
+ {
+ "epoch": 2.1381074168797953,
+ "grad_norm": 0.8896635325022703,
+ "learning_rate": 0.00011246870267373885,
+ "loss": 1.069,
+ "step": 209
+ },
+ {
+ "epoch": 2.1483375959079285,
+ "grad_norm": 1.0873242378114067,
+ "learning_rate": 0.00011194103610752655,
+ "loss": 1.0454,
+ "step": 210
+ },
+ {
+ "epoch": 2.1585677749360612,
+ "grad_norm": 0.9489754319671625,
+ "learning_rate": 0.00011141171120012552,
+ "loss": 1.0723,
+ "step": 211
+ },
+ {
+ "epoch": 2.1687979539641944,
+ "grad_norm": 0.8030522232247485,
+ "learning_rate": 0.0001108807554334651,
+ "loss": 1.0428,
+ "step": 212
+ },
+ {
+ "epoch": 2.1790281329923276,
+ "grad_norm": 0.5529249514757153,
+ "learning_rate": 0.00011034819637414686,
+ "loss": 1.061,
+ "step": 213
+ },
+ {
+ "epoch": 2.1892583120204603,
+ "grad_norm": 0.4631583007632592,
+ "learning_rate": 0.00010981406167201354,
+ "loss": 1.0355,
+ "step": 214
+ },
+ {
+ "epoch": 2.1994884910485935,
+ "grad_norm": 0.4099197978603843,
+ "learning_rate": 0.0001092783790587133,
+ "loss": 1.0777,
+ "step": 215
+ },
+ {
+ "epoch": 2.209718670076726,
+ "grad_norm": 0.4121757237952397,
+ "learning_rate": 0.00010874117634626011,
+ "loss": 1.0541,
+ "step": 216
+ },
+ {
+ "epoch": 2.2199488491048593,
+ "grad_norm": 0.4349846280052258,
+ "learning_rate": 0.00010820248142558965,
+ "loss": 1.0435,
+ "step": 217
+ },
+ {
+ "epoch": 2.2301790281329925,
+ "grad_norm": 0.3960108429870153,
+ "learning_rate": 0.00010766232226511142,
+ "loss": 1.0513,
+ "step": 218
+ },
+ {
+ "epoch": 2.2404092071611252,
+ "grad_norm": 0.4522590868845693,
+ "learning_rate": 0.00010712072690925638,
+ "loss": 1.0509,
+ "step": 219
+ },
+ {
+ "epoch": 2.2506393861892584,
+ "grad_norm": 0.4862701100635931,
+ "learning_rate": 0.00010657772347702118,
+ "loss": 1.0325,
+ "step": 220
+ },
+ {
+ "epoch": 2.260869565217391,
+ "grad_norm": 0.44763620371165586,
+ "learning_rate": 0.00010603334016050808,
+ "loss": 1.0369,
+ "step": 221
+ },
+ {
+ "epoch": 2.2710997442455243,
+ "grad_norm": 0.5580650650662916,
+ "learning_rate": 0.00010548760522346138,
+ "loss": 1.0414,
+ "step": 222
+ },
+ {
+ "epoch": 2.2813299232736575,
+ "grad_norm": 0.6352266377825914,
+ "learning_rate": 0.00010494054699979992,
+ "loss": 1.056,
+ "step": 223
+ },
+ {
+ "epoch": 2.29156010230179,
+ "grad_norm": 0.617790095766774,
+ "learning_rate": 0.00010439219389214595,
+ "loss": 1.0573,
+ "step": 224
+ },
+ {
+ "epoch": 2.3017902813299234,
+ "grad_norm": 0.6417970276744828,
+ "learning_rate": 0.0001038425743703507,
+ "loss": 1.0412,
+ "step": 225
+ },
+ {
+ "epoch": 2.312020460358056,
+ "grad_norm": 0.7082597904375986,
+ "learning_rate": 0.00010329171697001608,
+ "loss": 1.0366,
+ "step": 226
+ },
+ {
+ "epoch": 2.3222506393861893,
+ "grad_norm": 0.8496266841221832,
+ "learning_rate": 0.0001027396502910132,
+ "loss": 1.0451,
+ "step": 227
+ },
+ {
+ "epoch": 2.3324808184143224,
+ "grad_norm": 1.0308330001421908,
+ "learning_rate": 0.0001021864029959975,
+ "loss": 1.0428,
+ "step": 228
+ },
+ {
+ "epoch": 2.342710997442455,
+ "grad_norm": 1.026355847644825,
+ "learning_rate": 0.00010163200380892063,
+ "loss": 1.0612,
+ "step": 229
+ },
+ {
+ "epoch": 2.3529411764705883,
+ "grad_norm": 0.7560985076254375,
+ "learning_rate": 0.00010107648151353916,
+ "loss": 1.0247,
+ "step": 230
+ },
+ {
+ "epoch": 2.363171355498721,
+ "grad_norm": 0.41561156978885305,
+ "learning_rate": 0.00010051986495192008,
+ "loss": 1.0363,
+ "step": 231
+ },
+ {
+ "epoch": 2.373401534526854,
+ "grad_norm": 0.38675030483907014,
+ "learning_rate": 9.99621830229434e-05,
+ "loss": 1.05,
+ "step": 232
+ },
+ {
+ "epoch": 2.3836317135549874,
+ "grad_norm": 0.4804093390079753,
+ "learning_rate": 9.94034646808018e-05,
+ "loss": 1.0537,
+ "step": 233
+ },
+ {
+ "epoch": 2.39386189258312,
+ "grad_norm": 0.5877095225822028,
+ "learning_rate": 9.884373893349725e-05,
+ "loss": 1.0273,
+ "step": 234
+ },
+ {
+ "epoch": 2.4040920716112533,
+ "grad_norm": 0.6010921886045744,
+ "learning_rate": 9.828303484133515e-05,
+ "loss": 1.053,
+ "step": 235
+ },
+ {
+ "epoch": 2.414322250639386,
+ "grad_norm": 0.4918851450838751,
+ "learning_rate": 9.772138151541522e-05,
+ "loss": 1.0364,
+ "step": 236
+ },
+ {
+ "epoch": 2.424552429667519,
+ "grad_norm": 0.3407705594769831,
+ "learning_rate": 9.715880811612044e-05,
+ "loss": 1.0331,
+ "step": 237
+ },
+ {
+ "epoch": 2.4347826086956523,
+ "grad_norm": 0.2468632428242675,
+ "learning_rate": 9.659534385160289e-05,
+ "loss": 1.0323,
+ "step": 238
+ },
+ {
+ "epoch": 2.445012787723785,
+ "grad_norm": 0.26175126594915626,
+ "learning_rate": 9.603101797626729e-05,
+ "loss": 1.0491,
+ "step": 239
+ },
+ {
+ "epoch": 2.455242966751918,
+ "grad_norm": 0.3250566758468044,
+ "learning_rate": 9.546585978925221e-05,
+ "loss": 1.0127,
+ "step": 240
+ },
+ {
+ "epoch": 2.4654731457800514,
+ "grad_norm": 0.3733853550848411,
+ "learning_rate": 9.489989863290885e-05,
+ "loss": 1.0637,
+ "step": 241
+ },
+ {
+ "epoch": 2.475703324808184,
+ "grad_norm": 0.45748627752418863,
+ "learning_rate": 9.433316389127768e-05,
+ "loss": 1.038,
+ "step": 242
+ },
+ {
+ "epoch": 2.4859335038363173,
+ "grad_norm": 0.432153145698656,
+ "learning_rate": 9.37656849885628e-05,
+ "loss": 1.0441,
+ "step": 243
+ },
+ {
+ "epoch": 2.49616368286445,
+ "grad_norm": 0.3361885905419595,
+ "learning_rate": 9.319749138760424e-05,
+ "loss": 1.0317,
+ "step": 244
+ },
+ {
+ "epoch": 2.506393861892583,
+ "grad_norm": 0.26392084893370804,
+ "learning_rate": 9.262861258834833e-05,
+ "loss": 1.0353,
+ "step": 245
+ },
+ {
+ "epoch": 2.516624040920716,
+ "grad_norm": 0.25278206548366783,
+ "learning_rate": 9.205907812631616e-05,
+ "loss": 1.0211,
+ "step": 246
+ },
+ {
+ "epoch": 2.526854219948849,
+ "grad_norm": 0.2853155639467533,
+ "learning_rate": 9.148891757106999e-05,
+ "loss": 1.0381,
+ "step": 247
+ },
+ {
+ "epoch": 2.5370843989769822,
+ "grad_norm": 0.28133618292890095,
+ "learning_rate": 9.091816052467817e-05,
+ "loss": 1.045,
+ "step": 248
+ },
+ {
+ "epoch": 2.547314578005115,
+ "grad_norm": 0.26415138394214893,
+ "learning_rate": 9.034683662017812e-05,
+ "loss": 1.0339,
+ "step": 249
+ },
+ {
+ "epoch": 2.557544757033248,
+ "grad_norm": 0.23100116731609785,
+ "learning_rate": 8.977497552003785e-05,
+ "loss": 1.0297,
+ "step": 250
+ },
+ {
+ "epoch": 2.5677749360613813,
+ "grad_norm": 0.25137211273391913,
+ "learning_rate": 8.920260691461602e-05,
+ "loss": 1.0474,
+ "step": 251
+ },
+ {
+ "epoch": 2.578005115089514,
+ "grad_norm": 0.25925530222353527,
+ "learning_rate": 8.862976052062034e-05,
+ "loss": 1.0478,
+ "step": 252
+ },
+ {
+ "epoch": 2.588235294117647,
+ "grad_norm": 0.21181713461857493,
+ "learning_rate": 8.805646607956467e-05,
+ "loss": 1.0384,
+ "step": 253
+ },
+ {
+ "epoch": 2.59846547314578,
+ "grad_norm": 0.20383781215036986,
+ "learning_rate": 8.748275335622506e-05,
+ "loss": 1.0352,
+ "step": 254
+ },
+ {
+ "epoch": 2.608695652173913,
+ "grad_norm": 0.22216426938201625,
+ "learning_rate": 8.69086521370942e-05,
+ "loss": 1.0251,
+ "step": 255
+ },
+ {
+ "epoch": 2.618925831202046,
+ "grad_norm": 0.2339550911616179,
+ "learning_rate": 8.633419222883508e-05,
+ "loss": 1.0388,
+ "step": 256
+ },
+ {
+ "epoch": 2.629156010230179,
+ "grad_norm": 0.29608330426201757,
+ "learning_rate": 8.575940345673337e-05,
+ "loss": 1.0415,
+ "step": 257
+ },
+ {
+ "epoch": 2.639386189258312,
+ "grad_norm": 0.282754017428277,
+ "learning_rate": 8.518431566314901e-05,
+ "loss": 1.0338,
+ "step": 258
+ },
+ {
+ "epoch": 2.649616368286445,
+ "grad_norm": 0.21590808755680316,
+ "learning_rate": 8.460895870596675e-05,
+ "loss": 1.0455,
+ "step": 259
+ },
+ {
+ "epoch": 2.659846547314578,
+ "grad_norm": 0.22038275314532527,
+ "learning_rate": 8.4033362457046e-05,
+ "loss": 1.0446,
+ "step": 260
+ },
+ {
+ "epoch": 2.670076726342711,
+ "grad_norm": 0.19903936327716265,
+ "learning_rate": 8.345755680066993e-05,
+ "loss": 1.0282,
+ "step": 261
+ },
+ {
+ "epoch": 2.680306905370844,
+ "grad_norm": 0.17558239320808622,
+ "learning_rate": 8.288157163199389e-05,
+ "loss": 1.0278,
+ "step": 262
+ },
+ {
+ "epoch": 2.690537084398977,
+ "grad_norm": 0.20248735135033116,
+ "learning_rate": 8.230543685549333e-05,
+ "loss": 1.0317,
+ "step": 263
+ },
+ {
+ "epoch": 2.70076726342711,
+ "grad_norm": 0.23742588094542533,
+ "learning_rate": 8.17291823834111e-05,
+ "loss": 1.0326,
+ "step": 264
+ },
+ {
+ "epoch": 2.710997442455243,
+ "grad_norm": 0.3032160882786812,
+ "learning_rate": 8.115283813420459e-05,
+ "loss": 1.0375,
+ "step": 265
+ },
+ {
+ "epoch": 2.7212276214833757,
+ "grad_norm": 0.2962448679587415,
+ "learning_rate": 8.057643403099221e-05,
+ "loss": 1.0584,
+ "step": 266
+ },
+ {
+ "epoch": 2.731457800511509,
+ "grad_norm": 0.30722625928358643,
+ "learning_rate": 8e-05,
+ "loss": 1.0395,
+ "step": 267
+ },
+ {
+ "epoch": 2.741687979539642,
+ "grad_norm": 0.28307960709841534,
+ "learning_rate": 7.94235659690078e-05,
+ "loss": 1.0369,
+ "step": 268
+ },
+ {
+ "epoch": 2.7519181585677748,
+ "grad_norm": 0.21321155330317557,
+ "learning_rate": 7.884716186579545e-05,
+ "loss": 1.0532,
+ "step": 269
+ },
+ {
+ "epoch": 2.762148337595908,
+ "grad_norm": 0.2443283053996415,
+ "learning_rate": 7.827081761658892e-05,
+ "loss": 1.0266,
+ "step": 270
+ },
+ {
+ "epoch": 2.772378516624041,
+ "grad_norm": 0.2904460725842854,
+ "learning_rate": 7.76945631445067e-05,
+ "loss": 1.0344,
+ "step": 271
+ },
+ {
+ "epoch": 2.782608695652174,
+ "grad_norm": 0.25431043958852784,
+ "learning_rate": 7.711842836800614e-05,
+ "loss": 1.0285,
+ "step": 272
+ },
+ {
+ "epoch": 2.792838874680307,
+ "grad_norm": 0.25586265632907745,
+ "learning_rate": 7.654244319933009e-05,
+ "loss": 1.0272,
+ "step": 273
+ },
+ {
+ "epoch": 2.80306905370844,
+ "grad_norm": 0.23485094953105404,
+ "learning_rate": 7.596663754295404e-05,
+ "loss": 1.0427,
+ "step": 274
+ },
+ {
+ "epoch": 2.813299232736573,
+ "grad_norm": 0.17957023788842033,
+ "learning_rate": 7.539104129403327e-05,
+ "loss": 1.0474,
+ "step": 275
+ },
+ {
+ "epoch": 2.8235294117647056,
+ "grad_norm": 0.160508230528404,
+ "learning_rate": 7.4815684336851e-05,
+ "loss": 1.0445,
+ "step": 276
+ },
+ {
+ "epoch": 2.833759590792839,
+ "grad_norm": 0.21356478024874126,
+ "learning_rate": 7.424059654326664e-05,
+ "loss": 1.04,
+ "step": 277
+ },
+ {
+ "epoch": 2.843989769820972,
+ "grad_norm": 0.21888445256566497,
+ "learning_rate": 7.366580777116495e-05,
+ "loss": 1.0406,
+ "step": 278
+ },
+ {
+ "epoch": 2.8542199488491047,
+ "grad_norm": 0.2041365287775187,
+ "learning_rate": 7.309134786290583e-05,
+ "loss": 1.0321,
+ "step": 279
+ },
+ {
+ "epoch": 2.864450127877238,
+ "grad_norm": 0.19151321727068246,
+ "learning_rate": 7.251724664377497e-05,
+ "loss": 1.0371,
+ "step": 280
+ },
+ {
+ "epoch": 2.874680306905371,
+ "grad_norm": 0.18344693741146628,
+ "learning_rate": 7.194353392043534e-05,
+ "loss": 1.039,
+ "step": 281
+ },
+ {
+ "epoch": 2.8849104859335037,
+ "grad_norm": 0.19330950475902522,
+ "learning_rate": 7.13702394793797e-05,
+ "loss": 1.0364,
+ "step": 282
+ },
+ {
+ "epoch": 2.895140664961637,
+ "grad_norm": 0.1990648393315987,
+ "learning_rate": 7.079739308538399e-05,
+ "loss": 1.0277,
+ "step": 283
+ },
+ {
+ "epoch": 2.90537084398977,
+ "grad_norm": 0.2262885121158685,
+ "learning_rate": 7.022502447996215e-05,
+ "loss": 1.0275,
+ "step": 284
+ },
+ {
+ "epoch": 2.915601023017903,
+ "grad_norm": 0.20233291351840074,
+ "learning_rate": 6.965316337982191e-05,
+ "loss": 1.0381,
+ "step": 285
+ },
+ {
+ "epoch": 2.9258312020460355,
+ "grad_norm": 0.19513363820566396,
+ "learning_rate": 6.908183947532184e-05,
+ "loss": 1.0342,
+ "step": 286
+ },
+ {
+ "epoch": 2.9360613810741687,
+ "grad_norm": 0.20380750046128057,
+ "learning_rate": 6.851108242893002e-05,
+ "loss": 1.0377,
+ "step": 287
+ },
+ {
+ "epoch": 2.946291560102302,
+ "grad_norm": 0.1866318852041668,
+ "learning_rate": 6.794092187368387e-05,
+ "loss": 1.0428,
+ "step": 288
+ },
+ {
+ "epoch": 2.9565217391304346,
+ "grad_norm": 0.15744510473495013,
+ "learning_rate": 6.737138741165168e-05,
+ "loss": 1.0503,
+ "step": 289
+ },
+ {
+ "epoch": 2.9667519181585678,
+ "grad_norm": 0.1652822650571465,
+ "learning_rate": 6.680250861239581e-05,
+ "loss": 1.035,
+ "step": 290
+ },
+ {
+ "epoch": 2.976982097186701,
+ "grad_norm": 0.15546583646318948,
+ "learning_rate": 6.623431501143723e-05,
+ "loss": 1.0313,
+ "step": 291
+ },
+ {
+ "epoch": 2.9872122762148337,
+ "grad_norm": 0.29006532173297445,
+ "learning_rate": 6.566683610872231e-05,
+ "loss": 1.0564,
+ "step": 292
+ },
+ {
+ "epoch": 2.997442455242967,
+ "grad_norm": 0.2268887255063016,
+ "learning_rate": 6.510010136709118e-05,
+ "loss": 1.2037,
+ "step": 293
+ },
+ {
+ "epoch": 3.0076726342710995,
+ "grad_norm": 0.2539730951218038,
+ "learning_rate": 6.453414021074781e-05,
+ "loss": 1.1394,
+ "step": 294
+ },
+ {
+ "epoch": 3.0179028132992327,
+ "grad_norm": 0.2231294668730024,
+ "learning_rate": 6.396898202373277e-05,
+ "loss": 1.0223,
+ "step": 295
+ },
+ {
+ "epoch": 3.028132992327366,
+ "grad_norm": 0.20401904180469313,
+ "learning_rate": 6.340465614839714e-05,
+ "loss": 1.0336,
+ "step": 296
+ },
+ {
+ "epoch": 3.0383631713554986,
+ "grad_norm": 0.18979057876237626,
+ "learning_rate": 6.284119188387957e-05,
+ "loss": 1.0107,
+ "step": 297
+ },
+ {
+ "epoch": 3.0485933503836318,
+ "grad_norm": 0.16567071328991922,
+ "learning_rate": 6.227861848458481e-05,
+ "loss": 1.0134,
+ "step": 298
+ },
+ {
+ "epoch": 3.0588235294117645,
+ "grad_norm": 0.2788005001606031,
+ "learning_rate": 6.171696515866488e-05,
+ "loss": 1.0289,
+ "step": 299
+ },
+ {
+ "epoch": 3.0690537084398977,
+ "grad_norm": 0.13520429300745568,
+ "learning_rate": 6.115626106650273e-05,
+ "loss": 1.0297,
+ "step": 300
+ },
+ {
+ "epoch": 3.079283887468031,
+ "grad_norm": 0.21854013343576806,
+ "learning_rate": 6.059653531919823e-05,
+ "loss": 1.0282,
+ "step": 301
+ },
+ {
+ "epoch": 3.0895140664961636,
+ "grad_norm": 0.1818832819994029,
+ "learning_rate": 6.0037816977056625e-05,
+ "loss": 1.0531,
+ "step": 302
+ },
+ {
+ "epoch": 3.0997442455242967,
+ "grad_norm": 0.18589522528315505,
+ "learning_rate": 5.9480135048079964e-05,
+ "loss": 1.0113,
+ "step": 303
+ },
+ {
+ "epoch": 3.10997442455243,
+ "grad_norm": 0.18063564176637392,
+ "learning_rate": 5.892351848646087e-05,
+ "loss": 1.0394,
+ "step": 304
+ },
+ {
+ "epoch": 3.1202046035805626,
+ "grad_norm": 0.147586744880159,
+ "learning_rate": 5.836799619107937e-05,
+ "loss": 1.0365,
+ "step": 305
+ },
+ {
+ "epoch": 3.130434782608696,
+ "grad_norm": 0.17675912154183307,
+ "learning_rate": 5.781359700400254e-05,
+ "loss": 1.0039,
+ "step": 306
+ },
+ {
+ "epoch": 3.1406649616368285,
+ "grad_norm": 0.16586699917434417,
+ "learning_rate": 5.726034970898682e-05,
+ "loss": 1.0243,
+ "step": 307
+ },
+ {
+ "epoch": 3.1508951406649617,
+ "grad_norm": 0.17820259338383218,
+ "learning_rate": 5.670828302998393e-05,
+ "loss": 1.0314,
+ "step": 308
+ },
+ {
+ "epoch": 3.1611253196930944,
+ "grad_norm": 0.15458082339803622,
+ "learning_rate": 5.6157425629649314e-05,
+ "loss": 1.0485,
+ "step": 309
+ },
+ {
+ "epoch": 3.1713554987212276,
+ "grad_norm": 0.1643571380269719,
+ "learning_rate": 5.560780610785406e-05,
+ "loss": 1.018,
+ "step": 310
+ },
+ {
+ "epoch": 3.1815856777493607,
+ "grad_norm": 0.156856618068632,
+ "learning_rate": 5.5059453000200125e-05,
+ "loss": 1.0061,
+ "step": 311
+ },
+ {
+ "epoch": 3.1918158567774935,
+ "grad_norm": 0.14086641088136265,
+ "learning_rate": 5.451239477653864e-05,
+ "loss": 1.0205,
+ "step": 312
+ },
+ {
+ "epoch": 3.2020460358056266,
+ "grad_norm": 0.14820087407296167,
+ "learning_rate": 5.3966659839491936e-05,
+ "loss": 1.0226,
+ "step": 313
+ },
+ {
+ "epoch": 3.21227621483376,
+ "grad_norm": 0.13884477979100748,
+ "learning_rate": 5.342227652297887e-05,
+ "loss": 1.001,
+ "step": 314
+ },
+ {
+ "epoch": 3.2225063938618925,
+ "grad_norm": 0.14309328916359365,
+ "learning_rate": 5.287927309074365e-05,
+ "loss": 1.031,
+ "step": 315
+ },
+ {
+ "epoch": 3.2327365728900257,
+ "grad_norm": 0.15252565103321644,
+ "learning_rate": 5.233767773488859e-05,
+ "loss": 1.015,
+ "step": 316
+ },
+ {
+ "epoch": 3.2429667519181584,
+ "grad_norm": 0.13230625482589622,
+ "learning_rate": 5.179751857441036e-05,
+ "loss": 1.0053,
+ "step": 317
+ },
+ {
+ "epoch": 3.2531969309462916,
+ "grad_norm": 0.1556826070734683,
+ "learning_rate": 5.1258823653739914e-05,
+ "loss": 1.0211,
+ "step": 318
+ },
+ {
+ "epoch": 3.2634271099744243,
+ "grad_norm": 0.1371375107565863,
+ "learning_rate": 5.0721620941286735e-05,
+ "loss": 1.0143,
+ "step": 319
+ },
+ {
+ "epoch": 3.2736572890025575,
+ "grad_norm": 0.13573969073646522,
+ "learning_rate": 5.018593832798649e-05,
+ "loss": 1.0375,
+ "step": 320
+ },
+ {
+ "epoch": 3.2838874680306906,
+ "grad_norm": 0.11657280744271552,
+ "learning_rate": 4.965180362585315e-05,
+ "loss": 1.0253,
+ "step": 321
+ },
+ {
+ "epoch": 3.2941176470588234,
+ "grad_norm": 0.11616693511840327,
+ "learning_rate": 4.911924456653494e-05,
+ "loss": 1.0209,
+ "step": 322
+ },
+ {
+ "epoch": 3.3043478260869565,
+ "grad_norm": 0.12512977851281545,
+ "learning_rate": 4.8588288799874514e-05,
+ "loss": 1.0112,
+ "step": 323
+ },
+ {
+ "epoch": 3.3145780051150897,
+ "grad_norm": 0.10387393823484337,
+ "learning_rate": 4.805896389247348e-05,
+ "loss": 1.0077,
+ "step": 324
+ },
+ {
+ "epoch": 3.3248081841432224,
+ "grad_norm": 0.13924261830307932,
+ "learning_rate": 4.753129732626116e-05,
+ "loss": 1.0229,
+ "step": 325
+ },
+ {
+ "epoch": 3.3350383631713556,
+ "grad_norm": 0.12135011031399273,
+ "learning_rate": 4.70053164970677e-05,
+ "loss": 1.0184,
+ "step": 326
+ },
+ {
+ "epoch": 3.3452685421994883,
+ "grad_norm": 0.1183989328140461,
+ "learning_rate": 4.6481048713201825e-05,
+ "loss": 1.0058,
+ "step": 327
+ },
+ {
+ "epoch": 3.3554987212276215,
+ "grad_norm": 0.1284311973823529,
+ "learning_rate": 4.595852119403282e-05,
+ "loss": 1.0278,
+ "step": 328
+ },
+ {
+ "epoch": 3.3657289002557547,
+ "grad_norm": 0.11430293939389213,
+ "learning_rate": 4.543776106857765e-05,
+ "loss": 1.012,
+ "step": 329
+ },
+ {
+ "epoch": 3.3759590792838874,
+ "grad_norm": 0.1263289266506516,
+ "learning_rate": 4.491879537409211e-05,
+ "loss": 1.0242,
+ "step": 330
+ },
+ {
+ "epoch": 3.3861892583120206,
+ "grad_norm": 0.1316142612111793,
+ "learning_rate": 4.4401651054667274e-05,
+ "loss": 1.0078,
+ "step": 331
+ },
+ {
+ "epoch": 3.3964194373401533,
+ "grad_norm": 0.10490352230994067,
+ "learning_rate": 4.3886354959830625e-05,
+ "loss": 1.0141,
+ "step": 332
+ },
+ {
+ "epoch": 3.4066496163682864,
+ "grad_norm": 0.1587962681364766,
+ "learning_rate": 4.3372933843152e-05,
+ "loss": 1.031,
+ "step": 333
+ },
+ {
+ "epoch": 3.4168797953964196,
+ "grad_norm": 0.14806873434228535,
+ "learning_rate": 4.2861414360854387e-05,
+ "loss": 1.0261,
+ "step": 334
+ },
+ {
+ "epoch": 3.4271099744245523,
+ "grad_norm": 0.12428110117787702,
+ "learning_rate": 4.2351823070430376e-05,
+ "loss": 1.0191,
+ "step": 335
+ },
+ {
+ "epoch": 3.4373401534526855,
+ "grad_norm": 0.1700524980071232,
+ "learning_rate": 4.184418642926289e-05,
+ "loss": 1.0267,
+ "step": 336
+ },
+ {
+ "epoch": 3.4475703324808182,
+ "grad_norm": 0.15240023162277883,
+ "learning_rate": 4.133853079325196e-05,
+ "loss": 1.025,
+ "step": 337
+ },
+ {
+ "epoch": 3.4578005115089514,
+ "grad_norm": 0.11788415451483955,
+ "learning_rate": 4.083488241544595e-05,
+ "loss": 1.0459,
+ "step": 338
+ },
+ {
+ "epoch": 3.4680306905370846,
+ "grad_norm": 0.16921030624641467,
+ "learning_rate": 4.033326744467882e-05,
+ "loss": 1.0112,
+ "step": 339
+ },
+ {
+ "epoch": 3.4782608695652173,
+ "grad_norm": 0.11606787523018822,
+ "learning_rate": 3.983371192421246e-05,
+ "loss": 1.0306,
+ "step": 340
+ },
+ {
+ "epoch": 3.4884910485933505,
+ "grad_norm": 0.1682374876165679,
+ "learning_rate": 3.933624179038446e-05,
+ "loss": 1.0185,
+ "step": 341
+ },
+ {
+ "epoch": 3.498721227621483,
+ "grad_norm": 0.1565962574632331,
+ "learning_rate": 3.884088287126151e-05,
+ "loss": 1.0293,
+ "step": 342
+ },
+ {
+ "epoch": 3.5089514066496164,
+ "grad_norm": 0.14324776346066015,
+ "learning_rate": 3.834766088529867e-05,
+ "loss": 1.0458,
+ "step": 343
+ },
+ {
+ "epoch": 3.5191815856777495,
+ "grad_norm": 0.19529159534261803,
+ "learning_rate": 3.785660144000378e-05,
+ "loss": 1.0056,
+ "step": 344
+ },
+ {
+ "epoch": 3.5294117647058822,
+ "grad_norm": 0.12801441909483788,
+ "learning_rate": 3.736773003060821e-05,
+ "loss": 1.0297,
+ "step": 345
+ },
+ {
+ "epoch": 3.5396419437340154,
+ "grad_norm": 0.1503941568635443,
+ "learning_rate": 3.688107203874301e-05,
+ "loss": 1.0416,
+ "step": 346
+ },
+ {
+ "epoch": 3.5498721227621486,
+ "grad_norm": 0.13361192505091346,
+ "learning_rate": 3.6396652731121136e-05,
+ "loss": 1.0204,
+ "step": 347
+ },
+ {
+ "epoch": 3.5601023017902813,
+ "grad_norm": 0.12402122906765294,
+ "learning_rate": 3.5914497258225815e-05,
+ "loss": 1.0281,
+ "step": 348
+ },
+ {
+ "epoch": 3.5703324808184145,
+ "grad_norm": 0.1370545977754261,
+ "learning_rate": 3.543463065300452e-05,
+ "loss": 1.0271,
+ "step": 349
+ },
+ {
+ "epoch": 3.580562659846547,
+ "grad_norm": 0.10522140449656789,
+ "learning_rate": 3.49570778295694e-05,
+ "loss": 1.0253,
+ "step": 350
+ },
+ {
+ "epoch": 3.5907928388746804,
+ "grad_norm": 0.13288330038665777,
+ "learning_rate": 3.448186358190383e-05,
+ "loss": 1.0155,
+ "step": 351
+ },
+ {
+ "epoch": 3.601023017902813,
+ "grad_norm": 0.11850947440760301,
+ "learning_rate": 3.400901258257501e-05,
+ "loss": 1.0316,
+ "step": 352
+ },
+ {
+ "epoch": 3.6112531969309463,
+ "grad_norm": 0.1207639690615278,
+ "learning_rate": 3.3538549381453046e-05,
+ "loss": 1.0147,
+ "step": 353
+ },
+ {
+ "epoch": 3.6214833759590794,
+ "grad_norm": 0.10436214175934275,
+ "learning_rate": 3.307049840443644e-05,
+ "loss": 1.0158,
+ "step": 354
+ },
+ {
+ "epoch": 3.631713554987212,
+ "grad_norm": 0.10939591730050287,
+ "learning_rate": 3.2604883952183716e-05,
+ "loss": 1.0219,
+ "step": 355
+ },
+ {
+ "epoch": 3.6419437340153453,
+ "grad_norm": 0.10305381344145671,
+ "learning_rate": 3.214173019885202e-05,
+ "loss": 1.0165,
+ "step": 356
+ },
+ {
+ "epoch": 3.6521739130434785,
+ "grad_norm": 0.10617153493263774,
+ "learning_rate": 3.1681061190841806e-05,
+ "loss": 1.0193,
+ "step": 357
+ },
+ {
+ "epoch": 3.662404092071611,
+ "grad_norm": 0.1052450380146473,
+ "learning_rate": 3.122290084554845e-05,
+ "loss": 1.0309,
+ "step": 358
+ },
+ {
+ "epoch": 3.6726342710997444,
+ "grad_norm": 0.09779940640870793,
+ "learning_rate": 3.076727295012059e-05,
+ "loss": 1.0106,
+ "step": 359
+ },
+ {
+ "epoch": 3.682864450127877,
+ "grad_norm": 0.09865111890866658,
+ "learning_rate": 3.031420116022493e-05,
+ "loss": 1.0237,
+ "step": 360
+ },
+ {
+ "epoch": 3.6930946291560103,
+ "grad_norm": 0.09761460812236314,
+ "learning_rate": 2.98637089988182e-05,
+ "loss": 1.0071,
+ "step": 361
+ },
+ {
+ "epoch": 3.703324808184143,
+ "grad_norm": 0.09657195902847425,
+ "learning_rate": 2.94158198549259e-05,
+ "loss": 1.0213,
+ "step": 362
+ },
+ {
+ "epoch": 3.713554987212276,
+ "grad_norm": 0.10306766572443878,
+ "learning_rate": 2.8970556982427836e-05,
+ "loss": 1.0114,
+ "step": 363
+ },
+ {
+ "epoch": 3.7237851662404093,
+ "grad_norm": 0.0892256925639781,
+ "learning_rate": 2.852794349885087e-05,
+ "loss": 1.0141,
+ "step": 364
+ },
+ {
+ "epoch": 3.734015345268542,
+ "grad_norm": 0.0984967903902541,
+ "learning_rate": 2.8088002384168783e-05,
+ "loss": 1.0309,
+ "step": 365
+ },
+ {
+ "epoch": 3.7442455242966752,
+ "grad_norm": 0.09702833968633048,
+ "learning_rate": 2.765075647960898e-05,
+ "loss": 1.0133,
+ "step": 366
+ },
+ {
+ "epoch": 3.7544757033248084,
+ "grad_norm": 0.10259443786625837,
+ "learning_rate": 2.7216228486466856e-05,
+ "loss": 1.0158,
+ "step": 367
+ },
+ {
+ "epoch": 3.764705882352941,
+ "grad_norm": 0.09444360606393558,
+ "learning_rate": 2.678444096492683e-05,
+ "loss": 1.02,
+ "step": 368
+ },
+ {
+ "epoch": 3.7749360613810743,
+ "grad_norm": 0.11306816830082422,
+ "learning_rate": 2.6355416332891404e-05,
+ "loss": 1.0185,
+ "step": 369
+ },
+ {
+ "epoch": 3.785166240409207,
+ "grad_norm": 0.09142683583600977,
+ "learning_rate": 2.592917686481708e-05,
+ "loss": 1.0038,
+ "step": 370
+ },
+ {
+ "epoch": 3.79539641943734,
+ "grad_norm": 0.10949950748671738,
+ "learning_rate": 2.5505744690557846e-05,
+ "loss": 1.0376,
+ "step": 371
+ },
+ {
+ "epoch": 3.805626598465473,
+ "grad_norm": 0.11343467361764166,
+ "learning_rate": 2.508514179421629e-05,
+ "loss": 1.0358,
+ "step": 372
+ },
+ {
+ "epoch": 3.815856777493606,
+ "grad_norm": 0.09342791259699781,
+ "learning_rate": 2.4667390013002254e-05,
+ "loss": 1.0211,
+ "step": 373
+ },
+ {
+ "epoch": 3.8260869565217392,
+ "grad_norm": 0.10858137240897216,
+ "learning_rate": 2.425251103609898e-05,
+ "loss": 1.0332,
+ "step": 374
+ },
+ {
+ "epoch": 3.836317135549872,
+ "grad_norm": 0.0886186909107238,
+ "learning_rate": 2.3840526403537095e-05,
+ "loss": 1.0143,
+ "step": 375
+ },
+ {
+ "epoch": 3.846547314578005,
+ "grad_norm": 0.09895029034827527,
+ "learning_rate": 2.3431457505076205e-05,
+ "loss": 1.0173,
+ "step": 376
+ },
+ {
+ "epoch": 3.8567774936061383,
+ "grad_norm": 0.10546368891288044,
+ "learning_rate": 2.3025325579094498e-05,
+ "loss": 1.0362,
+ "step": 377
+ },
+ {
+ "epoch": 3.867007672634271,
+ "grad_norm": 0.08194245174545557,
+ "learning_rate": 2.2622151711485962e-05,
+ "loss": 1.0124,
+ "step": 378
+ },
+ {
+ "epoch": 3.877237851662404,
+ "grad_norm": 0.1114016593112589,
+ "learning_rate": 2.2221956834565647e-05,
+ "loss": 1.0139,
+ "step": 379
+ },
+ {
+ "epoch": 3.887468030690537,
+ "grad_norm": 0.09816527002580688,
+ "learning_rate": 2.1824761725982874e-05,
+ "loss": 1.0523,
+ "step": 380
+ },
+ {
+ "epoch": 3.89769820971867,
+ "grad_norm": 0.0799950179310954,
+ "learning_rate": 2.1430587007642513e-05,
+ "loss": 0.991,
+ "step": 381
+ },
+ {
+ "epoch": 3.907928388746803,
+ "grad_norm": 0.1080492642683735,
+ "learning_rate": 2.1039453144634364e-05,
+ "loss": 1.0355,
+ "step": 382
+ },
+ {
+ "epoch": 3.918158567774936,
+ "grad_norm": 0.09178099069537385,
+ "learning_rate": 2.0651380444170527e-05,
+ "loss": 1.015,
+ "step": 383
+ },
+ {
+ "epoch": 3.928388746803069,
+ "grad_norm": 0.09031457518884235,
+ "learning_rate": 2.026638905453111e-05,
+ "loss": 1.0229,
+ "step": 384
+ },
+ {
+ "epoch": 3.938618925831202,
+ "grad_norm": 0.09836817148107584,
+ "learning_rate": 1.9884498964018233e-05,
+ "loss": 1.0135,
+ "step": 385
+ },
+ {
+ "epoch": 3.948849104859335,
+ "grad_norm": 0.08002277345563297,
+ "learning_rate": 1.9505729999918194e-05,
+ "loss": 1.0044,
+ "step": 386
+ },
+ {
+ "epoch": 3.959079283887468,
+ "grad_norm": 0.08738866259057297,
+ "learning_rate": 1.913010182747196e-05,
+ "loss": 1.0103,
+ "step": 387
+ },
+ {
+ "epoch": 3.969309462915601,
+ "grad_norm": 0.08459856390870199,
+ "learning_rate": 1.875763394885441e-05,
+ "loss": 1.0491,
+ "step": 388
+ },
+ {
+ "epoch": 3.979539641943734,
+ "grad_norm": 0.07894640689767103,
+ "learning_rate": 1.8388345702161556e-05,
+ "loss": 1.0355,
+ "step": 389
+ },
+ {
+ "epoch": 3.9897698209718673,
+ "grad_norm": 0.07643936790355793,
+ "learning_rate": 1.8022256260406756e-05,
+ "loss": 1.021,
+ "step": 390
+ },
+ {
+ "epoch": 4.0,
+ "grad_norm": 0.09661289153754662,
+ "learning_rate": 1.765938463052506e-05,
+ "loss": 1.288,
+ "step": 391
+ },
+ {
+ "epoch": 4.010230179028133,
+ "grad_norm": 0.08762181265193057,
+ "learning_rate": 1.729974965238651e-05,
+ "loss": 1.001,
+ "step": 392
+ },
+ {
+ "epoch": 4.020460358056266,
+ "grad_norm": 0.08647609139764888,
+ "learning_rate": 1.6943369997818066e-05,
+ "loss": 1.015,
+ "step": 393
+ },
+ {
+ "epoch": 4.030690537084399,
+ "grad_norm": 0.08788066772345464,
+ "learning_rate": 1.659026416963401e-05,
+ "loss": 1.0076,
+ "step": 394
+ },
+ {
+ "epoch": 4.040920716112532,
+ "grad_norm": 0.07750381562018518,
+ "learning_rate": 1.6240450500675393e-05,
+ "loss": 1.0148,
+ "step": 395
+ },
+ {
+ "epoch": 4.051150895140665,
+ "grad_norm": 0.07635694926329481,
+ "learning_rate": 1.5893947152858285e-05,
+ "loss": 1.0016,
+ "step": 396
+ },
+ {
+ "epoch": 4.061381074168798,
+ "grad_norm": 0.08148076505063037,
+ "learning_rate": 1.55507721162307e-05,
+ "loss": 1.0043,
+ "step": 397
+ },
+ {
+ "epoch": 4.071611253196931,
+ "grad_norm": 0.07751240865988411,
+ "learning_rate": 1.5210943208038634e-05,
+ "loss": 1.0288,
+ "step": 398
+ },
+ {
+ "epoch": 4.081841432225064,
+ "grad_norm": 0.07904341557445363,
+ "learning_rate": 1.4874478071801055e-05,
+ "loss": 1.0302,
+ "step": 399
+ },
+ {
+ "epoch": 4.092071611253197,
+ "grad_norm": 0.08036549522985713,
+ "learning_rate": 1.454139417639377e-05,
+ "loss": 1.0021,
+ "step": 400
+ },
+ {
+ "epoch": 4.10230179028133,
+ "grad_norm": 0.07597724481740194,
+ "learning_rate": 1.4211708815142599e-05,
+ "loss": 1.008,
+ "step": 401
+ },
+ {
+ "epoch": 4.112531969309463,
+ "grad_norm": 0.07493385254201147,
+ "learning_rate": 1.3885439104925387e-05,
+ "loss": 1.0082,
+ "step": 402
+ },
+ {
+ "epoch": 4.122762148337596,
+ "grad_norm": 0.08492760833944613,
+ "learning_rate": 1.3562601985283358e-05,
+ "loss": 1.0103,
+ "step": 403
+ },
+ {
+ "epoch": 4.132992327365729,
+ "grad_norm": 0.0819518128021678,
+ "learning_rate": 1.3243214217541751e-05,
+ "loss": 1.0186,
+ "step": 404
+ },
+ {
+ "epoch": 4.143222506393862,
+ "grad_norm": 0.07813515029694298,
+ "learning_rate": 1.2927292383939407e-05,
+ "loss": 1.0103,
+ "step": 405
+ },
+ {
+ "epoch": 4.153452685421995,
+ "grad_norm": 0.07571122126240162,
+ "learning_rate": 1.2614852886767932e-05,
+ "loss": 1.0172,
+ "step": 406
+ },
+ {
+ "epoch": 4.163682864450128,
+ "grad_norm": 0.07867401677851067,
+ "learning_rate": 1.2305911947520159e-05,
+ "loss": 1.0172,
+ "step": 407
+ },
+ {
+ "epoch": 4.173913043478261,
+ "grad_norm": 0.07427552294515202,
+ "learning_rate": 1.2000485606047837e-05,
+ "loss": 1.0254,
+ "step": 408
+ },
+ {
+ "epoch": 4.1841432225063935,
+ "grad_norm": 0.07918066825847282,
+ "learning_rate": 1.1698589719728911e-05,
+ "loss": 1.025,
+ "step": 409
+ },
+ {
+ "epoch": 4.194373401534527,
+ "grad_norm": 0.0745101976937812,
+ "learning_rate": 1.1400239962644294e-05,
+ "loss": 1.011,
+ "step": 410
+ },
+ {
+ "epoch": 4.20460358056266,
+ "grad_norm": 0.07635541094438233,
+ "learning_rate": 1.1105451824763933e-05,
+ "loss": 1.0064,
+ "step": 411
+ },
+ {
+ "epoch": 4.2148337595907925,
+ "grad_norm": 0.07409705314572482,
+ "learning_rate": 1.0814240611142765e-05,
+ "loss": 1.0049,
+ "step": 412
+ },
+ {
+ "epoch": 4.225063938618926,
+ "grad_norm": 0.07875185704588439,
+ "learning_rate": 1.0526621441125946e-05,
+ "loss": 1.0039,
+ "step": 413
+ },
+ {
+ "epoch": 4.235294117647059,
+ "grad_norm": 0.07576883769832815,
+ "learning_rate": 1.0242609247563924e-05,
+ "loss": 1.0204,
+ "step": 414
+ },
+ {
+ "epoch": 4.245524296675192,
+ "grad_norm": 0.07808157523983991,
+ "learning_rate": 9.962218776037234e-06,
+ "loss": 1.0178,
+ "step": 415
+ },
+ {
+ "epoch": 4.255754475703325,
+ "grad_norm": 0.0706208132598418,
+ "learning_rate": 9.68546458409077e-06,
+ "loss": 1.0144,
+ "step": 416
+ },
+ {
+ "epoch": 4.265984654731458,
+ "grad_norm": 0.0816397013567019,
+ "learning_rate": 9.41236104047806e-06,
+ "loss": 1.0008,
+ "step": 417
+ },
+ {
+ "epoch": 4.276214833759591,
+ "grad_norm": 0.07454955996130302,
+ "learning_rate": 9.14292232441528e-06,
+ "loss": 1.0102,
+ "step": 418
+ },
+ {
+ "epoch": 4.286445012787723,
+ "grad_norm": 0.0715010447940489,
+ "learning_rate": 8.877162424845012e-06,
+ "loss": 0.997,
+ "step": 419
+ },
+ {
+ "epoch": 4.296675191815857,
+ "grad_norm": 0.07752947453647589,
+ "learning_rate": 8.615095139710044e-06,
+ "loss": 1.0204,
+ "step": 420
+ },
+ {
+ "epoch": 4.30690537084399,
+ "grad_norm": 0.07999927748669688,
+ "learning_rate": 8.356734075236858e-06,
+ "loss": 1.0286,
+ "step": 421
+ },
+ {
+ "epoch": 4.3171355498721224,
+ "grad_norm": 0.07470822337555567,
+ "learning_rate": 8.102092645229392e-06,
+ "loss": 0.9999,
+ "step": 422
+ },
+ {
+ "epoch": 4.327365728900256,
+ "grad_norm": 0.06986020359409462,
+ "learning_rate": 7.8511840703725e-06,
+ "loss": 1.0157,
+ "step": 423
+ },
+ {
+ "epoch": 4.337595907928389,
+ "grad_norm": 0.07546615544304432,
+ "learning_rate": 7.604021377545518e-06,
+ "loss": 1.0177,
+ "step": 424
+ },
+ {
+ "epoch": 4.3478260869565215,
+ "grad_norm": 0.07303012390571562,
+ "learning_rate": 7.36061739914601e-06,
+ "loss": 1.0053,
+ "step": 425
+ },
+ {
+ "epoch": 4.358056265984655,
+ "grad_norm": 0.07983971604823199,
+ "learning_rate": 7.120984772423507e-06,
+ "loss": 1.0116,
+ "step": 426
+ },
+ {
+ "epoch": 4.368286445012788,
+ "grad_norm": 0.07412392301940252,
+ "learning_rate": 6.88513593882334e-06,
+ "loss": 1.0157,
+ "step": 427
+ },
+ {
+ "epoch": 4.378516624040921,
+ "grad_norm": 0.07540589914642663,
+ "learning_rate": 6.653083143340748e-06,
+ "loss": 1.0321,
+ "step": 428
+ },
+ {
+ "epoch": 4.388746803069053,
+ "grad_norm": 0.0734695901154156,
+ "learning_rate": 6.4248384338851146e-06,
+ "loss": 1.0166,
+ "step": 429
+ },
+ {
+ "epoch": 4.398976982097187,
+ "grad_norm": 0.07148917346240428,
+ "learning_rate": 6.2004136606544515e-06,
+ "loss": 1.0155,
+ "step": 430
+ },
+ {
+ "epoch": 4.40920716112532,
+ "grad_norm": 0.06576015271509353,
+ "learning_rate": 5.979820475520202e-06,
+ "loss": 1.0268,
+ "step": 431
+ },
+ {
+ "epoch": 4.419437340153452,
+ "grad_norm": 0.06602815168374586,
+ "learning_rate": 5.763070331422151e-06,
+ "loss": 1.0094,
+ "step": 432
+ },
+ {
+ "epoch": 4.429667519181586,
+ "grad_norm": 0.06806818912226788,
+ "learning_rate": 5.550174481773969e-06,
+ "loss": 1.0117,
+ "step": 433
+ },
+ {
+ "epoch": 4.439897698209719,
+ "grad_norm": 0.0654860198424932,
+ "learning_rate": 5.341143979878851e-06,
+ "loss": 1.024,
+ "step": 434
+ },
+ {
+ "epoch": 4.450127877237851,
+ "grad_norm": 0.06347915481210827,
+ "learning_rate": 5.135989678355664e-06,
+ "loss": 1.0068,
+ "step": 435
+ },
+ {
+ "epoch": 4.460358056265985,
+ "grad_norm": 0.06673007006524383,
+ "learning_rate": 4.934722228575481e-06,
+ "loss": 1.0144,
+ "step": 436
+ },
+ {
+ "epoch": 4.470588235294118,
+ "grad_norm": 0.06276490466658945,
+ "learning_rate": 4.7373520801085705e-06,
+ "loss": 1.0149,
+ "step": 437
+ },
+ {
+ "epoch": 4.4808184143222505,
+ "grad_norm": 0.06462800206504453,
+ "learning_rate": 4.543889480181944e-06,
+ "loss": 1.0209,
+ "step": 438
+ },
+ {
+ "epoch": 4.491048593350383,
+ "grad_norm": 0.06638702716213918,
+ "learning_rate": 4.354344473147194e-06,
+ "loss": 1.0229,
+ "step": 439
+ },
+ {
+ "epoch": 4.501278772378517,
+ "grad_norm": 0.06786708229119566,
+ "learning_rate": 4.1687268999591164e-06,
+ "loss": 1.0093,
+ "step": 440
+ },
+ {
+ "epoch": 4.5115089514066495,
+ "grad_norm": 0.06344929556504937,
+ "learning_rate": 3.98704639766474e-06,
+ "loss": 1.0227,
+ "step": 441
+ },
+ {
+ "epoch": 4.521739130434782,
+ "grad_norm": 0.061257597771025976,
+ "learning_rate": 3.809312398903e-06,
+ "loss": 1.0206,
+ "step": 442
+ },
+ {
+ "epoch": 4.531969309462916,
+ "grad_norm": 0.06207018671567817,
+ "learning_rate": 3.6355341314149216e-06,
+ "loss": 1.0061,
+ "step": 443
+ },
+ {
+ "epoch": 4.542199488491049,
+ "grad_norm": 0.0697177559048247,
+ "learning_rate": 3.465720617564676e-06,
+ "loss": 1.0001,
+ "step": 444
+ },
+ {
+ "epoch": 4.552429667519181,
+ "grad_norm": 0.06909570953584229,
+ "learning_rate": 3.299880673871023e-06,
+ "loss": 1.0179,
+ "step": 445
+ },
+ {
+ "epoch": 4.562659846547315,
+ "grad_norm": 0.0660073639280078,
+ "learning_rate": 3.138022910549632e-06,
+ "loss": 1.0261,
+ "step": 446
+ },
+ {
+ "epoch": 4.572890025575448,
+ "grad_norm": 0.061208022344978574,
+ "learning_rate": 2.980155731066017e-06,
+ "loss": 0.9983,
+ "step": 447
+ },
+ {
+ "epoch": 4.58312020460358,
+ "grad_norm": 0.05925952496152496,
+ "learning_rate": 2.8262873316992556e-06,
+ "loss": 1.0232,
+ "step": 448
+ },
+ {
+ "epoch": 4.593350383631714,
+ "grad_norm": 0.06402981619720209,
+ "learning_rate": 2.676425701116463e-06,
+ "loss": 1.0065,
+ "step": 449
+ },
+ {
+ "epoch": 4.603580562659847,
+ "grad_norm": 0.058782758741933706,
+ "learning_rate": 2.530578619957993e-06,
+ "loss": 1.0117,
+ "step": 450
+ },
+ {
+ "epoch": 4.6138107416879794,
+ "grad_norm": 0.05836448954212346,
+ "learning_rate": 2.3887536604334784e-06,
+ "loss": 0.9904,
+ "step": 451
+ },
+ {
+ "epoch": 4.624040920716112,
+ "grad_norm": 0.05866978691158495,
+ "learning_rate": 2.2509581859287576e-06,
+ "loss": 1.018,
+ "step": 452
+ },
+ {
+ "epoch": 4.634271099744246,
+ "grad_norm": 0.05897537411295091,
+ "learning_rate": 2.117199350623462e-06,
+ "loss": 1.0224,
+ "step": 453
+ },
+ {
+ "epoch": 4.6445012787723785,
+ "grad_norm": 0.0576826219117585,
+ "learning_rate": 1.987484099119712e-06,
+ "loss": 1.0256,
+ "step": 454
+ },
+ {
+ "epoch": 4.654731457800511,
+ "grad_norm": 0.058074308794142736,
+ "learning_rate": 1.8618191660814356e-06,
+ "loss": 1.0126,
+ "step": 455
+ },
+ {
+ "epoch": 4.664961636828645,
+ "grad_norm": 0.057718766623449665,
+ "learning_rate": 1.7402110758847834e-06,
+ "loss": 1.0064,
+ "step": 456
+ },
+ {
+ "epoch": 4.675191815856778,
+ "grad_norm": 0.055268606367167225,
+ "learning_rate": 1.6226661422794033e-06,
+ "loss": 1.0015,
+ "step": 457
+ },
+ {
+ "epoch": 4.68542199488491,
+ "grad_norm": 0.06120460420865381,
+ "learning_rate": 1.5091904680605862e-06,
+ "loss": 1.0195,
+ "step": 458
+ },
+ {
+ "epoch": 4.695652173913043,
+ "grad_norm": 0.057198583474872174,
+ "learning_rate": 1.3997899447524277e-06,
+ "loss": 1.0313,
+ "step": 459
+ },
+ {
+ "epoch": 4.705882352941177,
+ "grad_norm": 0.054125320926830395,
+ "learning_rate": 1.294470252302009e-06,
+ "loss": 1.0074,
+ "step": 460
+ },
+ {
+ "epoch": 4.716112531969309,
+ "grad_norm": 0.05767289138418469,
+ "learning_rate": 1.193236858784408e-06,
+ "loss": 1.0073,
+ "step": 461
+ },
+ {
+ "epoch": 4.726342710997442,
+ "grad_norm": 0.058374045513090105,
+ "learning_rate": 1.0960950201188524e-06,
+ "loss": 1.0217,
+ "step": 462
+ },
+ {
+ "epoch": 4.736572890025576,
+ "grad_norm": 0.05554209835620826,
+ "learning_rate": 1.003049779795866e-06,
+ "loss": 1.0167,
+ "step": 463
+ },
+ {
+ "epoch": 4.746803069053708,
+ "grad_norm": 0.06184311865928885,
+ "learning_rate": 9.141059686153419e-07,
+ "loss": 1.0207,
+ "step": 464
+ },
+ {
+ "epoch": 4.757033248081841,
+ "grad_norm": 0.056079764830628674,
+ "learning_rate": 8.292682044358114e-07,
+ "loss": 1.0169,
+ "step": 465
+ },
+ {
+ "epoch": 4.767263427109975,
+ "grad_norm": 0.05903827365785868,
+ "learning_rate": 7.485408919346171e-07,
+ "loss": 1.0276,
+ "step": 466
+ },
+ {
+ "epoch": 4.7774936061381075,
+ "grad_norm": 0.05665451559441624,
+ "learning_rate": 6.719282223793056e-07,
+ "loss": 1.0108,
+ "step": 467
+ },
+ {
+ "epoch": 4.78772378516624,
+ "grad_norm": 0.05541913708216884,
+ "learning_rate": 5.994341734099429e-07,
+ "loss": 1.0213,
+ "step": 468
+ },
+ {
+ "epoch": 4.797953964194374,
+ "grad_norm": 0.0531727158843823,
+ "learning_rate": 5.310625088326671e-07,
+ "loss": 0.9962,
+ "step": 469
+ },
+ {
+ "epoch": 4.8081841432225065,
+ "grad_norm": 0.056474278323680495,
+ "learning_rate": 4.6681677842421724e-07,
+ "loss": 1.0079,
+ "step": 470
+ },
+ {
+ "epoch": 4.818414322250639,
+ "grad_norm": 0.05529573865410167,
+ "learning_rate": 4.067003177476991e-07,
+ "loss": 1.0025,
+ "step": 471
+ },
+ {
+ "epoch": 4.828644501278772,
+ "grad_norm": 0.055929732769464856,
+ "learning_rate": 3.507162479793369e-07,
+ "loss": 1.0173,
+ "step": 472
+ },
+ {
+ "epoch": 4.838874680306906,
+ "grad_norm": 0.0546056253906124,
+ "learning_rate": 2.9886747574646936e-07,
+ "loss": 1.0001,
+ "step": 473
+ },
+ {
+ "epoch": 4.849104859335038,
+ "grad_norm": 0.05682769159587846,
+ "learning_rate": 2.511566929766396e-07,
+ "loss": 1.0062,
+ "step": 474
+ },
+ {
+ "epoch": 4.859335038363171,
+ "grad_norm": 0.05365397819606409,
+ "learning_rate": 2.075863767577957e-07,
+ "loss": 1.0195,
+ "step": 475
+ },
+ {
+ "epoch": 4.869565217391305,
+ "grad_norm": 0.05538514973032816,
+ "learning_rate": 1.681587892097536e-07,
+ "loss": 1.0159,
+ "step": 476
+ },
+ {
+ "epoch": 4.879795396419437,
+ "grad_norm": 0.05391134086338163,
+ "learning_rate": 1.3287597736667323e-07,
+ "loss": 1.0233,
+ "step": 477
+ },
+ {
+ "epoch": 4.89002557544757,
+ "grad_norm": 0.054055617154126184,
+ "learning_rate": 1.0173977307082361e-07,
+ "loss": 1.0188,
+ "step": 478
+ },
+ {
+ "epoch": 4.900255754475703,
+ "grad_norm": 0.05372397484026782,
+ "learning_rate": 7.475179287748547e-08,
+ "loss": 1.0235,
+ "step": 479
+ },
+ {
+ "epoch": 4.910485933503836,
+ "grad_norm": 0.054794275718811285,
+ "learning_rate": 5.191343797096515e-08,
+ "loss": 1.0018,
+ "step": 480
+ },
+ {
+ "epoch": 4.920716112531969,
+ "grad_norm": 0.05350058384849903,
+ "learning_rate": 3.322589409190613e-08,
+ "loss": 1.009,
+ "step": 481
+ },
+ {
+ "epoch": 4.930946291560103,
+ "grad_norm": 0.05480646007195536,
+ "learning_rate": 1.8690131475711527e-08,
+ "loss": 1.0273,
+ "step": 482
+ },
+ {
+ "epoch": 4.9411764705882355,
+ "grad_norm": 0.05416557096319236,
+ "learning_rate": 8.306904802148907e-09,
+ "loss": 1.0322,
+ "step": 483
+ },
+ {
+ "epoch": 4.951406649616368,
+ "grad_norm": 0.053492193014158126,
+ "learning_rate": 2.07675315618161e-09,
+ "loss": 1.0035,
+ "step": 484
+ },
+ {
+ "epoch": 4.961636828644501,
+ "grad_norm": 0.055105663696246775,
+ "learning_rate": 0.0,
+ "loss": 1.0124,
+ "step": 485
+ },
+ {
+ "epoch": 4.961636828644501,
+ "step": 485,
+ "total_flos": 7065760181780480.0,
+ "train_loss": 1.075610858509221,
+ "train_runtime": 69151.8205,
+ "train_samples_per_second": 7.23,
+ "train_steps_per_second": 0.007
+ }
+ ],
+ "logging_steps": 1.0,
+ "max_steps": 485,
+ "num_input_tokens_seen": 0,
+ "num_train_epochs": 5,
+ "save_steps": 500,
+ "stateful_callbacks": {
+ "TrainerControl": {
+ "args": {
+ "should_epoch_stop": false,
+ "should_evaluate": false,
+ "should_log": false,
+ "should_save": true,
+ "should_training_stop": true
+ },
+ "attributes": {}
+ }
+ },
+ "total_flos": 7065760181780480.0,
+ "train_batch_size": 4,
+ "trial_name": null,
+ "trial_params": null
+}
diff --git a/training_args.bin b/training_args.bin
new file mode 100644
index 0000000..8775f18
--- /dev/null
+++ b/training_args.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:1f53657f362937f57b3c763e3a8626ec8db0ce2d9eb097eb6b0bc915a57bb83e
+size 7224
diff --git a/training_loss.png b/training_loss.png
new file mode 100644
index 0000000..a621b07
Binary files /dev/null and b/training_loss.png differ
diff --git a/vocab.json b/vocab.json
new file mode 100644
index 0000000..6c49fc6
--- /dev/null
+++ b/vocab.json
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:ca10d7e9fb3ed18575dd1e277a2579c16d108e32f27439684afa0e10b1440910
+size 2776833