commit 84f44cd484bf6e7967559497dfe3959c9284cb56 Author: ModelHub XC Date: Tue Jun 16 07:03:13 2026 +0800 初始化项目,由ModelHub XC社区提供模型 Model: mlfoundations-dev/openthoughts3_100k_qwen25_1b_bsz256_lr16e5_epochs5 Source: Original Platform diff --git a/.gitattributes b/.gitattributes new file mode 100644 index 0000000..4021b5a --- /dev/null +++ b/.gitattributes @@ -0,0 +1,53 @@ +*.7z filter=lfs diff=lfs merge=lfs -text +*.arrow filter=lfs diff=lfs merge=lfs -text + + +*.bz2 filter=lfs diff=lfs merge=lfs -text +*.ftz filter=lfs diff=lfs merge=lfs -text +*.gz filter=lfs diff=lfs merge=lfs -text +*.h5 filter=lfs diff=lfs merge=lfs -text +*.joblib filter=lfs diff=lfs merge=lfs -text +*.lfs.* filter=lfs diff=lfs merge=lfs -text +*.model filter=lfs diff=lfs merge=lfs -text +*.msgpack filter=lfs diff=lfs merge=lfs -text +*.onnx filter=lfs diff=lfs merge=lfs -text +*.ot filter=lfs diff=lfs merge=lfs -text +*.parquet filter=lfs diff=lfs merge=lfs -text +*.pb filter=lfs diff=lfs merge=lfs -text +*.pt filter=lfs diff=lfs merge=lfs -text +*.pth filter=lfs diff=lfs merge=lfs -text +*.rar filter=lfs diff=lfs merge=lfs -text +saved_model/**/* filter=lfs diff=lfs merge=lfs -text +*.tar.* filter=lfs diff=lfs merge=lfs -text +*.tflite filter=lfs diff=lfs merge=lfs -text +*.tgz filter=lfs diff=lfs merge=lfs -text +*.xz filter=lfs diff=lfs merge=lfs -text +*.zip filter=lfs diff=lfs merge=lfs -text +*.zstandard filter=lfs diff=lfs merge=lfs -text +*.tfevents* filter=lfs diff=lfs merge=lfs -text +*.db* filter=lfs diff=lfs merge=lfs -text +*.ark* filter=lfs diff=lfs merge=lfs -text +**/*ckpt*data* filter=lfs diff=lfs merge=lfs -text +**/*ckpt*.meta filter=lfs diff=lfs merge=lfs -text +**/*ckpt*.index filter=lfs diff=lfs merge=lfs -text + +*.ckpt filter=lfs diff=lfs merge=lfs -text +*.gguf* filter=lfs diff=lfs merge=lfs -text +*.ggml filter=lfs diff=lfs merge=lfs -text +*.llamafile* filter=lfs diff=lfs merge=lfs -text +*.pt2 filter=lfs diff=lfs merge=lfs -text +*.mlmodel filter=lfs diff=lfs merge=lfs -text +*.npy filter=lfs diff=lfs merge=lfs -text +*.npz filter=lfs diff=lfs merge=lfs -text +*.pickle filter=lfs diff=lfs merge=lfs -text +*.pkl filter=lfs diff=lfs merge=lfs -text +*.tar filter=lfs diff=lfs merge=lfs -text +*.wasm filter=lfs diff=lfs merge=lfs -text +*.zst filter=lfs diff=lfs merge=lfs -text +*tfevents* filter=lfs diff=lfs merge=lfs -text + +tokenizer.json filter=lfs diff=lfs merge=lfs -text +vocab.json filter=lfs diff=lfs merge=lfs -text +training_args.bin filter=lfs diff=lfs merge=lfs -text +model.safetensors filter=lfs diff=lfs merge=lfs -text +merges.txt filter=lfs diff=lfs merge=lfs -text \ No newline at end of file diff --git a/README.md b/README.md new file mode 100644 index 0000000..1863290 --- /dev/null +++ b/README.md @@ -0,0 +1,60 @@ +--- +library_name: transformers +license: apache-2.0 +base_model: Qwen/Qwen2.5-1.5B-Instruct +tags: +- llama-factory +- full +- generated_from_trainer +model-index: +- name: openthoughts3_100k_qwen25_1b_bsz256_lr16e5_epochs5 + results: [] +--- + + + +# openthoughts3_100k_qwen25_1b_bsz256_lr16e5_epochs5 + +This model is a fine-tuned version of [Qwen/Qwen2.5-1.5B-Instruct](https://huggingface.co/Qwen/Qwen2.5-1.5B-Instruct) on the mlfoundations-dev/openthoughts3_100k dataset. + +## Model description + +More information needed + +## Intended uses & limitations + +More information needed + +## Training and evaluation data + +More information needed + +## Training procedure + +### Training hyperparameters + +The following hyperparameters were used during training: +- learning_rate: 0.00016 +- train_batch_size: 4 +- eval_batch_size: 8 +- seed: 42 +- distributed_type: multi-GPU +- num_devices: 64 +- total_train_batch_size: 256 +- total_eval_batch_size: 512 +- optimizer: Use OptimizerNames.ADAMW_TORCH with betas=(0.9,0.999) and epsilon=1e-08 and optimizer_args=No additional optimizer arguments +- lr_scheduler_type: cosine +- lr_scheduler_warmup_ratio: 0.1 +- num_epochs: 5.0 + +### Training results + + + +### Framework versions + +- Transformers 4.46.1 +- Pytorch 2.3.0 +- Datasets 3.1.0 +- Tokenizers 0.20.3 diff --git a/added_tokens.json b/added_tokens.json new file mode 100644 index 0000000..482ced4 --- /dev/null +++ b/added_tokens.json @@ -0,0 +1,24 @@ +{ + "": 151658, + "": 151657, + "<|box_end|>": 151649, + "<|box_start|>": 151648, + "<|endoftext|>": 151643, + "<|file_sep|>": 151664, + "<|fim_middle|>": 151660, + "<|fim_pad|>": 151662, + "<|fim_prefix|>": 151659, + "<|fim_suffix|>": 151661, + "<|im_end|>": 151645, + "<|im_start|>": 151644, + "<|image_pad|>": 151655, + "<|object_ref_end|>": 151647, + "<|object_ref_start|>": 151646, + "<|quad_end|>": 151651, + "<|quad_start|>": 151650, + "<|repo_name|>": 151663, + "<|video_pad|>": 151656, + "<|vision_end|>": 151653, + "<|vision_pad|>": 151654, + "<|vision_start|>": 151652 +} diff --git a/all_results.json b/all_results.json new file mode 100644 index 0000000..166c124 --- /dev/null +++ b/all_results.json @@ -0,0 +1,8 @@ +{ + "epoch": 5.0, + "total_flos": 7122204608430080.0, + "train_loss": 1.0036099467436066, + "train_runtime": 36219.8634, + "train_samples_per_second": 13.805, + "train_steps_per_second": 0.054 +} \ No newline at end of file diff --git a/config.json b/config.json new file mode 100644 index 0000000..0e002a8 --- /dev/null +++ b/config.json @@ -0,0 +1,29 @@ +{ + "_name_or_path": "Qwen/Qwen2.5-1.5B-Instruct", + "architectures": [ + "Qwen2ForCausalLM" + ], + "attention_dropout": 0.0, + "bos_token_id": 151643, + "eos_token_id": 151645, + "hidden_act": "silu", + "hidden_size": 1536, + "initializer_range": 0.02, + "intermediate_size": 8960, + "max_position_embeddings": 32768, + "max_window_layers": 21, + "model_type": "qwen2", + "num_attention_heads": 12, + "num_hidden_layers": 28, + "num_key_value_heads": 2, + "rms_norm_eps": 1e-06, + "rope_scaling": null, + "rope_theta": 1000000.0, + "sliding_window": null, + "tie_word_embeddings": true, + "torch_dtype": "bfloat16", + "transformers_version": "4.46.1", + "use_cache": false, + "use_sliding_window": false, + "vocab_size": 151936 +} diff --git a/configs.yaml b/configs.yaml new file mode 100644 index 0000000..eda5354 --- /dev/null +++ b/configs.yaml @@ -0,0 +1,39 @@ +assistant_tag: gpt +bf16: 'True' +content_tag: value +cutoff_len: '16384' +dataloader_num_workers: '4' +dataloader_persistent_workers: 'True' +dataloader_pin_memory: 'True' +dataset: mlfoundations-dev/openthoughts3_100k +dataset_dir: ONLINE +ddp_timeout: '180000000' +deepspeed: /opt/ml/code/zero3.json +do_train: 'True' +enable_liger_kernel: 'True' +finetuning_type: full +formatting: sharegpt +global_batch_size: '256' +gradient_accumulation_steps: '1' +hub_model_id: mlfoundations-dev/openthoughts3_100k_qwen25_1b_bsz256_lr16e5_epochs5 +learning_rate: '0.00016' +logging_steps: '1' +lr_scheduler_type: cosine +messages: conversations +model_name_or_path: Qwen/Qwen2.5-1.5B-Instruct +num_train_epochs: '5.0' +output_dir: /opt/ml/model +overwrite_cache: 'True' +per_device_train_batch_size: '4' +plot_loss: 'True' +preprocessing_num_workers: '16' +push_to_db: 'True' +push_to_hub: 'True' +report_to: wandb +role_tag: from +run_name: openthoughts3_100k_qwen25_1b_bsz256_lr16e5_epochs5 +save_strategy: epoch +stage: sft +template: qwen25 +user_tag: human +warmup_ratio: '0.1' diff --git a/configuration.json b/configuration.json new file mode 100644 index 0000000..bbeeda1 --- /dev/null +++ b/configuration.json @@ -0,0 +1 @@ +{"framework": "pytorch", "task": "text-generation", "allow_remote": true} \ No newline at end of file diff --git a/generation_config.json b/generation_config.json new file mode 100644 index 0000000..16e88f7 --- /dev/null +++ b/generation_config.json @@ -0,0 +1,14 @@ +{ + "bos_token_id": 151643, + "do_sample": true, + "eos_token_id": [ + 151645, + 151643 + ], + "pad_token_id": 151643, + "repetition_penalty": 1.1, + "temperature": 0.7, + "top_k": 20, + "top_p": 0.8, + "transformers_version": "4.46.1" +} diff --git a/merges.txt b/merges.txt new file mode 100644 index 0000000..80c1a19 --- /dev/null +++ b/merges.txt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8831e4f1a044471340f7c0a83d7bd71306a5b867e95fd870f74d0c5308a904d5 +size 1671853 diff --git a/model.safetensors b/model.safetensors new file mode 100644 index 0000000..981e3b1 --- /dev/null +++ b/model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e158b0c2d82ca90cb35ba7c37e11c7cb64db7579e1ab1d2a107a48d942e783d3 +size 3087467144 diff --git a/special_tokens_map.json b/special_tokens_map.json new file mode 100644 index 0000000..17305b3 --- /dev/null +++ b/special_tokens_map.json @@ -0,0 +1,31 @@ +{ + "additional_special_tokens": [ + "<|im_start|>", + "<|im_end|>", + "<|object_ref_start|>", + "<|object_ref_end|>", + "<|box_start|>", + "<|box_end|>", + "<|quad_start|>", + "<|quad_end|>", + "<|vision_start|>", + "<|vision_end|>", + "<|vision_pad|>", + "<|image_pad|>", + "<|video_pad|>" + ], + "eos_token": { + "content": "<|endoftext|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "pad_token": { + "content": "<|endoftext|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + } +} diff --git a/tokenizer.json b/tokenizer.json new file mode 100644 index 0000000..51ebb3b --- /dev/null +++ b/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9c5ae00e602b8860cbd784ba82a8aa14e8feecec692e7076590d014d7b7fdafa +size 11421896 diff --git a/tokenizer_config.json b/tokenizer_config.json new file mode 100644 index 0000000..b84f53a --- /dev/null +++ b/tokenizer_config.json @@ -0,0 +1,208 @@ +{ + "add_bos_token": false, + "add_prefix_space": false, + "added_tokens_decoder": { + "151643": { + "content": "<|endoftext|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "151644": { + "content": "<|im_start|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "151645": { + "content": "<|im_end|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "151646": { + "content": "<|object_ref_start|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "151647": { + "content": "<|object_ref_end|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "151648": { + "content": "<|box_start|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "151649": { + "content": "<|box_end|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "151650": { + "content": "<|quad_start|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "151651": { + "content": "<|quad_end|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "151652": { + "content": "<|vision_start|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "151653": { + "content": "<|vision_end|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "151654": { + "content": "<|vision_pad|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "151655": { + "content": "<|image_pad|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "151656": { + "content": "<|video_pad|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "151657": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "151658": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "151659": { + "content": "<|fim_prefix|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "151660": { + "content": "<|fim_middle|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "151661": { + "content": "<|fim_suffix|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "151662": { + "content": "<|fim_pad|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "151663": { + "content": "<|repo_name|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "151664": { + "content": "<|file_sep|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + } + }, + "additional_special_tokens": [ + "<|im_start|>", + "<|im_end|>", + "<|object_ref_start|>", + "<|object_ref_end|>", + "<|box_start|>", + "<|box_end|>", + "<|quad_start|>", + "<|quad_end|>", + "<|vision_start|>", + "<|vision_end|>", + "<|vision_pad|>", + "<|image_pad|>", + "<|video_pad|>" + ], + "bos_token": null, + "chat_template": "{%- if tools %}\n {{- '<|im_start|>system\\n' }}\n {%- if messages[0]['role'] == 'system' %}\n {{- messages[0]['content'] }}\n {%- else %}\n {{- 'You are Qwen, created by Alibaba Cloud. You are a helpful assistant.' }}\n {%- endif %}\n {{- \"\\n\\n# Tools\\n\\nYou may call one or more functions to assist with the user query.\\n\\nYou are provided with function signatures within XML tags:\\n\" }}\n {%- for tool in tools %}\n {{- \"\\n\" }}\n {{- tool | tojson }}\n {%- endfor %}\n {{- \"\\n\\n\\nFor each function call, return a json object with function name and arguments within XML tags:\\n\\n{\\\"name\\\": , \\\"arguments\\\": }\\n<|im_end|>\\n\" }}\n{%- else %}\n {%- if messages[0]['role'] == 'system' %}\n {{- '<|im_start|>system\\n' + messages[0]['content'] + '<|im_end|>\\n' }}\n {%- else %}\n {{- '<|im_start|>system\\nYou are Qwen, created by Alibaba Cloud. You are a helpful assistant.<|im_end|>\\n' }}\n {%- endif %}\n{%- endif %}\n{%- for message in messages %}\n {%- if (message.role == \"user\") or (message.role == \"system\" and not loop.first) or (message.role == \"assistant\" and not message.tool_calls) %}\n {{- '<|im_start|>' + message.role + '\\n' + message.content + '<|im_end|>' + '\\n' }}\n {%- elif message.role == \"assistant\" %}\n {{- '<|im_start|>' + message.role }}\n {%- if message.content %}\n {{- '\\n' + message.content }}\n {%- endif %}\n {%- for tool_call in message.tool_calls %}\n {%- if tool_call.function is defined %}\n {%- set tool_call = tool_call.function %}\n {%- endif %}\n {{- '\\n\\n{\"name\": \"' }}\n {{- tool_call.name }}\n {{- '\", \"arguments\": ' }}\n {{- tool_call.arguments | tojson }}\n {{- '}\\n' }}\n {%- endfor %}\n {{- '<|im_end|>\\n' }}\n {%- elif message.role == \"tool\" %}\n {%- if (loop.index0 == 0) or (messages[loop.index0 - 1].role != \"tool\") %}\n {{- '<|im_start|>user' }}\n {%- endif %}\n {{- '\\n\\n' }}\n {{- message.content }}\n {{- '\\n' }}\n {%- if loop.last or (messages[loop.index0 + 1].role != \"tool\") %}\n {{- '<|im_end|>\\n' }}\n {%- endif %}\n {%- endif %}\n{%- endfor %}\n{%- if add_generation_prompt %}\n {{- '<|im_start|>assistant\\n' }}\n{%- endif %}\n", + "clean_up_tokenization_spaces": false, + "eos_token": "<|endoftext|>", + "errors": "replace", + "model_max_length": 131072, + "pad_token": "<|endoftext|>", + "padding_side": "right", + "split_special_tokens": false, + "tokenizer_class": "Qwen2Tokenizer", + "unk_token": null +} diff --git a/train_results.json b/train_results.json new file mode 100644 index 0000000..166c124 --- /dev/null +++ b/train_results.json @@ -0,0 +1,8 @@ +{ + "epoch": 5.0, + "total_flos": 7122204608430080.0, + "train_loss": 1.0036099467436066, + "train_runtime": 36219.8634, + "train_samples_per_second": 13.805, + "train_steps_per_second": 0.054 +} \ No newline at end of file diff --git a/trainer_log.jsonl b/trainer_log.jsonl new file mode 100644 index 0000000..2555007 --- /dev/null +++ b/trainer_log.jsonl @@ -0,0 +1,1956 @@ +{"current_steps": 1, "total_steps": 1955, "loss": 1.5213, "lr": 8.163265306122449e-07, "epoch": 0.0025575447570332483, "percentage": 0.05, "elapsed_time": "0:00:25", "remaining_time": "13:54:35"} +{"current_steps": 2, "total_steps": 1955, "loss": 1.4742, "lr": 1.6326530612244897e-06, "epoch": 0.005115089514066497, "percentage": 0.1, "elapsed_time": "0:00:44", "remaining_time": "11:59:49"} +{"current_steps": 3, "total_steps": 1955, "loss": 1.4946, "lr": 2.4489795918367347e-06, "epoch": 0.0076726342710997444, "percentage": 0.15, "elapsed_time": "0:01:02", "remaining_time": "11:19:45"} +{"current_steps": 4, "total_steps": 1955, "loss": 1.482, "lr": 3.2653061224489794e-06, "epoch": 0.010230179028132993, "percentage": 0.2, "elapsed_time": "0:01:21", "remaining_time": "10:59:03"} +{"current_steps": 5, "total_steps": 1955, "loss": 1.4866, "lr": 4.081632653061225e-06, "epoch": 0.01278772378516624, "percentage": 0.26, "elapsed_time": "0:01:39", "remaining_time": "10:46:28"} +{"current_steps": 6, "total_steps": 1955, "loss": 1.4844, "lr": 4.897959183673469e-06, "epoch": 0.015345268542199489, "percentage": 0.31, "elapsed_time": "0:01:58", "remaining_time": "10:41:21"} +{"current_steps": 7, "total_steps": 1955, "loss": 1.4533, "lr": 5.7142857142857145e-06, "epoch": 0.017902813299232736, "percentage": 0.36, "elapsed_time": "0:02:16", "remaining_time": "10:32:56"} +{"current_steps": 8, "total_steps": 1955, "loss": 1.4433, "lr": 6.530612244897959e-06, "epoch": 0.020460358056265986, "percentage": 0.41, "elapsed_time": "0:02:35", "remaining_time": "10:30:46"} +{"current_steps": 9, "total_steps": 1955, "loss": 1.4744, "lr": 7.346938775510205e-06, "epoch": 0.023017902813299233, "percentage": 0.46, "elapsed_time": "0:02:53", "remaining_time": "10:25:34"} +{"current_steps": 10, "total_steps": 1955, "loss": 1.4586, "lr": 8.16326530612245e-06, "epoch": 0.02557544757033248, "percentage": 0.51, "elapsed_time": "0:03:12", "remaining_time": "10:23:03"} +{"current_steps": 11, "total_steps": 1955, "loss": 1.4315, "lr": 8.979591836734695e-06, "epoch": 0.028132992327365727, "percentage": 0.56, "elapsed_time": "0:03:30", "remaining_time": "10:19:15"} +{"current_steps": 12, "total_steps": 1955, "loss": 1.3907, "lr": 9.795918367346939e-06, "epoch": 0.030690537084398978, "percentage": 0.61, "elapsed_time": "0:03:48", "remaining_time": "10:17:35"} +{"current_steps": 13, "total_steps": 1955, "loss": 1.4234, "lr": 1.0612244897959186e-05, "epoch": 0.03324808184143223, "percentage": 0.66, "elapsed_time": "0:04:07", "remaining_time": "10:16:07"} +{"current_steps": 14, "total_steps": 1955, "loss": 1.3478, "lr": 1.1428571428571429e-05, "epoch": 0.03580562659846547, "percentage": 0.72, "elapsed_time": "0:04:26", "remaining_time": "10:14:48"} +{"current_steps": 15, "total_steps": 1955, "loss": 1.378, "lr": 1.2244897959183674e-05, "epoch": 0.03836317135549872, "percentage": 0.77, "elapsed_time": "0:04:44", "remaining_time": "10:13:37"} +{"current_steps": 16, "total_steps": 1955, "loss": 1.3273, "lr": 1.3061224489795918e-05, "epoch": 0.04092071611253197, "percentage": 0.82, "elapsed_time": "0:05:03", "remaining_time": "10:13:20"} +{"current_steps": 17, "total_steps": 1955, "loss": 1.3424, "lr": 1.3877551020408165e-05, "epoch": 0.043478260869565216, "percentage": 0.87, "elapsed_time": "0:05:22", "remaining_time": "10:12:18"} +{"current_steps": 18, "total_steps": 1955, "loss": 1.3354, "lr": 1.469387755102041e-05, "epoch": 0.04603580562659847, "percentage": 0.92, "elapsed_time": "0:05:40", "remaining_time": "10:11:23"} +{"current_steps": 19, "total_steps": 1955, "loss": 1.3216, "lr": 1.5510204081632655e-05, "epoch": 0.04859335038363171, "percentage": 0.97, "elapsed_time": "0:05:59", "remaining_time": "10:10:29"} +{"current_steps": 20, "total_steps": 1955, "loss": 1.305, "lr": 1.63265306122449e-05, "epoch": 0.05115089514066496, "percentage": 1.02, "elapsed_time": "0:06:18", "remaining_time": "10:10:50"} +{"current_steps": 21, "total_steps": 1955, "loss": 1.2863, "lr": 1.7142857142857142e-05, "epoch": 0.05370843989769821, "percentage": 1.07, "elapsed_time": "0:06:37", "remaining_time": "10:10:00"} +{"current_steps": 22, "total_steps": 1955, "loss": 1.31, "lr": 1.795918367346939e-05, "epoch": 0.056265984654731455, "percentage": 1.13, "elapsed_time": "0:06:56", "remaining_time": "10:09:12"} +{"current_steps": 23, "total_steps": 1955, "loss": 1.2652, "lr": 1.8775510204081636e-05, "epoch": 0.058823529411764705, "percentage": 1.18, "elapsed_time": "0:07:14", "remaining_time": "10:08:27"} +{"current_steps": 24, "total_steps": 1955, "loss": 1.2848, "lr": 1.9591836734693877e-05, "epoch": 0.061381074168797956, "percentage": 1.23, "elapsed_time": "0:07:33", "remaining_time": "10:07:46"} +{"current_steps": 25, "total_steps": 1955, "loss": 1.2605, "lr": 2.0408163265306126e-05, "epoch": 0.0639386189258312, "percentage": 1.28, "elapsed_time": "0:07:51", "remaining_time": "10:07:16"} +{"current_steps": 26, "total_steps": 1955, "loss": 1.2663, "lr": 2.122448979591837e-05, "epoch": 0.06649616368286446, "percentage": 1.33, "elapsed_time": "0:08:10", "remaining_time": "10:06:37"} +{"current_steps": 27, "total_steps": 1955, "loss": 1.2493, "lr": 2.2040816326530613e-05, "epoch": 0.06905370843989769, "percentage": 1.38, "elapsed_time": "0:08:29", "remaining_time": "10:06:10"} +{"current_steps": 28, "total_steps": 1955, "loss": 1.2725, "lr": 2.2857142857142858e-05, "epoch": 0.07161125319693094, "percentage": 1.43, "elapsed_time": "0:08:47", "remaining_time": "10:05:33"} +{"current_steps": 29, "total_steps": 1955, "loss": 1.2493, "lr": 2.3673469387755103e-05, "epoch": 0.0741687979539642, "percentage": 1.48, "elapsed_time": "0:09:06", "remaining_time": "10:04:34"} +{"current_steps": 30, "total_steps": 1955, "loss": 1.2635, "lr": 2.448979591836735e-05, "epoch": 0.07672634271099744, "percentage": 1.53, "elapsed_time": "0:09:24", "remaining_time": "10:03:32"} +{"current_steps": 31, "total_steps": 1955, "loss": 1.2581, "lr": 2.5306122448979597e-05, "epoch": 0.0792838874680307, "percentage": 1.59, "elapsed_time": "0:09:42", "remaining_time": "10:02:27"} +{"current_steps": 32, "total_steps": 1955, "loss": 1.2319, "lr": 2.6122448979591835e-05, "epoch": 0.08184143222506395, "percentage": 1.64, "elapsed_time": "0:10:00", "remaining_time": "10:01:56"} +{"current_steps": 33, "total_steps": 1955, "loss": 1.2647, "lr": 2.6938775510204084e-05, "epoch": 0.08439897698209718, "percentage": 1.69, "elapsed_time": "0:10:19", "remaining_time": "10:01:45"} +{"current_steps": 34, "total_steps": 1955, "loss": 1.2828, "lr": 2.775510204081633e-05, "epoch": 0.08695652173913043, "percentage": 1.74, "elapsed_time": "0:10:38", "remaining_time": "10:01:16"} +{"current_steps": 35, "total_steps": 1955, "loss": 1.2245, "lr": 2.8571428571428574e-05, "epoch": 0.08951406649616368, "percentage": 1.79, "elapsed_time": "0:10:57", "remaining_time": "10:00:48"} +{"current_steps": 36, "total_steps": 1955, "loss": 1.2383, "lr": 2.938775510204082e-05, "epoch": 0.09207161125319693, "percentage": 1.84, "elapsed_time": "0:11:15", "remaining_time": "10:00:18"} +{"current_steps": 37, "total_steps": 1955, "loss": 1.2524, "lr": 3.020408163265306e-05, "epoch": 0.09462915601023018, "percentage": 1.89, "elapsed_time": "0:11:33", "remaining_time": "9:59:23"} +{"current_steps": 38, "total_steps": 1955, "loss": 1.229, "lr": 3.102040816326531e-05, "epoch": 0.09718670076726342, "percentage": 1.94, "elapsed_time": "0:11:53", "remaining_time": "9:59:29"} +{"current_steps": 39, "total_steps": 1955, "loss": 1.2438, "lr": 3.183673469387755e-05, "epoch": 0.09974424552429667, "percentage": 1.99, "elapsed_time": "0:12:11", "remaining_time": "9:59:01"} +{"current_steps": 40, "total_steps": 1955, "loss": 1.1862, "lr": 3.26530612244898e-05, "epoch": 0.10230179028132992, "percentage": 2.05, "elapsed_time": "0:12:30", "remaining_time": "9:58:40"} +{"current_steps": 41, "total_steps": 1955, "loss": 1.2127, "lr": 3.346938775510204e-05, "epoch": 0.10485933503836317, "percentage": 2.1, "elapsed_time": "0:12:48", "remaining_time": "9:58:14"} +{"current_steps": 42, "total_steps": 1955, "loss": 1.2118, "lr": 3.4285714285714284e-05, "epoch": 0.10741687979539642, "percentage": 2.15, "elapsed_time": "0:13:07", "remaining_time": "9:57:49"} +{"current_steps": 43, "total_steps": 1955, "loss": 1.1849, "lr": 3.510204081632653e-05, "epoch": 0.10997442455242967, "percentage": 2.2, "elapsed_time": "0:13:26", "remaining_time": "9:57:28"} +{"current_steps": 44, "total_steps": 1955, "loss": 1.1965, "lr": 3.591836734693878e-05, "epoch": 0.11253196930946291, "percentage": 2.25, "elapsed_time": "0:13:44", "remaining_time": "9:56:59"} +{"current_steps": 45, "total_steps": 1955, "loss": 1.1973, "lr": 3.673469387755102e-05, "epoch": 0.11508951406649616, "percentage": 2.3, "elapsed_time": "0:14:03", "remaining_time": "9:56:41"} +{"current_steps": 46, "total_steps": 1955, "loss": 1.219, "lr": 3.755102040816327e-05, "epoch": 0.11764705882352941, "percentage": 2.35, "elapsed_time": "0:14:21", "remaining_time": "9:55:53"} +{"current_steps": 47, "total_steps": 1955, "loss": 1.22, "lr": 3.836734693877551e-05, "epoch": 0.12020460358056266, "percentage": 2.4, "elapsed_time": "0:14:40", "remaining_time": "9:56:00"} +{"current_steps": 48, "total_steps": 1955, "loss": 1.1927, "lr": 3.9183673469387755e-05, "epoch": 0.12276214833759591, "percentage": 2.46, "elapsed_time": "0:14:59", "remaining_time": "9:55:35"} +{"current_steps": 49, "total_steps": 1955, "loss": 1.207, "lr": 4e-05, "epoch": 0.12531969309462915, "percentage": 2.51, "elapsed_time": "0:15:18", "remaining_time": "9:55:10"} +{"current_steps": 50, "total_steps": 1955, "loss": 1.1761, "lr": 4.081632653061225e-05, "epoch": 0.1278772378516624, "percentage": 2.56, "elapsed_time": "0:15:36", "remaining_time": "9:54:46"} +{"current_steps": 51, "total_steps": 1955, "loss": 1.1828, "lr": 4.1632653061224494e-05, "epoch": 0.13043478260869565, "percentage": 2.61, "elapsed_time": "0:15:55", "remaining_time": "9:54:22"} +{"current_steps": 52, "total_steps": 1955, "loss": 1.1645, "lr": 4.244897959183674e-05, "epoch": 0.1329923273657289, "percentage": 2.66, "elapsed_time": "0:16:14", "remaining_time": "9:54:08"} +{"current_steps": 53, "total_steps": 1955, "loss": 1.1663, "lr": 4.3265306122448984e-05, "epoch": 0.13554987212276215, "percentage": 2.71, "elapsed_time": "0:16:32", "remaining_time": "9:53:44"} +{"current_steps": 54, "total_steps": 1955, "loss": 1.2013, "lr": 4.4081632653061226e-05, "epoch": 0.13810741687979539, "percentage": 2.76, "elapsed_time": "0:16:51", "remaining_time": "9:53:19"} +{"current_steps": 55, "total_steps": 1955, "loss": 1.2142, "lr": 4.489795918367347e-05, "epoch": 0.14066496163682865, "percentage": 2.81, "elapsed_time": "0:17:09", "remaining_time": "9:52:55"} +{"current_steps": 56, "total_steps": 1955, "loss": 1.2031, "lr": 4.5714285714285716e-05, "epoch": 0.1432225063938619, "percentage": 2.86, "elapsed_time": "0:17:28", "remaining_time": "9:52:42"} +{"current_steps": 57, "total_steps": 1955, "loss": 1.1872, "lr": 4.6530612244897965e-05, "epoch": 0.14578005115089515, "percentage": 2.92, "elapsed_time": "0:17:47", "remaining_time": "9:52:19"} +{"current_steps": 58, "total_steps": 1955, "loss": 1.2026, "lr": 4.7346938775510206e-05, "epoch": 0.1483375959079284, "percentage": 2.97, "elapsed_time": "0:18:05", "remaining_time": "9:51:56"} +{"current_steps": 59, "total_steps": 1955, "loss": 1.1698, "lr": 4.8163265306122455e-05, "epoch": 0.15089514066496162, "percentage": 3.02, "elapsed_time": "0:18:24", "remaining_time": "9:51:33"} +{"current_steps": 60, "total_steps": 1955, "loss": 1.1935, "lr": 4.89795918367347e-05, "epoch": 0.1534526854219949, "percentage": 3.07, "elapsed_time": "0:18:42", "remaining_time": "9:50:54"} +{"current_steps": 61, "total_steps": 1955, "loss": 1.1816, "lr": 4.9795918367346945e-05, "epoch": 0.15601023017902813, "percentage": 3.12, "elapsed_time": "0:19:01", "remaining_time": "9:50:32"} +{"current_steps": 62, "total_steps": 1955, "loss": 1.2148, "lr": 5.0612244897959194e-05, "epoch": 0.1585677749360614, "percentage": 3.17, "elapsed_time": "0:19:19", "remaining_time": "9:50:10"} +{"current_steps": 63, "total_steps": 1955, "loss": 1.1974, "lr": 5.1428571428571436e-05, "epoch": 0.16112531969309463, "percentage": 3.22, "elapsed_time": "0:19:37", "remaining_time": "9:49:36"} +{"current_steps": 64, "total_steps": 1955, "loss": 1.1914, "lr": 5.224489795918367e-05, "epoch": 0.1636828644501279, "percentage": 3.27, "elapsed_time": "0:19:56", "remaining_time": "9:49:08"} +{"current_steps": 65, "total_steps": 1955, "loss": 1.183, "lr": 5.306122448979592e-05, "epoch": 0.16624040920716113, "percentage": 3.32, "elapsed_time": "0:20:15", "remaining_time": "9:48:59"} +{"current_steps": 66, "total_steps": 1955, "loss": 1.1457, "lr": 5.387755102040817e-05, "epoch": 0.16879795396419436, "percentage": 3.38, "elapsed_time": "0:20:34", "remaining_time": "9:48:40"} +{"current_steps": 67, "total_steps": 1955, "loss": 1.1724, "lr": 5.469387755102041e-05, "epoch": 0.17135549872122763, "percentage": 3.43, "elapsed_time": "0:20:52", "remaining_time": "9:48:18"} +{"current_steps": 68, "total_steps": 1955, "loss": 1.1469, "lr": 5.551020408163266e-05, "epoch": 0.17391304347826086, "percentage": 3.48, "elapsed_time": "0:21:11", "remaining_time": "9:48:01"} +{"current_steps": 69, "total_steps": 1955, "loss": 1.1335, "lr": 5.63265306122449e-05, "epoch": 0.17647058823529413, "percentage": 3.53, "elapsed_time": "0:21:29", "remaining_time": "9:47:25"} +{"current_steps": 70, "total_steps": 1955, "loss": 1.1475, "lr": 5.714285714285715e-05, "epoch": 0.17902813299232737, "percentage": 3.58, "elapsed_time": "0:21:48", "remaining_time": "9:47:04"} +{"current_steps": 71, "total_steps": 1955, "loss": 1.1595, "lr": 5.79591836734694e-05, "epoch": 0.1815856777493606, "percentage": 3.63, "elapsed_time": "0:22:06", "remaining_time": "9:46:45"} +{"current_steps": 72, "total_steps": 1955, "loss": 1.1764, "lr": 5.877551020408164e-05, "epoch": 0.18414322250639387, "percentage": 3.68, "elapsed_time": "0:22:25", "remaining_time": "9:46:25"} +{"current_steps": 73, "total_steps": 1955, "loss": 1.2046, "lr": 5.959183673469389e-05, "epoch": 0.1867007672634271, "percentage": 3.73, "elapsed_time": "0:22:43", "remaining_time": "9:46:03"} +{"current_steps": 74, "total_steps": 1955, "loss": 1.1601, "lr": 6.040816326530612e-05, "epoch": 0.18925831202046037, "percentage": 3.79, "elapsed_time": "0:23:02", "remaining_time": "9:45:43"} +{"current_steps": 75, "total_steps": 1955, "loss": 1.1524, "lr": 6.122448979591836e-05, "epoch": 0.1918158567774936, "percentage": 3.84, "elapsed_time": "0:23:20", "remaining_time": "9:45:10"} +{"current_steps": 76, "total_steps": 1955, "loss": 1.1559, "lr": 6.204081632653062e-05, "epoch": 0.19437340153452684, "percentage": 3.89, "elapsed_time": "0:23:39", "remaining_time": "9:44:50"} +{"current_steps": 77, "total_steps": 1955, "loss": 1.1657, "lr": 6.285714285714286e-05, "epoch": 0.1969309462915601, "percentage": 3.94, "elapsed_time": "0:23:57", "remaining_time": "9:44:17"} +{"current_steps": 78, "total_steps": 1955, "loss": 1.1471, "lr": 6.36734693877551e-05, "epoch": 0.19948849104859334, "percentage": 3.99, "elapsed_time": "0:24:15", "remaining_time": "9:43:56"} +{"current_steps": 79, "total_steps": 1955, "loss": 1.1408, "lr": 6.448979591836736e-05, "epoch": 0.2020460358056266, "percentage": 4.04, "elapsed_time": "0:24:33", "remaining_time": "9:43:22"} +{"current_steps": 80, "total_steps": 1955, "loss": 1.1565, "lr": 6.53061224489796e-05, "epoch": 0.20460358056265984, "percentage": 4.09, "elapsed_time": "0:24:52", "remaining_time": "9:43:03"} +{"current_steps": 81, "total_steps": 1955, "loss": 1.1394, "lr": 6.612244897959184e-05, "epoch": 0.2071611253196931, "percentage": 4.14, "elapsed_time": "0:25:11", "remaining_time": "9:42:48"} +{"current_steps": 82, "total_steps": 1955, "loss": 1.1366, "lr": 6.693877551020408e-05, "epoch": 0.20971867007672634, "percentage": 4.19, "elapsed_time": "0:25:30", "remaining_time": "9:42:27"} +{"current_steps": 83, "total_steps": 1955, "loss": 1.1521, "lr": 6.775510204081634e-05, "epoch": 0.21227621483375958, "percentage": 4.25, "elapsed_time": "0:25:48", "remaining_time": "9:42:07"} +{"current_steps": 84, "total_steps": 1955, "loss": 1.1485, "lr": 6.857142857142857e-05, "epoch": 0.21483375959079284, "percentage": 4.3, "elapsed_time": "0:26:07", "remaining_time": "9:41:47"} +{"current_steps": 85, "total_steps": 1955, "loss": 1.199, "lr": 6.938775510204082e-05, "epoch": 0.21739130434782608, "percentage": 4.35, "elapsed_time": "0:26:25", "remaining_time": "9:41:27"} +{"current_steps": 86, "total_steps": 1955, "loss": 1.15, "lr": 7.020408163265306e-05, "epoch": 0.21994884910485935, "percentage": 4.4, "elapsed_time": "0:26:44", "remaining_time": "9:41:07"} +{"current_steps": 87, "total_steps": 1955, "loss": 1.1547, "lr": 7.10204081632653e-05, "epoch": 0.22250639386189258, "percentage": 4.45, "elapsed_time": "0:27:02", "remaining_time": "9:40:36"} +{"current_steps": 88, "total_steps": 1955, "loss": 1.1808, "lr": 7.183673469387756e-05, "epoch": 0.22506393861892582, "percentage": 4.5, "elapsed_time": "0:27:21", "remaining_time": "9:40:16"} +{"current_steps": 89, "total_steps": 1955, "loss": 1.1399, "lr": 7.26530612244898e-05, "epoch": 0.22762148337595908, "percentage": 4.55, "elapsed_time": "0:27:39", "remaining_time": "9:39:57"} +{"current_steps": 90, "total_steps": 1955, "loss": 1.1709, "lr": 7.346938775510205e-05, "epoch": 0.23017902813299232, "percentage": 4.6, "elapsed_time": "0:27:58", "remaining_time": "9:39:37"} +{"current_steps": 91, "total_steps": 1955, "loss": 1.1649, "lr": 7.42857142857143e-05, "epoch": 0.23273657289002558, "percentage": 4.65, "elapsed_time": "0:28:16", "remaining_time": "9:39:06"} +{"current_steps": 92, "total_steps": 1955, "loss": 1.1693, "lr": 7.510204081632654e-05, "epoch": 0.23529411764705882, "percentage": 4.71, "elapsed_time": "0:28:34", "remaining_time": "9:38:47"} +{"current_steps": 93, "total_steps": 1955, "loss": 1.1525, "lr": 7.591836734693878e-05, "epoch": 0.23785166240409208, "percentage": 4.76, "elapsed_time": "0:28:53", "remaining_time": "9:38:28"} +{"current_steps": 94, "total_steps": 1955, "loss": 1.1727, "lr": 7.673469387755103e-05, "epoch": 0.24040920716112532, "percentage": 4.81, "elapsed_time": "0:29:12", "remaining_time": "9:38:09"} +{"current_steps": 95, "total_steps": 1955, "loss": 1.1193, "lr": 7.755102040816327e-05, "epoch": 0.24296675191815856, "percentage": 4.86, "elapsed_time": "0:29:30", "remaining_time": "9:37:49"} +{"current_steps": 96, "total_steps": 1955, "loss": 1.1605, "lr": 7.836734693877551e-05, "epoch": 0.24552429667519182, "percentage": 4.91, "elapsed_time": "0:29:49", "remaining_time": "9:37:26"} +{"current_steps": 97, "total_steps": 1955, "loss": 1.1663, "lr": 7.918367346938776e-05, "epoch": 0.24808184143222506, "percentage": 4.96, "elapsed_time": "0:30:07", "remaining_time": "9:37:06"} +{"current_steps": 98, "total_steps": 1955, "loss": 1.134, "lr": 8e-05, "epoch": 0.2506393861892583, "percentage": 5.01, "elapsed_time": "0:30:26", "remaining_time": "9:36:47"} +{"current_steps": 99, "total_steps": 1955, "loss": 1.1307, "lr": 8.081632653061225e-05, "epoch": 0.2531969309462916, "percentage": 5.06, "elapsed_time": "0:30:44", "remaining_time": "9:36:27"} +{"current_steps": 100, "total_steps": 1955, "loss": 1.1283, "lr": 8.16326530612245e-05, "epoch": 0.2557544757033248, "percentage": 5.12, "elapsed_time": "0:31:03", "remaining_time": "9:36:08"} +{"current_steps": 101, "total_steps": 1955, "loss": 1.1486, "lr": 8.244897959183673e-05, "epoch": 0.25831202046035806, "percentage": 5.17, "elapsed_time": "0:31:22", "remaining_time": "9:35:56"} +{"current_steps": 102, "total_steps": 1955, "loss": 1.1539, "lr": 8.326530612244899e-05, "epoch": 0.2608695652173913, "percentage": 5.22, "elapsed_time": "0:31:41", "remaining_time": "9:35:37"} +{"current_steps": 103, "total_steps": 1955, "loss": 1.1246, "lr": 8.408163265306123e-05, "epoch": 0.26342710997442453, "percentage": 5.27, "elapsed_time": "0:31:59", "remaining_time": "9:35:18"} +{"current_steps": 104, "total_steps": 1955, "loss": 1.1193, "lr": 8.489795918367348e-05, "epoch": 0.2659846547314578, "percentage": 5.32, "elapsed_time": "0:32:18", "remaining_time": "9:34:59"} +{"current_steps": 105, "total_steps": 1955, "loss": 1.1257, "lr": 8.571428571428571e-05, "epoch": 0.26854219948849106, "percentage": 5.37, "elapsed_time": "0:32:36", "remaining_time": "9:34:40"} +{"current_steps": 106, "total_steps": 1955, "loss": 1.1127, "lr": 8.653061224489797e-05, "epoch": 0.2710997442455243, "percentage": 5.42, "elapsed_time": "0:32:55", "remaining_time": "9:34:20"} +{"current_steps": 107, "total_steps": 1955, "loss": 1.1441, "lr": 8.734693877551021e-05, "epoch": 0.27365728900255754, "percentage": 5.47, "elapsed_time": "0:33:13", "remaining_time": "9:33:51"} +{"current_steps": 108, "total_steps": 1955, "loss": 1.1199, "lr": 8.816326530612245e-05, "epoch": 0.27621483375959077, "percentage": 5.52, "elapsed_time": "0:33:32", "remaining_time": "9:33:32"} +{"current_steps": 109, "total_steps": 1955, "loss": 1.1454, "lr": 8.897959183673471e-05, "epoch": 0.27877237851662406, "percentage": 5.58, "elapsed_time": "0:33:50", "remaining_time": "9:33:07"} +{"current_steps": 110, "total_steps": 1955, "loss": 1.1534, "lr": 8.979591836734694e-05, "epoch": 0.2813299232736573, "percentage": 5.63, "elapsed_time": "0:34:08", "remaining_time": "9:32:39"} +{"current_steps": 111, "total_steps": 1955, "loss": 1.1518, "lr": 9.061224489795919e-05, "epoch": 0.28388746803069054, "percentage": 5.68, "elapsed_time": "0:34:26", "remaining_time": "9:32:17"} +{"current_steps": 112, "total_steps": 1955, "loss": 1.1422, "lr": 9.142857142857143e-05, "epoch": 0.2864450127877238, "percentage": 5.73, "elapsed_time": "0:34:45", "remaining_time": "9:32:00"} +{"current_steps": 113, "total_steps": 1955, "loss": 1.1125, "lr": 9.224489795918369e-05, "epoch": 0.289002557544757, "percentage": 5.78, "elapsed_time": "0:35:05", "remaining_time": "9:31:55"} +{"current_steps": 114, "total_steps": 1955, "loss": 1.147, "lr": 9.306122448979593e-05, "epoch": 0.2915601023017903, "percentage": 5.83, "elapsed_time": "0:35:23", "remaining_time": "9:31:27"} +{"current_steps": 115, "total_steps": 1955, "loss": 1.1225, "lr": 9.387755102040817e-05, "epoch": 0.29411764705882354, "percentage": 5.88, "elapsed_time": "0:35:41", "remaining_time": "9:31:08"} +{"current_steps": 116, "total_steps": 1955, "loss": 1.1402, "lr": 9.469387755102041e-05, "epoch": 0.2966751918158568, "percentage": 5.93, "elapsed_time": "0:35:59", "remaining_time": "9:30:40"} +{"current_steps": 117, "total_steps": 1955, "loss": 1.1268, "lr": 9.551020408163267e-05, "epoch": 0.29923273657289, "percentage": 5.98, "elapsed_time": "0:36:17", "remaining_time": "9:30:04"} +{"current_steps": 118, "total_steps": 1955, "loss": 1.1782, "lr": 9.632653061224491e-05, "epoch": 0.30179028132992325, "percentage": 6.04, "elapsed_time": "0:36:35", "remaining_time": "9:29:39"} +{"current_steps": 119, "total_steps": 1955, "loss": 1.1383, "lr": 9.714285714285714e-05, "epoch": 0.30434782608695654, "percentage": 6.09, "elapsed_time": "0:36:54", "remaining_time": "9:29:22"} +{"current_steps": 120, "total_steps": 1955, "loss": 1.1485, "lr": 9.79591836734694e-05, "epoch": 0.3069053708439898, "percentage": 6.14, "elapsed_time": "0:37:12", "remaining_time": "9:29:05"} +{"current_steps": 121, "total_steps": 1955, "loss": 1.1604, "lr": 9.877551020408164e-05, "epoch": 0.309462915601023, "percentage": 6.19, "elapsed_time": "0:37:30", "remaining_time": "9:28:37"} +{"current_steps": 122, "total_steps": 1955, "loss": 1.1129, "lr": 9.959183673469389e-05, "epoch": 0.31202046035805625, "percentage": 6.24, "elapsed_time": "0:37:49", "remaining_time": "9:28:18"} +{"current_steps": 123, "total_steps": 1955, "loss": 1.1344, "lr": 0.00010040816326530613, "epoch": 0.3145780051150895, "percentage": 6.29, "elapsed_time": "0:38:07", "remaining_time": "9:27:52"} +{"current_steps": 124, "total_steps": 1955, "loss": 1.1269, "lr": 0.00010122448979591839, "epoch": 0.3171355498721228, "percentage": 6.34, "elapsed_time": "0:38:26", "remaining_time": "9:27:33"} +{"current_steps": 125, "total_steps": 1955, "loss": 1.1078, "lr": 0.00010204081632653062, "epoch": 0.319693094629156, "percentage": 6.39, "elapsed_time": "0:38:44", "remaining_time": "9:27:07"} +{"current_steps": 126, "total_steps": 1955, "loss": 1.1653, "lr": 0.00010285714285714287, "epoch": 0.32225063938618925, "percentage": 6.45, "elapsed_time": "0:39:03", "remaining_time": "9:27:03"} +{"current_steps": 127, "total_steps": 1955, "loss": 1.1088, "lr": 0.00010367346938775511, "epoch": 0.3248081841432225, "percentage": 6.5, "elapsed_time": "0:39:22", "remaining_time": "9:26:44"} +{"current_steps": 128, "total_steps": 1955, "loss": 1.1289, "lr": 0.00010448979591836734, "epoch": 0.3273657289002558, "percentage": 6.55, "elapsed_time": "0:39:41", "remaining_time": "9:26:28"} +{"current_steps": 129, "total_steps": 1955, "loss": 1.1191, "lr": 0.0001053061224489796, "epoch": 0.329923273657289, "percentage": 6.6, "elapsed_time": "0:39:59", "remaining_time": "9:26:10"} +{"current_steps": 130, "total_steps": 1955, "loss": 1.1527, "lr": 0.00010612244897959184, "epoch": 0.33248081841432225, "percentage": 6.65, "elapsed_time": "0:40:18", "remaining_time": "9:25:51"} +{"current_steps": 131, "total_steps": 1955, "loss": 1.118, "lr": 0.0001069387755102041, "epoch": 0.3350383631713555, "percentage": 6.7, "elapsed_time": "0:40:37", "remaining_time": "9:25:32"} +{"current_steps": 132, "total_steps": 1955, "loss": 1.1702, "lr": 0.00010775510204081634, "epoch": 0.3375959079283887, "percentage": 6.75, "elapsed_time": "0:40:55", "remaining_time": "9:25:13"} +{"current_steps": 133, "total_steps": 1955, "loss": 1.1467, "lr": 0.00010857142857142859, "epoch": 0.340153452685422, "percentage": 6.8, "elapsed_time": "0:41:14", "remaining_time": "9:25:01"} +{"current_steps": 134, "total_steps": 1955, "loss": 1.1174, "lr": 0.00010938775510204082, "epoch": 0.34271099744245526, "percentage": 6.85, "elapsed_time": "0:41:33", "remaining_time": "9:24:42"} +{"current_steps": 135, "total_steps": 1955, "loss": 1.1746, "lr": 0.00011020408163265307, "epoch": 0.3452685421994885, "percentage": 6.91, "elapsed_time": "0:41:51", "remaining_time": "9:24:22"} +{"current_steps": 136, "total_steps": 1955, "loss": 1.1333, "lr": 0.00011102040816326532, "epoch": 0.34782608695652173, "percentage": 6.96, "elapsed_time": "0:42:09", "remaining_time": "9:23:58"} +{"current_steps": 137, "total_steps": 1955, "loss": 1.1482, "lr": 0.00011183673469387757, "epoch": 0.35038363171355497, "percentage": 7.01, "elapsed_time": "0:42:28", "remaining_time": "9:23:40"} +{"current_steps": 138, "total_steps": 1955, "loss": 1.1304, "lr": 0.0001126530612244898, "epoch": 0.35294117647058826, "percentage": 7.06, "elapsed_time": "0:42:47", "remaining_time": "9:23:29"} +{"current_steps": 139, "total_steps": 1955, "loss": 1.1106, "lr": 0.00011346938775510204, "epoch": 0.3554987212276215, "percentage": 7.11, "elapsed_time": "0:43:06", "remaining_time": "9:23:10"} +{"current_steps": 140, "total_steps": 1955, "loss": 1.1664, "lr": 0.0001142857142857143, "epoch": 0.35805626598465473, "percentage": 7.16, "elapsed_time": "0:43:25", "remaining_time": "9:22:53"} +{"current_steps": 141, "total_steps": 1955, "loss": 1.0802, "lr": 0.00011510204081632654, "epoch": 0.36061381074168797, "percentage": 7.21, "elapsed_time": "0:43:43", "remaining_time": "9:22:35"} +{"current_steps": 142, "total_steps": 1955, "loss": 1.1332, "lr": 0.0001159183673469388, "epoch": 0.3631713554987212, "percentage": 7.26, "elapsed_time": "0:44:02", "remaining_time": "9:22:16"} +{"current_steps": 143, "total_steps": 1955, "loss": 1.1573, "lr": 0.00011673469387755102, "epoch": 0.3657289002557545, "percentage": 7.31, "elapsed_time": "0:44:21", "remaining_time": "9:22:04"} +{"current_steps": 144, "total_steps": 1955, "loss": 1.1292, "lr": 0.00011755102040816328, "epoch": 0.36828644501278773, "percentage": 7.37, "elapsed_time": "0:44:40", "remaining_time": "9:21:45"} +{"current_steps": 145, "total_steps": 1955, "loss": 1.1375, "lr": 0.00011836734693877552, "epoch": 0.37084398976982097, "percentage": 7.42, "elapsed_time": "0:44:58", "remaining_time": "9:21:21"} +{"current_steps": 146, "total_steps": 1955, "loss": 1.1281, "lr": 0.00011918367346938777, "epoch": 0.3734015345268542, "percentage": 7.47, "elapsed_time": "0:45:16", "remaining_time": "9:21:03"} +{"current_steps": 147, "total_steps": 1955, "loss": 1.1604, "lr": 0.00012000000000000002, "epoch": 0.37595907928388744, "percentage": 7.52, "elapsed_time": "0:45:35", "remaining_time": "9:20:42"} +{"current_steps": 148, "total_steps": 1955, "loss": 1.1247, "lr": 0.00012081632653061224, "epoch": 0.37851662404092073, "percentage": 7.57, "elapsed_time": "0:45:53", "remaining_time": "9:20:19"} +{"current_steps": 149, "total_steps": 1955, "loss": 1.1087, "lr": 0.0001216326530612245, "epoch": 0.38107416879795397, "percentage": 7.62, "elapsed_time": "0:46:12", "remaining_time": "9:20:00"} +{"current_steps": 150, "total_steps": 1955, "loss": 1.1174, "lr": 0.00012244897959183673, "epoch": 0.3836317135549872, "percentage": 7.67, "elapsed_time": "0:46:30", "remaining_time": "9:19:36"} +{"current_steps": 151, "total_steps": 1955, "loss": 1.1243, "lr": 0.00012326530612244898, "epoch": 0.38618925831202044, "percentage": 7.72, "elapsed_time": "0:46:48", "remaining_time": "9:19:18"} +{"current_steps": 152, "total_steps": 1955, "loss": 1.127, "lr": 0.00012408163265306124, "epoch": 0.3887468030690537, "percentage": 7.77, "elapsed_time": "0:47:07", "remaining_time": "9:18:59"} +{"current_steps": 153, "total_steps": 1955, "loss": 1.1152, "lr": 0.0001248979591836735, "epoch": 0.391304347826087, "percentage": 7.83, "elapsed_time": "0:47:26", "remaining_time": "9:18:40"} +{"current_steps": 154, "total_steps": 1955, "loss": 1.129, "lr": 0.00012571428571428572, "epoch": 0.3938618925831202, "percentage": 7.88, "elapsed_time": "0:47:43", "remaining_time": "9:18:12"} +{"current_steps": 155, "total_steps": 1955, "loss": 1.1347, "lr": 0.00012653061224489798, "epoch": 0.39641943734015345, "percentage": 7.93, "elapsed_time": "0:48:02", "remaining_time": "9:17:53"} +{"current_steps": 156, "total_steps": 1955, "loss": 1.1398, "lr": 0.0001273469387755102, "epoch": 0.3989769820971867, "percentage": 7.98, "elapsed_time": "0:48:20", "remaining_time": "9:17:29"} +{"current_steps": 157, "total_steps": 1955, "loss": 1.1572, "lr": 0.00012816326530612246, "epoch": 0.40153452685422, "percentage": 8.03, "elapsed_time": "0:48:39", "remaining_time": "9:17:12"} +{"current_steps": 158, "total_steps": 1955, "loss": 1.1443, "lr": 0.00012897959183673472, "epoch": 0.4040920716112532, "percentage": 8.08, "elapsed_time": "0:48:57", "remaining_time": "9:16:53"} +{"current_steps": 159, "total_steps": 1955, "loss": 1.1027, "lr": 0.00012979591836734695, "epoch": 0.40664961636828645, "percentage": 8.13, "elapsed_time": "0:49:15", "remaining_time": "9:16:28"} +{"current_steps": 160, "total_steps": 1955, "loss": 1.1194, "lr": 0.0001306122448979592, "epoch": 0.4092071611253197, "percentage": 8.18, "elapsed_time": "0:49:34", "remaining_time": "9:16:10"} +{"current_steps": 161, "total_steps": 1955, "loss": 1.1209, "lr": 0.00013142857142857143, "epoch": 0.4117647058823529, "percentage": 8.24, "elapsed_time": "0:49:53", "remaining_time": "9:15:51"} +{"current_steps": 162, "total_steps": 1955, "loss": 1.1115, "lr": 0.00013224489795918368, "epoch": 0.4143222506393862, "percentage": 8.29, "elapsed_time": "0:50:11", "remaining_time": "9:15:30"} +{"current_steps": 163, "total_steps": 1955, "loss": 1.1237, "lr": 0.00013306122448979594, "epoch": 0.41687979539641945, "percentage": 8.34, "elapsed_time": "0:50:29", "remaining_time": "9:15:11"} +{"current_steps": 164, "total_steps": 1955, "loss": 1.1353, "lr": 0.00013387755102040817, "epoch": 0.4194373401534527, "percentage": 8.39, "elapsed_time": "0:50:48", "remaining_time": "9:14:53"} +{"current_steps": 165, "total_steps": 1955, "loss": 1.1043, "lr": 0.00013469387755102042, "epoch": 0.4219948849104859, "percentage": 8.44, "elapsed_time": "0:51:07", "remaining_time": "9:14:35"} +{"current_steps": 166, "total_steps": 1955, "loss": 1.1252, "lr": 0.00013551020408163268, "epoch": 0.42455242966751916, "percentage": 8.49, "elapsed_time": "0:51:25", "remaining_time": "9:14:16"} +{"current_steps": 167, "total_steps": 1955, "loss": 1.1226, "lr": 0.0001363265306122449, "epoch": 0.42710997442455245, "percentage": 8.54, "elapsed_time": "0:51:44", "remaining_time": "9:14:01"} +{"current_steps": 168, "total_steps": 1955, "loss": 1.1323, "lr": 0.00013714285714285713, "epoch": 0.4296675191815857, "percentage": 8.59, "elapsed_time": "0:52:03", "remaining_time": "9:13:42"} +{"current_steps": 169, "total_steps": 1955, "loss": 1.1318, "lr": 0.0001379591836734694, "epoch": 0.4322250639386189, "percentage": 8.64, "elapsed_time": "0:52:22", "remaining_time": "9:13:25"} +{"current_steps": 170, "total_steps": 1955, "loss": 1.1093, "lr": 0.00013877551020408165, "epoch": 0.43478260869565216, "percentage": 8.7, "elapsed_time": "0:52:40", "remaining_time": "9:13:01"} +{"current_steps": 171, "total_steps": 1955, "loss": 1.1297, "lr": 0.0001395918367346939, "epoch": 0.4373401534526854, "percentage": 8.75, "elapsed_time": "0:52:58", "remaining_time": "9:12:42"} +{"current_steps": 172, "total_steps": 1955, "loss": 1.1217, "lr": 0.00014040816326530613, "epoch": 0.4398976982097187, "percentage": 8.8, "elapsed_time": "0:53:17", "remaining_time": "9:12:24"} +{"current_steps": 173, "total_steps": 1955, "loss": 1.145, "lr": 0.00014122448979591838, "epoch": 0.4424552429667519, "percentage": 8.85, "elapsed_time": "0:53:35", "remaining_time": "9:12:06"} +{"current_steps": 174, "total_steps": 1955, "loss": 1.151, "lr": 0.0001420408163265306, "epoch": 0.44501278772378516, "percentage": 8.9, "elapsed_time": "0:53:54", "remaining_time": "9:11:47"} +{"current_steps": 175, "total_steps": 1955, "loss": 1.1209, "lr": 0.00014285714285714287, "epoch": 0.4475703324808184, "percentage": 8.95, "elapsed_time": "0:54:12", "remaining_time": "9:11:26"} +{"current_steps": 176, "total_steps": 1955, "loss": 1.155, "lr": 0.00014367346938775512, "epoch": 0.45012787723785164, "percentage": 9.0, "elapsed_time": "0:54:31", "remaining_time": "9:11:08"} +{"current_steps": 177, "total_steps": 1955, "loss": 1.1264, "lr": 0.00014448979591836735, "epoch": 0.45268542199488493, "percentage": 9.05, "elapsed_time": "0:54:50", "remaining_time": "9:10:50"} +{"current_steps": 178, "total_steps": 1955, "loss": 1.1339, "lr": 0.0001453061224489796, "epoch": 0.45524296675191817, "percentage": 9.1, "elapsed_time": "0:55:08", "remaining_time": "9:10:28"} +{"current_steps": 179, "total_steps": 1955, "loss": 1.1179, "lr": 0.00014612244897959183, "epoch": 0.4578005115089514, "percentage": 9.16, "elapsed_time": "0:55:26", "remaining_time": "9:10:03"} +{"current_steps": 180, "total_steps": 1955, "loss": 1.1306, "lr": 0.0001469387755102041, "epoch": 0.46035805626598464, "percentage": 9.21, "elapsed_time": "0:55:44", "remaining_time": "9:09:44"} +{"current_steps": 181, "total_steps": 1955, "loss": 1.1547, "lr": 0.00014775510204081635, "epoch": 0.4629156010230179, "percentage": 9.26, "elapsed_time": "0:56:02", "remaining_time": "9:09:20"} +{"current_steps": 182, "total_steps": 1955, "loss": 1.1234, "lr": 0.0001485714285714286, "epoch": 0.46547314578005117, "percentage": 9.31, "elapsed_time": "0:56:21", "remaining_time": "9:09:02"} +{"current_steps": 183, "total_steps": 1955, "loss": 1.1384, "lr": 0.00014938775510204083, "epoch": 0.4680306905370844, "percentage": 9.36, "elapsed_time": "0:56:41", "remaining_time": "9:08:52"} +{"current_steps": 184, "total_steps": 1955, "loss": 1.1308, "lr": 0.00015020408163265308, "epoch": 0.47058823529411764, "percentage": 9.41, "elapsed_time": "0:56:59", "remaining_time": "9:08:34"} +{"current_steps": 185, "total_steps": 1955, "loss": 1.142, "lr": 0.0001510204081632653, "epoch": 0.4731457800511509, "percentage": 9.46, "elapsed_time": "0:57:18", "remaining_time": "9:08:15"} +{"current_steps": 186, "total_steps": 1955, "loss": 1.1399, "lr": 0.00015183673469387757, "epoch": 0.47570332480818417, "percentage": 9.51, "elapsed_time": "0:57:36", "remaining_time": "9:07:51"} +{"current_steps": 187, "total_steps": 1955, "loss": 1.1247, "lr": 0.0001526530612244898, "epoch": 0.4782608695652174, "percentage": 9.57, "elapsed_time": "0:57:54", "remaining_time": "9:07:29"} +{"current_steps": 188, "total_steps": 1955, "loss": 1.1439, "lr": 0.00015346938775510205, "epoch": 0.48081841432225064, "percentage": 9.62, "elapsed_time": "0:58:12", "remaining_time": "9:07:05"} +{"current_steps": 189, "total_steps": 1955, "loss": 1.1268, "lr": 0.0001542857142857143, "epoch": 0.4833759590792839, "percentage": 9.67, "elapsed_time": "0:58:30", "remaining_time": "9:06:42"} +{"current_steps": 190, "total_steps": 1955, "loss": 1.1315, "lr": 0.00015510204081632654, "epoch": 0.4859335038363171, "percentage": 9.72, "elapsed_time": "0:58:48", "remaining_time": "9:06:18"} +{"current_steps": 191, "total_steps": 1955, "loss": 1.0859, "lr": 0.0001559183673469388, "epoch": 0.4884910485933504, "percentage": 9.77, "elapsed_time": "0:59:07", "remaining_time": "9:06:00"} +{"current_steps": 192, "total_steps": 1955, "loss": 1.122, "lr": 0.00015673469387755102, "epoch": 0.49104859335038364, "percentage": 9.82, "elapsed_time": "0:59:25", "remaining_time": "9:05:36"} +{"current_steps": 193, "total_steps": 1955, "loss": 1.0953, "lr": 0.00015755102040816327, "epoch": 0.4936061381074169, "percentage": 9.87, "elapsed_time": "0:59:44", "remaining_time": "9:05:26"} +{"current_steps": 194, "total_steps": 1955, "loss": 1.0973, "lr": 0.00015836734693877553, "epoch": 0.4961636828644501, "percentage": 9.92, "elapsed_time": "1:00:03", "remaining_time": "9:05:11"} +{"current_steps": 195, "total_steps": 1955, "loss": 1.1666, "lr": 0.00015918367346938778, "epoch": 0.49872122762148335, "percentage": 9.97, "elapsed_time": "1:00:22", "remaining_time": "9:04:52"} +{"current_steps": 196, "total_steps": 1955, "loss": 1.1244, "lr": 0.00016, "epoch": 0.5012787723785166, "percentage": 10.03, "elapsed_time": "1:00:40", "remaining_time": "9:04:29"} +{"current_steps": 197, "total_steps": 1955, "loss": 1.118, "lr": 0.00015999987240667874, "epoch": 0.5038363171355499, "percentage": 10.08, "elapsed_time": "1:00:59", "remaining_time": "9:04:15"} +{"current_steps": 198, "total_steps": 1955, "loss": 1.1489, "lr": 0.0001599994896271219, "epoch": 0.5063938618925832, "percentage": 10.13, "elapsed_time": "1:01:18", "remaining_time": "9:03:57"} +{"current_steps": 199, "total_steps": 1955, "loss": 1.1172, "lr": 0.0001599988516625505, "epoch": 0.5089514066496164, "percentage": 10.18, "elapsed_time": "1:01:36", "remaining_time": "9:03:39"} +{"current_steps": 200, "total_steps": 1955, "loss": 1.124, "lr": 0.00015999795851499954, "epoch": 0.5115089514066496, "percentage": 10.23, "elapsed_time": "1:01:55", "remaining_time": "9:03:21"} +{"current_steps": 201, "total_steps": 1955, "loss": 1.1087, "lr": 0.000159996810187318, "epoch": 0.5140664961636828, "percentage": 10.28, "elapsed_time": "1:02:13", "remaining_time": "9:03:00"} +{"current_steps": 202, "total_steps": 1955, "loss": 1.0977, "lr": 0.0001599954066831689, "epoch": 0.5166240409207161, "percentage": 10.33, "elapsed_time": "1:02:31", "remaining_time": "9:02:34"} +{"current_steps": 203, "total_steps": 1955, "loss": 1.1278, "lr": 0.00015999374800702916, "epoch": 0.5191815856777494, "percentage": 10.38, "elapsed_time": "1:02:49", "remaining_time": "9:02:15"} +{"current_steps": 204, "total_steps": 1955, "loss": 1.0978, "lr": 0.00015999183416418963, "epoch": 0.5217391304347826, "percentage": 10.43, "elapsed_time": "1:03:08", "remaining_time": "9:01:57"} +{"current_steps": 205, "total_steps": 1955, "loss": 1.1255, "lr": 0.0001599896651607552, "epoch": 0.5242966751918159, "percentage": 10.49, "elapsed_time": "1:03:27", "remaining_time": "9:01:39"} +{"current_steps": 206, "total_steps": 1955, "loss": 1.1117, "lr": 0.00015998724100364464, "epoch": 0.5268542199488491, "percentage": 10.54, "elapsed_time": "1:03:45", "remaining_time": "9:01:20"} +{"current_steps": 207, "total_steps": 1955, "loss": 1.1269, "lr": 0.00015998456170059059, "epoch": 0.5294117647058824, "percentage": 10.59, "elapsed_time": "1:04:04", "remaining_time": "9:01:02"} +{"current_steps": 208, "total_steps": 1955, "loss": 1.1159, "lr": 0.00015998162726013954, "epoch": 0.5319693094629157, "percentage": 10.64, "elapsed_time": "1:04:22", "remaining_time": "9:00:43"} +{"current_steps": 209, "total_steps": 1955, "loss": 1.1025, "lr": 0.00015997843769165193, "epoch": 0.5345268542199488, "percentage": 10.69, "elapsed_time": "1:04:41", "remaining_time": "9:00:25"} +{"current_steps": 210, "total_steps": 1955, "loss": 1.0962, "lr": 0.0001599749930053019, "epoch": 0.5370843989769821, "percentage": 10.74, "elapsed_time": "1:04:59", "remaining_time": "9:00:06"} +{"current_steps": 211, "total_steps": 1955, "loss": 1.1216, "lr": 0.00015997129321207747, "epoch": 0.5396419437340153, "percentage": 10.79, "elapsed_time": "1:05:18", "remaining_time": "8:59:48"} +{"current_steps": 212, "total_steps": 1955, "loss": 1.0845, "lr": 0.00015996733832378032, "epoch": 0.5421994884910486, "percentage": 10.84, "elapsed_time": "1:05:37", "remaining_time": "8:59:30"} +{"current_steps": 213, "total_steps": 1955, "loss": 1.1337, "lr": 0.00015996312835302593, "epoch": 0.5447570332480819, "percentage": 10.9, "elapsed_time": "1:05:55", "remaining_time": "8:59:11"} +{"current_steps": 214, "total_steps": 1955, "loss": 1.0791, "lr": 0.00015995866331324334, "epoch": 0.5473145780051151, "percentage": 10.95, "elapsed_time": "1:06:13", "remaining_time": "8:58:49"} +{"current_steps": 215, "total_steps": 1955, "loss": 1.0898, "lr": 0.00015995394321867534, "epoch": 0.5498721227621484, "percentage": 11.0, "elapsed_time": "1:06:32", "remaining_time": "8:58:30"} +{"current_steps": 216, "total_steps": 1955, "loss": 1.1221, "lr": 0.0001599489680843782, "epoch": 0.5524296675191815, "percentage": 11.05, "elapsed_time": "1:06:51", "remaining_time": "8:58:12"} +{"current_steps": 217, "total_steps": 1955, "loss": 1.0914, "lr": 0.00015994373792622182, "epoch": 0.5549872122762148, "percentage": 11.1, "elapsed_time": "1:07:09", "remaining_time": "8:57:54"} +{"current_steps": 218, "total_steps": 1955, "loss": 1.0659, "lr": 0.0001599382527608895, "epoch": 0.5575447570332481, "percentage": 11.15, "elapsed_time": "1:07:27", "remaining_time": "8:57:31"} +{"current_steps": 219, "total_steps": 1955, "loss": 1.0895, "lr": 0.00015993251260587796, "epoch": 0.5601023017902813, "percentage": 11.2, "elapsed_time": "1:07:46", "remaining_time": "8:57:13"} +{"current_steps": 220, "total_steps": 1955, "loss": 1.1447, "lr": 0.00015992651747949742, "epoch": 0.5626598465473146, "percentage": 11.25, "elapsed_time": "1:08:04", "remaining_time": "8:56:50"} +{"current_steps": 221, "total_steps": 1955, "loss": 1.082, "lr": 0.00015992026740087125, "epoch": 0.5652173913043478, "percentage": 11.3, "elapsed_time": "1:08:22", "remaining_time": "8:56:29"} +{"current_steps": 222, "total_steps": 1955, "loss": 1.0858, "lr": 0.00015991376238993623, "epoch": 0.5677749360613811, "percentage": 11.36, "elapsed_time": "1:08:40", "remaining_time": "8:56:07"} +{"current_steps": 223, "total_steps": 1955, "loss": 1.0903, "lr": 0.0001599070024674422, "epoch": 0.5703324808184144, "percentage": 11.41, "elapsed_time": "1:08:59", "remaining_time": "8:55:48"} +{"current_steps": 224, "total_steps": 1955, "loss": 1.1162, "lr": 0.0001598999876549522, "epoch": 0.5728900255754475, "percentage": 11.46, "elapsed_time": "1:09:18", "remaining_time": "8:55:31"} +{"current_steps": 225, "total_steps": 1955, "loss": 1.1131, "lr": 0.00015989271797484236, "epoch": 0.5754475703324808, "percentage": 11.51, "elapsed_time": "1:09:36", "remaining_time": "8:55:13"} +{"current_steps": 226, "total_steps": 1955, "loss": 1.0896, "lr": 0.00015988519345030167, "epoch": 0.578005115089514, "percentage": 11.56, "elapsed_time": "1:09:55", "remaining_time": "8:54:55"} +{"current_steps": 227, "total_steps": 1955, "loss": 1.0953, "lr": 0.00015987741410533217, "epoch": 0.5805626598465473, "percentage": 11.61, "elapsed_time": "1:10:13", "remaining_time": "8:54:37"} +{"current_steps": 228, "total_steps": 1955, "loss": 1.0837, "lr": 0.0001598693799647486, "epoch": 0.5831202046035806, "percentage": 11.66, "elapsed_time": "1:10:32", "remaining_time": "8:54:18"} +{"current_steps": 229, "total_steps": 1955, "loss": 1.1026, "lr": 0.00015986109105417862, "epoch": 0.5856777493606138, "percentage": 11.71, "elapsed_time": "1:10:51", "remaining_time": "8:54:00"} +{"current_steps": 230, "total_steps": 1955, "loss": 1.1069, "lr": 0.0001598525474000624, "epoch": 0.5882352941176471, "percentage": 11.76, "elapsed_time": "1:11:09", "remaining_time": "8:53:38"} +{"current_steps": 231, "total_steps": 1955, "loss": 1.1079, "lr": 0.00015984374902965284, "epoch": 0.5907928388746803, "percentage": 11.82, "elapsed_time": "1:11:27", "remaining_time": "8:53:16"} +{"current_steps": 232, "total_steps": 1955, "loss": 1.088, "lr": 0.00015983469597101517, "epoch": 0.5933503836317136, "percentage": 11.87, "elapsed_time": "1:11:46", "remaining_time": "8:53:01"} +{"current_steps": 233, "total_steps": 1955, "loss": 1.0947, "lr": 0.0001598253882530272, "epoch": 0.5959079283887468, "percentage": 11.92, "elapsed_time": "1:12:04", "remaining_time": "8:52:39"} +{"current_steps": 234, "total_steps": 1955, "loss": 1.0527, "lr": 0.00015981582590537897, "epoch": 0.59846547314578, "percentage": 11.97, "elapsed_time": "1:12:22", "remaining_time": "8:52:20"} +{"current_steps": 235, "total_steps": 1955, "loss": 1.0747, "lr": 0.0001598060089585728, "epoch": 0.6010230179028133, "percentage": 12.02, "elapsed_time": "1:12:41", "remaining_time": "8:52:02"} +{"current_steps": 236, "total_steps": 1955, "loss": 1.1013, "lr": 0.00015979593744392312, "epoch": 0.6035805626598465, "percentage": 12.07, "elapsed_time": "1:12:59", "remaining_time": "8:51:43"} +{"current_steps": 237, "total_steps": 1955, "loss": 1.0967, "lr": 0.00015978561139355635, "epoch": 0.6061381074168798, "percentage": 12.12, "elapsed_time": "1:13:19", "remaining_time": "8:51:30"} +{"current_steps": 238, "total_steps": 1955, "loss": 1.0733, "lr": 0.00015977503084041087, "epoch": 0.6086956521739131, "percentage": 12.17, "elapsed_time": "1:13:37", "remaining_time": "8:51:08"} +{"current_steps": 239, "total_steps": 1955, "loss": 1.1196, "lr": 0.00015976419581823688, "epoch": 0.6112531969309463, "percentage": 12.23, "elapsed_time": "1:13:55", "remaining_time": "8:50:46"} +{"current_steps": 240, "total_steps": 1955, "loss": 1.088, "lr": 0.00015975310636159632, "epoch": 0.6138107416879796, "percentage": 12.28, "elapsed_time": "1:14:14", "remaining_time": "8:50:28"} +{"current_steps": 241, "total_steps": 1955, "loss": 1.0768, "lr": 0.00015974176250586265, "epoch": 0.6163682864450127, "percentage": 12.33, "elapsed_time": "1:14:32", "remaining_time": "8:50:09"} +{"current_steps": 242, "total_steps": 1955, "loss": 1.106, "lr": 0.00015973016428722094, "epoch": 0.618925831202046, "percentage": 12.38, "elapsed_time": "1:14:51", "remaining_time": "8:49:51"} +{"current_steps": 243, "total_steps": 1955, "loss": 1.1002, "lr": 0.0001597183117426675, "epoch": 0.6214833759590793, "percentage": 12.43, "elapsed_time": "1:15:09", "remaining_time": "8:49:33"} +{"current_steps": 244, "total_steps": 1955, "loss": 1.1445, "lr": 0.00015970620491001004, "epoch": 0.6240409207161125, "percentage": 12.48, "elapsed_time": "1:15:28", "remaining_time": "8:49:15"} +{"current_steps": 245, "total_steps": 1955, "loss": 1.1019, "lr": 0.00015969384382786729, "epoch": 0.6265984654731458, "percentage": 12.53, "elapsed_time": "1:15:47", "remaining_time": "8:48:56"} +{"current_steps": 246, "total_steps": 1955, "loss": 1.1002, "lr": 0.00015968122853566905, "epoch": 0.629156010230179, "percentage": 12.58, "elapsed_time": "1:16:05", "remaining_time": "8:48:39"} +{"current_steps": 247, "total_steps": 1955, "loss": 1.0892, "lr": 0.000159668359073656, "epoch": 0.6317135549872123, "percentage": 12.63, "elapsed_time": "1:16:24", "remaining_time": "8:48:21"} +{"current_steps": 248, "total_steps": 1955, "loss": 1.1395, "lr": 0.00015965523548287956, "epoch": 0.6342710997442456, "percentage": 12.69, "elapsed_time": "1:16:43", "remaining_time": "8:48:03"} +{"current_steps": 249, "total_steps": 1955, "loss": 1.1157, "lr": 0.0001596418578052018, "epoch": 0.6368286445012787, "percentage": 12.74, "elapsed_time": "1:17:01", "remaining_time": "8:47:41"} +{"current_steps": 250, "total_steps": 1955, "loss": 1.0961, "lr": 0.0001596282260832953, "epoch": 0.639386189258312, "percentage": 12.79, "elapsed_time": "1:17:19", "remaining_time": "8:47:23"} +{"current_steps": 251, "total_steps": 1955, "loss": 1.1019, "lr": 0.00015961434036064294, "epoch": 0.6419437340153452, "percentage": 12.84, "elapsed_time": "1:17:38", "remaining_time": "8:47:04"} +{"current_steps": 252, "total_steps": 1955, "loss": 1.1053, "lr": 0.00015960020068153785, "epoch": 0.6445012787723785, "percentage": 12.89, "elapsed_time": "1:17:56", "remaining_time": "8:46:44"} +{"current_steps": 253, "total_steps": 1955, "loss": 1.0848, "lr": 0.00015958580709108332, "epoch": 0.6470588235294118, "percentage": 12.94, "elapsed_time": "1:18:14", "remaining_time": "8:46:23"} +{"current_steps": 254, "total_steps": 1955, "loss": 1.136, "lr": 0.00015957115963519244, "epoch": 0.649616368286445, "percentage": 12.99, "elapsed_time": "1:18:33", "remaining_time": "8:46:04"} +{"current_steps": 255, "total_steps": 1955, "loss": 1.0952, "lr": 0.00015955625836058815, "epoch": 0.6521739130434783, "percentage": 13.04, "elapsed_time": "1:18:51", "remaining_time": "8:45:43"} +{"current_steps": 256, "total_steps": 1955, "loss": 1.0809, "lr": 0.00015954110331480302, "epoch": 0.6547314578005116, "percentage": 13.09, "elapsed_time": "1:19:10", "remaining_time": "8:45:24"} +{"current_steps": 257, "total_steps": 1955, "loss": 1.116, "lr": 0.00015952569454617916, "epoch": 0.6572890025575447, "percentage": 13.15, "elapsed_time": "1:19:28", "remaining_time": "8:45:06"} +{"current_steps": 258, "total_steps": 1955, "loss": 1.0784, "lr": 0.00015951003210386793, "epoch": 0.659846547314578, "percentage": 13.2, "elapsed_time": "1:19:47", "remaining_time": "8:44:48"} +{"current_steps": 259, "total_steps": 1955, "loss": 1.1071, "lr": 0.0001594941160378299, "epoch": 0.6624040920716112, "percentage": 13.25, "elapsed_time": "1:20:05", "remaining_time": "8:44:30"} +{"current_steps": 260, "total_steps": 1955, "loss": 1.087, "lr": 0.00015947794639883473, "epoch": 0.6649616368286445, "percentage": 13.3, "elapsed_time": "1:20:23", "remaining_time": "8:44:07"} +{"current_steps": 261, "total_steps": 1955, "loss": 1.0604, "lr": 0.0001594615232384608, "epoch": 0.6675191815856778, "percentage": 13.35, "elapsed_time": "1:20:41", "remaining_time": "8:43:45"} +{"current_steps": 262, "total_steps": 1955, "loss": 1.076, "lr": 0.00015944484660909523, "epoch": 0.670076726342711, "percentage": 13.4, "elapsed_time": "1:21:00", "remaining_time": "8:43:27"} +{"current_steps": 263, "total_steps": 1955, "loss": 1.1204, "lr": 0.00015942791656393376, "epoch": 0.6726342710997443, "percentage": 13.45, "elapsed_time": "1:21:19", "remaining_time": "8:43:08"} +{"current_steps": 264, "total_steps": 1955, "loss": 1.0986, "lr": 0.00015941073315698035, "epoch": 0.6751918158567775, "percentage": 13.5, "elapsed_time": "1:21:37", "remaining_time": "8:42:46"} +{"current_steps": 265, "total_steps": 1955, "loss": 1.1274, "lr": 0.00015939329644304724, "epoch": 0.6777493606138107, "percentage": 13.55, "elapsed_time": "1:21:55", "remaining_time": "8:42:28"} +{"current_steps": 266, "total_steps": 1955, "loss": 1.0934, "lr": 0.0001593756064777546, "epoch": 0.680306905370844, "percentage": 13.61, "elapsed_time": "1:22:14", "remaining_time": "8:42:11"} +{"current_steps": 267, "total_steps": 1955, "loss": 1.0471, "lr": 0.00015935766331753049, "epoch": 0.6828644501278772, "percentage": 13.66, "elapsed_time": "1:22:32", "remaining_time": "8:41:52"} +{"current_steps": 268, "total_steps": 1955, "loss": 1.0887, "lr": 0.00015933946701961055, "epoch": 0.6854219948849105, "percentage": 13.71, "elapsed_time": "1:22:51", "remaining_time": "8:41:34"} +{"current_steps": 269, "total_steps": 1955, "loss": 1.0667, "lr": 0.000159321017642038, "epoch": 0.6879795396419437, "percentage": 13.76, "elapsed_time": "1:23:10", "remaining_time": "8:41:16"} +{"current_steps": 270, "total_steps": 1955, "loss": 1.1073, "lr": 0.00015930231524366326, "epoch": 0.690537084398977, "percentage": 13.81, "elapsed_time": "1:23:28", "remaining_time": "8:40:58"} +{"current_steps": 271, "total_steps": 1955, "loss": 1.1053, "lr": 0.0001592833598841438, "epoch": 0.6930946291560103, "percentage": 13.86, "elapsed_time": "1:23:47", "remaining_time": "8:40:40"} +{"current_steps": 272, "total_steps": 1955, "loss": 1.0707, "lr": 0.00015926415162394414, "epoch": 0.6956521739130435, "percentage": 13.91, "elapsed_time": "1:24:05", "remaining_time": "8:40:21"} +{"current_steps": 273, "total_steps": 1955, "loss": 1.0878, "lr": 0.00015924469052433534, "epoch": 0.6982097186700768, "percentage": 13.96, "elapsed_time": "1:24:24", "remaining_time": "8:40:03"} +{"current_steps": 274, "total_steps": 1955, "loss": 1.1036, "lr": 0.00015922497664739508, "epoch": 0.7007672634271099, "percentage": 14.02, "elapsed_time": "1:24:42", "remaining_time": "8:39:42"} +{"current_steps": 275, "total_steps": 1955, "loss": 1.0826, "lr": 0.0001592050100560074, "epoch": 0.7033248081841432, "percentage": 14.07, "elapsed_time": "1:25:01", "remaining_time": "8:39:23"} +{"current_steps": 276, "total_steps": 1955, "loss": 1.0992, "lr": 0.0001591847908138623, "epoch": 0.7058823529411765, "percentage": 14.12, "elapsed_time": "1:25:19", "remaining_time": "8:39:05"} +{"current_steps": 277, "total_steps": 1955, "loss": 1.1122, "lr": 0.00015916431898545583, "epoch": 0.7084398976982097, "percentage": 14.17, "elapsed_time": "1:25:38", "remaining_time": "8:38:47"} +{"current_steps": 278, "total_steps": 1955, "loss": 1.0797, "lr": 0.0001591435946360897, "epoch": 0.710997442455243, "percentage": 14.22, "elapsed_time": "1:25:56", "remaining_time": "8:38:27"} +{"current_steps": 279, "total_steps": 1955, "loss": 1.1083, "lr": 0.00015912261783187113, "epoch": 0.7135549872122762, "percentage": 14.27, "elapsed_time": "1:26:15", "remaining_time": "8:38:12"} +{"current_steps": 280, "total_steps": 1955, "loss": 1.0768, "lr": 0.00015910138863971265, "epoch": 0.7161125319693095, "percentage": 14.32, "elapsed_time": "1:26:33", "remaining_time": "8:37:51"} +{"current_steps": 281, "total_steps": 1955, "loss": 1.0675, "lr": 0.00015907990712733176, "epoch": 0.7186700767263428, "percentage": 14.37, "elapsed_time": "1:26:52", "remaining_time": "8:37:33"} +{"current_steps": 282, "total_steps": 1955, "loss": 1.095, "lr": 0.00015905817336325098, "epoch": 0.7212276214833759, "percentage": 14.42, "elapsed_time": "1:27:11", "remaining_time": "8:37:15"} +{"current_steps": 283, "total_steps": 1955, "loss": 1.0227, "lr": 0.00015903618741679735, "epoch": 0.7237851662404092, "percentage": 14.48, "elapsed_time": "1:27:29", "remaining_time": "8:36:54"} +{"current_steps": 284, "total_steps": 1955, "loss": 1.0894, "lr": 0.00015901394935810236, "epoch": 0.7263427109974424, "percentage": 14.53, "elapsed_time": "1:27:47", "remaining_time": "8:36:32"} +{"current_steps": 285, "total_steps": 1955, "loss": 1.0708, "lr": 0.00015899145925810172, "epoch": 0.7289002557544757, "percentage": 14.58, "elapsed_time": "1:28:06", "remaining_time": "8:36:14"} +{"current_steps": 286, "total_steps": 1955, "loss": 1.0973, "lr": 0.0001589687171885351, "epoch": 0.731457800511509, "percentage": 14.63, "elapsed_time": "1:28:24", "remaining_time": "8:35:52"} +{"current_steps": 287, "total_steps": 1955, "loss": 1.0959, "lr": 0.0001589457232219459, "epoch": 0.7340153452685422, "percentage": 14.68, "elapsed_time": "1:28:42", "remaining_time": "8:35:34"} +{"current_steps": 288, "total_steps": 1955, "loss": 1.0588, "lr": 0.000158922477431681, "epoch": 0.7365728900255755, "percentage": 14.73, "elapsed_time": "1:29:01", "remaining_time": "8:35:19"} +{"current_steps": 289, "total_steps": 1955, "loss": 1.0877, "lr": 0.00015889897989189065, "epoch": 0.7391304347826086, "percentage": 14.78, "elapsed_time": "1:29:20", "remaining_time": "8:35:01"} +{"current_steps": 290, "total_steps": 1955, "loss": 1.0987, "lr": 0.00015887523067752805, "epoch": 0.7416879795396419, "percentage": 14.83, "elapsed_time": "1:29:38", "remaining_time": "8:34:42"} +{"current_steps": 291, "total_steps": 1955, "loss": 1.0813, "lr": 0.0001588512298643492, "epoch": 0.7442455242966752, "percentage": 14.88, "elapsed_time": "1:29:57", "remaining_time": "8:34:24"} +{"current_steps": 292, "total_steps": 1955, "loss": 1.0493, "lr": 0.00015882697752891273, "epoch": 0.7468030690537084, "percentage": 14.94, "elapsed_time": "1:30:16", "remaining_time": "8:34:06"} +{"current_steps": 293, "total_steps": 1955, "loss": 1.0745, "lr": 0.0001588024737485795, "epoch": 0.7493606138107417, "percentage": 14.99, "elapsed_time": "1:30:34", "remaining_time": "8:33:45"} +{"current_steps": 294, "total_steps": 1955, "loss": 1.0756, "lr": 0.00015877771860151255, "epoch": 0.7519181585677749, "percentage": 15.04, "elapsed_time": "1:30:52", "remaining_time": "8:33:27"} +{"current_steps": 295, "total_steps": 1955, "loss": 1.0624, "lr": 0.00015875271216667658, "epoch": 0.7544757033248082, "percentage": 15.09, "elapsed_time": "1:31:11", "remaining_time": "8:33:09"} +{"current_steps": 296, "total_steps": 1955, "loss": 1.0713, "lr": 0.00015872745452383797, "epoch": 0.7570332480818415, "percentage": 15.14, "elapsed_time": "1:31:30", "remaining_time": "8:32:51"} +{"current_steps": 297, "total_steps": 1955, "loss": 1.1115, "lr": 0.00015870194575356444, "epoch": 0.7595907928388747, "percentage": 15.19, "elapsed_time": "1:31:48", "remaining_time": "8:32:32"} +{"current_steps": 298, "total_steps": 1955, "loss": 1.0871, "lr": 0.00015867618593722464, "epoch": 0.7621483375959079, "percentage": 15.24, "elapsed_time": "1:32:07", "remaining_time": "8:32:13"} +{"current_steps": 299, "total_steps": 1955, "loss": 1.0979, "lr": 0.00015865017515698807, "epoch": 0.7647058823529411, "percentage": 15.29, "elapsed_time": "1:32:25", "remaining_time": "8:31:55"} +{"current_steps": 300, "total_steps": 1955, "loss": 1.0597, "lr": 0.00015862391349582484, "epoch": 0.7672634271099744, "percentage": 15.35, "elapsed_time": "1:32:44", "remaining_time": "8:31:38"} +{"current_steps": 301, "total_steps": 1955, "loss": 1.0932, "lr": 0.00015859740103750522, "epoch": 0.7698209718670077, "percentage": 15.4, "elapsed_time": "1:33:03", "remaining_time": "8:31:20"} +{"current_steps": 302, "total_steps": 1955, "loss": 1.0938, "lr": 0.00015857063786659954, "epoch": 0.7723785166240409, "percentage": 15.45, "elapsed_time": "1:33:22", "remaining_time": "8:31:02"} +{"current_steps": 303, "total_steps": 1955, "loss": 1.0623, "lr": 0.00015854362406847786, "epoch": 0.7749360613810742, "percentage": 15.5, "elapsed_time": "1:33:40", "remaining_time": "8:30:44"} +{"current_steps": 304, "total_steps": 1955, "loss": 1.0699, "lr": 0.00015851635972930967, "epoch": 0.7774936061381074, "percentage": 15.55, "elapsed_time": "1:33:59", "remaining_time": "8:30:27"} +{"current_steps": 305, "total_steps": 1955, "loss": 1.0826, "lr": 0.00015848884493606367, "epoch": 0.7800511508951407, "percentage": 15.6, "elapsed_time": "1:34:17", "remaining_time": "8:30:08"} +{"current_steps": 306, "total_steps": 1955, "loss": 1.0755, "lr": 0.00015846107977650743, "epoch": 0.782608695652174, "percentage": 15.65, "elapsed_time": "1:34:36", "remaining_time": "8:29:50"} +{"current_steps": 307, "total_steps": 1955, "loss": 1.0416, "lr": 0.0001584330643392072, "epoch": 0.7851662404092071, "percentage": 15.7, "elapsed_time": "1:34:55", "remaining_time": "8:29:32"} +{"current_steps": 308, "total_steps": 1955, "loss": 1.0754, "lr": 0.00015840479871352754, "epoch": 0.7877237851662404, "percentage": 15.75, "elapsed_time": "1:35:13", "remaining_time": "8:29:14"} +{"current_steps": 309, "total_steps": 1955, "loss": 1.0934, "lr": 0.00015837628298963105, "epoch": 0.7902813299232737, "percentage": 15.81, "elapsed_time": "1:35:32", "remaining_time": "8:28:56"} +{"current_steps": 310, "total_steps": 1955, "loss": 1.0632, "lr": 0.00015834751725847816, "epoch": 0.7928388746803069, "percentage": 15.86, "elapsed_time": "1:35:51", "remaining_time": "8:28:38"} +{"current_steps": 311, "total_steps": 1955, "loss": 1.0956, "lr": 0.00015831850161182677, "epoch": 0.7953964194373402, "percentage": 15.91, "elapsed_time": "1:36:09", "remaining_time": "8:28:19"} +{"current_steps": 312, "total_steps": 1955, "loss": 1.1069, "lr": 0.0001582892361422319, "epoch": 0.7979539641943734, "percentage": 15.96, "elapsed_time": "1:36:27", "remaining_time": "8:27:57"} +{"current_steps": 313, "total_steps": 1955, "loss": 1.0728, "lr": 0.00015825972094304555, "epoch": 0.8005115089514067, "percentage": 16.01, "elapsed_time": "1:36:46", "remaining_time": "8:27:38"} +{"current_steps": 314, "total_steps": 1955, "loss": 1.0772, "lr": 0.00015822995610841623, "epoch": 0.80306905370844, "percentage": 16.06, "elapsed_time": "1:37:04", "remaining_time": "8:27:20"} +{"current_steps": 315, "total_steps": 1955, "loss": 1.0654, "lr": 0.00015819994173328885, "epoch": 0.8056265984654731, "percentage": 16.11, "elapsed_time": "1:37:23", "remaining_time": "8:27:01"} +{"current_steps": 316, "total_steps": 1955, "loss": 1.0668, "lr": 0.00015816967791340417, "epoch": 0.8081841432225064, "percentage": 16.16, "elapsed_time": "1:37:41", "remaining_time": "8:26:43"} +{"current_steps": 317, "total_steps": 1955, "loss": 1.0911, "lr": 0.00015813916474529885, "epoch": 0.8107416879795396, "percentage": 16.21, "elapsed_time": "1:38:00", "remaining_time": "8:26:25"} +{"current_steps": 318, "total_steps": 1955, "loss": 1.0826, "lr": 0.0001581084023263047, "epoch": 0.8132992327365729, "percentage": 16.27, "elapsed_time": "1:38:19", "remaining_time": "8:26:07"} +{"current_steps": 319, "total_steps": 1955, "loss": 1.0426, "lr": 0.00015807739075454874, "epoch": 0.8158567774936062, "percentage": 16.32, "elapsed_time": "1:38:37", "remaining_time": "8:25:48"} +{"current_steps": 320, "total_steps": 1955, "loss": 1.0731, "lr": 0.00015804613012895268, "epoch": 0.8184143222506394, "percentage": 16.37, "elapsed_time": "1:38:56", "remaining_time": "8:25:30"} +{"current_steps": 321, "total_steps": 1955, "loss": 1.0491, "lr": 0.0001580146205492327, "epoch": 0.8209718670076727, "percentage": 16.42, "elapsed_time": "1:39:14", "remaining_time": "8:25:12"} +{"current_steps": 322, "total_steps": 1955, "loss": 1.0796, "lr": 0.00015798286211589916, "epoch": 0.8235294117647058, "percentage": 16.47, "elapsed_time": "1:39:33", "remaining_time": "8:24:54"} +{"current_steps": 323, "total_steps": 1955, "loss": 1.0998, "lr": 0.00015795085493025608, "epoch": 0.8260869565217391, "percentage": 16.52, "elapsed_time": "1:39:51", "remaining_time": "8:24:33"} +{"current_steps": 324, "total_steps": 1955, "loss": 1.097, "lr": 0.00015791859909440107, "epoch": 0.8286445012787724, "percentage": 16.57, "elapsed_time": "1:40:09", "remaining_time": "8:24:11"} +{"current_steps": 325, "total_steps": 1955, "loss": 1.0594, "lr": 0.00015788609471122485, "epoch": 0.8312020460358056, "percentage": 16.62, "elapsed_time": "1:40:28", "remaining_time": "8:23:53"} +{"current_steps": 326, "total_steps": 1955, "loss": 1.0672, "lr": 0.000157853341884411, "epoch": 0.8337595907928389, "percentage": 16.68, "elapsed_time": "1:40:46", "remaining_time": "8:23:34"} +{"current_steps": 327, "total_steps": 1955, "loss": 1.1076, "lr": 0.00015782034071843557, "epoch": 0.8363171355498721, "percentage": 16.73, "elapsed_time": "1:41:05", "remaining_time": "8:23:16"} +{"current_steps": 328, "total_steps": 1955, "loss": 1.0794, "lr": 0.00015778709131856675, "epoch": 0.8388746803069054, "percentage": 16.78, "elapsed_time": "1:41:23", "remaining_time": "8:22:58"} +{"current_steps": 329, "total_steps": 1955, "loss": 1.1175, "lr": 0.00015775359379086455, "epoch": 0.8414322250639387, "percentage": 16.83, "elapsed_time": "1:41:42", "remaining_time": "8:22:40"} +{"current_steps": 330, "total_steps": 1955, "loss": 1.0893, "lr": 0.00015771984824218053, "epoch": 0.8439897698209718, "percentage": 16.88, "elapsed_time": "1:42:01", "remaining_time": "8:22:22"} +{"current_steps": 331, "total_steps": 1955, "loss": 1.0628, "lr": 0.00015768585478015732, "epoch": 0.8465473145780051, "percentage": 16.93, "elapsed_time": "1:42:18", "remaining_time": "8:21:59"} +{"current_steps": 332, "total_steps": 1955, "loss": 1.0553, "lr": 0.00015765161351322845, "epoch": 0.8491048593350383, "percentage": 16.98, "elapsed_time": "1:42:37", "remaining_time": "8:21:42"} +{"current_steps": 333, "total_steps": 1955, "loss": 1.1007, "lr": 0.0001576171245506178, "epoch": 0.8516624040920716, "percentage": 17.03, "elapsed_time": "1:42:56", "remaining_time": "8:21:24"} +{"current_steps": 334, "total_steps": 1955, "loss": 1.0354, "lr": 0.00015758238800233937, "epoch": 0.8542199488491049, "percentage": 17.08, "elapsed_time": "1:43:14", "remaining_time": "8:21:03"} +{"current_steps": 335, "total_steps": 1955, "loss": 1.0609, "lr": 0.00015754740397919703, "epoch": 0.8567774936061381, "percentage": 17.14, "elapsed_time": "1:43:33", "remaining_time": "8:20:45"} +{"current_steps": 336, "total_steps": 1955, "loss": 1.0599, "lr": 0.0001575121725927839, "epoch": 0.8593350383631714, "percentage": 17.19, "elapsed_time": "1:43:51", "remaining_time": "8:20:24"} +{"current_steps": 337, "total_steps": 1955, "loss": 1.0825, "lr": 0.00015747669395548228, "epoch": 0.8618925831202046, "percentage": 17.24, "elapsed_time": "1:44:09", "remaining_time": "8:20:05"} +{"current_steps": 338, "total_steps": 1955, "loss": 1.0867, "lr": 0.00015744096818046306, "epoch": 0.8644501278772379, "percentage": 17.29, "elapsed_time": "1:44:28", "remaining_time": "8:19:47"} +{"current_steps": 339, "total_steps": 1955, "loss": 1.0519, "lr": 0.00015740499538168548, "epoch": 0.8670076726342711, "percentage": 17.34, "elapsed_time": "1:44:46", "remaining_time": "8:19:28"} +{"current_steps": 340, "total_steps": 1955, "loss": 1.0926, "lr": 0.00015736877567389682, "epoch": 0.8695652173913043, "percentage": 17.39, "elapsed_time": "1:45:05", "remaining_time": "8:19:10"} +{"current_steps": 341, "total_steps": 1955, "loss": 1.0485, "lr": 0.00015733230917263182, "epoch": 0.8721227621483376, "percentage": 17.44, "elapsed_time": "1:45:23", "remaining_time": "8:18:51"} +{"current_steps": 342, "total_steps": 1955, "loss": 1.0742, "lr": 0.00015729559599421262, "epoch": 0.8746803069053708, "percentage": 17.49, "elapsed_time": "1:45:42", "remaining_time": "8:18:32"} +{"current_steps": 343, "total_steps": 1955, "loss": 1.0731, "lr": 0.00015725863625574808, "epoch": 0.8772378516624041, "percentage": 17.54, "elapsed_time": "1:46:00", "remaining_time": "8:18:14"} +{"current_steps": 344, "total_steps": 1955, "loss": 1.0818, "lr": 0.0001572214300751336, "epoch": 0.8797953964194374, "percentage": 17.6, "elapsed_time": "1:46:19", "remaining_time": "8:17:55"} +{"current_steps": 345, "total_steps": 1955, "loss": 1.0592, "lr": 0.00015718397757105072, "epoch": 0.8823529411764706, "percentage": 17.65, "elapsed_time": "1:46:37", "remaining_time": "8:17:36"} +{"current_steps": 346, "total_steps": 1955, "loss": 1.124, "lr": 0.0001571462788629666, "epoch": 0.8849104859335039, "percentage": 17.7, "elapsed_time": "1:46:55", "remaining_time": "8:17:15"} +{"current_steps": 347, "total_steps": 1955, "loss": 1.0076, "lr": 0.00015710833407113386, "epoch": 0.887468030690537, "percentage": 17.75, "elapsed_time": "1:47:14", "remaining_time": "8:16:57"} +{"current_steps": 348, "total_steps": 1955, "loss": 1.0735, "lr": 0.00015707014331659008, "epoch": 0.8900255754475703, "percentage": 17.8, "elapsed_time": "1:47:33", "remaining_time": "8:16:39"} +{"current_steps": 349, "total_steps": 1955, "loss": 1.0582, "lr": 0.00015703170672115737, "epoch": 0.8925831202046036, "percentage": 17.85, "elapsed_time": "1:47:51", "remaining_time": "8:16:21"} +{"current_steps": 350, "total_steps": 1955, "loss": 1.0788, "lr": 0.00015699302440744202, "epoch": 0.8951406649616368, "percentage": 17.9, "elapsed_time": "1:48:10", "remaining_time": "8:16:03"} +{"current_steps": 351, "total_steps": 1955, "loss": 1.0986, "lr": 0.00015695409649883418, "epoch": 0.8976982097186701, "percentage": 17.95, "elapsed_time": "1:48:28", "remaining_time": "8:15:44"} +{"current_steps": 352, "total_steps": 1955, "loss": 1.0522, "lr": 0.0001569149231195074, "epoch": 0.9002557544757033, "percentage": 18.01, "elapsed_time": "1:48:47", "remaining_time": "8:15:26"} +{"current_steps": 353, "total_steps": 1955, "loss": 1.077, "lr": 0.0001568755043944182, "epoch": 0.9028132992327366, "percentage": 18.06, "elapsed_time": "1:49:06", "remaining_time": "8:15:08"} +{"current_steps": 354, "total_steps": 1955, "loss": 1.0659, "lr": 0.00015683584044930572, "epoch": 0.9053708439897699, "percentage": 18.11, "elapsed_time": "1:49:24", "remaining_time": "8:14:47"} +{"current_steps": 355, "total_steps": 1955, "loss": 1.0446, "lr": 0.00015679593141069132, "epoch": 0.907928388746803, "percentage": 18.16, "elapsed_time": "1:49:43", "remaining_time": "8:14:30"} +{"current_steps": 356, "total_steps": 1955, "loss": 1.0577, "lr": 0.0001567557774058782, "epoch": 0.9104859335038363, "percentage": 18.21, "elapsed_time": "1:50:01", "remaining_time": "8:14:12"} +{"current_steps": 357, "total_steps": 1955, "loss": 1.0675, "lr": 0.0001567153785629509, "epoch": 0.9130434782608695, "percentage": 18.26, "elapsed_time": "1:50:20", "remaining_time": "8:13:56"} +{"current_steps": 358, "total_steps": 1955, "loss": 1.0891, "lr": 0.000156674735010775, "epoch": 0.9156010230179028, "percentage": 18.31, "elapsed_time": "1:50:39", "remaining_time": "8:13:38"} +{"current_steps": 359, "total_steps": 1955, "loss": 1.0715, "lr": 0.00015663384687899663, "epoch": 0.9181585677749361, "percentage": 18.36, "elapsed_time": "1:50:58", "remaining_time": "8:13:19"} +{"current_steps": 360, "total_steps": 1955, "loss": 1.0396, "lr": 0.00015659271429804215, "epoch": 0.9207161125319693, "percentage": 18.41, "elapsed_time": "1:51:16", "remaining_time": "8:13:01"} +{"current_steps": 361, "total_steps": 1955, "loss": 1.0919, "lr": 0.00015655133739911757, "epoch": 0.9232736572890026, "percentage": 18.47, "elapsed_time": "1:51:35", "remaining_time": "8:12:42"} +{"current_steps": 362, "total_steps": 1955, "loss": 1.0151, "lr": 0.0001565097163142083, "epoch": 0.9258312020460358, "percentage": 18.52, "elapsed_time": "1:51:53", "remaining_time": "8:12:22"} +{"current_steps": 363, "total_steps": 1955, "loss": 1.0796, "lr": 0.00015646785117607865, "epoch": 0.928388746803069, "percentage": 18.57, "elapsed_time": "1:52:11", "remaining_time": "8:12:01"} +{"current_steps": 364, "total_steps": 1955, "loss": 1.0651, "lr": 0.00015642574211827142, "epoch": 0.9309462915601023, "percentage": 18.62, "elapsed_time": "1:52:29", "remaining_time": "8:11:43"} +{"current_steps": 365, "total_steps": 1955, "loss": 1.0785, "lr": 0.00015638338927510752, "epoch": 0.9335038363171355, "percentage": 18.67, "elapsed_time": "1:52:48", "remaining_time": "8:11:24"} +{"current_steps": 366, "total_steps": 1955, "loss": 1.1032, "lr": 0.00015634079278168542, "epoch": 0.9360613810741688, "percentage": 18.72, "elapsed_time": "1:53:07", "remaining_time": "8:11:06"} +{"current_steps": 367, "total_steps": 1955, "loss": 1.0784, "lr": 0.00015629795277388077, "epoch": 0.9386189258312021, "percentage": 18.77, "elapsed_time": "1:53:25", "remaining_time": "8:10:48"} +{"current_steps": 368, "total_steps": 1955, "loss": 1.0729, "lr": 0.00015625486938834613, "epoch": 0.9411764705882353, "percentage": 18.82, "elapsed_time": "1:53:44", "remaining_time": "8:10:30"} +{"current_steps": 369, "total_steps": 1955, "loss": 1.0676, "lr": 0.00015621154276251024, "epoch": 0.9437340153452686, "percentage": 18.87, "elapsed_time": "1:54:02", "remaining_time": "8:10:09"} +{"current_steps": 370, "total_steps": 1955, "loss": 1.0626, "lr": 0.00015616797303457782, "epoch": 0.9462915601023018, "percentage": 18.93, "elapsed_time": "1:54:21", "remaining_time": "8:09:51"} +{"current_steps": 371, "total_steps": 1955, "loss": 1.0935, "lr": 0.00015612416034352906, "epoch": 0.948849104859335, "percentage": 18.98, "elapsed_time": "1:54:39", "remaining_time": "8:09:32"} +{"current_steps": 372, "total_steps": 1955, "loss": 1.0714, "lr": 0.00015608010482911908, "epoch": 0.9514066496163683, "percentage": 19.03, "elapsed_time": "1:54:58", "remaining_time": "8:09:13"} +{"current_steps": 373, "total_steps": 1955, "loss": 1.0757, "lr": 0.00015603580663187765, "epoch": 0.9539641943734015, "percentage": 19.08, "elapsed_time": "1:55:16", "remaining_time": "8:08:55"} +{"current_steps": 374, "total_steps": 1955, "loss": 1.0762, "lr": 0.00015599126589310857, "epoch": 0.9565217391304348, "percentage": 19.13, "elapsed_time": "1:55:35", "remaining_time": "8:08:37"} +{"current_steps": 375, "total_steps": 1955, "loss": 1.0991, "lr": 0.00015594648275488944, "epoch": 0.959079283887468, "percentage": 19.18, "elapsed_time": "1:55:53", "remaining_time": "8:08:19"} +{"current_steps": 376, "total_steps": 1955, "loss": 1.0493, "lr": 0.00015590145736007091, "epoch": 0.9616368286445013, "percentage": 19.23, "elapsed_time": "1:56:11", "remaining_time": "8:07:58"} +{"current_steps": 377, "total_steps": 1955, "loss": 1.0845, "lr": 0.00015585618985227657, "epoch": 0.9641943734015346, "percentage": 19.28, "elapsed_time": "1:56:30", "remaining_time": "8:07:39"} +{"current_steps": 378, "total_steps": 1955, "loss": 1.0851, "lr": 0.00015581068037590212, "epoch": 0.9667519181585678, "percentage": 19.34, "elapsed_time": "1:56:48", "remaining_time": "8:07:19"} +{"current_steps": 379, "total_steps": 1955, "loss": 1.054, "lr": 0.00015576492907611524, "epoch": 0.969309462915601, "percentage": 19.39, "elapsed_time": "1:57:07", "remaining_time": "8:07:01"} +{"current_steps": 380, "total_steps": 1955, "loss": 1.0377, "lr": 0.00015571893609885493, "epoch": 0.9718670076726342, "percentage": 19.44, "elapsed_time": "1:57:25", "remaining_time": "8:06:41"} +{"current_steps": 381, "total_steps": 1955, "loss": 1.0698, "lr": 0.00015567270159083107, "epoch": 0.9744245524296675, "percentage": 19.49, "elapsed_time": "1:57:43", "remaining_time": "8:06:22"} +{"current_steps": 382, "total_steps": 1955, "loss": 1.0723, "lr": 0.00015562622569952408, "epoch": 0.9769820971867008, "percentage": 19.54, "elapsed_time": "1:58:01", "remaining_time": "8:06:02"} +{"current_steps": 383, "total_steps": 1955, "loss": 1.0753, "lr": 0.00015557950857318425, "epoch": 0.979539641943734, "percentage": 19.59, "elapsed_time": "1:58:20", "remaining_time": "8:05:44"} +{"current_steps": 384, "total_steps": 1955, "loss": 1.0301, "lr": 0.00015553255036083145, "epoch": 0.9820971867007673, "percentage": 19.64, "elapsed_time": "1:58:38", "remaining_time": "8:05:24"} +{"current_steps": 385, "total_steps": 1955, "loss": 1.1103, "lr": 0.0001554853512122545, "epoch": 0.9846547314578005, "percentage": 19.69, "elapsed_time": "1:58:57", "remaining_time": "8:05:07"} +{"current_steps": 386, "total_steps": 1955, "loss": 1.0633, "lr": 0.00015543791127801084, "epoch": 0.9872122762148338, "percentage": 19.74, "elapsed_time": "1:59:16", "remaining_time": "8:04:48"} +{"current_steps": 387, "total_steps": 1955, "loss": 1.0769, "lr": 0.0001553902307094259, "epoch": 0.989769820971867, "percentage": 19.8, "elapsed_time": "1:59:34", "remaining_time": "8:04:28"} +{"current_steps": 388, "total_steps": 1955, "loss": 1.0905, "lr": 0.00015534230965859276, "epoch": 0.9923273657289002, "percentage": 19.85, "elapsed_time": "1:59:52", "remaining_time": "8:04:09"} +{"current_steps": 389, "total_steps": 1955, "loss": 1.0737, "lr": 0.00015529414827837156, "epoch": 0.9948849104859335, "percentage": 19.9, "elapsed_time": "2:00:11", "remaining_time": "8:03:51"} +{"current_steps": 390, "total_steps": 1955, "loss": 1.0539, "lr": 0.00015524574672238906, "epoch": 0.9974424552429667, "percentage": 19.95, "elapsed_time": "2:00:30", "remaining_time": "8:03:34"} +{"current_steps": 391, "total_steps": 1955, "loss": 1.0846, "lr": 0.00015519710514503814, "epoch": 1.0, "percentage": 20.0, "elapsed_time": "2:00:49", "remaining_time": "8:03:16"} +{"current_steps": 392, "total_steps": 1955, "loss": 1.0432, "lr": 0.00015514822370147732, "epoch": 1.0025575447570332, "percentage": 20.05, "elapsed_time": "2:01:18", "remaining_time": "8:03:43"} +{"current_steps": 393, "total_steps": 1955, "loss": 1.0578, "lr": 0.00015509910254763023, "epoch": 1.0051150895140666, "percentage": 20.1, "elapsed_time": "2:01:37", "remaining_time": "8:03:24"} +{"current_steps": 394, "total_steps": 1955, "loss": 1.0306, "lr": 0.0001550497418401852, "epoch": 1.0076726342710998, "percentage": 20.15, "elapsed_time": "2:01:56", "remaining_time": "8:03:05"} +{"current_steps": 395, "total_steps": 1955, "loss": 1.0383, "lr": 0.00015500014173659457, "epoch": 1.010230179028133, "percentage": 20.2, "elapsed_time": "2:02:14", "remaining_time": "8:02:46"} +{"current_steps": 396, "total_steps": 1955, "loss": 1.0573, "lr": 0.00015495030239507442, "epoch": 1.0127877237851663, "percentage": 20.26, "elapsed_time": "2:02:32", "remaining_time": "8:02:27"} +{"current_steps": 397, "total_steps": 1955, "loss": 1.0573, "lr": 0.00015490022397460392, "epoch": 1.0153452685421995, "percentage": 20.31, "elapsed_time": "2:02:50", "remaining_time": "8:02:06"} +{"current_steps": 398, "total_steps": 1955, "loss": 1.0474, "lr": 0.0001548499066349249, "epoch": 1.0179028132992327, "percentage": 20.36, "elapsed_time": "2:03:09", "remaining_time": "8:01:48"} +{"current_steps": 399, "total_steps": 1955, "loss": 1.0175, "lr": 0.00015479935053654126, "epoch": 1.020460358056266, "percentage": 20.41, "elapsed_time": "2:03:28", "remaining_time": "8:01:29"} +{"current_steps": 400, "total_steps": 1955, "loss": 1.0724, "lr": 0.00015474855584071847, "epoch": 1.0230179028132993, "percentage": 20.46, "elapsed_time": "2:03:46", "remaining_time": "8:01:10"} +{"current_steps": 401, "total_steps": 1955, "loss": 1.0527, "lr": 0.0001546975227094832, "epoch": 1.0255754475703325, "percentage": 20.51, "elapsed_time": "2:04:05", "remaining_time": "8:00:53"} +{"current_steps": 402, "total_steps": 1955, "loss": 1.0695, "lr": 0.00015464625130562256, "epoch": 1.0281329923273657, "percentage": 20.56, "elapsed_time": "2:04:23", "remaining_time": "8:00:34"} +{"current_steps": 403, "total_steps": 1955, "loss": 1.0344, "lr": 0.0001545947417926838, "epoch": 1.030690537084399, "percentage": 20.61, "elapsed_time": "2:04:42", "remaining_time": "8:00:16"} +{"current_steps": 404, "total_steps": 1955, "loss": 1.0443, "lr": 0.00015454299433497362, "epoch": 1.0332480818414322, "percentage": 20.66, "elapsed_time": "2:05:01", "remaining_time": "7:59:57"} +{"current_steps": 405, "total_steps": 1955, "loss": 1.0393, "lr": 0.00015449100909755784, "epoch": 1.0358056265984654, "percentage": 20.72, "elapsed_time": "2:05:19", "remaining_time": "7:59:39"} +{"current_steps": 406, "total_steps": 1955, "loss": 1.0737, "lr": 0.00015443878624626066, "epoch": 1.0383631713554988, "percentage": 20.77, "elapsed_time": "2:05:37", "remaining_time": "7:59:18"} +{"current_steps": 407, "total_steps": 1955, "loss": 1.0106, "lr": 0.0001543863259476642, "epoch": 1.040920716112532, "percentage": 20.82, "elapsed_time": "2:05:56", "remaining_time": "7:59:00"} +{"current_steps": 408, "total_steps": 1955, "loss": 1.0399, "lr": 0.00015433362836910817, "epoch": 1.0434782608695652, "percentage": 20.87, "elapsed_time": "2:06:15", "remaining_time": "7:58:42"} +{"current_steps": 409, "total_steps": 1955, "loss": 1.0222, "lr": 0.00015428069367868892, "epoch": 1.0460358056265984, "percentage": 20.92, "elapsed_time": "2:06:33", "remaining_time": "7:58:23"} +{"current_steps": 410, "total_steps": 1955, "loss": 1.0161, "lr": 0.00015422752204525937, "epoch": 1.0485933503836318, "percentage": 20.97, "elapsed_time": "2:06:52", "remaining_time": "7:58:05"} +{"current_steps": 411, "total_steps": 1955, "loss": 1.0446, "lr": 0.0001541741136384281, "epoch": 1.051150895140665, "percentage": 21.02, "elapsed_time": "2:07:10", "remaining_time": "7:57:45"} +{"current_steps": 412, "total_steps": 1955, "loss": 1.0245, "lr": 0.00015412046862855902, "epoch": 1.0537084398976981, "percentage": 21.07, "elapsed_time": "2:07:28", "remaining_time": "7:57:25"} +{"current_steps": 413, "total_steps": 1955, "loss": 1.0308, "lr": 0.00015406658718677076, "epoch": 1.0562659846547315, "percentage": 21.13, "elapsed_time": "2:07:47", "remaining_time": "7:57:06"} +{"current_steps": 414, "total_steps": 1955, "loss": 1.0768, "lr": 0.00015401246948493612, "epoch": 1.0588235294117647, "percentage": 21.18, "elapsed_time": "2:08:05", "remaining_time": "7:56:47"} +{"current_steps": 415, "total_steps": 1955, "loss": 1.0473, "lr": 0.00015395811569568154, "epoch": 1.061381074168798, "percentage": 21.23, "elapsed_time": "2:08:24", "remaining_time": "7:56:29"} +{"current_steps": 416, "total_steps": 1955, "loss": 1.0299, "lr": 0.00015390352599238655, "epoch": 1.0639386189258313, "percentage": 21.28, "elapsed_time": "2:08:42", "remaining_time": "7:56:10"} +{"current_steps": 417, "total_steps": 1955, "loss": 1.0139, "lr": 0.00015384870054918314, "epoch": 1.0664961636828645, "percentage": 21.33, "elapsed_time": "2:09:01", "remaining_time": "7:55:52"} +{"current_steps": 418, "total_steps": 1955, "loss": 1.0237, "lr": 0.00015379363954095535, "epoch": 1.0690537084398977, "percentage": 21.38, "elapsed_time": "2:09:19", "remaining_time": "7:55:33"} +{"current_steps": 419, "total_steps": 1955, "loss": 1.0786, "lr": 0.0001537383431433386, "epoch": 1.0716112531969308, "percentage": 21.43, "elapsed_time": "2:09:38", "remaining_time": "7:55:15"} +{"current_steps": 420, "total_steps": 1955, "loss": 1.0264, "lr": 0.00015368281153271918, "epoch": 1.0741687979539642, "percentage": 21.48, "elapsed_time": "2:09:56", "remaining_time": "7:54:56"} +{"current_steps": 421, "total_steps": 1955, "loss": 1.0413, "lr": 0.0001536270448862336, "epoch": 1.0767263427109974, "percentage": 21.53, "elapsed_time": "2:10:15", "remaining_time": "7:54:36"} +{"current_steps": 422, "total_steps": 1955, "loss": 1.0305, "lr": 0.00015357104338176823, "epoch": 1.0792838874680306, "percentage": 21.59, "elapsed_time": "2:10:33", "remaining_time": "7:54:18"} +{"current_steps": 423, "total_steps": 1955, "loss": 1.0177, "lr": 0.00015351480719795845, "epoch": 1.081841432225064, "percentage": 21.64, "elapsed_time": "2:10:52", "remaining_time": "7:53:59"} +{"current_steps": 424, "total_steps": 1955, "loss": 1.0663, "lr": 0.00015345833651418835, "epoch": 1.0843989769820972, "percentage": 21.69, "elapsed_time": "2:11:11", "remaining_time": "7:53:41"} +{"current_steps": 425, "total_steps": 1955, "loss": 1.0262, "lr": 0.00015340163151058997, "epoch": 1.0869565217391304, "percentage": 21.74, "elapsed_time": "2:11:29", "remaining_time": "7:53:22"} +{"current_steps": 426, "total_steps": 1955, "loss": 1.0166, "lr": 0.00015334469236804278, "epoch": 1.0895140664961638, "percentage": 21.79, "elapsed_time": "2:11:47", "remaining_time": "7:53:02"} +{"current_steps": 427, "total_steps": 1955, "loss": 1.041, "lr": 0.00015328751926817314, "epoch": 1.092071611253197, "percentage": 21.84, "elapsed_time": "2:12:06", "remaining_time": "7:52:44"} +{"current_steps": 428, "total_steps": 1955, "loss": 1.0236, "lr": 0.0001532301123933537, "epoch": 1.0946291560102301, "percentage": 21.89, "elapsed_time": "2:12:24", "remaining_time": "7:52:25"} +{"current_steps": 429, "total_steps": 1955, "loss": 1.0528, "lr": 0.00015317247192670282, "epoch": 1.0971867007672633, "percentage": 21.94, "elapsed_time": "2:12:43", "remaining_time": "7:52:07"} +{"current_steps": 430, "total_steps": 1955, "loss": 1.0277, "lr": 0.00015311459805208397, "epoch": 1.0997442455242967, "percentage": 21.99, "elapsed_time": "2:13:02", "remaining_time": "7:51:48"} +{"current_steps": 431, "total_steps": 1955, "loss": 1.0582, "lr": 0.0001530564909541051, "epoch": 1.10230179028133, "percentage": 22.05, "elapsed_time": "2:13:21", "remaining_time": "7:51:31"} +{"current_steps": 432, "total_steps": 1955, "loss": 1.077, "lr": 0.0001529981508181182, "epoch": 1.104859335038363, "percentage": 22.1, "elapsed_time": "2:13:39", "remaining_time": "7:51:13"} +{"current_steps": 433, "total_steps": 1955, "loss": 1.0542, "lr": 0.00015293957783021854, "epoch": 1.1074168797953965, "percentage": 22.15, "elapsed_time": "2:13:57", "remaining_time": "7:50:52"} +{"current_steps": 434, "total_steps": 1955, "loss": 1.0418, "lr": 0.0001528807721772442, "epoch": 1.1099744245524297, "percentage": 22.2, "elapsed_time": "2:14:15", "remaining_time": "7:50:33"} +{"current_steps": 435, "total_steps": 1955, "loss": 1.0343, "lr": 0.00015282173404677533, "epoch": 1.1125319693094629, "percentage": 22.25, "elapsed_time": "2:14:34", "remaining_time": "7:50:14"} +{"current_steps": 436, "total_steps": 1955, "loss": 1.0341, "lr": 0.00015276246362713375, "epoch": 1.1150895140664963, "percentage": 22.3, "elapsed_time": "2:14:53", "remaining_time": "7:49:55"} +{"current_steps": 437, "total_steps": 1955, "loss": 1.014, "lr": 0.00015270296110738221, "epoch": 1.1176470588235294, "percentage": 22.35, "elapsed_time": "2:15:11", "remaining_time": "7:49:37"} +{"current_steps": 438, "total_steps": 1955, "loss": 1.0269, "lr": 0.0001526432266773238, "epoch": 1.1202046035805626, "percentage": 22.4, "elapsed_time": "2:15:30", "remaining_time": "7:49:18"} +{"current_steps": 439, "total_steps": 1955, "loss": 1.0472, "lr": 0.0001525832605275014, "epoch": 1.1227621483375958, "percentage": 22.46, "elapsed_time": "2:15:48", "remaining_time": "7:48:58"} +{"current_steps": 440, "total_steps": 1955, "loss": 1.024, "lr": 0.000152523062849197, "epoch": 1.1253196930946292, "percentage": 22.51, "elapsed_time": "2:16:06", "remaining_time": "7:48:39"} +{"current_steps": 441, "total_steps": 1955, "loss": 1.0448, "lr": 0.0001524626338344311, "epoch": 1.1278772378516624, "percentage": 22.56, "elapsed_time": "2:16:25", "remaining_time": "7:48:20"} +{"current_steps": 442, "total_steps": 1955, "loss": 1.0244, "lr": 0.00015240197367596226, "epoch": 1.1304347826086956, "percentage": 22.61, "elapsed_time": "2:16:43", "remaining_time": "7:48:01"} +{"current_steps": 443, "total_steps": 1955, "loss": 1.0499, "lr": 0.00015234108256728616, "epoch": 1.132992327365729, "percentage": 22.66, "elapsed_time": "2:17:02", "remaining_time": "7:47:44"} +{"current_steps": 444, "total_steps": 1955, "loss": 1.0151, "lr": 0.00015227996070263535, "epoch": 1.1355498721227621, "percentage": 22.71, "elapsed_time": "2:17:20", "remaining_time": "7:47:23"} +{"current_steps": 445, "total_steps": 1955, "loss": 1.0345, "lr": 0.00015221860827697832, "epoch": 1.1381074168797953, "percentage": 22.76, "elapsed_time": "2:17:39", "remaining_time": "7:47:05"} +{"current_steps": 446, "total_steps": 1955, "loss": 1.008, "lr": 0.00015215702548601907, "epoch": 1.1406649616368287, "percentage": 22.81, "elapsed_time": "2:17:57", "remaining_time": "7:46:47"} +{"current_steps": 447, "total_steps": 1955, "loss": 0.9962, "lr": 0.00015209521252619644, "epoch": 1.143222506393862, "percentage": 22.86, "elapsed_time": "2:18:16", "remaining_time": "7:46:28"} +{"current_steps": 448, "total_steps": 1955, "loss": 1.0299, "lr": 0.00015203316959468344, "epoch": 1.145780051150895, "percentage": 22.92, "elapsed_time": "2:18:34", "remaining_time": "7:46:08"} +{"current_steps": 449, "total_steps": 1955, "loss": 1.019, "lr": 0.0001519708968893867, "epoch": 1.1483375959079285, "percentage": 22.97, "elapsed_time": "2:18:52", "remaining_time": "7:45:49"} +{"current_steps": 450, "total_steps": 1955, "loss": 1.0708, "lr": 0.00015190839460894567, "epoch": 1.1508951406649617, "percentage": 23.02, "elapsed_time": "2:19:11", "remaining_time": "7:45:30"} +{"current_steps": 451, "total_steps": 1955, "loss": 1.0417, "lr": 0.00015184566295273227, "epoch": 1.1534526854219949, "percentage": 23.07, "elapsed_time": "2:19:30", "remaining_time": "7:45:12"} +{"current_steps": 452, "total_steps": 1955, "loss": 1.0464, "lr": 0.00015178270212084995, "epoch": 1.156010230179028, "percentage": 23.12, "elapsed_time": "2:19:48", "remaining_time": "7:44:53"} +{"current_steps": 453, "total_steps": 1955, "loss": 1.0612, "lr": 0.00015171951231413328, "epoch": 1.1585677749360614, "percentage": 23.17, "elapsed_time": "2:20:07", "remaining_time": "7:44:36"} +{"current_steps": 454, "total_steps": 1955, "loss": 1.0325, "lr": 0.00015165609373414722, "epoch": 1.1611253196930946, "percentage": 23.22, "elapsed_time": "2:20:26", "remaining_time": "7:44:17"} +{"current_steps": 455, "total_steps": 1955, "loss": 1.0295, "lr": 0.0001515924465831864, "epoch": 1.1636828644501278, "percentage": 23.27, "elapsed_time": "2:20:44", "remaining_time": "7:43:59"} +{"current_steps": 456, "total_steps": 1955, "loss": 1.0231, "lr": 0.00015152857106427462, "epoch": 1.1662404092071612, "percentage": 23.32, "elapsed_time": "2:21:03", "remaining_time": "7:43:41"} +{"current_steps": 457, "total_steps": 1955, "loss": 1.015, "lr": 0.00015146446738116412, "epoch": 1.1687979539641944, "percentage": 23.38, "elapsed_time": "2:21:21", "remaining_time": "7:43:21"} +{"current_steps": 458, "total_steps": 1955, "loss": 1.0195, "lr": 0.00015140013573833498, "epoch": 1.1713554987212276, "percentage": 23.43, "elapsed_time": "2:21:39", "remaining_time": "7:43:02"} +{"current_steps": 459, "total_steps": 1955, "loss": 1.026, "lr": 0.00015133557634099435, "epoch": 1.1739130434782608, "percentage": 23.48, "elapsed_time": "2:21:58", "remaining_time": "7:42:44"} +{"current_steps": 460, "total_steps": 1955, "loss": 1.055, "lr": 0.00015127078939507595, "epoch": 1.1764705882352942, "percentage": 23.53, "elapsed_time": "2:22:17", "remaining_time": "7:42:25"} +{"current_steps": 461, "total_steps": 1955, "loss": 1.0768, "lr": 0.00015120577510723934, "epoch": 1.1790281329923273, "percentage": 23.58, "elapsed_time": "2:22:35", "remaining_time": "7:42:05"} +{"current_steps": 462, "total_steps": 1955, "loss": 1.0227, "lr": 0.00015114053368486919, "epoch": 1.1815856777493605, "percentage": 23.63, "elapsed_time": "2:22:53", "remaining_time": "7:41:46"} +{"current_steps": 463, "total_steps": 1955, "loss": 1.0101, "lr": 0.0001510750653360748, "epoch": 1.184143222506394, "percentage": 23.68, "elapsed_time": "2:23:12", "remaining_time": "7:41:28"} +{"current_steps": 464, "total_steps": 1955, "loss": 1.0372, "lr": 0.00015100937026968922, "epoch": 1.186700767263427, "percentage": 23.73, "elapsed_time": "2:23:30", "remaining_time": "7:41:09"} +{"current_steps": 465, "total_steps": 1955, "loss": 1.0471, "lr": 0.0001509434486952688, "epoch": 1.1892583120204603, "percentage": 23.79, "elapsed_time": "2:23:49", "remaining_time": "7:40:51"} +{"current_steps": 466, "total_steps": 1955, "loss": 1.0431, "lr": 0.00015087730082309232, "epoch": 1.1918158567774937, "percentage": 23.84, "elapsed_time": "2:24:08", "remaining_time": "7:40:32"} +{"current_steps": 467, "total_steps": 1955, "loss": 1.0199, "lr": 0.00015081092686416043, "epoch": 1.1943734015345269, "percentage": 23.89, "elapsed_time": "2:24:26", "remaining_time": "7:40:13"} +{"current_steps": 468, "total_steps": 1955, "loss": 1.0706, "lr": 0.00015074432703019504, "epoch": 1.19693094629156, "percentage": 23.94, "elapsed_time": "2:24:44", "remaining_time": "7:39:53"} +{"current_steps": 469, "total_steps": 1955, "loss": 1.0346, "lr": 0.00015067750153363845, "epoch": 1.1994884910485935, "percentage": 23.99, "elapsed_time": "2:25:02", "remaining_time": "7:39:34"} +{"current_steps": 470, "total_steps": 1955, "loss": 1.0554, "lr": 0.00015061045058765282, "epoch": 1.2020460358056266, "percentage": 24.04, "elapsed_time": "2:25:21", "remaining_time": "7:39:15"} +{"current_steps": 471, "total_steps": 1955, "loss": 1.0279, "lr": 0.0001505431744061195, "epoch": 1.2046035805626598, "percentage": 24.09, "elapsed_time": "2:25:39", "remaining_time": "7:38:56"} +{"current_steps": 472, "total_steps": 1955, "loss": 0.9885, "lr": 0.0001504756732036383, "epoch": 1.207161125319693, "percentage": 24.14, "elapsed_time": "2:25:57", "remaining_time": "7:38:36"} +{"current_steps": 473, "total_steps": 1955, "loss": 1.0432, "lr": 0.00015040794719552676, "epoch": 1.2097186700767264, "percentage": 24.19, "elapsed_time": "2:26:16", "remaining_time": "7:38:18"} +{"current_steps": 474, "total_steps": 1955, "loss": 1.027, "lr": 0.00015033999659781953, "epoch": 1.2122762148337596, "percentage": 24.25, "elapsed_time": "2:26:34", "remaining_time": "7:37:59"} +{"current_steps": 475, "total_steps": 1955, "loss": 1.0421, "lr": 0.00015027182162726769, "epoch": 1.2148337595907928, "percentage": 24.3, "elapsed_time": "2:26:53", "remaining_time": "7:37:41"} +{"current_steps": 476, "total_steps": 1955, "loss": 1.013, "lr": 0.000150203422501338, "epoch": 1.2173913043478262, "percentage": 24.35, "elapsed_time": "2:27:12", "remaining_time": "7:37:22"} +{"current_steps": 477, "total_steps": 1955, "loss": 1.0671, "lr": 0.00015013479943821225, "epoch": 1.2199488491048593, "percentage": 24.4, "elapsed_time": "2:27:30", "remaining_time": "7:37:02"} +{"current_steps": 478, "total_steps": 1955, "loss": 1.0506, "lr": 0.00015006595265678655, "epoch": 1.2225063938618925, "percentage": 24.45, "elapsed_time": "2:27:48", "remaining_time": "7:36:41"} +{"current_steps": 479, "total_steps": 1955, "loss": 1.058, "lr": 0.00014999688237667065, "epoch": 1.2250639386189257, "percentage": 24.5, "elapsed_time": "2:28:06", "remaining_time": "7:36:24"} +{"current_steps": 480, "total_steps": 1955, "loss": 1.0112, "lr": 0.00014992758881818722, "epoch": 1.227621483375959, "percentage": 24.55, "elapsed_time": "2:28:25", "remaining_time": "7:36:05"} +{"current_steps": 481, "total_steps": 1955, "loss": 1.0223, "lr": 0.00014985807220237112, "epoch": 1.2301790281329923, "percentage": 24.6, "elapsed_time": "2:28:43", "remaining_time": "7:35:46"} +{"current_steps": 482, "total_steps": 1955, "loss": 1.0437, "lr": 0.00014978833275096872, "epoch": 1.2327365728900257, "percentage": 24.65, "elapsed_time": "2:29:02", "remaining_time": "7:35:28"} +{"current_steps": 483, "total_steps": 1955, "loss": 1.0331, "lr": 0.00014971837068643732, "epoch": 1.2352941176470589, "percentage": 24.71, "elapsed_time": "2:29:21", "remaining_time": "7:35:10"} +{"current_steps": 484, "total_steps": 1955, "loss": 1.0503, "lr": 0.00014964818623194412, "epoch": 1.237851662404092, "percentage": 24.76, "elapsed_time": "2:29:39", "remaining_time": "7:34:51"} +{"current_steps": 485, "total_steps": 1955, "loss": 1.0536, "lr": 0.00014957777961136588, "epoch": 1.2404092071611252, "percentage": 24.81, "elapsed_time": "2:29:58", "remaining_time": "7:34:33"} +{"current_steps": 486, "total_steps": 1955, "loss": 1.0452, "lr": 0.00014950715104928794, "epoch": 1.2429667519181586, "percentage": 24.86, "elapsed_time": "2:30:17", "remaining_time": "7:34:15"} +{"current_steps": 487, "total_steps": 1955, "loss": 1.0205, "lr": 0.0001494363007710036, "epoch": 1.2455242966751918, "percentage": 24.91, "elapsed_time": "2:30:35", "remaining_time": "7:33:56"} +{"current_steps": 488, "total_steps": 1955, "loss": 1.0355, "lr": 0.00014936522900251348, "epoch": 1.248081841432225, "percentage": 24.96, "elapsed_time": "2:30:54", "remaining_time": "7:33:38"} +{"current_steps": 489, "total_steps": 1955, "loss": 1.0455, "lr": 0.00014929393597052458, "epoch": 1.2506393861892584, "percentage": 25.01, "elapsed_time": "2:31:13", "remaining_time": "7:33:21"} +{"current_steps": 490, "total_steps": 1955, "loss": 1.0625, "lr": 0.00014922242190244981, "epoch": 1.2531969309462916, "percentage": 25.06, "elapsed_time": "2:31:31", "remaining_time": "7:33:00"} +{"current_steps": 491, "total_steps": 1955, "loss": 1.0346, "lr": 0.0001491506870264071, "epoch": 1.2557544757033248, "percentage": 25.12, "elapsed_time": "2:31:49", "remaining_time": "7:32:42"} +{"current_steps": 492, "total_steps": 1955, "loss": 1.0605, "lr": 0.00014907873157121875, "epoch": 1.258312020460358, "percentage": 25.17, "elapsed_time": "2:32:08", "remaining_time": "7:32:24"} +{"current_steps": 493, "total_steps": 1955, "loss": 1.0282, "lr": 0.00014900655576641057, "epoch": 1.2608695652173914, "percentage": 25.22, "elapsed_time": "2:32:27", "remaining_time": "7:32:05"} +{"current_steps": 494, "total_steps": 1955, "loss": 1.0264, "lr": 0.00014893415984221141, "epoch": 1.2634271099744245, "percentage": 25.27, "elapsed_time": "2:32:45", "remaining_time": "7:31:47"} +{"current_steps": 495, "total_steps": 1955, "loss": 1.0514, "lr": 0.00014886154402955217, "epoch": 1.265984654731458, "percentage": 25.32, "elapsed_time": "2:33:04", "remaining_time": "7:31:28"} +{"current_steps": 496, "total_steps": 1955, "loss": 1.0408, "lr": 0.00014878870856006513, "epoch": 1.2685421994884911, "percentage": 25.37, "elapsed_time": "2:33:22", "remaining_time": "7:31:09"} +{"current_steps": 497, "total_steps": 1955, "loss": 1.0338, "lr": 0.00014871565366608329, "epoch": 1.2710997442455243, "percentage": 25.42, "elapsed_time": "2:33:41", "remaining_time": "7:30:50"} +{"current_steps": 498, "total_steps": 1955, "loss": 1.0193, "lr": 0.0001486423795806396, "epoch": 1.2736572890025575, "percentage": 25.47, "elapsed_time": "2:33:59", "remaining_time": "7:30:32"} +{"current_steps": 499, "total_steps": 1955, "loss": 1.0324, "lr": 0.00014856888653746607, "epoch": 1.2762148337595907, "percentage": 25.52, "elapsed_time": "2:34:18", "remaining_time": "7:30:14"} +{"current_steps": 500, "total_steps": 1955, "loss": 1.0076, "lr": 0.00014849517477099334, "epoch": 1.278772378516624, "percentage": 25.58, "elapsed_time": "2:34:36", "remaining_time": "7:29:55"} +{"current_steps": 501, "total_steps": 1955, "loss": 1.0266, "lr": 0.00014842124451634956, "epoch": 1.2813299232736572, "percentage": 25.63, "elapsed_time": "2:34:55", "remaining_time": "7:29:36"} +{"current_steps": 502, "total_steps": 1955, "loss": 1.033, "lr": 0.00014834709600935995, "epoch": 1.2838874680306906, "percentage": 25.68, "elapsed_time": "2:35:13", "remaining_time": "7:29:18"} +{"current_steps": 503, "total_steps": 1955, "loss": 1.0519, "lr": 0.00014827272948654584, "epoch": 1.2864450127877238, "percentage": 25.73, "elapsed_time": "2:35:32", "remaining_time": "7:28:59"} +{"current_steps": 504, "total_steps": 1955, "loss": 1.0258, "lr": 0.00014819814518512403, "epoch": 1.289002557544757, "percentage": 25.78, "elapsed_time": "2:35:50", "remaining_time": "7:28:40"} +{"current_steps": 505, "total_steps": 1955, "loss": 1.0398, "lr": 0.000148123343343006, "epoch": 1.2915601023017902, "percentage": 25.83, "elapsed_time": "2:36:09", "remaining_time": "7:28:22"} +{"current_steps": 506, "total_steps": 1955, "loss": 1.0155, "lr": 0.0001480483241987971, "epoch": 1.2941176470588236, "percentage": 25.88, "elapsed_time": "2:36:27", "remaining_time": "7:28:02"} +{"current_steps": 507, "total_steps": 1955, "loss": 1.0486, "lr": 0.0001479730879917959, "epoch": 1.2966751918158568, "percentage": 25.93, "elapsed_time": "2:36:46", "remaining_time": "7:27:44"} +{"current_steps": 508, "total_steps": 1955, "loss": 1.0115, "lr": 0.00014789763496199335, "epoch": 1.29923273657289, "percentage": 25.98, "elapsed_time": "2:37:04", "remaining_time": "7:27:23"} +{"current_steps": 509, "total_steps": 1955, "loss": 1.0449, "lr": 0.00014782196535007198, "epoch": 1.3017902813299234, "percentage": 26.04, "elapsed_time": "2:37:22", "remaining_time": "7:27:06"} +{"current_steps": 510, "total_steps": 1955, "loss": 1.0132, "lr": 0.00014774607939740524, "epoch": 1.3043478260869565, "percentage": 26.09, "elapsed_time": "2:37:40", "remaining_time": "7:26:46"} +{"current_steps": 511, "total_steps": 1955, "loss": 1.0229, "lr": 0.0001476699773460567, "epoch": 1.3069053708439897, "percentage": 26.14, "elapsed_time": "2:37:59", "remaining_time": "7:26:27"} +{"current_steps": 512, "total_steps": 1955, "loss": 1.0509, "lr": 0.00014759365943877906, "epoch": 1.309462915601023, "percentage": 26.19, "elapsed_time": "2:38:17", "remaining_time": "7:26:07"} +{"current_steps": 513, "total_steps": 1955, "loss": 1.0078, "lr": 0.00014751712591901385, "epoch": 1.3120204603580563, "percentage": 26.24, "elapsed_time": "2:38:36", "remaining_time": "7:25:48"} +{"current_steps": 514, "total_steps": 1955, "loss": 1.0289, "lr": 0.00014744037703089014, "epoch": 1.3145780051150895, "percentage": 26.29, "elapsed_time": "2:38:54", "remaining_time": "7:25:29"} +{"current_steps": 515, "total_steps": 1955, "loss": 1.0213, "lr": 0.00014736341301922406, "epoch": 1.317135549872123, "percentage": 26.34, "elapsed_time": "2:39:12", "remaining_time": "7:25:11"} +{"current_steps": 516, "total_steps": 1955, "loss": 1.0164, "lr": 0.00014728623412951802, "epoch": 1.319693094629156, "percentage": 26.39, "elapsed_time": "2:39:31", "remaining_time": "7:24:52"} +{"current_steps": 517, "total_steps": 1955, "loss": 1.0119, "lr": 0.00014720884060795975, "epoch": 1.3222506393861893, "percentage": 26.45, "elapsed_time": "2:39:50", "remaining_time": "7:24:34"} +{"current_steps": 518, "total_steps": 1955, "loss": 1.0295, "lr": 0.00014713123270142163, "epoch": 1.3248081841432224, "percentage": 26.5, "elapsed_time": "2:40:08", "remaining_time": "7:24:15"} +{"current_steps": 519, "total_steps": 1955, "loss": 1.0197, "lr": 0.00014705341065745999, "epoch": 1.3273657289002558, "percentage": 26.55, "elapsed_time": "2:40:26", "remaining_time": "7:23:55"} +{"current_steps": 520, "total_steps": 1955, "loss": 1.0624, "lr": 0.00014697537472431411, "epoch": 1.329923273657289, "percentage": 26.6, "elapsed_time": "2:40:45", "remaining_time": "7:23:36"} +{"current_steps": 521, "total_steps": 1955, "loss": 1.0647, "lr": 0.0001468971251509056, "epoch": 1.3324808184143222, "percentage": 26.65, "elapsed_time": "2:41:03", "remaining_time": "7:23:18"} +{"current_steps": 522, "total_steps": 1955, "loss": 1.0402, "lr": 0.00014681866218683757, "epoch": 1.3350383631713556, "percentage": 26.7, "elapsed_time": "2:41:22", "remaining_time": "7:22:59"} +{"current_steps": 523, "total_steps": 1955, "loss": 1.0304, "lr": 0.0001467399860823937, "epoch": 1.3375959079283888, "percentage": 26.75, "elapsed_time": "2:41:40", "remaining_time": "7:22:41"} +{"current_steps": 524, "total_steps": 1955, "loss": 1.0548, "lr": 0.00014666109708853767, "epoch": 1.340153452685422, "percentage": 26.8, "elapsed_time": "2:41:59", "remaining_time": "7:22:22"} +{"current_steps": 525, "total_steps": 1955, "loss": 1.0287, "lr": 0.00014658199545691222, "epoch": 1.3427109974424551, "percentage": 26.85, "elapsed_time": "2:42:17", "remaining_time": "7:22:03"} +{"current_steps": 526, "total_steps": 1955, "loss": 1.0539, "lr": 0.0001465026814398383, "epoch": 1.3452685421994885, "percentage": 26.91, "elapsed_time": "2:42:37", "remaining_time": "7:21:47"} +{"current_steps": 527, "total_steps": 1955, "loss": 1.0035, "lr": 0.00014642315529031442, "epoch": 1.3478260869565217, "percentage": 26.96, "elapsed_time": "2:42:55", "remaining_time": "7:21:28"} +{"current_steps": 528, "total_steps": 1955, "loss": 1.0659, "lr": 0.00014634341726201572, "epoch": 1.350383631713555, "percentage": 27.01, "elapsed_time": "2:43:14", "remaining_time": "7:21:10"} +{"current_steps": 529, "total_steps": 1955, "loss": 1.0282, "lr": 0.00014626346760929316, "epoch": 1.3529411764705883, "percentage": 27.06, "elapsed_time": "2:43:32", "remaining_time": "7:20:51"} +{"current_steps": 530, "total_steps": 1955, "loss": 1.0002, "lr": 0.00014618330658717278, "epoch": 1.3554987212276215, "percentage": 27.11, "elapsed_time": "2:43:51", "remaining_time": "7:20:33"} +{"current_steps": 531, "total_steps": 1955, "loss": 1.0377, "lr": 0.00014610293445135492, "epoch": 1.3580562659846547, "percentage": 27.16, "elapsed_time": "2:44:09", "remaining_time": "7:20:13"} +{"current_steps": 532, "total_steps": 1955, "loss": 1.023, "lr": 0.00014602235145821322, "epoch": 1.3606138107416879, "percentage": 27.21, "elapsed_time": "2:44:28", "remaining_time": "7:19:55"} +{"current_steps": 533, "total_steps": 1955, "loss": 1.0601, "lr": 0.00014594155786479398, "epoch": 1.3631713554987213, "percentage": 27.26, "elapsed_time": "2:44:46", "remaining_time": "7:19:36"} +{"current_steps": 534, "total_steps": 1955, "loss": 1.0292, "lr": 0.00014586055392881527, "epoch": 1.3657289002557544, "percentage": 27.31, "elapsed_time": "2:45:04", "remaining_time": "7:19:16"} +{"current_steps": 535, "total_steps": 1955, "loss": 1.0397, "lr": 0.00014577933990866617, "epoch": 1.3682864450127878, "percentage": 27.37, "elapsed_time": "2:45:23", "remaining_time": "7:18:58"} +{"current_steps": 536, "total_steps": 1955, "loss": 1.0749, "lr": 0.00014569791606340577, "epoch": 1.370843989769821, "percentage": 27.42, "elapsed_time": "2:45:42", "remaining_time": "7:18:40"} +{"current_steps": 537, "total_steps": 1955, "loss": 1.0293, "lr": 0.00014561628265276257, "epoch": 1.3734015345268542, "percentage": 27.47, "elapsed_time": "2:46:00", "remaining_time": "7:18:22"} +{"current_steps": 538, "total_steps": 1955, "loss": 1.0398, "lr": 0.00014553443993713355, "epoch": 1.3759590792838874, "percentage": 27.52, "elapsed_time": "2:46:19", "remaining_time": "7:18:03"} +{"current_steps": 539, "total_steps": 1955, "loss": 1.0268, "lr": 0.00014545238817758327, "epoch": 1.3785166240409208, "percentage": 27.57, "elapsed_time": "2:46:37", "remaining_time": "7:17:44"} +{"current_steps": 540, "total_steps": 1955, "loss": 1.0354, "lr": 0.00014537012763584316, "epoch": 1.381074168797954, "percentage": 27.62, "elapsed_time": "2:46:56", "remaining_time": "7:17:26"} +{"current_steps": 541, "total_steps": 1955, "loss": 1.0642, "lr": 0.0001452876585743106, "epoch": 1.3836317135549872, "percentage": 27.67, "elapsed_time": "2:47:14", "remaining_time": "7:17:07"} +{"current_steps": 542, "total_steps": 1955, "loss": 1.0534, "lr": 0.00014520498125604814, "epoch": 1.3861892583120206, "percentage": 27.72, "elapsed_time": "2:47:33", "remaining_time": "7:16:49"} +{"current_steps": 543, "total_steps": 1955, "loss": 1.01, "lr": 0.00014512209594478263, "epoch": 1.3887468030690537, "percentage": 27.77, "elapsed_time": "2:47:51", "remaining_time": "7:16:30"} +{"current_steps": 544, "total_steps": 1955, "loss": 1.0307, "lr": 0.00014503900290490436, "epoch": 1.391304347826087, "percentage": 27.83, "elapsed_time": "2:48:09", "remaining_time": "7:16:10"} +{"current_steps": 545, "total_steps": 1955, "loss": 1.0211, "lr": 0.00014495570240146625, "epoch": 1.39386189258312, "percentage": 27.88, "elapsed_time": "2:48:28", "remaining_time": "7:15:51"} +{"current_steps": 546, "total_steps": 1955, "loss": 1.0005, "lr": 0.000144872194700183, "epoch": 1.3964194373401535, "percentage": 27.93, "elapsed_time": "2:48:46", "remaining_time": "7:15:32"} +{"current_steps": 547, "total_steps": 1955, "loss": 1.0387, "lr": 0.00014478848006743022, "epoch": 1.3989769820971867, "percentage": 27.98, "elapsed_time": "2:49:04", "remaining_time": "7:15:12"} +{"current_steps": 548, "total_steps": 1955, "loss": 1.0292, "lr": 0.00014470455877024365, "epoch": 1.40153452685422, "percentage": 28.03, "elapsed_time": "2:49:23", "remaining_time": "7:14:53"} +{"current_steps": 549, "total_steps": 1955, "loss": 1.0511, "lr": 0.00014462043107631818, "epoch": 1.4040920716112533, "percentage": 28.08, "elapsed_time": "2:49:41", "remaining_time": "7:14:35"} +{"current_steps": 550, "total_steps": 1955, "loss": 0.9925, "lr": 0.00014453609725400713, "epoch": 1.4066496163682864, "percentage": 28.13, "elapsed_time": "2:50:00", "remaining_time": "7:14:16"} +{"current_steps": 551, "total_steps": 1955, "loss": 1.0061, "lr": 0.0001444515575723213, "epoch": 1.4092071611253196, "percentage": 28.18, "elapsed_time": "2:50:18", "remaining_time": "7:13:58"} +{"current_steps": 552, "total_steps": 1955, "loss": 1.0488, "lr": 0.00014436681230092815, "epoch": 1.4117647058823528, "percentage": 28.24, "elapsed_time": "2:50:36", "remaining_time": "7:13:39"} +{"current_steps": 553, "total_steps": 1955, "loss": 1.0324, "lr": 0.00014428186171015097, "epoch": 1.4143222506393862, "percentage": 28.29, "elapsed_time": "2:50:55", "remaining_time": "7:13:19"} +{"current_steps": 554, "total_steps": 1955, "loss": 1.0422, "lr": 0.00014419670607096791, "epoch": 1.4168797953964194, "percentage": 28.34, "elapsed_time": "2:51:13", "remaining_time": "7:13:00"} +{"current_steps": 555, "total_steps": 1955, "loss": 1.056, "lr": 0.00014411134565501133, "epoch": 1.4194373401534528, "percentage": 28.39, "elapsed_time": "2:51:32", "remaining_time": "7:12:42"} +{"current_steps": 556, "total_steps": 1955, "loss": 1.0408, "lr": 0.00014402578073456661, "epoch": 1.421994884910486, "percentage": 28.44, "elapsed_time": "2:51:50", "remaining_time": "7:12:24"} +{"current_steps": 557, "total_steps": 1955, "loss": 1.0271, "lr": 0.00014394001158257163, "epoch": 1.4245524296675192, "percentage": 28.49, "elapsed_time": "2:52:09", "remaining_time": "7:12:05"} +{"current_steps": 558, "total_steps": 1955, "loss": 1.0193, "lr": 0.00014385403847261562, "epoch": 1.4271099744245523, "percentage": 28.54, "elapsed_time": "2:52:27", "remaining_time": "7:11:45"} +{"current_steps": 559, "total_steps": 1955, "loss": 1.0122, "lr": 0.00014376786167893846, "epoch": 1.4296675191815857, "percentage": 28.59, "elapsed_time": "2:52:46", "remaining_time": "7:11:27"} +{"current_steps": 560, "total_steps": 1955, "loss": 1.0045, "lr": 0.00014368148147642974, "epoch": 1.432225063938619, "percentage": 28.64, "elapsed_time": "2:53:04", "remaining_time": "7:11:08"} +{"current_steps": 561, "total_steps": 1955, "loss": 1.0144, "lr": 0.00014359489814062788, "epoch": 1.434782608695652, "percentage": 28.7, "elapsed_time": "2:53:23", "remaining_time": "7:10:51"} +{"current_steps": 562, "total_steps": 1955, "loss": 1.0287, "lr": 0.00014350811194771928, "epoch": 1.4373401534526855, "percentage": 28.75, "elapsed_time": "2:53:42", "remaining_time": "7:10:32"} +{"current_steps": 563, "total_steps": 1955, "loss": 1.0566, "lr": 0.00014342112317453738, "epoch": 1.4398976982097187, "percentage": 28.8, "elapsed_time": "2:54:00", "remaining_time": "7:10:12"} +{"current_steps": 564, "total_steps": 1955, "loss": 1.052, "lr": 0.00014333393209856182, "epoch": 1.4424552429667519, "percentage": 28.85, "elapsed_time": "2:54:18", "remaining_time": "7:09:54"} +{"current_steps": 565, "total_steps": 1955, "loss": 1.0608, "lr": 0.00014324653899791765, "epoch": 1.445012787723785, "percentage": 28.9, "elapsed_time": "2:54:37", "remaining_time": "7:09:36"} +{"current_steps": 566, "total_steps": 1955, "loss": 1.0234, "lr": 0.00014315894415137416, "epoch": 1.4475703324808185, "percentage": 28.95, "elapsed_time": "2:54:55", "remaining_time": "7:09:17"} +{"current_steps": 567, "total_steps": 1955, "loss": 1.0048, "lr": 0.00014307114783834442, "epoch": 1.4501278772378516, "percentage": 29.0, "elapsed_time": "2:55:14", "remaining_time": "7:08:59"} +{"current_steps": 568, "total_steps": 1955, "loss": 1.0363, "lr": 0.0001429831503388839, "epoch": 1.452685421994885, "percentage": 29.05, "elapsed_time": "2:55:32", "remaining_time": "7:08:39"} +{"current_steps": 569, "total_steps": 1955, "loss": 1.0269, "lr": 0.00014289495193368996, "epoch": 1.4552429667519182, "percentage": 29.1, "elapsed_time": "2:55:51", "remaining_time": "7:08:21"} +{"current_steps": 570, "total_steps": 1955, "loss": 1.017, "lr": 0.0001428065529041008, "epoch": 1.4578005115089514, "percentage": 29.16, "elapsed_time": "2:56:09", "remaining_time": "7:08:02"} +{"current_steps": 571, "total_steps": 1955, "loss": 1.0375, "lr": 0.00014271795353209456, "epoch": 1.4603580562659846, "percentage": 29.21, "elapsed_time": "2:56:28", "remaining_time": "7:07:44"} +{"current_steps": 572, "total_steps": 1955, "loss": 1.0434, "lr": 0.00014262915410028848, "epoch": 1.4629156010230178, "percentage": 29.26, "elapsed_time": "2:56:47", "remaining_time": "7:07:27"} +{"current_steps": 573, "total_steps": 1955, "loss": 1.0292, "lr": 0.00014254015489193782, "epoch": 1.4654731457800512, "percentage": 29.31, "elapsed_time": "2:57:05", "remaining_time": "7:07:07"} +{"current_steps": 574, "total_steps": 1955, "loss": 1.0159, "lr": 0.00014245095619093532, "epoch": 1.4680306905370843, "percentage": 29.36, "elapsed_time": "2:57:24", "remaining_time": "7:06:49"} +{"current_steps": 575, "total_steps": 1955, "loss": 1.0484, "lr": 0.00014236155828180983, "epoch": 1.4705882352941178, "percentage": 29.41, "elapsed_time": "2:57:42", "remaining_time": "7:06:29"} +{"current_steps": 576, "total_steps": 1955, "loss": 1.0508, "lr": 0.00014227196144972582, "epoch": 1.473145780051151, "percentage": 29.46, "elapsed_time": "2:58:01", "remaining_time": "7:06:11"} +{"current_steps": 577, "total_steps": 1955, "loss": 1.0403, "lr": 0.0001421821659804822, "epoch": 1.4757033248081841, "percentage": 29.51, "elapsed_time": "2:58:19", "remaining_time": "7:05:53"} +{"current_steps": 578, "total_steps": 1955, "loss": 1.0304, "lr": 0.00014209217216051156, "epoch": 1.4782608695652173, "percentage": 29.57, "elapsed_time": "2:58:38", "remaining_time": "7:05:34"} +{"current_steps": 579, "total_steps": 1955, "loss": 1.0102, "lr": 0.00014200198027687912, "epoch": 1.4808184143222507, "percentage": 29.62, "elapsed_time": "2:58:56", "remaining_time": "7:05:16"} +{"current_steps": 580, "total_steps": 1955, "loss": 1.0253, "lr": 0.00014191159061728193, "epoch": 1.4833759590792839, "percentage": 29.67, "elapsed_time": "2:59:15", "remaining_time": "7:04:57"} +{"current_steps": 581, "total_steps": 1955, "loss": 1.044, "lr": 0.00014182100347004793, "epoch": 1.485933503836317, "percentage": 29.72, "elapsed_time": "2:59:33", "remaining_time": "7:04:38"} +{"current_steps": 582, "total_steps": 1955, "loss": 1.0322, "lr": 0.000141730219124135, "epoch": 1.4884910485933505, "percentage": 29.77, "elapsed_time": "2:59:52", "remaining_time": "7:04:19"} +{"current_steps": 583, "total_steps": 1955, "loss": 1.0572, "lr": 0.00014163923786913004, "epoch": 1.4910485933503836, "percentage": 29.82, "elapsed_time": "3:00:10", "remaining_time": "7:04:01"} +{"current_steps": 584, "total_steps": 1955, "loss": 1.0627, "lr": 0.00014154805999524802, "epoch": 1.4936061381074168, "percentage": 29.87, "elapsed_time": "3:00:29", "remaining_time": "7:03:42"} +{"current_steps": 585, "total_steps": 1955, "loss": 1.0017, "lr": 0.0001414566857933312, "epoch": 1.49616368286445, "percentage": 29.92, "elapsed_time": "3:00:47", "remaining_time": "7:03:23"} +{"current_steps": 586, "total_steps": 1955, "loss": 1.0168, "lr": 0.00014136511555484798, "epoch": 1.4987212276214834, "percentage": 29.97, "elapsed_time": "3:01:06", "remaining_time": "7:03:05"} +{"current_steps": 587, "total_steps": 1955, "loss": 1.0253, "lr": 0.00014127334957189219, "epoch": 1.5012787723785166, "percentage": 30.03, "elapsed_time": "3:01:24", "remaining_time": "7:02:46"} +{"current_steps": 588, "total_steps": 1955, "loss": 1.0523, "lr": 0.00014118138813718192, "epoch": 1.50383631713555, "percentage": 30.08, "elapsed_time": "3:01:43", "remaining_time": "7:02:28"} +{"current_steps": 589, "total_steps": 1955, "loss": 0.9921, "lr": 0.0001410892315440588, "epoch": 1.5063938618925832, "percentage": 30.13, "elapsed_time": "3:02:01", "remaining_time": "7:02:10"} +{"current_steps": 590, "total_steps": 1955, "loss": 1.0219, "lr": 0.00014099688008648703, "epoch": 1.5089514066496164, "percentage": 30.18, "elapsed_time": "3:02:20", "remaining_time": "7:01:51"} +{"current_steps": 591, "total_steps": 1955, "loss": 0.9963, "lr": 0.0001409043340590523, "epoch": 1.5115089514066495, "percentage": 30.23, "elapsed_time": "3:02:38", "remaining_time": "7:01:32"} +{"current_steps": 592, "total_steps": 1955, "loss": 1.0484, "lr": 0.00014081159375696102, "epoch": 1.5140664961636827, "percentage": 30.28, "elapsed_time": "3:02:57", "remaining_time": "7:01:13"} +{"current_steps": 593, "total_steps": 1955, "loss": 0.978, "lr": 0.00014071865947603922, "epoch": 1.5166240409207161, "percentage": 30.33, "elapsed_time": "3:03:15", "remaining_time": "7:00:53"} +{"current_steps": 594, "total_steps": 1955, "loss": 1.0431, "lr": 0.00014062553151273177, "epoch": 1.5191815856777495, "percentage": 30.38, "elapsed_time": "3:03:33", "remaining_time": "7:00:35"} +{"current_steps": 595, "total_steps": 1955, "loss": 1.0157, "lr": 0.0001405322101641013, "epoch": 1.5217391304347827, "percentage": 30.43, "elapsed_time": "3:03:52", "remaining_time": "7:00:17"} +{"current_steps": 596, "total_steps": 1955, "loss": 1.026, "lr": 0.00014043869572782737, "epoch": 1.5242966751918159, "percentage": 30.49, "elapsed_time": "3:04:11", "remaining_time": "6:59:58"} +{"current_steps": 597, "total_steps": 1955, "loss": 1.0334, "lr": 0.00014034498850220537, "epoch": 1.526854219948849, "percentage": 30.54, "elapsed_time": "3:04:29", "remaining_time": "6:59:39"} +{"current_steps": 598, "total_steps": 1955, "loss": 1.0353, "lr": 0.00014025108878614576, "epoch": 1.5294117647058822, "percentage": 30.59, "elapsed_time": "3:04:47", "remaining_time": "6:59:21"} +{"current_steps": 599, "total_steps": 1955, "loss": 1.0115, "lr": 0.0001401569968791729, "epoch": 1.5319693094629157, "percentage": 30.64, "elapsed_time": "3:05:06", "remaining_time": "6:59:02"} +{"current_steps": 600, "total_steps": 1955, "loss": 1.0604, "lr": 0.00014006271308142433, "epoch": 1.5345268542199488, "percentage": 30.69, "elapsed_time": "3:05:25", "remaining_time": "6:58:44"} +{"current_steps": 601, "total_steps": 1955, "loss": 1.0096, "lr": 0.0001399682376936495, "epoch": 1.5370843989769822, "percentage": 30.74, "elapsed_time": "3:05:43", "remaining_time": "6:58:25"} +{"current_steps": 602, "total_steps": 1955, "loss": 1.0059, "lr": 0.00013987357101720929, "epoch": 1.5396419437340154, "percentage": 30.79, "elapsed_time": "3:06:01", "remaining_time": "6:58:05"} +{"current_steps": 603, "total_steps": 1955, "loss": 1.0197, "lr": 0.00013977871335407445, "epoch": 1.5421994884910486, "percentage": 30.84, "elapsed_time": "3:06:20", "remaining_time": "6:57:48"} +{"current_steps": 604, "total_steps": 1955, "loss": 1.0302, "lr": 0.00013968366500682514, "epoch": 1.5447570332480818, "percentage": 30.9, "elapsed_time": "3:06:39", "remaining_time": "6:57:29"} +{"current_steps": 605, "total_steps": 1955, "loss": 1.0167, "lr": 0.00013958842627864975, "epoch": 1.547314578005115, "percentage": 30.95, "elapsed_time": "3:06:57", "remaining_time": "6:57:11"} +{"current_steps": 606, "total_steps": 1955, "loss": 0.994, "lr": 0.00013949299747334387, "epoch": 1.5498721227621484, "percentage": 31.0, "elapsed_time": "3:07:16", "remaining_time": "6:56:53"} +{"current_steps": 607, "total_steps": 1955, "loss": 1.0182, "lr": 0.00013939737889530948, "epoch": 1.5524296675191815, "percentage": 31.05, "elapsed_time": "3:07:35", "remaining_time": "6:56:34"} +{"current_steps": 608, "total_steps": 1955, "loss": 1.0432, "lr": 0.00013930157084955387, "epoch": 1.554987212276215, "percentage": 31.1, "elapsed_time": "3:07:53", "remaining_time": "6:56:14"} +{"current_steps": 609, "total_steps": 1955, "loss": 1.0392, "lr": 0.00013920557364168872, "epoch": 1.5575447570332481, "percentage": 31.15, "elapsed_time": "3:08:11", "remaining_time": "6:55:56"} +{"current_steps": 610, "total_steps": 1955, "loss": 1.0089, "lr": 0.00013910938757792911, "epoch": 1.5601023017902813, "percentage": 31.2, "elapsed_time": "3:08:30", "remaining_time": "6:55:38"} +{"current_steps": 611, "total_steps": 1955, "loss": 1.0433, "lr": 0.00013901301296509247, "epoch": 1.5626598465473145, "percentage": 31.25, "elapsed_time": "3:08:48", "remaining_time": "6:55:20"} +{"current_steps": 612, "total_steps": 1955, "loss": 1.033, "lr": 0.00013891645011059774, "epoch": 1.5652173913043477, "percentage": 31.3, "elapsed_time": "3:09:07", "remaining_time": "6:55:00"} +{"current_steps": 613, "total_steps": 1955, "loss": 1.0233, "lr": 0.00013881969932246434, "epoch": 1.567774936061381, "percentage": 31.36, "elapsed_time": "3:09:25", "remaining_time": "6:54:42"} +{"current_steps": 614, "total_steps": 1955, "loss": 1.0283, "lr": 0.00013872276090931112, "epoch": 1.5703324808184145, "percentage": 31.41, "elapsed_time": "3:09:44", "remaining_time": "6:54:23"} +{"current_steps": 615, "total_steps": 1955, "loss": 1.0449, "lr": 0.0001386256351803554, "epoch": 1.5728900255754477, "percentage": 31.46, "elapsed_time": "3:10:02", "remaining_time": "6:54:05"} +{"current_steps": 616, "total_steps": 1955, "loss": 1.0005, "lr": 0.00013852832244541207, "epoch": 1.5754475703324808, "percentage": 31.51, "elapsed_time": "3:10:21", "remaining_time": "6:53:47"} +{"current_steps": 617, "total_steps": 1955, "loss": 1.034, "lr": 0.00013843082301489247, "epoch": 1.578005115089514, "percentage": 31.56, "elapsed_time": "3:10:40", "remaining_time": "6:53:28"} +{"current_steps": 618, "total_steps": 1955, "loss": 1.0292, "lr": 0.00013833313719980358, "epoch": 1.5805626598465472, "percentage": 31.61, "elapsed_time": "3:10:58", "remaining_time": "6:53:08"} +{"current_steps": 619, "total_steps": 1955, "loss": 1.0142, "lr": 0.00013823526531174675, "epoch": 1.5831202046035806, "percentage": 31.66, "elapsed_time": "3:11:16", "remaining_time": "6:52:50"} +{"current_steps": 620, "total_steps": 1955, "loss": 1.0019, "lr": 0.000138137207662917, "epoch": 1.5856777493606138, "percentage": 31.71, "elapsed_time": "3:11:35", "remaining_time": "6:52:32"} +{"current_steps": 621, "total_steps": 1955, "loss": 1.0308, "lr": 0.00013803896456610187, "epoch": 1.5882352941176472, "percentage": 31.76, "elapsed_time": "3:11:53", "remaining_time": "6:52:13"} +{"current_steps": 622, "total_steps": 1955, "loss": 1.0455, "lr": 0.0001379405363346804, "epoch": 1.5907928388746804, "percentage": 31.82, "elapsed_time": "3:12:12", "remaining_time": "6:51:55"} +{"current_steps": 623, "total_steps": 1955, "loss": 1.018, "lr": 0.00013784192328262227, "epoch": 1.5933503836317136, "percentage": 31.87, "elapsed_time": "3:12:30", "remaining_time": "6:51:35"} +{"current_steps": 624, "total_steps": 1955, "loss": 1.0566, "lr": 0.00013774312572448658, "epoch": 1.5959079283887467, "percentage": 31.92, "elapsed_time": "3:12:49", "remaining_time": "6:51:16"} +{"current_steps": 625, "total_steps": 1955, "loss": 1.0759, "lr": 0.00013764414397542113, "epoch": 1.59846547314578, "percentage": 31.97, "elapsed_time": "3:13:07", "remaining_time": "6:50:58"} +{"current_steps": 626, "total_steps": 1955, "loss": 1.0041, "lr": 0.0001375449783511611, "epoch": 1.6010230179028133, "percentage": 32.02, "elapsed_time": "3:13:26", "remaining_time": "6:50:40"} +{"current_steps": 627, "total_steps": 1955, "loss": 1.0141, "lr": 0.0001374456291680283, "epoch": 1.6035805626598465, "percentage": 32.07, "elapsed_time": "3:13:45", "remaining_time": "6:50:22"} +{"current_steps": 628, "total_steps": 1955, "loss": 1.0532, "lr": 0.00013734609674293001, "epoch": 1.60613810741688, "percentage": 32.12, "elapsed_time": "3:14:03", "remaining_time": "6:50:03"} +{"current_steps": 629, "total_steps": 1955, "loss": 1.0079, "lr": 0.00013724638139335808, "epoch": 1.608695652173913, "percentage": 32.17, "elapsed_time": "3:14:22", "remaining_time": "6:49:45"} +{"current_steps": 630, "total_steps": 1955, "loss": 1.014, "lr": 0.00013714648343738785, "epoch": 1.6112531969309463, "percentage": 32.23, "elapsed_time": "3:14:40", "remaining_time": "6:49:26"} +{"current_steps": 631, "total_steps": 1955, "loss": 1.0217, "lr": 0.00013704640319367706, "epoch": 1.6138107416879794, "percentage": 32.28, "elapsed_time": "3:14:59", "remaining_time": "6:49:07"} +{"current_steps": 632, "total_steps": 1955, "loss": 1.0151, "lr": 0.000136946140981465, "epoch": 1.6163682864450126, "percentage": 32.33, "elapsed_time": "3:15:17", "remaining_time": "6:48:49"} +{"current_steps": 633, "total_steps": 1955, "loss": 0.9972, "lr": 0.00013684569712057141, "epoch": 1.618925831202046, "percentage": 32.38, "elapsed_time": "3:15:36", "remaining_time": "6:48:31"} +{"current_steps": 634, "total_steps": 1955, "loss": 1.0438, "lr": 0.0001367450719313954, "epoch": 1.6214833759590794, "percentage": 32.43, "elapsed_time": "3:15:55", "remaining_time": "6:48:12"} +{"current_steps": 635, "total_steps": 1955, "loss": 0.9964, "lr": 0.00013664426573491454, "epoch": 1.6240409207161126, "percentage": 32.48, "elapsed_time": "3:16:13", "remaining_time": "6:47:54"} +{"current_steps": 636, "total_steps": 1955, "loss": 1.0428, "lr": 0.0001365432788526838, "epoch": 1.6265984654731458, "percentage": 32.53, "elapsed_time": "3:16:32", "remaining_time": "6:47:36"} +{"current_steps": 637, "total_steps": 1955, "loss": 1.0374, "lr": 0.0001364421116068344, "epoch": 1.629156010230179, "percentage": 32.58, "elapsed_time": "3:16:50", "remaining_time": "6:47:17"} +{"current_steps": 638, "total_steps": 1955, "loss": 1.022, "lr": 0.00013634076432007298, "epoch": 1.6317135549872122, "percentage": 32.63, "elapsed_time": "3:17:09", "remaining_time": "6:46:58"} +{"current_steps": 639, "total_steps": 1955, "loss": 1.0555, "lr": 0.00013623923731568053, "epoch": 1.6342710997442456, "percentage": 32.69, "elapsed_time": "3:17:27", "remaining_time": "6:46:38"} +{"current_steps": 640, "total_steps": 1955, "loss": 0.9896, "lr": 0.00013613753091751117, "epoch": 1.6368286445012787, "percentage": 32.74, "elapsed_time": "3:17:45", "remaining_time": "6:46:19"} +{"current_steps": 641, "total_steps": 1955, "loss": 1.0104, "lr": 0.00013603564544999134, "epoch": 1.6393861892583121, "percentage": 32.79, "elapsed_time": "3:18:03", "remaining_time": "6:46:00"} +{"current_steps": 642, "total_steps": 1955, "loss": 1.013, "lr": 0.00013593358123811873, "epoch": 1.6419437340153453, "percentage": 32.84, "elapsed_time": "3:18:21", "remaining_time": "6:45:41"} +{"current_steps": 643, "total_steps": 1955, "loss": 1.0285, "lr": 0.00013583133860746102, "epoch": 1.6445012787723785, "percentage": 32.89, "elapsed_time": "3:18:40", "remaining_time": "6:45:22"} +{"current_steps": 644, "total_steps": 1955, "loss": 1.0735, "lr": 0.00013572891788415526, "epoch": 1.6470588235294117, "percentage": 32.94, "elapsed_time": "3:18:59", "remaining_time": "6:45:04"} +{"current_steps": 645, "total_steps": 1955, "loss": 0.9838, "lr": 0.00013562631939490638, "epoch": 1.6496163682864449, "percentage": 32.99, "elapsed_time": "3:19:16", "remaining_time": "6:44:43"} +{"current_steps": 646, "total_steps": 1955, "loss": 1.0407, "lr": 0.00013552354346698644, "epoch": 1.6521739130434783, "percentage": 33.04, "elapsed_time": "3:19:35", "remaining_time": "6:44:25"} +{"current_steps": 647, "total_steps": 1955, "loss": 0.9994, "lr": 0.0001354205904282335, "epoch": 1.6547314578005117, "percentage": 33.09, "elapsed_time": "3:19:53", "remaining_time": "6:44:07"} +{"current_steps": 648, "total_steps": 1955, "loss": 1.0158, "lr": 0.0001353174606070505, "epoch": 1.6572890025575449, "percentage": 33.15, "elapsed_time": "3:20:11", "remaining_time": "6:43:47"} +{"current_steps": 649, "total_steps": 1955, "loss": 1.0223, "lr": 0.00013521415433240448, "epoch": 1.659846547314578, "percentage": 33.2, "elapsed_time": "3:20:30", "remaining_time": "6:43:29"} +{"current_steps": 650, "total_steps": 1955, "loss": 1.0048, "lr": 0.0001351106719338251, "epoch": 1.6624040920716112, "percentage": 33.25, "elapsed_time": "3:20:49", "remaining_time": "6:43:10"} +{"current_steps": 651, "total_steps": 1955, "loss": 1.031, "lr": 0.000135007013741404, "epoch": 1.6649616368286444, "percentage": 33.3, "elapsed_time": "3:21:07", "remaining_time": "6:42:52"} +{"current_steps": 652, "total_steps": 1955, "loss": 1.0551, "lr": 0.0001349031800857934, "epoch": 1.6675191815856778, "percentage": 33.35, "elapsed_time": "3:21:26", "remaining_time": "6:42:34"} +{"current_steps": 653, "total_steps": 1955, "loss": 1.0296, "lr": 0.00013479917129820547, "epoch": 1.670076726342711, "percentage": 33.4, "elapsed_time": "3:21:44", "remaining_time": "6:42:14"} +{"current_steps": 654, "total_steps": 1955, "loss": 1.0355, "lr": 0.00013469498771041078, "epoch": 1.6726342710997444, "percentage": 33.45, "elapsed_time": "3:22:02", "remaining_time": "6:41:55"} +{"current_steps": 655, "total_steps": 1955, "loss": 1.0239, "lr": 0.0001345906296547376, "epoch": 1.6751918158567776, "percentage": 33.5, "elapsed_time": "3:22:21", "remaining_time": "6:41:36"} +{"current_steps": 656, "total_steps": 1955, "loss": 1.0107, "lr": 0.00013448609746407076, "epoch": 1.6777493606138107, "percentage": 33.55, "elapsed_time": "3:22:39", "remaining_time": "6:41:17"} +{"current_steps": 657, "total_steps": 1955, "loss": 1.0132, "lr": 0.0001343813914718504, "epoch": 1.680306905370844, "percentage": 33.61, "elapsed_time": "3:22:57", "remaining_time": "6:40:58"} +{"current_steps": 658, "total_steps": 1955, "loss": 1.034, "lr": 0.0001342765120120712, "epoch": 1.682864450127877, "percentage": 33.66, "elapsed_time": "3:23:16", "remaining_time": "6:40:40"} +{"current_steps": 659, "total_steps": 1955, "loss": 1.0359, "lr": 0.0001341714594192811, "epoch": 1.6854219948849105, "percentage": 33.71, "elapsed_time": "3:23:34", "remaining_time": "6:40:21"} +{"current_steps": 660, "total_steps": 1955, "loss": 1.0515, "lr": 0.00013406623402858038, "epoch": 1.6879795396419437, "percentage": 33.76, "elapsed_time": "3:23:52", "remaining_time": "6:40:02"} +{"current_steps": 661, "total_steps": 1955, "loss": 1.0295, "lr": 0.00013396083617562041, "epoch": 1.690537084398977, "percentage": 33.81, "elapsed_time": "3:24:10", "remaining_time": "6:39:42"} +{"current_steps": 662, "total_steps": 1955, "loss": 1.0218, "lr": 0.0001338552661966028, "epoch": 1.6930946291560103, "percentage": 33.86, "elapsed_time": "3:24:29", "remaining_time": "6:39:24"} +{"current_steps": 663, "total_steps": 1955, "loss": 1.0438, "lr": 0.00013374952442827813, "epoch": 1.6956521739130435, "percentage": 33.91, "elapsed_time": "3:24:48", "remaining_time": "6:39:06"} +{"current_steps": 664, "total_steps": 1955, "loss": 1.0293, "lr": 0.00013364361120794495, "epoch": 1.6982097186700766, "percentage": 33.96, "elapsed_time": "3:25:06", "remaining_time": "6:38:47"} +{"current_steps": 665, "total_steps": 1955, "loss": 1.0332, "lr": 0.00013353752687344882, "epoch": 1.7007672634271098, "percentage": 34.02, "elapsed_time": "3:25:25", "remaining_time": "6:38:29"} +{"current_steps": 666, "total_steps": 1955, "loss": 1.0053, "lr": 0.000133431271763181, "epoch": 1.7033248081841432, "percentage": 34.07, "elapsed_time": "3:25:44", "remaining_time": "6:38:11"} +{"current_steps": 667, "total_steps": 1955, "loss": 1.0262, "lr": 0.00013332484621607758, "epoch": 1.7058823529411766, "percentage": 34.12, "elapsed_time": "3:26:02", "remaining_time": "6:37:52"} +{"current_steps": 668, "total_steps": 1955, "loss": 1.0156, "lr": 0.00013321825057161825, "epoch": 1.7084398976982098, "percentage": 34.17, "elapsed_time": "3:26:21", "remaining_time": "6:37:35"} +{"current_steps": 669, "total_steps": 1955, "loss": 1.0413, "lr": 0.00013311148516982534, "epoch": 1.710997442455243, "percentage": 34.22, "elapsed_time": "3:26:40", "remaining_time": "6:37:16"} +{"current_steps": 670, "total_steps": 1955, "loss": 1.0199, "lr": 0.00013300455035126268, "epoch": 1.7135549872122762, "percentage": 34.27, "elapsed_time": "3:26:58", "remaining_time": "6:36:58"} +{"current_steps": 671, "total_steps": 1955, "loss": 1.0361, "lr": 0.00013289744645703444, "epoch": 1.7161125319693094, "percentage": 34.32, "elapsed_time": "3:27:17", "remaining_time": "6:36:40"} +{"current_steps": 672, "total_steps": 1955, "loss": 1.0385, "lr": 0.0001327901738287842, "epoch": 1.7186700767263428, "percentage": 34.37, "elapsed_time": "3:27:36", "remaining_time": "6:36:22"} +{"current_steps": 673, "total_steps": 1955, "loss": 1.0163, "lr": 0.0001326827328086937, "epoch": 1.721227621483376, "percentage": 34.42, "elapsed_time": "3:27:55", "remaining_time": "6:36:03"} +{"current_steps": 674, "total_steps": 1955, "loss": 1.0592, "lr": 0.00013257512373948186, "epoch": 1.7237851662404093, "percentage": 34.48, "elapsed_time": "3:28:13", "remaining_time": "6:35:45"} +{"current_steps": 675, "total_steps": 1955, "loss": 1.0303, "lr": 0.00013246734696440368, "epoch": 1.7263427109974425, "percentage": 34.53, "elapsed_time": "3:28:32", "remaining_time": "6:35:26"} +{"current_steps": 676, "total_steps": 1955, "loss": 0.9963, "lr": 0.000132359402827249, "epoch": 1.7289002557544757, "percentage": 34.58, "elapsed_time": "3:28:50", "remaining_time": "6:35:07"} +{"current_steps": 677, "total_steps": 1955, "loss": 1.0133, "lr": 0.0001322512916723417, "epoch": 1.7314578005115089, "percentage": 34.63, "elapsed_time": "3:29:08", "remaining_time": "6:34:47"} +{"current_steps": 678, "total_steps": 1955, "loss": 1.0143, "lr": 0.00013214301384453824, "epoch": 1.734015345268542, "percentage": 34.68, "elapsed_time": "3:29:26", "remaining_time": "6:34:28"} +{"current_steps": 679, "total_steps": 1955, "loss": 1.0164, "lr": 0.00013203456968922684, "epoch": 1.7365728900255755, "percentage": 34.73, "elapsed_time": "3:29:44", "remaining_time": "6:34:09"} +{"current_steps": 680, "total_steps": 1955, "loss": 1.0172, "lr": 0.0001319259595523262, "epoch": 1.7391304347826086, "percentage": 34.78, "elapsed_time": "3:30:03", "remaining_time": "6:33:51"} +{"current_steps": 681, "total_steps": 1955, "loss": 1.0048, "lr": 0.0001318171837802846, "epoch": 1.741687979539642, "percentage": 34.83, "elapsed_time": "3:30:22", "remaining_time": "6:33:33"} +{"current_steps": 682, "total_steps": 1955, "loss": 1.0508, "lr": 0.00013170824272007854, "epoch": 1.7442455242966752, "percentage": 34.88, "elapsed_time": "3:30:40", "remaining_time": "6:33:13"} +{"current_steps": 683, "total_steps": 1955, "loss": 1.0433, "lr": 0.00013159913671921184, "epoch": 1.7468030690537084, "percentage": 34.94, "elapsed_time": "3:30:58", "remaining_time": "6:32:55"} +{"current_steps": 684, "total_steps": 1955, "loss": 1.0281, "lr": 0.00013148986612571438, "epoch": 1.7493606138107416, "percentage": 34.99, "elapsed_time": "3:31:17", "remaining_time": "6:32:37"} +{"current_steps": 685, "total_steps": 1955, "loss": 1.0207, "lr": 0.00013138043128814114, "epoch": 1.7519181585677748, "percentage": 35.04, "elapsed_time": "3:31:36", "remaining_time": "6:32:18"} +{"current_steps": 686, "total_steps": 1955, "loss": 1.0509, "lr": 0.000131270832555571, "epoch": 1.7544757033248082, "percentage": 35.09, "elapsed_time": "3:31:54", "remaining_time": "6:32:00"} +{"current_steps": 687, "total_steps": 1955, "loss": 1.0263, "lr": 0.00013116107027760557, "epoch": 1.7570332480818416, "percentage": 35.14, "elapsed_time": "3:32:12", "remaining_time": "6:31:40"} +{"current_steps": 688, "total_steps": 1955, "loss": 1.015, "lr": 0.00013105114480436823, "epoch": 1.7595907928388748, "percentage": 35.19, "elapsed_time": "3:32:31", "remaining_time": "6:31:22"} +{"current_steps": 689, "total_steps": 1955, "loss": 0.9964, "lr": 0.00013094105648650285, "epoch": 1.762148337595908, "percentage": 35.24, "elapsed_time": "3:32:49", "remaining_time": "6:31:02"} +{"current_steps": 690, "total_steps": 1955, "loss": 1.0221, "lr": 0.00013083080567517284, "epoch": 1.7647058823529411, "percentage": 35.29, "elapsed_time": "3:33:07", "remaining_time": "6:30:44"} +{"current_steps": 691, "total_steps": 1955, "loss": 1.0333, "lr": 0.0001307203927220598, "epoch": 1.7672634271099743, "percentage": 35.35, "elapsed_time": "3:33:26", "remaining_time": "6:30:26"} +{"current_steps": 692, "total_steps": 1955, "loss": 1.0281, "lr": 0.0001306098179793627, "epoch": 1.7698209718670077, "percentage": 35.4, "elapsed_time": "3:33:45", "remaining_time": "6:30:07"} +{"current_steps": 693, "total_steps": 1955, "loss": 1.0414, "lr": 0.00013049908179979644, "epoch": 1.772378516624041, "percentage": 35.45, "elapsed_time": "3:34:03", "remaining_time": "6:29:48"} +{"current_steps": 694, "total_steps": 1955, "loss": 0.9934, "lr": 0.00013038818453659098, "epoch": 1.7749360613810743, "percentage": 35.5, "elapsed_time": "3:34:21", "remaining_time": "6:29:28"} +{"current_steps": 695, "total_steps": 1955, "loss": 1.0077, "lr": 0.00013027712654349003, "epoch": 1.7774936061381075, "percentage": 35.55, "elapsed_time": "3:34:39", "remaining_time": "6:29:10"} +{"current_steps": 696, "total_steps": 1955, "loss": 1.0408, "lr": 0.0001301659081747501, "epoch": 1.7800511508951407, "percentage": 35.6, "elapsed_time": "3:34:58", "remaining_time": "6:28:52"} +{"current_steps": 697, "total_steps": 1955, "loss": 1.0186, "lr": 0.0001300545297851392, "epoch": 1.7826086956521738, "percentage": 35.65, "elapsed_time": "3:35:17", "remaining_time": "6:28:33"} +{"current_steps": 698, "total_steps": 1955, "loss": 1.0329, "lr": 0.0001299429917299358, "epoch": 1.785166240409207, "percentage": 35.7, "elapsed_time": "3:35:35", "remaining_time": "6:28:15"} +{"current_steps": 699, "total_steps": 1955, "loss": 1.0233, "lr": 0.00012983129436492763, "epoch": 1.7877237851662404, "percentage": 35.75, "elapsed_time": "3:35:53", "remaining_time": "6:27:56"} +{"current_steps": 700, "total_steps": 1955, "loss": 1.0409, "lr": 0.00012971943804641068, "epoch": 1.7902813299232738, "percentage": 35.81, "elapsed_time": "3:36:12", "remaining_time": "6:27:37"} +{"current_steps": 701, "total_steps": 1955, "loss": 1.0066, "lr": 0.0001296074231311879, "epoch": 1.792838874680307, "percentage": 35.86, "elapsed_time": "3:36:30", "remaining_time": "6:27:19"} +{"current_steps": 702, "total_steps": 1955, "loss": 1.0254, "lr": 0.0001294952499765682, "epoch": 1.7953964194373402, "percentage": 35.91, "elapsed_time": "3:36:49", "remaining_time": "6:26:59"} +{"current_steps": 703, "total_steps": 1955, "loss": 1.0285, "lr": 0.00012938291894036522, "epoch": 1.7979539641943734, "percentage": 35.96, "elapsed_time": "3:37:07", "remaining_time": "6:26:40"} +{"current_steps": 704, "total_steps": 1955, "loss": 1.0091, "lr": 0.00012927043038089616, "epoch": 1.8005115089514065, "percentage": 36.01, "elapsed_time": "3:37:24", "remaining_time": "6:26:20"} +{"current_steps": 705, "total_steps": 1955, "loss": 1.0397, "lr": 0.00012915778465698077, "epoch": 1.80306905370844, "percentage": 36.06, "elapsed_time": "3:37:43", "remaining_time": "6:26:02"} +{"current_steps": 706, "total_steps": 1955, "loss": 0.991, "lr": 0.00012904498212794007, "epoch": 1.8056265984654731, "percentage": 36.11, "elapsed_time": "3:38:02", "remaining_time": "6:25:43"} +{"current_steps": 707, "total_steps": 1955, "loss": 0.9944, "lr": 0.00012893202315359537, "epoch": 1.8081841432225065, "percentage": 36.16, "elapsed_time": "3:38:20", "remaining_time": "6:25:25"} +{"current_steps": 708, "total_steps": 1955, "loss": 1.0212, "lr": 0.00012881890809426688, "epoch": 1.8107416879795397, "percentage": 36.21, "elapsed_time": "3:38:38", "remaining_time": "6:25:05"} +{"current_steps": 709, "total_steps": 1955, "loss": 0.9717, "lr": 0.00012870563731077277, "epoch": 1.813299232736573, "percentage": 36.27, "elapsed_time": "3:38:57", "remaining_time": "6:24:47"} +{"current_steps": 710, "total_steps": 1955, "loss": 1.0162, "lr": 0.0001285922111644279, "epoch": 1.815856777493606, "percentage": 36.32, "elapsed_time": "3:39:15", "remaining_time": "6:24:28"} +{"current_steps": 711, "total_steps": 1955, "loss": 1.0685, "lr": 0.00012847863001704278, "epoch": 1.8184143222506393, "percentage": 36.37, "elapsed_time": "3:39:33", "remaining_time": "6:24:09"} +{"current_steps": 712, "total_steps": 1955, "loss": 1.0166, "lr": 0.00012836489423092225, "epoch": 1.8209718670076727, "percentage": 36.42, "elapsed_time": "3:39:51", "remaining_time": "6:23:50"} +{"current_steps": 713, "total_steps": 1955, "loss": 1.0255, "lr": 0.00012825100416886454, "epoch": 1.8235294117647058, "percentage": 36.47, "elapsed_time": "3:40:10", "remaining_time": "6:23:32"} +{"current_steps": 714, "total_steps": 1955, "loss": 1.0135, "lr": 0.0001281369601941599, "epoch": 1.8260869565217392, "percentage": 36.52, "elapsed_time": "3:40:28", "remaining_time": "6:23:12"} +{"current_steps": 715, "total_steps": 1955, "loss": 0.999, "lr": 0.00012802276267058957, "epoch": 1.8286445012787724, "percentage": 36.57, "elapsed_time": "3:40:47", "remaining_time": "6:22:54"} +{"current_steps": 716, "total_steps": 1955, "loss": 1.0153, "lr": 0.00012790841196242458, "epoch": 1.8312020460358056, "percentage": 36.62, "elapsed_time": "3:41:05", "remaining_time": "6:22:35"} +{"current_steps": 717, "total_steps": 1955, "loss": 0.9855, "lr": 0.00012779390843442462, "epoch": 1.8337595907928388, "percentage": 36.68, "elapsed_time": "3:41:24", "remaining_time": "6:22:17"} +{"current_steps": 718, "total_steps": 1955, "loss": 1.0351, "lr": 0.00012767925245183676, "epoch": 1.836317135549872, "percentage": 36.73, "elapsed_time": "3:41:43", "remaining_time": "6:21:58"} +{"current_steps": 719, "total_steps": 1955, "loss": 1.035, "lr": 0.00012756444438039453, "epoch": 1.8388746803069054, "percentage": 36.78, "elapsed_time": "3:42:01", "remaining_time": "6:21:40"} +{"current_steps": 720, "total_steps": 1955, "loss": 1.0412, "lr": 0.00012744948458631646, "epoch": 1.8414322250639388, "percentage": 36.83, "elapsed_time": "3:42:19", "remaining_time": "6:21:21"} +{"current_steps": 721, "total_steps": 1955, "loss": 1.0419, "lr": 0.0001273343734363051, "epoch": 1.843989769820972, "percentage": 36.88, "elapsed_time": "3:42:38", "remaining_time": "6:21:02"} +{"current_steps": 722, "total_steps": 1955, "loss": 0.9993, "lr": 0.00012721911129754578, "epoch": 1.8465473145780051, "percentage": 36.93, "elapsed_time": "3:42:56", "remaining_time": "6:20:44"} +{"current_steps": 723, "total_steps": 1955, "loss": 1.0255, "lr": 0.0001271036985377055, "epoch": 1.8491048593350383, "percentage": 36.98, "elapsed_time": "3:43:14", "remaining_time": "6:20:24"} +{"current_steps": 724, "total_steps": 1955, "loss": 1.0159, "lr": 0.00012698813552493174, "epoch": 1.8516624040920715, "percentage": 37.03, "elapsed_time": "3:43:33", "remaining_time": "6:20:06"} +{"current_steps": 725, "total_steps": 1955, "loss": 1.0468, "lr": 0.00012687242262785116, "epoch": 1.854219948849105, "percentage": 37.08, "elapsed_time": "3:43:52", "remaining_time": "6:19:48"} +{"current_steps": 726, "total_steps": 1955, "loss": 0.9702, "lr": 0.00012675656021556855, "epoch": 1.856777493606138, "percentage": 37.14, "elapsed_time": "3:44:10", "remaining_time": "6:19:30"} +{"current_steps": 727, "total_steps": 1955, "loss": 0.9959, "lr": 0.00012664054865766573, "epoch": 1.8593350383631715, "percentage": 37.19, "elapsed_time": "3:44:29", "remaining_time": "6:19:11"} +{"current_steps": 728, "total_steps": 1955, "loss": 1.0009, "lr": 0.00012652438832420017, "epoch": 1.8618925831202047, "percentage": 37.24, "elapsed_time": "3:44:48", "remaining_time": "6:18:53"} +{"current_steps": 729, "total_steps": 1955, "loss": 1.0572, "lr": 0.00012640807958570394, "epoch": 1.8644501278772379, "percentage": 37.29, "elapsed_time": "3:45:06", "remaining_time": "6:18:34"} +{"current_steps": 730, "total_steps": 1955, "loss": 1.0123, "lr": 0.00012629162281318248, "epoch": 1.867007672634271, "percentage": 37.34, "elapsed_time": "3:45:25", "remaining_time": "6:18:16"} +{"current_steps": 731, "total_steps": 1955, "loss": 0.9835, "lr": 0.00012617501837811347, "epoch": 1.8695652173913042, "percentage": 37.39, "elapsed_time": "3:45:43", "remaining_time": "6:17:57"} +{"current_steps": 732, "total_steps": 1955, "loss": 1.0206, "lr": 0.00012605826665244559, "epoch": 1.8721227621483376, "percentage": 37.44, "elapsed_time": "3:46:01", "remaining_time": "6:17:37"} +{"current_steps": 733, "total_steps": 1955, "loss": 1.0312, "lr": 0.00012594136800859733, "epoch": 1.8746803069053708, "percentage": 37.49, "elapsed_time": "3:46:19", "remaining_time": "6:17:18"} +{"current_steps": 734, "total_steps": 1955, "loss": 0.9929, "lr": 0.00012582432281945587, "epoch": 1.8772378516624042, "percentage": 37.54, "elapsed_time": "3:46:37", "remaining_time": "6:17:00"} +{"current_steps": 735, "total_steps": 1955, "loss": 1.0232, "lr": 0.0001257071314583758, "epoch": 1.8797953964194374, "percentage": 37.6, "elapsed_time": "3:46:56", "remaining_time": "6:16:41"} +{"current_steps": 736, "total_steps": 1955, "loss": 1.0528, "lr": 0.00012558979429917803, "epoch": 1.8823529411764706, "percentage": 37.65, "elapsed_time": "3:47:15", "remaining_time": "6:16:23"} +{"current_steps": 737, "total_steps": 1955, "loss": 1.0262, "lr": 0.00012547231171614845, "epoch": 1.8849104859335037, "percentage": 37.7, "elapsed_time": "3:47:34", "remaining_time": "6:16:05"} +{"current_steps": 738, "total_steps": 1955, "loss": 1.0333, "lr": 0.00012535468408403697, "epoch": 1.887468030690537, "percentage": 37.75, "elapsed_time": "3:47:52", "remaining_time": "6:15:45"} +{"current_steps": 739, "total_steps": 1955, "loss": 1.0168, "lr": 0.00012523691177805597, "epoch": 1.8900255754475703, "percentage": 37.8, "elapsed_time": "3:48:10", "remaining_time": "6:15:27"} +{"current_steps": 740, "total_steps": 1955, "loss": 0.9883, "lr": 0.00012511899517387955, "epoch": 1.8925831202046037, "percentage": 37.85, "elapsed_time": "3:48:29", "remaining_time": "6:15:09"} +{"current_steps": 741, "total_steps": 1955, "loss": 0.9977, "lr": 0.00012500093464764197, "epoch": 1.895140664961637, "percentage": 37.9, "elapsed_time": "3:48:47", "remaining_time": "6:14:50"} +{"current_steps": 742, "total_steps": 1955, "loss": 1.0044, "lr": 0.00012488273057593654, "epoch": 1.89769820971867, "percentage": 37.95, "elapsed_time": "3:49:06", "remaining_time": "6:14:32"} +{"current_steps": 743, "total_steps": 1955, "loss": 1.0412, "lr": 0.00012476438333581456, "epoch": 1.9002557544757033, "percentage": 38.01, "elapsed_time": "3:49:25", "remaining_time": "6:14:13"} +{"current_steps": 744, "total_steps": 1955, "loss": 0.9978, "lr": 0.00012464589330478398, "epoch": 1.9028132992327365, "percentage": 38.06, "elapsed_time": "3:49:43", "remaining_time": "6:13:54"} +{"current_steps": 745, "total_steps": 1955, "loss": 0.9944, "lr": 0.0001245272608608082, "epoch": 1.9053708439897699, "percentage": 38.11, "elapsed_time": "3:50:01", "remaining_time": "6:13:36"} +{"current_steps": 746, "total_steps": 1955, "loss": 1.0184, "lr": 0.00012440848638230485, "epoch": 1.907928388746803, "percentage": 38.16, "elapsed_time": "3:50:20", "remaining_time": "6:13:17"} +{"current_steps": 747, "total_steps": 1955, "loss": 1.0105, "lr": 0.00012428957024814477, "epoch": 1.9104859335038364, "percentage": 38.21, "elapsed_time": "3:50:39", "remaining_time": "6:12:59"} +{"current_steps": 748, "total_steps": 1955, "loss": 1.0256, "lr": 0.00012417051283765055, "epoch": 1.9130434782608696, "percentage": 38.26, "elapsed_time": "3:50:57", "remaining_time": "6:12:41"} +{"current_steps": 749, "total_steps": 1955, "loss": 1.0479, "lr": 0.0001240513145305954, "epoch": 1.9156010230179028, "percentage": 38.31, "elapsed_time": "3:51:16", "remaining_time": "6:12:22"} +{"current_steps": 750, "total_steps": 1955, "loss": 1.0006, "lr": 0.00012393197570720208, "epoch": 1.918158567774936, "percentage": 38.36, "elapsed_time": "3:51:34", "remaining_time": "6:12:04"} +{"current_steps": 751, "total_steps": 1955, "loss": 1.0527, "lr": 0.0001238124967481415, "epoch": 1.9207161125319692, "percentage": 38.41, "elapsed_time": "3:51:53", "remaining_time": "6:11:45"} +{"current_steps": 752, "total_steps": 1955, "loss": 1.0039, "lr": 0.00012369287803453156, "epoch": 1.9232736572890026, "percentage": 38.47, "elapsed_time": "3:52:11", "remaining_time": "6:11:27"} +{"current_steps": 753, "total_steps": 1955, "loss": 1.0191, "lr": 0.00012357311994793603, "epoch": 1.9258312020460358, "percentage": 38.52, "elapsed_time": "3:52:30", "remaining_time": "6:11:09"} +{"current_steps": 754, "total_steps": 1955, "loss": 1.014, "lr": 0.00012345322287036315, "epoch": 1.9283887468030692, "percentage": 38.57, "elapsed_time": "3:52:48", "remaining_time": "6:10:49"} +{"current_steps": 755, "total_steps": 1955, "loss": 0.9853, "lr": 0.0001233331871842646, "epoch": 1.9309462915601023, "percentage": 38.62, "elapsed_time": "3:53:07", "remaining_time": "6:10:31"} +{"current_steps": 756, "total_steps": 1955, "loss": 1.022, "lr": 0.0001232130132725342, "epoch": 1.9335038363171355, "percentage": 38.67, "elapsed_time": "3:53:25", "remaining_time": "6:10:12"} +{"current_steps": 757, "total_steps": 1955, "loss": 1.0199, "lr": 0.00012309270151850666, "epoch": 1.9360613810741687, "percentage": 38.72, "elapsed_time": "3:53:44", "remaining_time": "6:09:54"} +{"current_steps": 758, "total_steps": 1955, "loss": 1.0008, "lr": 0.00012297225230595637, "epoch": 1.938618925831202, "percentage": 38.77, "elapsed_time": "3:54:03", "remaining_time": "6:09:36"} +{"current_steps": 759, "total_steps": 1955, "loss": 1.0464, "lr": 0.0001228516660190962, "epoch": 1.9411764705882353, "percentage": 38.82, "elapsed_time": "3:54:21", "remaining_time": "6:09:17"} +{"current_steps": 760, "total_steps": 1955, "loss": 1.0486, "lr": 0.00012273094304257633, "epoch": 1.9437340153452687, "percentage": 38.87, "elapsed_time": "3:54:39", "remaining_time": "6:08:58"} +{"current_steps": 761, "total_steps": 1955, "loss": 1.0483, "lr": 0.00012261008376148282, "epoch": 1.9462915601023019, "percentage": 38.93, "elapsed_time": "3:54:58", "remaining_time": "6:08:40"} +{"current_steps": 762, "total_steps": 1955, "loss": 1.026, "lr": 0.0001224890885613366, "epoch": 1.948849104859335, "percentage": 38.98, "elapsed_time": "3:55:16", "remaining_time": "6:08:21"} +{"current_steps": 763, "total_steps": 1955, "loss": 1.017, "lr": 0.00012236795782809225, "epoch": 1.9514066496163682, "percentage": 39.03, "elapsed_time": "3:55:35", "remaining_time": "6:08:03"} +{"current_steps": 764, "total_steps": 1955, "loss": 1.031, "lr": 0.00012224669194813647, "epoch": 1.9539641943734014, "percentage": 39.08, "elapsed_time": "3:55:53", "remaining_time": "6:07:43"} +{"current_steps": 765, "total_steps": 1955, "loss": 0.9639, "lr": 0.00012212529130828725, "epoch": 1.9565217391304348, "percentage": 39.13, "elapsed_time": "3:56:11", "remaining_time": "6:07:25"} +{"current_steps": 766, "total_steps": 1955, "loss": 1.0298, "lr": 0.00012200375629579234, "epoch": 1.959079283887468, "percentage": 39.18, "elapsed_time": "3:56:30", "remaining_time": "6:07:06"} +{"current_steps": 767, "total_steps": 1955, "loss": 0.9979, "lr": 0.0001218820872983281, "epoch": 1.9616368286445014, "percentage": 39.23, "elapsed_time": "3:56:48", "remaining_time": "6:06:47"} +{"current_steps": 768, "total_steps": 1955, "loss": 1.0219, "lr": 0.00012176028470399836, "epoch": 1.9641943734015346, "percentage": 39.28, "elapsed_time": "3:57:06", "remaining_time": "6:06:28"} +{"current_steps": 769, "total_steps": 1955, "loss": 1.0321, "lr": 0.00012163834890133303, "epoch": 1.9667519181585678, "percentage": 39.34, "elapsed_time": "3:57:25", "remaining_time": "6:06:09"} +{"current_steps": 770, "total_steps": 1955, "loss": 1.0152, "lr": 0.000121516280279287, "epoch": 1.969309462915601, "percentage": 39.39, "elapsed_time": "3:57:43", "remaining_time": "6:05:51"} +{"current_steps": 771, "total_steps": 1955, "loss": 1.0056, "lr": 0.00012139407922723875, "epoch": 1.9718670076726341, "percentage": 39.44, "elapsed_time": "3:58:01", "remaining_time": "6:05:32"} +{"current_steps": 772, "total_steps": 1955, "loss": 1.0211, "lr": 0.00012127174613498925, "epoch": 1.9744245524296675, "percentage": 39.49, "elapsed_time": "3:58:19", "remaining_time": "6:05:12"} +{"current_steps": 773, "total_steps": 1955, "loss": 1.0298, "lr": 0.00012114928139276064, "epoch": 1.976982097186701, "percentage": 39.54, "elapsed_time": "3:58:38", "remaining_time": "6:04:55"} +{"current_steps": 774, "total_steps": 1955, "loss": 0.997, "lr": 0.00012102668539119501, "epoch": 1.979539641943734, "percentage": 39.59, "elapsed_time": "3:58:57", "remaining_time": "6:04:36"} +{"current_steps": 775, "total_steps": 1955, "loss": 1.008, "lr": 0.00012090395852135314, "epoch": 1.9820971867007673, "percentage": 39.64, "elapsed_time": "3:59:15", "remaining_time": "6:04:17"} +{"current_steps": 776, "total_steps": 1955, "loss": 1.0247, "lr": 0.0001207811011747132, "epoch": 1.9846547314578005, "percentage": 39.69, "elapsed_time": "3:59:33", "remaining_time": "6:03:58"} +{"current_steps": 777, "total_steps": 1955, "loss": 1.0049, "lr": 0.00012065811374316966, "epoch": 1.9872122762148337, "percentage": 39.74, "elapsed_time": "3:59:52", "remaining_time": "6:03:40"} +{"current_steps": 778, "total_steps": 1955, "loss": 1.0454, "lr": 0.0001205349966190319, "epoch": 1.989769820971867, "percentage": 39.8, "elapsed_time": "4:00:10", "remaining_time": "6:03:21"} +{"current_steps": 779, "total_steps": 1955, "loss": 1.0269, "lr": 0.00012041175019502295, "epoch": 1.9923273657289002, "percentage": 39.85, "elapsed_time": "4:00:28", "remaining_time": "6:03:01"} +{"current_steps": 780, "total_steps": 1955, "loss": 1.0085, "lr": 0.00012028837486427837, "epoch": 1.9948849104859336, "percentage": 39.9, "elapsed_time": "4:00:46", "remaining_time": "6:02:42"} +{"current_steps": 781, "total_steps": 1955, "loss": 1.0151, "lr": 0.00012016487102034482, "epoch": 1.9974424552429668, "percentage": 39.95, "elapsed_time": "4:01:05", "remaining_time": "6:02:24"} +{"current_steps": 782, "total_steps": 1955, "loss": 0.9888, "lr": 0.00012004123905717898, "epoch": 2.0, "percentage": 40.0, "elapsed_time": "4:01:23", "remaining_time": "6:02:05"} +{"current_steps": 783, "total_steps": 1955, "loss": 0.98, "lr": 0.00011991747936914614, "epoch": 2.002557544757033, "percentage": 40.05, "elapsed_time": "4:01:53", "remaining_time": "6:02:03"} +{"current_steps": 784, "total_steps": 1955, "loss": 0.966, "lr": 0.00011979359235101906, "epoch": 2.0051150895140664, "percentage": 40.1, "elapsed_time": "4:02:11", "remaining_time": "6:01:44"} +{"current_steps": 785, "total_steps": 1955, "loss": 0.9695, "lr": 0.00011966957839797664, "epoch": 2.0076726342710995, "percentage": 40.15, "elapsed_time": "4:02:29", "remaining_time": "6:01:25"} +{"current_steps": 786, "total_steps": 1955, "loss": 1.0493, "lr": 0.00011954543790560267, "epoch": 2.010230179028133, "percentage": 40.2, "elapsed_time": "4:02:48", "remaining_time": "6:01:07"} +{"current_steps": 787, "total_steps": 1955, "loss": 0.9883, "lr": 0.00011942117126988461, "epoch": 2.0127877237851663, "percentage": 40.26, "elapsed_time": "4:03:07", "remaining_time": "6:00:48"} +{"current_steps": 788, "total_steps": 1955, "loss": 0.9984, "lr": 0.00011929677888721227, "epoch": 2.0153452685421995, "percentage": 40.31, "elapsed_time": "4:03:25", "remaining_time": "6:00:30"} +{"current_steps": 789, "total_steps": 1955, "loss": 0.9833, "lr": 0.00011917226115437656, "epoch": 2.0179028132992327, "percentage": 40.36, "elapsed_time": "4:03:44", "remaining_time": "6:00:12"} +{"current_steps": 790, "total_steps": 1955, "loss": 0.9724, "lr": 0.00011904761846856831, "epoch": 2.020460358056266, "percentage": 40.41, "elapsed_time": "4:04:03", "remaining_time": "5:59:53"} +{"current_steps": 791, "total_steps": 1955, "loss": 0.9699, "lr": 0.00011892285122737683, "epoch": 2.023017902813299, "percentage": 40.46, "elapsed_time": "4:04:21", "remaining_time": "5:59:34"} +{"current_steps": 792, "total_steps": 1955, "loss": 0.9741, "lr": 0.00011879795982878883, "epoch": 2.0255754475703327, "percentage": 40.51, "elapsed_time": "4:04:39", "remaining_time": "5:59:16"} +{"current_steps": 793, "total_steps": 1955, "loss": 0.9682, "lr": 0.00011867294467118698, "epoch": 2.028132992327366, "percentage": 40.56, "elapsed_time": "4:04:58", "remaining_time": "5:58:58"} +{"current_steps": 794, "total_steps": 1955, "loss": 0.9683, "lr": 0.00011854780615334875, "epoch": 2.030690537084399, "percentage": 40.61, "elapsed_time": "4:05:17", "remaining_time": "5:58:39"} +{"current_steps": 795, "total_steps": 1955, "loss": 0.9756, "lr": 0.00011842254467444517, "epoch": 2.0332480818414322, "percentage": 40.66, "elapsed_time": "4:05:35", "remaining_time": "5:58:21"} +{"current_steps": 796, "total_steps": 1955, "loss": 0.9853, "lr": 0.0001182971606340394, "epoch": 2.0358056265984654, "percentage": 40.72, "elapsed_time": "4:05:53", "remaining_time": "5:58:02"} +{"current_steps": 797, "total_steps": 1955, "loss": 1.0054, "lr": 0.00011817165443208562, "epoch": 2.0383631713554986, "percentage": 40.77, "elapsed_time": "4:06:12", "remaining_time": "5:57:43"} +{"current_steps": 798, "total_steps": 1955, "loss": 0.9792, "lr": 0.00011804602646892762, "epoch": 2.040920716112532, "percentage": 40.82, "elapsed_time": "4:06:30", "remaining_time": "5:57:24"} +{"current_steps": 799, "total_steps": 1955, "loss": 0.9788, "lr": 0.00011792027714529767, "epoch": 2.0434782608695654, "percentage": 40.87, "elapsed_time": "4:06:49", "remaining_time": "5:57:06"} +{"current_steps": 800, "total_steps": 1955, "loss": 1.023, "lr": 0.0001177944068623151, "epoch": 2.0460358056265986, "percentage": 40.92, "elapsed_time": "4:07:07", "remaining_time": "5:56:47"} +{"current_steps": 801, "total_steps": 1955, "loss": 0.9758, "lr": 0.00011766841602148507, "epoch": 2.0485933503836318, "percentage": 40.97, "elapsed_time": "4:07:25", "remaining_time": "5:56:28"} +{"current_steps": 802, "total_steps": 1955, "loss": 0.9828, "lr": 0.00011754230502469739, "epoch": 2.051150895140665, "percentage": 41.02, "elapsed_time": "4:07:44", "remaining_time": "5:56:09"} +{"current_steps": 803, "total_steps": 1955, "loss": 0.9891, "lr": 0.00011741607427422502, "epoch": 2.053708439897698, "percentage": 41.07, "elapsed_time": "4:08:03", "remaining_time": "5:55:51"} +{"current_steps": 804, "total_steps": 1955, "loss": 1.0182, "lr": 0.000117289724172723, "epoch": 2.0562659846547313, "percentage": 41.13, "elapsed_time": "4:08:21", "remaining_time": "5:55:33"} +{"current_steps": 805, "total_steps": 1955, "loss": 0.9653, "lr": 0.00011716325512322707, "epoch": 2.0588235294117645, "percentage": 41.18, "elapsed_time": "4:08:40", "remaining_time": "5:55:14"} +{"current_steps": 806, "total_steps": 1955, "loss": 0.9779, "lr": 0.00011703666752915235, "epoch": 2.061381074168798, "percentage": 41.23, "elapsed_time": "4:08:58", "remaining_time": "5:54:56"} +{"current_steps": 807, "total_steps": 1955, "loss": 1.0192, "lr": 0.00011690996179429219, "epoch": 2.0639386189258313, "percentage": 41.28, "elapsed_time": "4:09:16", "remaining_time": "5:54:37"} +{"current_steps": 808, "total_steps": 1955, "loss": 0.9929, "lr": 0.00011678313832281664, "epoch": 2.0664961636828645, "percentage": 41.33, "elapsed_time": "4:09:35", "remaining_time": "5:54:17"} +{"current_steps": 809, "total_steps": 1955, "loss": 0.9711, "lr": 0.00011665619751927146, "epoch": 2.0690537084398977, "percentage": 41.38, "elapsed_time": "4:09:53", "remaining_time": "5:53:59"} +{"current_steps": 810, "total_steps": 1955, "loss": 0.9732, "lr": 0.00011652913978857664, "epoch": 2.071611253196931, "percentage": 41.43, "elapsed_time": "4:10:11", "remaining_time": "5:53:40"} +{"current_steps": 811, "total_steps": 1955, "loss": 0.9955, "lr": 0.00011640196553602505, "epoch": 2.074168797953964, "percentage": 41.48, "elapsed_time": "4:10:30", "remaining_time": "5:53:21"} +{"current_steps": 812, "total_steps": 1955, "loss": 0.9706, "lr": 0.00011627467516728138, "epoch": 2.0767263427109977, "percentage": 41.53, "elapsed_time": "4:10:48", "remaining_time": "5:53:03"} +{"current_steps": 813, "total_steps": 1955, "loss": 0.9712, "lr": 0.00011614726908838063, "epoch": 2.079283887468031, "percentage": 41.59, "elapsed_time": "4:11:07", "remaining_time": "5:52:44"} +{"current_steps": 814, "total_steps": 1955, "loss": 1.0314, "lr": 0.00011601974770572692, "epoch": 2.081841432225064, "percentage": 41.64, "elapsed_time": "4:11:25", "remaining_time": "5:52:26"} +{"current_steps": 815, "total_steps": 1955, "loss": 0.961, "lr": 0.0001158921114260922, "epoch": 2.084398976982097, "percentage": 41.69, "elapsed_time": "4:11:44", "remaining_time": "5:52:07"} +{"current_steps": 816, "total_steps": 1955, "loss": 0.9732, "lr": 0.00011576436065661484, "epoch": 2.0869565217391304, "percentage": 41.74, "elapsed_time": "4:12:02", "remaining_time": "5:51:49"} +{"current_steps": 817, "total_steps": 1955, "loss": 0.9827, "lr": 0.00011563649580479848, "epoch": 2.0895140664961636, "percentage": 41.79, "elapsed_time": "4:12:21", "remaining_time": "5:51:30"} +{"current_steps": 818, "total_steps": 1955, "loss": 0.9634, "lr": 0.00011550851727851067, "epoch": 2.0920716112531967, "percentage": 41.84, "elapsed_time": "4:12:39", "remaining_time": "5:51:11"} +{"current_steps": 819, "total_steps": 1955, "loss": 0.9674, "lr": 0.00011538042548598154, "epoch": 2.0946291560102304, "percentage": 41.89, "elapsed_time": "4:12:58", "remaining_time": "5:50:53"} +{"current_steps": 820, "total_steps": 1955, "loss": 0.9682, "lr": 0.00011525222083580247, "epoch": 2.0971867007672635, "percentage": 41.94, "elapsed_time": "4:13:16", "remaining_time": "5:50:33"} +{"current_steps": 821, "total_steps": 1955, "loss": 0.98, "lr": 0.00011512390373692495, "epoch": 2.0997442455242967, "percentage": 41.99, "elapsed_time": "4:13:35", "remaining_time": "5:50:15"} +{"current_steps": 822, "total_steps": 1955, "loss": 0.9658, "lr": 0.00011499547459865908, "epoch": 2.10230179028133, "percentage": 42.05, "elapsed_time": "4:13:53", "remaining_time": "5:49:57"} +{"current_steps": 823, "total_steps": 1955, "loss": 0.9961, "lr": 0.00011486693383067234, "epoch": 2.104859335038363, "percentage": 42.1, "elapsed_time": "4:14:12", "remaining_time": "5:49:38"} +{"current_steps": 824, "total_steps": 1955, "loss": 0.9886, "lr": 0.0001147382818429884, "epoch": 2.1074168797953963, "percentage": 42.15, "elapsed_time": "4:14:30", "remaining_time": "5:49:19"} +{"current_steps": 825, "total_steps": 1955, "loss": 0.99, "lr": 0.0001146095190459855, "epoch": 2.10997442455243, "percentage": 42.2, "elapsed_time": "4:14:48", "remaining_time": "5:49:00"} +{"current_steps": 826, "total_steps": 1955, "loss": 0.9855, "lr": 0.00011448064585039555, "epoch": 2.112531969309463, "percentage": 42.25, "elapsed_time": "4:15:07", "remaining_time": "5:48:42"} +{"current_steps": 827, "total_steps": 1955, "loss": 0.9784, "lr": 0.0001143516626673025, "epoch": 2.1150895140664963, "percentage": 42.3, "elapsed_time": "4:15:25", "remaining_time": "5:48:23"} +{"current_steps": 828, "total_steps": 1955, "loss": 0.9884, "lr": 0.00011422256990814115, "epoch": 2.1176470588235294, "percentage": 42.35, "elapsed_time": "4:15:43", "remaining_time": "5:48:04"} +{"current_steps": 829, "total_steps": 1955, "loss": 0.9926, "lr": 0.0001140933679846959, "epoch": 2.1202046035805626, "percentage": 42.4, "elapsed_time": "4:16:02", "remaining_time": "5:47:46"} +{"current_steps": 830, "total_steps": 1955, "loss": 1.0183, "lr": 0.00011396405730909925, "epoch": 2.122762148337596, "percentage": 42.46, "elapsed_time": "4:16:20", "remaining_time": "5:47:27"} +{"current_steps": 831, "total_steps": 1955, "loss": 1.0098, "lr": 0.00011383463829383071, "epoch": 2.125319693094629, "percentage": 42.51, "elapsed_time": "4:16:39", "remaining_time": "5:47:08"} +{"current_steps": 832, "total_steps": 1955, "loss": 1.0071, "lr": 0.00011370511135171532, "epoch": 2.1278772378516626, "percentage": 42.56, "elapsed_time": "4:16:57", "remaining_time": "5:46:50"} +{"current_steps": 833, "total_steps": 1955, "loss": 1.0049, "lr": 0.00011357547689592237, "epoch": 2.130434782608696, "percentage": 42.61, "elapsed_time": "4:17:16", "remaining_time": "5:46:31"} +{"current_steps": 834, "total_steps": 1955, "loss": 0.9656, "lr": 0.00011344573533996417, "epoch": 2.132992327365729, "percentage": 42.66, "elapsed_time": "4:17:35", "remaining_time": "5:46:13"} +{"current_steps": 835, "total_steps": 1955, "loss": 0.9968, "lr": 0.0001133158870976946, "epoch": 2.135549872122762, "percentage": 42.71, "elapsed_time": "4:17:54", "remaining_time": "5:45:55"} +{"current_steps": 836, "total_steps": 1955, "loss": 1.0134, "lr": 0.00011318593258330785, "epoch": 2.1381074168797953, "percentage": 42.76, "elapsed_time": "4:18:12", "remaining_time": "5:45:37"} +{"current_steps": 837, "total_steps": 1955, "loss": 0.9522, "lr": 0.00011305587221133718, "epoch": 2.1406649616368285, "percentage": 42.81, "elapsed_time": "4:18:31", "remaining_time": "5:45:18"} +{"current_steps": 838, "total_steps": 1955, "loss": 1.0104, "lr": 0.00011292570639665342, "epoch": 2.1432225063938617, "percentage": 42.86, "elapsed_time": "4:18:49", "remaining_time": "5:45:00"} +{"current_steps": 839, "total_steps": 1955, "loss": 0.988, "lr": 0.00011279543555446379, "epoch": 2.1457800511508953, "percentage": 42.92, "elapsed_time": "4:19:07", "remaining_time": "5:44:41"} +{"current_steps": 840, "total_steps": 1955, "loss": 1.0119, "lr": 0.00011266506010031052, "epoch": 2.1483375959079285, "percentage": 42.97, "elapsed_time": "4:19:26", "remaining_time": "5:44:22"} +{"current_steps": 841, "total_steps": 1955, "loss": 0.9895, "lr": 0.00011253458045006955, "epoch": 2.1508951406649617, "percentage": 43.02, "elapsed_time": "4:19:44", "remaining_time": "5:44:03"} +{"current_steps": 842, "total_steps": 1955, "loss": 1.001, "lr": 0.00011240399701994919, "epoch": 2.153452685421995, "percentage": 43.07, "elapsed_time": "4:20:03", "remaining_time": "5:43:44"} +{"current_steps": 843, "total_steps": 1955, "loss": 0.9773, "lr": 0.00011227331022648877, "epoch": 2.156010230179028, "percentage": 43.12, "elapsed_time": "4:20:21", "remaining_time": "5:43:25"} +{"current_steps": 844, "total_steps": 1955, "loss": 1.024, "lr": 0.00011214252048655733, "epoch": 2.1585677749360612, "percentage": 43.17, "elapsed_time": "4:20:39", "remaining_time": "5:43:07"} +{"current_steps": 845, "total_steps": 1955, "loss": 0.9843, "lr": 0.00011201162821735228, "epoch": 2.1611253196930944, "percentage": 43.22, "elapsed_time": "4:20:58", "remaining_time": "5:42:48"} +{"current_steps": 846, "total_steps": 1955, "loss": 0.9809, "lr": 0.00011188063383639817, "epoch": 2.163682864450128, "percentage": 43.27, "elapsed_time": "4:21:16", "remaining_time": "5:42:30"} +{"current_steps": 847, "total_steps": 1955, "loss": 0.942, "lr": 0.00011174953776154516, "epoch": 2.166240409207161, "percentage": 43.32, "elapsed_time": "4:21:34", "remaining_time": "5:42:10"} +{"current_steps": 848, "total_steps": 1955, "loss": 1.0337, "lr": 0.00011161834041096782, "epoch": 2.1687979539641944, "percentage": 43.38, "elapsed_time": "4:21:53", "remaining_time": "5:41:52"} +{"current_steps": 849, "total_steps": 1955, "loss": 0.9913, "lr": 0.00011148704220316387, "epoch": 2.1713554987212276, "percentage": 43.43, "elapsed_time": "4:22:11", "remaining_time": "5:41:33"} +{"current_steps": 850, "total_steps": 1955, "loss": 0.9928, "lr": 0.0001113556435569526, "epoch": 2.1739130434782608, "percentage": 43.48, "elapsed_time": "4:22:30", "remaining_time": "5:41:15"} +{"current_steps": 851, "total_steps": 1955, "loss": 0.9972, "lr": 0.00011122414489147376, "epoch": 2.176470588235294, "percentage": 43.53, "elapsed_time": "4:22:48", "remaining_time": "5:40:56"} +{"current_steps": 852, "total_steps": 1955, "loss": 1.0105, "lr": 0.00011109254662618616, "epoch": 2.1790281329923276, "percentage": 43.58, "elapsed_time": "4:23:07", "remaining_time": "5:40:37"} +{"current_steps": 853, "total_steps": 1955, "loss": 0.9508, "lr": 0.00011096084918086626, "epoch": 2.1815856777493607, "percentage": 43.63, "elapsed_time": "4:23:25", "remaining_time": "5:40:19"} +{"current_steps": 854, "total_steps": 1955, "loss": 0.9354, "lr": 0.00011082905297560697, "epoch": 2.184143222506394, "percentage": 43.68, "elapsed_time": "4:23:43", "remaining_time": "5:40:00"} +{"current_steps": 855, "total_steps": 1955, "loss": 0.986, "lr": 0.00011069715843081613, "epoch": 2.186700767263427, "percentage": 43.73, "elapsed_time": "4:24:01", "remaining_time": "5:39:41"} +{"current_steps": 856, "total_steps": 1955, "loss": 1.0047, "lr": 0.00011056516596721534, "epoch": 2.1892583120204603, "percentage": 43.79, "elapsed_time": "4:24:20", "remaining_time": "5:39:22"} +{"current_steps": 857, "total_steps": 1955, "loss": 1.0204, "lr": 0.00011043307600583854, "epoch": 2.1918158567774935, "percentage": 43.84, "elapsed_time": "4:24:38", "remaining_time": "5:39:03"} +{"current_steps": 858, "total_steps": 1955, "loss": 1.0137, "lr": 0.0001103008889680306, "epoch": 2.1943734015345266, "percentage": 43.89, "elapsed_time": "4:24:56", "remaining_time": "5:38:44"} +{"current_steps": 859, "total_steps": 1955, "loss": 1.0085, "lr": 0.00011016860527544616, "epoch": 2.1969309462915603, "percentage": 43.94, "elapsed_time": "4:25:15", "remaining_time": "5:38:26"} +{"current_steps": 860, "total_steps": 1955, "loss": 1.0058, "lr": 0.00011003622535004806, "epoch": 2.1994884910485935, "percentage": 43.99, "elapsed_time": "4:25:33", "remaining_time": "5:38:07"} +{"current_steps": 861, "total_steps": 1955, "loss": 0.9986, "lr": 0.0001099037496141062, "epoch": 2.2020460358056266, "percentage": 44.04, "elapsed_time": "4:25:52", "remaining_time": "5:37:48"} +{"current_steps": 862, "total_steps": 1955, "loss": 0.9707, "lr": 0.00010977117849019604, "epoch": 2.20460358056266, "percentage": 44.09, "elapsed_time": "4:26:10", "remaining_time": "5:37:30"} +{"current_steps": 863, "total_steps": 1955, "loss": 0.9957, "lr": 0.00010963851240119731, "epoch": 2.207161125319693, "percentage": 44.14, "elapsed_time": "4:26:29", "remaining_time": "5:37:12"} +{"current_steps": 864, "total_steps": 1955, "loss": 0.9971, "lr": 0.00010950575177029271, "epoch": 2.209718670076726, "percentage": 44.19, "elapsed_time": "4:26:48", "remaining_time": "5:36:54"} +{"current_steps": 865, "total_steps": 1955, "loss": 0.955, "lr": 0.00010937289702096648, "epoch": 2.21227621483376, "percentage": 44.25, "elapsed_time": "4:27:07", "remaining_time": "5:36:36"} +{"current_steps": 866, "total_steps": 1955, "loss": 0.9858, "lr": 0.00010923994857700308, "epoch": 2.214833759590793, "percentage": 44.3, "elapsed_time": "4:27:25", "remaining_time": "5:36:17"} +{"current_steps": 867, "total_steps": 1955, "loss": 1.0272, "lr": 0.00010910690686248587, "epoch": 2.217391304347826, "percentage": 44.35, "elapsed_time": "4:27:44", "remaining_time": "5:35:59"} +{"current_steps": 868, "total_steps": 1955, "loss": 0.9689, "lr": 0.00010897377230179568, "epoch": 2.2199488491048593, "percentage": 44.4, "elapsed_time": "4:28:03", "remaining_time": "5:35:40"} +{"current_steps": 869, "total_steps": 1955, "loss": 1.005, "lr": 0.00010884054531960956, "epoch": 2.2225063938618925, "percentage": 44.45, "elapsed_time": "4:28:21", "remaining_time": "5:35:22"} +{"current_steps": 870, "total_steps": 1955, "loss": 0.9904, "lr": 0.00010870722634089927, "epoch": 2.2250639386189257, "percentage": 44.5, "elapsed_time": "4:28:40", "remaining_time": "5:35:04"} +{"current_steps": 871, "total_steps": 1955, "loss": 0.9716, "lr": 0.0001085738157909302, "epoch": 2.227621483375959, "percentage": 44.55, "elapsed_time": "4:28:58", "remaining_time": "5:34:45"} +{"current_steps": 872, "total_steps": 1955, "loss": 0.9921, "lr": 0.00010844031409525962, "epoch": 2.2301790281329925, "percentage": 44.6, "elapsed_time": "4:29:17", "remaining_time": "5:34:27"} +{"current_steps": 873, "total_steps": 1955, "loss": 1.0081, "lr": 0.00010830672167973572, "epoch": 2.2327365728900257, "percentage": 44.65, "elapsed_time": "4:29:36", "remaining_time": "5:34:08"} +{"current_steps": 874, "total_steps": 1955, "loss": 0.9961, "lr": 0.00010817303897049597, "epoch": 2.235294117647059, "percentage": 44.71, "elapsed_time": "4:29:54", "remaining_time": "5:33:49"} +{"current_steps": 875, "total_steps": 1955, "loss": 0.9648, "lr": 0.0001080392663939659, "epoch": 2.237851662404092, "percentage": 44.76, "elapsed_time": "4:30:12", "remaining_time": "5:33:31"} +{"current_steps": 876, "total_steps": 1955, "loss": 0.9835, "lr": 0.00010790540437685771, "epoch": 2.2404092071611252, "percentage": 44.81, "elapsed_time": "4:30:30", "remaining_time": "5:33:11"} +{"current_steps": 877, "total_steps": 1955, "loss": 0.9732, "lr": 0.00010777145334616884, "epoch": 2.2429667519181584, "percentage": 44.86, "elapsed_time": "4:30:49", "remaining_time": "5:32:53"} +{"current_steps": 878, "total_steps": 1955, "loss": 0.9799, "lr": 0.00010763741372918076, "epoch": 2.2455242966751916, "percentage": 44.91, "elapsed_time": "4:31:07", "remaining_time": "5:32:34"} +{"current_steps": 879, "total_steps": 1955, "loss": 0.9798, "lr": 0.00010750328595345744, "epoch": 2.2480818414322252, "percentage": 44.96, "elapsed_time": "4:31:26", "remaining_time": "5:32:16"} +{"current_steps": 880, "total_steps": 1955, "loss": 0.956, "lr": 0.00010736907044684409, "epoch": 2.2506393861892584, "percentage": 45.01, "elapsed_time": "4:31:45", "remaining_time": "5:31:58"} +{"current_steps": 881, "total_steps": 1955, "loss": 0.9766, "lr": 0.00010723476763746578, "epoch": 2.2531969309462916, "percentage": 45.06, "elapsed_time": "4:32:03", "remaining_time": "5:31:39"} +{"current_steps": 882, "total_steps": 1955, "loss": 0.9436, "lr": 0.00010710037795372604, "epoch": 2.2557544757033248, "percentage": 45.12, "elapsed_time": "4:32:21", "remaining_time": "5:31:20"} +{"current_steps": 883, "total_steps": 1955, "loss": 0.9829, "lr": 0.00010696590182430552, "epoch": 2.258312020460358, "percentage": 45.17, "elapsed_time": "4:32:40", "remaining_time": "5:31:02"} +{"current_steps": 884, "total_steps": 1955, "loss": 0.9776, "lr": 0.00010683133967816062, "epoch": 2.260869565217391, "percentage": 45.22, "elapsed_time": "4:32:58", "remaining_time": "5:30:42"} +{"current_steps": 885, "total_steps": 1955, "loss": 0.9966, "lr": 0.00010669669194452213, "epoch": 2.2634271099744243, "percentage": 45.27, "elapsed_time": "4:33:16", "remaining_time": "5:30:24"} +{"current_steps": 886, "total_steps": 1955, "loss": 1.0042, "lr": 0.00010656195905289382, "epoch": 2.265984654731458, "percentage": 45.32, "elapsed_time": "4:33:35", "remaining_time": "5:30:05"} +{"current_steps": 887, "total_steps": 1955, "loss": 0.9591, "lr": 0.00010642714143305115, "epoch": 2.268542199488491, "percentage": 45.37, "elapsed_time": "4:33:53", "remaining_time": "5:29:47"} +{"current_steps": 888, "total_steps": 1955, "loss": 0.9657, "lr": 0.00010629223951503975, "epoch": 2.2710997442455243, "percentage": 45.42, "elapsed_time": "4:34:12", "remaining_time": "5:29:28"} +{"current_steps": 889, "total_steps": 1955, "loss": 0.9902, "lr": 0.00010615725372917429, "epoch": 2.2736572890025575, "percentage": 45.47, "elapsed_time": "4:34:31", "remaining_time": "5:29:10"} +{"current_steps": 890, "total_steps": 1955, "loss": 1.0222, "lr": 0.00010602218450603687, "epoch": 2.2762148337595907, "percentage": 45.52, "elapsed_time": "4:34:49", "remaining_time": "5:28:51"} +{"current_steps": 891, "total_steps": 1955, "loss": 1.0003, "lr": 0.00010588703227647573, "epoch": 2.2787723785166243, "percentage": 45.58, "elapsed_time": "4:35:08", "remaining_time": "5:28:33"} +{"current_steps": 892, "total_steps": 1955, "loss": 0.9834, "lr": 0.00010575179747160391, "epoch": 2.2813299232736575, "percentage": 45.63, "elapsed_time": "4:35:26", "remaining_time": "5:28:14"} +{"current_steps": 893, "total_steps": 1955, "loss": 0.9893, "lr": 0.00010561648052279792, "epoch": 2.2838874680306906, "percentage": 45.68, "elapsed_time": "4:35:45", "remaining_time": "5:27:56"} +{"current_steps": 894, "total_steps": 1955, "loss": 1.0097, "lr": 0.00010548108186169619, "epoch": 2.286445012787724, "percentage": 45.73, "elapsed_time": "4:36:03", "remaining_time": "5:27:37"} +{"current_steps": 895, "total_steps": 1955, "loss": 0.9987, "lr": 0.00010534560192019784, "epoch": 2.289002557544757, "percentage": 45.78, "elapsed_time": "4:36:22", "remaining_time": "5:27:19"} +{"current_steps": 896, "total_steps": 1955, "loss": 0.9863, "lr": 0.00010521004113046126, "epoch": 2.29156010230179, "percentage": 45.83, "elapsed_time": "4:36:40", "remaining_time": "5:27:00"} +{"current_steps": 897, "total_steps": 1955, "loss": 0.9854, "lr": 0.00010507439992490274, "epoch": 2.2941176470588234, "percentage": 45.88, "elapsed_time": "4:36:59", "remaining_time": "5:26:42"} +{"current_steps": 898, "total_steps": 1955, "loss": 0.962, "lr": 0.00010493867873619509, "epoch": 2.296675191815857, "percentage": 45.93, "elapsed_time": "4:37:17", "remaining_time": "5:26:23"} +{"current_steps": 899, "total_steps": 1955, "loss": 0.9951, "lr": 0.00010480287799726624, "epoch": 2.29923273657289, "percentage": 45.98, "elapsed_time": "4:37:36", "remaining_time": "5:26:05"} +{"current_steps": 900, "total_steps": 1955, "loss": 0.9808, "lr": 0.00010466699814129784, "epoch": 2.3017902813299234, "percentage": 46.04, "elapsed_time": "4:37:54", "remaining_time": "5:25:46"} +{"current_steps": 901, "total_steps": 1955, "loss": 0.9722, "lr": 0.00010453103960172399, "epoch": 2.3043478260869565, "percentage": 46.09, "elapsed_time": "4:38:13", "remaining_time": "5:25:27"} +{"current_steps": 902, "total_steps": 1955, "loss": 0.9778, "lr": 0.0001043950028122297, "epoch": 2.3069053708439897, "percentage": 46.14, "elapsed_time": "4:38:31", "remaining_time": "5:25:08"} +{"current_steps": 903, "total_steps": 1955, "loss": 0.9999, "lr": 0.00010425888820674964, "epoch": 2.309462915601023, "percentage": 46.19, "elapsed_time": "4:38:50", "remaining_time": "5:24:50"} +{"current_steps": 904, "total_steps": 1955, "loss": 0.9277, "lr": 0.00010412269621946664, "epoch": 2.312020460358056, "percentage": 46.24, "elapsed_time": "4:39:09", "remaining_time": "5:24:32"} +{"current_steps": 905, "total_steps": 1955, "loss": 0.9623, "lr": 0.0001039864272848104, "epoch": 2.3145780051150897, "percentage": 46.29, "elapsed_time": "4:39:27", "remaining_time": "5:24:13"} +{"current_steps": 906, "total_steps": 1955, "loss": 0.9709, "lr": 0.00010385008183745614, "epoch": 2.317135549872123, "percentage": 46.34, "elapsed_time": "4:39:44", "remaining_time": "5:23:54"} +{"current_steps": 907, "total_steps": 1955, "loss": 0.9752, "lr": 0.00010371366031232298, "epoch": 2.319693094629156, "percentage": 46.39, "elapsed_time": "4:40:03", "remaining_time": "5:23:35"} +{"current_steps": 908, "total_steps": 1955, "loss": 1.0151, "lr": 0.00010357716314457286, "epoch": 2.3222506393861893, "percentage": 46.45, "elapsed_time": "4:40:22", "remaining_time": "5:23:17"} +{"current_steps": 909, "total_steps": 1955, "loss": 0.9525, "lr": 0.00010344059076960893, "epoch": 2.3248081841432224, "percentage": 46.5, "elapsed_time": "4:40:40", "remaining_time": "5:22:58"} +{"current_steps": 910, "total_steps": 1955, "loss": 1.0263, "lr": 0.00010330394362307426, "epoch": 2.3273657289002556, "percentage": 46.55, "elapsed_time": "4:40:59", "remaining_time": "5:22:40"} +{"current_steps": 911, "total_steps": 1955, "loss": 1.0032, "lr": 0.00010316722214085048, "epoch": 2.329923273657289, "percentage": 46.6, "elapsed_time": "4:41:17", "remaining_time": "5:22:21"} +{"current_steps": 912, "total_steps": 1955, "loss": 0.9655, "lr": 0.00010303042675905623, "epoch": 2.3324808184143224, "percentage": 46.65, "elapsed_time": "4:41:35", "remaining_time": "5:22:02"} +{"current_steps": 913, "total_steps": 1955, "loss": 0.9963, "lr": 0.00010289355791404597, "epoch": 2.3350383631713556, "percentage": 46.7, "elapsed_time": "4:41:53", "remaining_time": "5:21:43"} +{"current_steps": 914, "total_steps": 1955, "loss": 0.9959, "lr": 0.00010275661604240844, "epoch": 2.337595907928389, "percentage": 46.75, "elapsed_time": "4:42:12", "remaining_time": "5:21:25"} +{"current_steps": 915, "total_steps": 1955, "loss": 0.9923, "lr": 0.00010261960158096538, "epoch": 2.340153452685422, "percentage": 46.8, "elapsed_time": "4:42:30", "remaining_time": "5:21:06"} +{"current_steps": 916, "total_steps": 1955, "loss": 1.0133, "lr": 0.00010248251496677002, "epoch": 2.342710997442455, "percentage": 46.85, "elapsed_time": "4:42:48", "remaining_time": "5:20:46"} +{"current_steps": 917, "total_steps": 1955, "loss": 0.9559, "lr": 0.00010234535663710578, "epoch": 2.3452685421994883, "percentage": 46.91, "elapsed_time": "4:43:06", "remaining_time": "5:20:28"} +{"current_steps": 918, "total_steps": 1955, "loss": 0.9839, "lr": 0.00010220812702948483, "epoch": 2.3478260869565215, "percentage": 46.96, "elapsed_time": "4:43:25", "remaining_time": "5:20:09"} +{"current_steps": 919, "total_steps": 1955, "loss": 0.9695, "lr": 0.00010207082658164668, "epoch": 2.350383631713555, "percentage": 47.01, "elapsed_time": "4:43:44", "remaining_time": "5:19:51"} +{"current_steps": 920, "total_steps": 1955, "loss": 0.9699, "lr": 0.00010193345573155686, "epoch": 2.3529411764705883, "percentage": 47.06, "elapsed_time": "4:44:02", "remaining_time": "5:19:33"} +{"current_steps": 921, "total_steps": 1955, "loss": 0.9737, "lr": 0.00010179601491740546, "epoch": 2.3554987212276215, "percentage": 47.11, "elapsed_time": "4:44:21", "remaining_time": "5:19:14"} +{"current_steps": 922, "total_steps": 1955, "loss": 0.9734, "lr": 0.00010165850457760569, "epoch": 2.3580562659846547, "percentage": 47.16, "elapsed_time": "4:44:39", "remaining_time": "5:18:55"} +{"current_steps": 923, "total_steps": 1955, "loss": 0.9758, "lr": 0.00010152092515079263, "epoch": 2.360613810741688, "percentage": 47.21, "elapsed_time": "4:44:58", "remaining_time": "5:18:37"} +{"current_steps": 924, "total_steps": 1955, "loss": 0.9843, "lr": 0.00010138327707582161, "epoch": 2.363171355498721, "percentage": 47.26, "elapsed_time": "4:45:16", "remaining_time": "5:18:18"} +{"current_steps": 925, "total_steps": 1955, "loss": 0.9718, "lr": 0.00010124556079176705, "epoch": 2.3657289002557547, "percentage": 47.31, "elapsed_time": "4:45:35", "remaining_time": "5:18:00"} +{"current_steps": 926, "total_steps": 1955, "loss": 0.9485, "lr": 0.0001011077767379209, "epoch": 2.368286445012788, "percentage": 47.37, "elapsed_time": "4:45:53", "remaining_time": "5:17:42"} +{"current_steps": 927, "total_steps": 1955, "loss": 1.0041, "lr": 0.00010096992535379125, "epoch": 2.370843989769821, "percentage": 47.42, "elapsed_time": "4:46:11", "remaining_time": "5:17:22"} +{"current_steps": 928, "total_steps": 1955, "loss": 1.0095, "lr": 0.00010083200707910109, "epoch": 2.373401534526854, "percentage": 47.47, "elapsed_time": "4:46:30", "remaining_time": "5:17:04"} +{"current_steps": 929, "total_steps": 1955, "loss": 0.9793, "lr": 0.00010069402235378657, "epoch": 2.3759590792838874, "percentage": 47.52, "elapsed_time": "4:46:49", "remaining_time": "5:16:45"} +{"current_steps": 930, "total_steps": 1955, "loss": 1.003, "lr": 0.000100555971617996, "epoch": 2.3785166240409206, "percentage": 47.57, "elapsed_time": "4:47:07", "remaining_time": "5:16:27"} +{"current_steps": 931, "total_steps": 1955, "loss": 0.9707, "lr": 0.00010041785531208813, "epoch": 2.381074168797954, "percentage": 47.62, "elapsed_time": "4:47:26", "remaining_time": "5:16:09"} +{"current_steps": 932, "total_steps": 1955, "loss": 0.9943, "lr": 0.00010027967387663098, "epoch": 2.3836317135549874, "percentage": 47.67, "elapsed_time": "4:47:44", "remaining_time": "5:15:50"} +{"current_steps": 933, "total_steps": 1955, "loss": 0.978, "lr": 0.00010014142775240018, "epoch": 2.3861892583120206, "percentage": 47.72, "elapsed_time": "4:48:03", "remaining_time": "5:15:32"} +{"current_steps": 934, "total_steps": 1955, "loss": 0.9654, "lr": 0.00010000311738037786, "epoch": 2.3887468030690537, "percentage": 47.77, "elapsed_time": "4:48:22", "remaining_time": "5:15:13"} +{"current_steps": 935, "total_steps": 1955, "loss": 0.964, "lr": 9.986474320175097e-05, "epoch": 2.391304347826087, "percentage": 47.83, "elapsed_time": "4:48:40", "remaining_time": "5:14:55"} +{"current_steps": 936, "total_steps": 1955, "loss": 0.9825, "lr": 9.972630565791003e-05, "epoch": 2.39386189258312, "percentage": 47.88, "elapsed_time": "4:48:59", "remaining_time": "5:14:36"} +{"current_steps": 937, "total_steps": 1955, "loss": 0.9851, "lr": 9.958780519044772e-05, "epoch": 2.3964194373401533, "percentage": 47.93, "elapsed_time": "4:49:18", "remaining_time": "5:14:18"} +{"current_steps": 938, "total_steps": 1955, "loss": 0.9939, "lr": 9.944924224115737e-05, "epoch": 2.398976982097187, "percentage": 47.98, "elapsed_time": "4:49:36", "remaining_time": "5:14:00"} +{"current_steps": 939, "total_steps": 1955, "loss": 0.9781, "lr": 9.931061725203167e-05, "epoch": 2.40153452685422, "percentage": 48.03, "elapsed_time": "4:49:55", "remaining_time": "5:13:41"} +{"current_steps": 940, "total_steps": 1955, "loss": 0.9868, "lr": 9.917193066526122e-05, "epoch": 2.4040920716112533, "percentage": 48.08, "elapsed_time": "4:50:13", "remaining_time": "5:13:23"} +{"current_steps": 941, "total_steps": 1955, "loss": 0.9754, "lr": 9.903318292323301e-05, "epoch": 2.4066496163682864, "percentage": 48.13, "elapsed_time": "4:50:32", "remaining_time": "5:13:04"} +{"current_steps": 942, "total_steps": 1955, "loss": 0.9859, "lr": 9.889437446852923e-05, "epoch": 2.4092071611253196, "percentage": 48.18, "elapsed_time": "4:50:51", "remaining_time": "5:12:46"} +{"current_steps": 943, "total_steps": 1955, "loss": 0.9896, "lr": 9.875550574392565e-05, "epoch": 2.411764705882353, "percentage": 48.24, "elapsed_time": "4:51:09", "remaining_time": "5:12:27"} +{"current_steps": 944, "total_steps": 1955, "loss": 0.9881, "lr": 9.86165771923903e-05, "epoch": 2.414322250639386, "percentage": 48.29, "elapsed_time": "4:51:28", "remaining_time": "5:12:09"} +{"current_steps": 945, "total_steps": 1955, "loss": 0.9428, "lr": 9.84775892570821e-05, "epoch": 2.4168797953964196, "percentage": 48.34, "elapsed_time": "4:51:47", "remaining_time": "5:11:51"} +{"current_steps": 946, "total_steps": 1955, "loss": 0.9622, "lr": 9.833854238134931e-05, "epoch": 2.419437340153453, "percentage": 48.39, "elapsed_time": "4:52:05", "remaining_time": "5:11:32"} +{"current_steps": 947, "total_steps": 1955, "loss": 1.0125, "lr": 9.819943700872828e-05, "epoch": 2.421994884910486, "percentage": 48.44, "elapsed_time": "4:52:24", "remaining_time": "5:11:14"} +{"current_steps": 948, "total_steps": 1955, "loss": 0.9712, "lr": 9.806027358294195e-05, "epoch": 2.424552429667519, "percentage": 48.49, "elapsed_time": "4:52:42", "remaining_time": "5:10:56"} +{"current_steps": 949, "total_steps": 1955, "loss": 0.9851, "lr": 9.792105254789834e-05, "epoch": 2.4271099744245523, "percentage": 48.54, "elapsed_time": "4:53:01", "remaining_time": "5:10:37"} +{"current_steps": 950, "total_steps": 1955, "loss": 0.9683, "lr": 9.778177434768935e-05, "epoch": 2.4296675191815855, "percentage": 48.59, "elapsed_time": "4:53:19", "remaining_time": "5:10:18"} +{"current_steps": 951, "total_steps": 1955, "loss": 0.9841, "lr": 9.764243942658919e-05, "epoch": 2.4322250639386187, "percentage": 48.64, "elapsed_time": "4:53:38", "remaining_time": "5:10:00"} +{"current_steps": 952, "total_steps": 1955, "loss": 0.9492, "lr": 9.750304822905297e-05, "epoch": 2.4347826086956523, "percentage": 48.7, "elapsed_time": "4:53:57", "remaining_time": "5:09:41"} +{"current_steps": 953, "total_steps": 1955, "loss": 0.9996, "lr": 9.736360119971537e-05, "epoch": 2.4373401534526855, "percentage": 48.75, "elapsed_time": "4:54:15", "remaining_time": "5:09:22"} +{"current_steps": 954, "total_steps": 1955, "loss": 1.0015, "lr": 9.722409878338908e-05, "epoch": 2.4398976982097187, "percentage": 48.8, "elapsed_time": "4:54:33", "remaining_time": "5:09:04"} +{"current_steps": 955, "total_steps": 1955, "loss": 0.9774, "lr": 9.708454142506354e-05, "epoch": 2.442455242966752, "percentage": 48.85, "elapsed_time": "4:54:51", "remaining_time": "5:08:45"} +{"current_steps": 956, "total_steps": 1955, "loss": 0.9847, "lr": 9.694492956990345e-05, "epoch": 2.445012787723785, "percentage": 48.9, "elapsed_time": "4:55:10", "remaining_time": "5:08:26"} +{"current_steps": 957, "total_steps": 1955, "loss": 0.9565, "lr": 9.680526366324726e-05, "epoch": 2.4475703324808182, "percentage": 48.95, "elapsed_time": "4:55:29", "remaining_time": "5:08:08"} +{"current_steps": 958, "total_steps": 1955, "loss": 0.9517, "lr": 9.666554415060596e-05, "epoch": 2.4501278772378514, "percentage": 49.0, "elapsed_time": "4:55:47", "remaining_time": "5:07:49"} +{"current_steps": 959, "total_steps": 1955, "loss": 0.9743, "lr": 9.652577147766142e-05, "epoch": 2.452685421994885, "percentage": 49.05, "elapsed_time": "4:56:05", "remaining_time": "5:07:30"} +{"current_steps": 960, "total_steps": 1955, "loss": 0.9506, "lr": 9.638594609026515e-05, "epoch": 2.455242966751918, "percentage": 49.1, "elapsed_time": "4:56:23", "remaining_time": "5:07:12"} +{"current_steps": 961, "total_steps": 1955, "loss": 1.0158, "lr": 9.624606843443675e-05, "epoch": 2.4578005115089514, "percentage": 49.16, "elapsed_time": "4:56:42", "remaining_time": "5:06:53"} +{"current_steps": 962, "total_steps": 1955, "loss": 0.992, "lr": 9.610613895636263e-05, "epoch": 2.4603580562659846, "percentage": 49.21, "elapsed_time": "4:57:01", "remaining_time": "5:06:35"} +{"current_steps": 963, "total_steps": 1955, "loss": 0.999, "lr": 9.596615810239445e-05, "epoch": 2.4629156010230178, "percentage": 49.26, "elapsed_time": "4:57:19", "remaining_time": "5:06:16"} +{"current_steps": 964, "total_steps": 1955, "loss": 1.0055, "lr": 9.582612631904779e-05, "epoch": 2.4654731457800514, "percentage": 49.31, "elapsed_time": "4:57:38", "remaining_time": "5:05:58"} +{"current_steps": 965, "total_steps": 1955, "loss": 0.9579, "lr": 9.568604405300062e-05, "epoch": 2.4680306905370846, "percentage": 49.36, "elapsed_time": "4:57:56", "remaining_time": "5:05:39"} +{"current_steps": 966, "total_steps": 1955, "loss": 0.9731, "lr": 9.554591175109194e-05, "epoch": 2.4705882352941178, "percentage": 49.41, "elapsed_time": "4:58:14", "remaining_time": "5:05:20"} +{"current_steps": 967, "total_steps": 1955, "loss": 0.9817, "lr": 9.54057298603205e-05, "epoch": 2.473145780051151, "percentage": 49.46, "elapsed_time": "4:58:32", "remaining_time": "5:05:02"} +{"current_steps": 968, "total_steps": 1955, "loss": 0.9874, "lr": 9.526549882784305e-05, "epoch": 2.475703324808184, "percentage": 49.51, "elapsed_time": "4:58:51", "remaining_time": "5:04:43"} +{"current_steps": 969, "total_steps": 1955, "loss": 1.0348, "lr": 9.512521910097316e-05, "epoch": 2.4782608695652173, "percentage": 49.57, "elapsed_time": "4:59:10", "remaining_time": "5:04:25"} +{"current_steps": 970, "total_steps": 1955, "loss": 0.9565, "lr": 9.49848911271798e-05, "epoch": 2.4808184143222505, "percentage": 49.62, "elapsed_time": "4:59:28", "remaining_time": "5:04:06"} +{"current_steps": 971, "total_steps": 1955, "loss": 0.9784, "lr": 9.484451535408572e-05, "epoch": 2.483375959079284, "percentage": 49.67, "elapsed_time": "4:59:47", "remaining_time": "5:03:48"} +{"current_steps": 972, "total_steps": 1955, "loss": 0.9983, "lr": 9.470409222946623e-05, "epoch": 2.4859335038363173, "percentage": 49.72, "elapsed_time": "5:00:05", "remaining_time": "5:03:29"} +{"current_steps": 973, "total_steps": 1955, "loss": 0.98, "lr": 9.456362220124766e-05, "epoch": 2.4884910485933505, "percentage": 49.77, "elapsed_time": "5:00:23", "remaining_time": "5:03:10"} +{"current_steps": 974, "total_steps": 1955, "loss": 0.9779, "lr": 9.442310571750588e-05, "epoch": 2.4910485933503836, "percentage": 49.82, "elapsed_time": "5:00:41", "remaining_time": "5:02:51"} +{"current_steps": 975, "total_steps": 1955, "loss": 0.9581, "lr": 9.42825432264651e-05, "epoch": 2.493606138107417, "percentage": 49.87, "elapsed_time": "5:01:00", "remaining_time": "5:02:32"} +{"current_steps": 976, "total_steps": 1955, "loss": 0.9855, "lr": 9.414193517649614e-05, "epoch": 2.49616368286445, "percentage": 49.92, "elapsed_time": "5:01:18", "remaining_time": "5:02:14"} +{"current_steps": 977, "total_steps": 1955, "loss": 0.9754, "lr": 9.400128201611521e-05, "epoch": 2.498721227621483, "percentage": 49.97, "elapsed_time": "5:01:36", "remaining_time": "5:01:55"} +{"current_steps": 978, "total_steps": 1955, "loss": 0.9909, "lr": 9.386058419398243e-05, "epoch": 2.501278772378517, "percentage": 50.03, "elapsed_time": "5:01:55", "remaining_time": "5:01:36"} +{"current_steps": 979, "total_steps": 1955, "loss": 0.9946, "lr": 9.371984215890032e-05, "epoch": 2.50383631713555, "percentage": 50.08, "elapsed_time": "5:02:13", "remaining_time": "5:01:18"} +{"current_steps": 980, "total_steps": 1955, "loss": 0.9543, "lr": 9.357905635981251e-05, "epoch": 2.506393861892583, "percentage": 50.13, "elapsed_time": "5:02:32", "remaining_time": "5:00:59"} +{"current_steps": 981, "total_steps": 1955, "loss": 0.9638, "lr": 9.34382272458022e-05, "epoch": 2.5089514066496164, "percentage": 50.18, "elapsed_time": "5:02:51", "remaining_time": "5:00:41"} +{"current_steps": 982, "total_steps": 1955, "loss": 0.9464, "lr": 9.329735526609071e-05, "epoch": 2.5115089514066495, "percentage": 50.23, "elapsed_time": "5:03:09", "remaining_time": "5:00:23"} +{"current_steps": 983, "total_steps": 1955, "loss": 0.9565, "lr": 9.315644087003614e-05, "epoch": 2.5140664961636827, "percentage": 50.28, "elapsed_time": "5:03:28", "remaining_time": "5:00:04"} +{"current_steps": 984, "total_steps": 1955, "loss": 0.987, "lr": 9.301548450713193e-05, "epoch": 2.516624040920716, "percentage": 50.33, "elapsed_time": "5:03:46", "remaining_time": "4:59:45"} +{"current_steps": 985, "total_steps": 1955, "loss": 0.985, "lr": 9.28744866270053e-05, "epoch": 2.5191815856777495, "percentage": 50.38, "elapsed_time": "5:04:05", "remaining_time": "4:59:28"} +{"current_steps": 986, "total_steps": 1955, "loss": 0.958, "lr": 9.273344767941595e-05, "epoch": 2.5217391304347827, "percentage": 50.43, "elapsed_time": "5:04:24", "remaining_time": "4:59:09"} +{"current_steps": 987, "total_steps": 1955, "loss": 0.9693, "lr": 9.259236811425458e-05, "epoch": 2.524296675191816, "percentage": 50.49, "elapsed_time": "5:04:42", "remaining_time": "4:58:50"} +{"current_steps": 988, "total_steps": 1955, "loss": 0.9938, "lr": 9.245124838154145e-05, "epoch": 2.526854219948849, "percentage": 50.54, "elapsed_time": "5:05:01", "remaining_time": "4:58:32"} +{"current_steps": 989, "total_steps": 1955, "loss": 0.9934, "lr": 9.231008893142496e-05, "epoch": 2.5294117647058822, "percentage": 50.59, "elapsed_time": "5:05:19", "remaining_time": "4:58:13"} +{"current_steps": 990, "total_steps": 1955, "loss": 1.0013, "lr": 9.216889021418015e-05, "epoch": 2.531969309462916, "percentage": 50.64, "elapsed_time": "5:05:38", "remaining_time": "4:57:54"} +{"current_steps": 991, "total_steps": 1955, "loss": 0.9831, "lr": 9.202765268020734e-05, "epoch": 2.5345268542199486, "percentage": 50.69, "elapsed_time": "5:05:56", "remaining_time": "4:57:36"} +{"current_steps": 992, "total_steps": 1955, "loss": 0.9997, "lr": 9.188637678003078e-05, "epoch": 2.5370843989769822, "percentage": 50.74, "elapsed_time": "5:06:15", "remaining_time": "4:57:18"} +{"current_steps": 993, "total_steps": 1955, "loss": 0.9828, "lr": 9.17450629642969e-05, "epoch": 2.5396419437340154, "percentage": 50.79, "elapsed_time": "5:06:33", "remaining_time": "4:56:59"} +{"current_steps": 994, "total_steps": 1955, "loss": 0.9643, "lr": 9.160371168377322e-05, "epoch": 2.5421994884910486, "percentage": 50.84, "elapsed_time": "5:06:51", "remaining_time": "4:56:40"} +{"current_steps": 995, "total_steps": 1955, "loss": 0.9582, "lr": 9.146232338934671e-05, "epoch": 2.544757033248082, "percentage": 50.9, "elapsed_time": "5:07:10", "remaining_time": "4:56:21"} +{"current_steps": 996, "total_steps": 1955, "loss": 0.9744, "lr": 9.132089853202243e-05, "epoch": 2.547314578005115, "percentage": 50.95, "elapsed_time": "5:07:28", "remaining_time": "4:56:03"} +{"current_steps": 997, "total_steps": 1955, "loss": 0.9792, "lr": 9.117943756292208e-05, "epoch": 2.5498721227621486, "percentage": 51.0, "elapsed_time": "5:07:47", "remaining_time": "4:55:45"} +{"current_steps": 998, "total_steps": 1955, "loss": 0.9755, "lr": 9.103794093328248e-05, "epoch": 2.5524296675191813, "percentage": 51.05, "elapsed_time": "5:08:06", "remaining_time": "4:55:26"} +{"current_steps": 999, "total_steps": 1955, "loss": 0.9716, "lr": 9.089640909445431e-05, "epoch": 2.554987212276215, "percentage": 51.1, "elapsed_time": "5:08:25", "remaining_time": "4:55:08"} +{"current_steps": 1000, "total_steps": 1955, "loss": 0.9747, "lr": 9.075484249790048e-05, "epoch": 2.557544757033248, "percentage": 51.15, "elapsed_time": "5:08:43", "remaining_time": "4:54:50"} +{"current_steps": 1001, "total_steps": 1955, "loss": 0.9762, "lr": 9.061324159519476e-05, "epoch": 2.5601023017902813, "percentage": 51.2, "elapsed_time": "5:09:02", "remaining_time": "4:54:31"} +{"current_steps": 1002, "total_steps": 1955, "loss": 0.9674, "lr": 9.047160683802046e-05, "epoch": 2.5626598465473145, "percentage": 51.25, "elapsed_time": "5:09:20", "remaining_time": "4:54:12"} +{"current_steps": 1003, "total_steps": 1955, "loss": 0.9942, "lr": 9.032993867816876e-05, "epoch": 2.5652173913043477, "percentage": 51.3, "elapsed_time": "5:09:38", "remaining_time": "4:53:54"} +{"current_steps": 1004, "total_steps": 1955, "loss": 1.0001, "lr": 9.018823756753746e-05, "epoch": 2.5677749360613813, "percentage": 51.36, "elapsed_time": "5:09:57", "remaining_time": "4:53:35"} +{"current_steps": 1005, "total_steps": 1955, "loss": 0.9929, "lr": 9.00465039581294e-05, "epoch": 2.5703324808184145, "percentage": 51.41, "elapsed_time": "5:10:16", "remaining_time": "4:53:17"} +{"current_steps": 1006, "total_steps": 1955, "loss": 0.9318, "lr": 8.990473830205118e-05, "epoch": 2.5728900255754477, "percentage": 51.46, "elapsed_time": "5:10:35", "remaining_time": "4:52:59"} +{"current_steps": 1007, "total_steps": 1955, "loss": 1.0079, "lr": 8.976294105151154e-05, "epoch": 2.575447570332481, "percentage": 51.51, "elapsed_time": "5:10:53", "remaining_time": "4:52:40"} +{"current_steps": 1008, "total_steps": 1955, "loss": 0.9952, "lr": 8.962111265882006e-05, "epoch": 2.578005115089514, "percentage": 51.56, "elapsed_time": "5:11:11", "remaining_time": "4:52:21"} +{"current_steps": 1009, "total_steps": 1955, "loss": 0.9941, "lr": 8.947925357638561e-05, "epoch": 2.580562659846547, "percentage": 51.61, "elapsed_time": "5:11:30", "remaining_time": "4:52:03"} +{"current_steps": 1010, "total_steps": 1955, "loss": 0.9816, "lr": 8.933736425671495e-05, "epoch": 2.5831202046035804, "percentage": 51.66, "elapsed_time": "5:11:48", "remaining_time": "4:51:44"} +{"current_steps": 1011, "total_steps": 1955, "loss": 0.9818, "lr": 8.91954451524114e-05, "epoch": 2.585677749360614, "percentage": 51.71, "elapsed_time": "5:12:06", "remaining_time": "4:51:25"} +{"current_steps": 1012, "total_steps": 1955, "loss": 0.9876, "lr": 8.905349671617313e-05, "epoch": 2.588235294117647, "percentage": 51.76, "elapsed_time": "5:12:25", "remaining_time": "4:51:07"} +{"current_steps": 1013, "total_steps": 1955, "loss": 0.9702, "lr": 8.891151940079198e-05, "epoch": 2.5907928388746804, "percentage": 51.82, "elapsed_time": "5:12:43", "remaining_time": "4:50:48"} +{"current_steps": 1014, "total_steps": 1955, "loss": 0.9877, "lr": 8.87695136591519e-05, "epoch": 2.5933503836317136, "percentage": 51.87, "elapsed_time": "5:13:02", "remaining_time": "4:50:29"} +{"current_steps": 1015, "total_steps": 1955, "loss": 0.9707, "lr": 8.862747994422744e-05, "epoch": 2.5959079283887467, "percentage": 51.92, "elapsed_time": "5:13:20", "remaining_time": "4:50:11"} +{"current_steps": 1016, "total_steps": 1955, "loss": 0.9703, "lr": 8.848541870908248e-05, "epoch": 2.59846547314578, "percentage": 51.97, "elapsed_time": "5:13:39", "remaining_time": "4:49:52"} +{"current_steps": 1017, "total_steps": 1955, "loss": 0.979, "lr": 8.834333040686867e-05, "epoch": 2.601023017902813, "percentage": 52.02, "elapsed_time": "5:13:57", "remaining_time": "4:49:34"} +{"current_steps": 1018, "total_steps": 1955, "loss": 0.9829, "lr": 8.820121549082389e-05, "epoch": 2.6035805626598467, "percentage": 52.07, "elapsed_time": "5:14:15", "remaining_time": "4:49:15"} +{"current_steps": 1019, "total_steps": 1955, "loss": 0.9558, "lr": 8.805907441427107e-05, "epoch": 2.60613810741688, "percentage": 52.12, "elapsed_time": "5:14:33", "remaining_time": "4:48:56"} +{"current_steps": 1020, "total_steps": 1955, "loss": 0.9644, "lr": 8.791690763061646e-05, "epoch": 2.608695652173913, "percentage": 52.17, "elapsed_time": "5:14:52", "remaining_time": "4:48:38"} +{"current_steps": 1021, "total_steps": 1955, "loss": 0.9769, "lr": 8.777471559334835e-05, "epoch": 2.6112531969309463, "percentage": 52.23, "elapsed_time": "5:15:11", "remaining_time": "4:48:19"} +{"current_steps": 1022, "total_steps": 1955, "loss": 0.9699, "lr": 8.763249875603568e-05, "epoch": 2.6138107416879794, "percentage": 52.28, "elapsed_time": "5:15:29", "remaining_time": "4:48:01"} +{"current_steps": 1023, "total_steps": 1955, "loss": 0.9913, "lr": 8.74902575723263e-05, "epoch": 2.6163682864450126, "percentage": 52.33, "elapsed_time": "5:15:47", "remaining_time": "4:47:42"} +{"current_steps": 1024, "total_steps": 1955, "loss": 0.9714, "lr": 8.734799249594593e-05, "epoch": 2.618925831202046, "percentage": 52.38, "elapsed_time": "5:16:06", "remaining_time": "4:47:24"} +{"current_steps": 1025, "total_steps": 1955, "loss": 0.9667, "lr": 8.720570398069639e-05, "epoch": 2.6214833759590794, "percentage": 52.43, "elapsed_time": "5:16:25", "remaining_time": "4:47:05"} +{"current_steps": 1026, "total_steps": 1955, "loss": 0.9748, "lr": 8.706339248045425e-05, "epoch": 2.6240409207161126, "percentage": 52.48, "elapsed_time": "5:16:43", "remaining_time": "4:46:47"} +{"current_steps": 1027, "total_steps": 1955, "loss": 0.9813, "lr": 8.692105844916946e-05, "epoch": 2.626598465473146, "percentage": 52.53, "elapsed_time": "5:17:02", "remaining_time": "4:46:28"} +{"current_steps": 1028, "total_steps": 1955, "loss": 0.9908, "lr": 8.677870234086383e-05, "epoch": 2.629156010230179, "percentage": 52.58, "elapsed_time": "5:17:20", "remaining_time": "4:46:09"} +{"current_steps": 1029, "total_steps": 1955, "loss": 0.9936, "lr": 8.663632460962956e-05, "epoch": 2.631713554987212, "percentage": 52.63, "elapsed_time": "5:17:38", "remaining_time": "4:45:51"} +{"current_steps": 1030, "total_steps": 1955, "loss": 0.9795, "lr": 8.649392570962781e-05, "epoch": 2.634271099744246, "percentage": 52.69, "elapsed_time": "5:17:56", "remaining_time": "4:45:32"} +{"current_steps": 1031, "total_steps": 1955, "loss": 0.984, "lr": 8.635150609508733e-05, "epoch": 2.6368286445012785, "percentage": 52.74, "elapsed_time": "5:18:15", "remaining_time": "4:45:13"} +{"current_steps": 1032, "total_steps": 1955, "loss": 0.9536, "lr": 8.620906622030292e-05, "epoch": 2.639386189258312, "percentage": 52.79, "elapsed_time": "5:18:34", "remaining_time": "4:44:55"} +{"current_steps": 1033, "total_steps": 1955, "loss": 1.0088, "lr": 8.6066606539634e-05, "epoch": 2.6419437340153453, "percentage": 52.84, "elapsed_time": "5:18:52", "remaining_time": "4:44:36"} +{"current_steps": 1034, "total_steps": 1955, "loss": 0.9876, "lr": 8.592412750750312e-05, "epoch": 2.6445012787723785, "percentage": 52.89, "elapsed_time": "5:19:11", "remaining_time": "4:44:18"} +{"current_steps": 1035, "total_steps": 1955, "loss": 0.9915, "lr": 8.578162957839462e-05, "epoch": 2.6470588235294117, "percentage": 52.94, "elapsed_time": "5:19:29", "remaining_time": "4:43:59"} +{"current_steps": 1036, "total_steps": 1955, "loss": 0.9638, "lr": 8.563911320685312e-05, "epoch": 2.649616368286445, "percentage": 52.99, "elapsed_time": "5:19:48", "remaining_time": "4:43:41"} +{"current_steps": 1037, "total_steps": 1955, "loss": 0.9713, "lr": 8.549657884748205e-05, "epoch": 2.6521739130434785, "percentage": 53.04, "elapsed_time": "5:20:06", "remaining_time": "4:43:22"} +{"current_steps": 1038, "total_steps": 1955, "loss": 0.9772, "lr": 8.535402695494221e-05, "epoch": 2.6547314578005117, "percentage": 53.09, "elapsed_time": "5:20:25", "remaining_time": "4:43:04"} +{"current_steps": 1039, "total_steps": 1955, "loss": 0.9841, "lr": 8.521145798395035e-05, "epoch": 2.657289002557545, "percentage": 53.15, "elapsed_time": "5:20:43", "remaining_time": "4:42:45"} +{"current_steps": 1040, "total_steps": 1955, "loss": 0.9955, "lr": 8.506887238927764e-05, "epoch": 2.659846547314578, "percentage": 53.2, "elapsed_time": "5:21:01", "remaining_time": "4:42:26"} +{"current_steps": 1041, "total_steps": 1955, "loss": 0.9729, "lr": 8.492627062574837e-05, "epoch": 2.662404092071611, "percentage": 53.25, "elapsed_time": "5:21:20", "remaining_time": "4:42:08"} +{"current_steps": 1042, "total_steps": 1955, "loss": 1.0041, "lr": 8.478365314823831e-05, "epoch": 2.6649616368286444, "percentage": 53.3, "elapsed_time": "5:21:38", "remaining_time": "4:41:49"} +{"current_steps": 1043, "total_steps": 1955, "loss": 0.9385, "lr": 8.464102041167343e-05, "epoch": 2.6675191815856776, "percentage": 53.35, "elapsed_time": "5:21:57", "remaining_time": "4:41:31"} +{"current_steps": 1044, "total_steps": 1955, "loss": 0.9798, "lr": 8.449837287102837e-05, "epoch": 2.670076726342711, "percentage": 53.4, "elapsed_time": "5:22:16", "remaining_time": "4:41:12"} +{"current_steps": 1045, "total_steps": 1955, "loss": 0.9664, "lr": 8.43557109813249e-05, "epoch": 2.6726342710997444, "percentage": 53.45, "elapsed_time": "5:22:34", "remaining_time": "4:40:54"} +{"current_steps": 1046, "total_steps": 1955, "loss": 0.9512, "lr": 8.421303519763067e-05, "epoch": 2.6751918158567776, "percentage": 53.5, "elapsed_time": "5:22:52", "remaining_time": "4:40:35"} +{"current_steps": 1047, "total_steps": 1955, "loss": 0.9847, "lr": 8.407034597505762e-05, "epoch": 2.6777493606138107, "percentage": 53.55, "elapsed_time": "5:23:11", "remaining_time": "4:40:16"} +{"current_steps": 1048, "total_steps": 1955, "loss": 0.9847, "lr": 8.392764376876049e-05, "epoch": 2.680306905370844, "percentage": 53.61, "elapsed_time": "5:23:29", "remaining_time": "4:39:58"} +{"current_steps": 1049, "total_steps": 1955, "loss": 0.9592, "lr": 8.378492903393555e-05, "epoch": 2.682864450127877, "percentage": 53.66, "elapsed_time": "5:23:47", "remaining_time": "4:39:39"} +{"current_steps": 1050, "total_steps": 1955, "loss": 0.9846, "lr": 8.364220222581896e-05, "epoch": 2.6854219948849103, "percentage": 53.71, "elapsed_time": "5:24:06", "remaining_time": "4:39:20"} +{"current_steps": 1051, "total_steps": 1955, "loss": 0.9811, "lr": 8.34994637996854e-05, "epoch": 2.687979539641944, "percentage": 53.76, "elapsed_time": "5:24:24", "remaining_time": "4:39:02"} +{"current_steps": 1052, "total_steps": 1955, "loss": 0.9744, "lr": 8.335671421084661e-05, "epoch": 2.690537084398977, "percentage": 53.81, "elapsed_time": "5:24:43", "remaining_time": "4:38:43"} +{"current_steps": 1053, "total_steps": 1955, "loss": 0.9868, "lr": 8.321395391464995e-05, "epoch": 2.6930946291560103, "percentage": 53.86, "elapsed_time": "5:25:01", "remaining_time": "4:38:25"} +{"current_steps": 1054, "total_steps": 1955, "loss": 0.951, "lr": 8.307118336647694e-05, "epoch": 2.6956521739130435, "percentage": 53.91, "elapsed_time": "5:25:20", "remaining_time": "4:38:06"} +{"current_steps": 1055, "total_steps": 1955, "loss": 0.9643, "lr": 8.292840302174178e-05, "epoch": 2.6982097186700766, "percentage": 53.96, "elapsed_time": "5:25:38", "remaining_time": "4:37:48"} +{"current_steps": 1056, "total_steps": 1955, "loss": 0.9841, "lr": 8.278561333588993e-05, "epoch": 2.70076726342711, "percentage": 54.02, "elapsed_time": "5:25:57", "remaining_time": "4:37:29"} +{"current_steps": 1057, "total_steps": 1955, "loss": 0.984, "lr": 8.264281476439662e-05, "epoch": 2.703324808184143, "percentage": 54.07, "elapsed_time": "5:26:16", "remaining_time": "4:37:11"} +{"current_steps": 1058, "total_steps": 1955, "loss": 0.9731, "lr": 8.250000776276551e-05, "epoch": 2.7058823529411766, "percentage": 54.12, "elapsed_time": "5:26:34", "remaining_time": "4:36:52"} +{"current_steps": 1059, "total_steps": 1955, "loss": 1.0008, "lr": 8.235719278652704e-05, "epoch": 2.70843989769821, "percentage": 54.17, "elapsed_time": "5:26:53", "remaining_time": "4:36:34"} +{"current_steps": 1060, "total_steps": 1955, "loss": 0.96, "lr": 8.221437029123715e-05, "epoch": 2.710997442455243, "percentage": 54.22, "elapsed_time": "5:27:11", "remaining_time": "4:36:15"} +{"current_steps": 1061, "total_steps": 1955, "loss": 1.0134, "lr": 8.20715407324758e-05, "epoch": 2.713554987212276, "percentage": 54.27, "elapsed_time": "5:27:29", "remaining_time": "4:35:56"} +{"current_steps": 1062, "total_steps": 1955, "loss": 0.9869, "lr": 8.192870456584536e-05, "epoch": 2.7161125319693094, "percentage": 54.32, "elapsed_time": "5:27:48", "remaining_time": "4:35:38"} +{"current_steps": 1063, "total_steps": 1955, "loss": 1.0191, "lr": 8.178586224696938e-05, "epoch": 2.718670076726343, "percentage": 54.37, "elapsed_time": "5:28:06", "remaining_time": "4:35:19"} +{"current_steps": 1064, "total_steps": 1955, "loss": 0.9847, "lr": 8.164301423149104e-05, "epoch": 2.7212276214833757, "percentage": 54.42, "elapsed_time": "5:28:25", "remaining_time": "4:35:01"} +{"current_steps": 1065, "total_steps": 1955, "loss": 0.9537, "lr": 8.150016097507161e-05, "epoch": 2.7237851662404093, "percentage": 54.48, "elapsed_time": "5:28:43", "remaining_time": "4:34:42"} +{"current_steps": 1066, "total_steps": 1955, "loss": 0.9715, "lr": 8.135730293338918e-05, "epoch": 2.7263427109974425, "percentage": 54.53, "elapsed_time": "5:29:01", "remaining_time": "4:34:23"} +{"current_steps": 1067, "total_steps": 1955, "loss": 0.9778, "lr": 8.121444056213698e-05, "epoch": 2.7289002557544757, "percentage": 54.58, "elapsed_time": "5:29:20", "remaining_time": "4:34:05"} +{"current_steps": 1068, "total_steps": 1955, "loss": 0.9979, "lr": 8.107157431702219e-05, "epoch": 2.731457800511509, "percentage": 54.63, "elapsed_time": "5:29:38", "remaining_time": "4:33:46"} +{"current_steps": 1069, "total_steps": 1955, "loss": 0.972, "lr": 8.092870465376422e-05, "epoch": 2.734015345268542, "percentage": 54.68, "elapsed_time": "5:29:57", "remaining_time": "4:33:28"} +{"current_steps": 1070, "total_steps": 1955, "loss": 1.0173, "lr": 8.078583202809347e-05, "epoch": 2.7365728900255757, "percentage": 54.73, "elapsed_time": "5:30:15", "remaining_time": "4:33:09"} +{"current_steps": 1071, "total_steps": 1955, "loss": 0.9681, "lr": 8.064295689574979e-05, "epoch": 2.7391304347826084, "percentage": 54.78, "elapsed_time": "5:30:34", "remaining_time": "4:32:51"} +{"current_steps": 1072, "total_steps": 1955, "loss": 0.9977, "lr": 8.050007971248095e-05, "epoch": 2.741687979539642, "percentage": 54.83, "elapsed_time": "5:30:53", "remaining_time": "4:32:32"} +{"current_steps": 1073, "total_steps": 1955, "loss": 0.9817, "lr": 8.035720093404133e-05, "epoch": 2.7442455242966752, "percentage": 54.88, "elapsed_time": "5:31:11", "remaining_time": "4:32:14"} +{"current_steps": 1074, "total_steps": 1955, "loss": 0.9677, "lr": 8.021432101619034e-05, "epoch": 2.7468030690537084, "percentage": 54.94, "elapsed_time": "5:31:30", "remaining_time": "4:31:56"} +{"current_steps": 1075, "total_steps": 1955, "loss": 1.0198, "lr": 8.007144041469111e-05, "epoch": 2.7493606138107416, "percentage": 54.99, "elapsed_time": "5:31:48", "remaining_time": "4:31:37"} +{"current_steps": 1076, "total_steps": 1955, "loss": 0.9774, "lr": 7.992855958530893e-05, "epoch": 2.7519181585677748, "percentage": 55.04, "elapsed_time": "5:32:07", "remaining_time": "4:31:18"} +{"current_steps": 1077, "total_steps": 1955, "loss": 0.9975, "lr": 7.978567898380968e-05, "epoch": 2.7544757033248084, "percentage": 55.09, "elapsed_time": "5:32:25", "remaining_time": "4:31:00"} +{"current_steps": 1078, "total_steps": 1955, "loss": 0.9601, "lr": 7.96427990659587e-05, "epoch": 2.7570332480818416, "percentage": 55.14, "elapsed_time": "5:32:44", "remaining_time": "4:30:42"} +{"current_steps": 1079, "total_steps": 1955, "loss": 0.94, "lr": 7.949992028751908e-05, "epoch": 2.7595907928388748, "percentage": 55.19, "elapsed_time": "5:33:03", "remaining_time": "4:30:23"} +{"current_steps": 1080, "total_steps": 1955, "loss": 0.9856, "lr": 7.935704310425022e-05, "epoch": 2.762148337595908, "percentage": 55.24, "elapsed_time": "5:33:21", "remaining_time": "4:30:05"} +{"current_steps": 1081, "total_steps": 1955, "loss": 0.9485, "lr": 7.921416797190653e-05, "epoch": 2.764705882352941, "percentage": 55.29, "elapsed_time": "5:33:39", "remaining_time": "4:29:46"} +{"current_steps": 1082, "total_steps": 1955, "loss": 0.9852, "lr": 7.90712953462358e-05, "epoch": 2.7672634271099743, "percentage": 55.35, "elapsed_time": "5:33:57", "remaining_time": "4:29:27"} +{"current_steps": 1083, "total_steps": 1955, "loss": 0.9843, "lr": 7.892842568297784e-05, "epoch": 2.7698209718670075, "percentage": 55.4, "elapsed_time": "5:34:15", "remaining_time": "4:29:08"} +{"current_steps": 1084, "total_steps": 1955, "loss": 0.9866, "lr": 7.878555943786304e-05, "epoch": 2.772378516624041, "percentage": 55.45, "elapsed_time": "5:34:34", "remaining_time": "4:28:50"} +{"current_steps": 1085, "total_steps": 1955, "loss": 0.9617, "lr": 7.864269706661084e-05, "epoch": 2.7749360613810743, "percentage": 55.5, "elapsed_time": "5:34:53", "remaining_time": "4:28:31"} +{"current_steps": 1086, "total_steps": 1955, "loss": 1.0151, "lr": 7.84998390249284e-05, "epoch": 2.7774936061381075, "percentage": 55.55, "elapsed_time": "5:35:11", "remaining_time": "4:28:13"} +{"current_steps": 1087, "total_steps": 1955, "loss": 0.9416, "lr": 7.8356985768509e-05, "epoch": 2.7800511508951407, "percentage": 55.6, "elapsed_time": "5:35:30", "remaining_time": "4:27:54"} +{"current_steps": 1088, "total_steps": 1955, "loss": 0.9677, "lr": 7.821413775303063e-05, "epoch": 2.782608695652174, "percentage": 55.65, "elapsed_time": "5:35:48", "remaining_time": "4:27:36"} +{"current_steps": 1089, "total_steps": 1955, "loss": 0.9878, "lr": 7.807129543415467e-05, "epoch": 2.785166240409207, "percentage": 55.7, "elapsed_time": "5:36:07", "remaining_time": "4:27:17"} +{"current_steps": 1090, "total_steps": 1955, "loss": 0.9559, "lr": 7.792845926752422e-05, "epoch": 2.78772378516624, "percentage": 55.75, "elapsed_time": "5:36:26", "remaining_time": "4:26:59"} +{"current_steps": 1091, "total_steps": 1955, "loss": 0.9315, "lr": 7.778562970876285e-05, "epoch": 2.790281329923274, "percentage": 55.81, "elapsed_time": "5:36:44", "remaining_time": "4:26:40"} +{"current_steps": 1092, "total_steps": 1955, "loss": 0.9905, "lr": 7.764280721347296e-05, "epoch": 2.792838874680307, "percentage": 55.86, "elapsed_time": "5:37:03", "remaining_time": "4:26:22"} +{"current_steps": 1093, "total_steps": 1955, "loss": 0.992, "lr": 7.749999223723451e-05, "epoch": 2.79539641943734, "percentage": 55.91, "elapsed_time": "5:37:21", "remaining_time": "4:26:03"} +{"current_steps": 1094, "total_steps": 1955, "loss": 0.976, "lr": 7.73571852356034e-05, "epoch": 2.7979539641943734, "percentage": 55.96, "elapsed_time": "5:37:40", "remaining_time": "4:25:45"} +{"current_steps": 1095, "total_steps": 1955, "loss": 0.9717, "lr": 7.72143866641101e-05, "epoch": 2.8005115089514065, "percentage": 56.01, "elapsed_time": "5:37:58", "remaining_time": "4:25:26"} +{"current_steps": 1096, "total_steps": 1955, "loss": 1.019, "lr": 7.707159697825824e-05, "epoch": 2.80306905370844, "percentage": 56.06, "elapsed_time": "5:38:17", "remaining_time": "4:25:08"} +{"current_steps": 1097, "total_steps": 1955, "loss": 0.9877, "lr": 7.692881663352306e-05, "epoch": 2.805626598465473, "percentage": 56.11, "elapsed_time": "5:38:35", "remaining_time": "4:24:49"} +{"current_steps": 1098, "total_steps": 1955, "loss": 1.0, "lr": 7.678604608535007e-05, "epoch": 2.8081841432225065, "percentage": 56.16, "elapsed_time": "5:38:54", "remaining_time": "4:24:30"} +{"current_steps": 1099, "total_steps": 1955, "loss": 0.9913, "lr": 7.664328578915341e-05, "epoch": 2.8107416879795397, "percentage": 56.21, "elapsed_time": "5:39:12", "remaining_time": "4:24:12"} +{"current_steps": 1100, "total_steps": 1955, "loss": 0.9667, "lr": 7.650053620031461e-05, "epoch": 2.813299232736573, "percentage": 56.27, "elapsed_time": "5:39:31", "remaining_time": "4:23:54"} +{"current_steps": 1101, "total_steps": 1955, "loss": 0.9941, "lr": 7.635779777418105e-05, "epoch": 2.815856777493606, "percentage": 56.32, "elapsed_time": "5:39:49", "remaining_time": "4:23:35"} +{"current_steps": 1102, "total_steps": 1955, "loss": 0.9755, "lr": 7.621507096606445e-05, "epoch": 2.8184143222506393, "percentage": 56.37, "elapsed_time": "5:40:08", "remaining_time": "4:23:17"} +{"current_steps": 1103, "total_steps": 1955, "loss": 0.9896, "lr": 7.607235623123952e-05, "epoch": 2.820971867007673, "percentage": 56.42, "elapsed_time": "5:40:27", "remaining_time": "4:22:58"} +{"current_steps": 1104, "total_steps": 1955, "loss": 0.9671, "lr": 7.592965402494242e-05, "epoch": 2.8235294117647056, "percentage": 56.47, "elapsed_time": "5:40:45", "remaining_time": "4:22:40"} +{"current_steps": 1105, "total_steps": 1955, "loss": 0.9572, "lr": 7.578696480236935e-05, "epoch": 2.8260869565217392, "percentage": 56.52, "elapsed_time": "5:41:04", "remaining_time": "4:22:21"} +{"current_steps": 1106, "total_steps": 1955, "loss": 0.9874, "lr": 7.564428901867512e-05, "epoch": 2.8286445012787724, "percentage": 56.57, "elapsed_time": "5:41:22", "remaining_time": "4:22:03"} +{"current_steps": 1107, "total_steps": 1955, "loss": 0.9834, "lr": 7.550162712897166e-05, "epoch": 2.8312020460358056, "percentage": 56.62, "elapsed_time": "5:41:41", "remaining_time": "4:21:44"} +{"current_steps": 1108, "total_steps": 1955, "loss": 0.9932, "lr": 7.535897958832657e-05, "epoch": 2.833759590792839, "percentage": 56.68, "elapsed_time": "5:41:59", "remaining_time": "4:21:26"} +{"current_steps": 1109, "total_steps": 1955, "loss": 0.9976, "lr": 7.521634685176171e-05, "epoch": 2.836317135549872, "percentage": 56.73, "elapsed_time": "5:42:18", "remaining_time": "4:21:07"} +{"current_steps": 1110, "total_steps": 1955, "loss": 0.979, "lr": 7.507372937425166e-05, "epoch": 2.8388746803069056, "percentage": 56.78, "elapsed_time": "5:42:36", "remaining_time": "4:20:49"} +{"current_steps": 1111, "total_steps": 1955, "loss": 0.9784, "lr": 7.493112761072238e-05, "epoch": 2.8414322250639388, "percentage": 56.83, "elapsed_time": "5:42:55", "remaining_time": "4:20:30"} +{"current_steps": 1112, "total_steps": 1955, "loss": 0.9861, "lr": 7.478854201604967e-05, "epoch": 2.843989769820972, "percentage": 56.88, "elapsed_time": "5:43:13", "remaining_time": "4:20:11"} +{"current_steps": 1113, "total_steps": 1955, "loss": 0.9767, "lr": 7.464597304505779e-05, "epoch": 2.846547314578005, "percentage": 56.93, "elapsed_time": "5:43:31", "remaining_time": "4:19:53"} +{"current_steps": 1114, "total_steps": 1955, "loss": 0.9763, "lr": 7.450342115251793e-05, "epoch": 2.8491048593350383, "percentage": 56.98, "elapsed_time": "5:43:50", "remaining_time": "4:19:34"} +{"current_steps": 1115, "total_steps": 1955, "loss": 0.9814, "lr": 7.436088679314689e-05, "epoch": 2.8516624040920715, "percentage": 57.03, "elapsed_time": "5:44:09", "remaining_time": "4:19:16"} +{"current_steps": 1116, "total_steps": 1955, "loss": 0.9737, "lr": 7.42183704216054e-05, "epoch": 2.8542199488491047, "percentage": 57.08, "elapsed_time": "5:44:27", "remaining_time": "4:18:57"} +{"current_steps": 1117, "total_steps": 1955, "loss": 0.9593, "lr": 7.407587249249691e-05, "epoch": 2.8567774936061383, "percentage": 57.14, "elapsed_time": "5:44:46", "remaining_time": "4:18:39"} +{"current_steps": 1118, "total_steps": 1955, "loss": 0.9912, "lr": 7.393339346036604e-05, "epoch": 2.8593350383631715, "percentage": 57.19, "elapsed_time": "5:45:04", "remaining_time": "4:18:20"} +{"current_steps": 1119, "total_steps": 1955, "loss": 0.9636, "lr": 7.379093377969708e-05, "epoch": 2.8618925831202047, "percentage": 57.24, "elapsed_time": "5:45:23", "remaining_time": "4:18:02"} +{"current_steps": 1120, "total_steps": 1955, "loss": 1.0179, "lr": 7.364849390491269e-05, "epoch": 2.864450127877238, "percentage": 57.29, "elapsed_time": "5:45:41", "remaining_time": "4:17:43"} +{"current_steps": 1121, "total_steps": 1955, "loss": 0.9865, "lr": 7.350607429037222e-05, "epoch": 2.867007672634271, "percentage": 57.34, "elapsed_time": "5:46:00", "remaining_time": "4:17:25"} +{"current_steps": 1122, "total_steps": 1955, "loss": 0.9697, "lr": 7.336367539037047e-05, "epoch": 2.869565217391304, "percentage": 57.39, "elapsed_time": "5:46:18", "remaining_time": "4:17:06"} +{"current_steps": 1123, "total_steps": 1955, "loss": 0.9394, "lr": 7.32212976591362e-05, "epoch": 2.8721227621483374, "percentage": 57.44, "elapsed_time": "5:46:37", "remaining_time": "4:16:48"} +{"current_steps": 1124, "total_steps": 1955, "loss": 1.0193, "lr": 7.307894155083054e-05, "epoch": 2.874680306905371, "percentage": 57.49, "elapsed_time": "5:46:56", "remaining_time": "4:16:29"} +{"current_steps": 1125, "total_steps": 1955, "loss": 0.9959, "lr": 7.293660751954576e-05, "epoch": 2.877237851662404, "percentage": 57.54, "elapsed_time": "5:47:14", "remaining_time": "4:16:11"} +{"current_steps": 1126, "total_steps": 1955, "loss": 0.9886, "lr": 7.279429601930365e-05, "epoch": 2.8797953964194374, "percentage": 57.6, "elapsed_time": "5:47:33", "remaining_time": "4:15:52"} +{"current_steps": 1127, "total_steps": 1955, "loss": 0.9552, "lr": 7.265200750405408e-05, "epoch": 2.8823529411764706, "percentage": 57.65, "elapsed_time": "5:47:51", "remaining_time": "4:15:34"} +{"current_steps": 1128, "total_steps": 1955, "loss": 0.9613, "lr": 7.250974242767372e-05, "epoch": 2.8849104859335037, "percentage": 57.7, "elapsed_time": "5:48:10", "remaining_time": "4:15:15"} +{"current_steps": 1129, "total_steps": 1955, "loss": 0.9668, "lr": 7.236750124396435e-05, "epoch": 2.887468030690537, "percentage": 57.75, "elapsed_time": "5:48:28", "remaining_time": "4:14:57"} +{"current_steps": 1130, "total_steps": 1955, "loss": 0.9925, "lr": 7.222528440665167e-05, "epoch": 2.89002557544757, "percentage": 57.8, "elapsed_time": "5:48:47", "remaining_time": "4:14:38"} +{"current_steps": 1131, "total_steps": 1955, "loss": 1.0041, "lr": 7.20830923693836e-05, "epoch": 2.8925831202046037, "percentage": 57.85, "elapsed_time": "5:49:05", "remaining_time": "4:14:20"} +{"current_steps": 1132, "total_steps": 1955, "loss": 0.9425, "lr": 7.194092558572897e-05, "epoch": 2.895140664961637, "percentage": 57.9, "elapsed_time": "5:49:24", "remaining_time": "4:14:01"} +{"current_steps": 1133, "total_steps": 1955, "loss": 0.9618, "lr": 7.179878450917613e-05, "epoch": 2.89769820971867, "percentage": 57.95, "elapsed_time": "5:49:42", "remaining_time": "4:13:43"} +{"current_steps": 1134, "total_steps": 1955, "loss": 0.9625, "lr": 7.165666959313135e-05, "epoch": 2.9002557544757033, "percentage": 58.01, "elapsed_time": "5:50:00", "remaining_time": "4:13:24"} +{"current_steps": 1135, "total_steps": 1955, "loss": 0.9868, "lr": 7.151458129091752e-05, "epoch": 2.9028132992327365, "percentage": 58.06, "elapsed_time": "5:50:19", "remaining_time": "4:13:05"} +{"current_steps": 1136, "total_steps": 1955, "loss": 0.9579, "lr": 7.137252005577256e-05, "epoch": 2.90537084398977, "percentage": 58.11, "elapsed_time": "5:50:38", "remaining_time": "4:12:47"} +{"current_steps": 1137, "total_steps": 1955, "loss": 1.0193, "lr": 7.123048634084815e-05, "epoch": 2.907928388746803, "percentage": 58.16, "elapsed_time": "5:50:56", "remaining_time": "4:12:29"} +{"current_steps": 1138, "total_steps": 1955, "loss": 0.9594, "lr": 7.108848059920805e-05, "epoch": 2.9104859335038364, "percentage": 58.21, "elapsed_time": "5:51:15", "remaining_time": "4:12:10"} +{"current_steps": 1139, "total_steps": 1955, "loss": 0.9746, "lr": 7.09465032838269e-05, "epoch": 2.9130434782608696, "percentage": 58.26, "elapsed_time": "5:51:34", "remaining_time": "4:11:52"} +{"current_steps": 1140, "total_steps": 1955, "loss": 0.9659, "lr": 7.080455484758863e-05, "epoch": 2.915601023017903, "percentage": 58.31, "elapsed_time": "5:51:52", "remaining_time": "4:11:33"} +{"current_steps": 1141, "total_steps": 1955, "loss": 0.9818, "lr": 7.066263574328505e-05, "epoch": 2.918158567774936, "percentage": 58.36, "elapsed_time": "5:52:11", "remaining_time": "4:11:15"} +{"current_steps": 1142, "total_steps": 1955, "loss": 0.9915, "lr": 7.052074642361444e-05, "epoch": 2.920716112531969, "percentage": 58.41, "elapsed_time": "5:52:29", "remaining_time": "4:10:56"} +{"current_steps": 1143, "total_steps": 1955, "loss": 0.9882, "lr": 7.037888734117998e-05, "epoch": 2.923273657289003, "percentage": 58.47, "elapsed_time": "5:52:48", "remaining_time": "4:10:38"} +{"current_steps": 1144, "total_steps": 1955, "loss": 0.9666, "lr": 7.023705894848848e-05, "epoch": 2.9258312020460355, "percentage": 58.52, "elapsed_time": "5:53:07", "remaining_time": "4:10:19"} +{"current_steps": 1145, "total_steps": 1955, "loss": 0.9746, "lr": 7.009526169794885e-05, "epoch": 2.928388746803069, "percentage": 58.57, "elapsed_time": "5:53:25", "remaining_time": "4:10:01"} +{"current_steps": 1146, "total_steps": 1955, "loss": 0.9624, "lr": 6.995349604187061e-05, "epoch": 2.9309462915601023, "percentage": 58.62, "elapsed_time": "5:53:43", "remaining_time": "4:09:42"} +{"current_steps": 1147, "total_steps": 1955, "loss": 0.9795, "lr": 6.981176243246257e-05, "epoch": 2.9335038363171355, "percentage": 58.67, "elapsed_time": "5:54:01", "remaining_time": "4:09:23"} +{"current_steps": 1148, "total_steps": 1955, "loss": 0.977, "lr": 6.967006132183127e-05, "epoch": 2.9360613810741687, "percentage": 58.72, "elapsed_time": "5:54:20", "remaining_time": "4:09:05"} +{"current_steps": 1149, "total_steps": 1955, "loss": 0.9928, "lr": 6.952839316197956e-05, "epoch": 2.938618925831202, "percentage": 58.77, "elapsed_time": "5:54:39", "remaining_time": "4:08:47"} +{"current_steps": 1150, "total_steps": 1955, "loss": 0.9822, "lr": 6.938675840480525e-05, "epoch": 2.9411764705882355, "percentage": 58.82, "elapsed_time": "5:54:58", "remaining_time": "4:08:28"} +{"current_steps": 1151, "total_steps": 1955, "loss": 0.9973, "lr": 6.924515750209954e-05, "epoch": 2.9437340153452687, "percentage": 58.87, "elapsed_time": "5:55:16", "remaining_time": "4:08:10"} +{"current_steps": 1152, "total_steps": 1955, "loss": 0.9685, "lr": 6.910359090554572e-05, "epoch": 2.946291560102302, "percentage": 58.93, "elapsed_time": "5:55:34", "remaining_time": "4:07:51"} +{"current_steps": 1153, "total_steps": 1955, "loss": 0.9896, "lr": 6.896205906671755e-05, "epoch": 2.948849104859335, "percentage": 58.98, "elapsed_time": "5:55:53", "remaining_time": "4:07:32"} +{"current_steps": 1154, "total_steps": 1955, "loss": 0.9948, "lr": 6.882056243707796e-05, "epoch": 2.9514066496163682, "percentage": 59.03, "elapsed_time": "5:56:12", "remaining_time": "4:07:14"} +{"current_steps": 1155, "total_steps": 1955, "loss": 1.0107, "lr": 6.86791014679776e-05, "epoch": 2.9539641943734014, "percentage": 59.08, "elapsed_time": "5:56:30", "remaining_time": "4:06:56"} +{"current_steps": 1156, "total_steps": 1955, "loss": 0.9844, "lr": 6.85376766106533e-05, "epoch": 2.9565217391304346, "percentage": 59.13, "elapsed_time": "5:56:49", "remaining_time": "4:06:37"} +{"current_steps": 1157, "total_steps": 1955, "loss": 0.9748, "lr": 6.839628831622681e-05, "epoch": 2.959079283887468, "percentage": 59.18, "elapsed_time": "5:57:08", "remaining_time": "4:06:19"} +{"current_steps": 1158, "total_steps": 1955, "loss": 0.9699, "lr": 6.825493703570311e-05, "epoch": 2.9616368286445014, "percentage": 59.23, "elapsed_time": "5:57:26", "remaining_time": "4:06:00"} +{"current_steps": 1159, "total_steps": 1955, "loss": 0.9694, "lr": 6.811362321996926e-05, "epoch": 2.9641943734015346, "percentage": 59.28, "elapsed_time": "5:57:45", "remaining_time": "4:05:42"} +{"current_steps": 1160, "total_steps": 1955, "loss": 0.991, "lr": 6.797234731979267e-05, "epoch": 2.9667519181585678, "percentage": 59.34, "elapsed_time": "5:58:04", "remaining_time": "4:05:24"} +{"current_steps": 1161, "total_steps": 1955, "loss": 0.9614, "lr": 6.783110978581989e-05, "epoch": 2.969309462915601, "percentage": 59.39, "elapsed_time": "5:58:22", "remaining_time": "4:05:05"} +{"current_steps": 1162, "total_steps": 1955, "loss": 0.9656, "lr": 6.768991106857508e-05, "epoch": 2.971867007672634, "percentage": 59.44, "elapsed_time": "5:58:41", "remaining_time": "4:04:47"} +{"current_steps": 1163, "total_steps": 1955, "loss": 1.0069, "lr": 6.754875161845855e-05, "epoch": 2.9744245524296673, "percentage": 59.49, "elapsed_time": "5:59:00", "remaining_time": "4:04:28"} +{"current_steps": 1164, "total_steps": 1955, "loss": 0.9612, "lr": 6.740763188574546e-05, "epoch": 2.976982097186701, "percentage": 59.54, "elapsed_time": "5:59:18", "remaining_time": "4:04:10"} +{"current_steps": 1165, "total_steps": 1955, "loss": 0.9696, "lr": 6.726655232058409e-05, "epoch": 2.979539641943734, "percentage": 59.59, "elapsed_time": "5:59:37", "remaining_time": "4:03:51"} +{"current_steps": 1166, "total_steps": 1955, "loss": 1.0014, "lr": 6.712551337299473e-05, "epoch": 2.9820971867007673, "percentage": 59.64, "elapsed_time": "5:59:55", "remaining_time": "4:03:33"} +{"current_steps": 1167, "total_steps": 1955, "loss": 0.9773, "lr": 6.69845154928681e-05, "epoch": 2.9846547314578005, "percentage": 59.69, "elapsed_time": "6:00:14", "remaining_time": "4:03:14"} +{"current_steps": 1168, "total_steps": 1955, "loss": 0.9911, "lr": 6.684355912996386e-05, "epoch": 2.9872122762148337, "percentage": 59.74, "elapsed_time": "6:00:32", "remaining_time": "4:02:56"} +{"current_steps": 1169, "total_steps": 1955, "loss": 0.9683, "lr": 6.670264473390931e-05, "epoch": 2.9897698209718673, "percentage": 59.8, "elapsed_time": "6:00:51", "remaining_time": "4:02:37"} +{"current_steps": 1170, "total_steps": 1955, "loss": 0.967, "lr": 6.656177275419785e-05, "epoch": 2.9923273657289, "percentage": 59.85, "elapsed_time": "6:01:09", "remaining_time": "4:02:19"} +{"current_steps": 1171, "total_steps": 1955, "loss": 0.9638, "lr": 6.64209436401875e-05, "epoch": 2.9948849104859336, "percentage": 59.9, "elapsed_time": "6:01:28", "remaining_time": "4:02:00"} +{"current_steps": 1172, "total_steps": 1955, "loss": 1.0056, "lr": 6.62801578410997e-05, "epoch": 2.997442455242967, "percentage": 59.95, "elapsed_time": "6:01:47", "remaining_time": "4:01:42"} +{"current_steps": 1173, "total_steps": 1955, "loss": 0.9933, "lr": 6.61394158060176e-05, "epoch": 3.0, "percentage": 60.0, "elapsed_time": "6:02:05", "remaining_time": "4:01:23"} +{"current_steps": 1174, "total_steps": 1955, "loss": 0.9712, "lr": 6.59987179838848e-05, "epoch": 3.002557544757033, "percentage": 60.05, "elapsed_time": "6:02:34", "remaining_time": "4:01:12"} +{"current_steps": 1175, "total_steps": 1955, "loss": 0.9701, "lr": 6.58580648235039e-05, "epoch": 3.0051150895140664, "percentage": 60.1, "elapsed_time": "6:02:52", "remaining_time": "4:00:53"} +{"current_steps": 1176, "total_steps": 1955, "loss": 0.9498, "lr": 6.571745677353492e-05, "epoch": 3.0076726342710995, "percentage": 60.15, "elapsed_time": "6:03:10", "remaining_time": "4:00:34"} +{"current_steps": 1177, "total_steps": 1955, "loss": 0.9841, "lr": 6.557689428249414e-05, "epoch": 3.010230179028133, "percentage": 60.2, "elapsed_time": "6:03:29", "remaining_time": "4:00:16"} +{"current_steps": 1178, "total_steps": 1955, "loss": 0.9728, "lr": 6.543637779875237e-05, "epoch": 3.0127877237851663, "percentage": 60.26, "elapsed_time": "6:03:48", "remaining_time": "3:59:57"} +{"current_steps": 1179, "total_steps": 1955, "loss": 0.9263, "lr": 6.529590777053378e-05, "epoch": 3.0153452685421995, "percentage": 60.31, "elapsed_time": "6:04:06", "remaining_time": "3:59:39"} +{"current_steps": 1180, "total_steps": 1955, "loss": 0.9353, "lr": 6.515548464591428e-05, "epoch": 3.0179028132992327, "percentage": 60.36, "elapsed_time": "6:04:25", "remaining_time": "3:59:20"} +{"current_steps": 1181, "total_steps": 1955, "loss": 0.948, "lr": 6.501510887282024e-05, "epoch": 3.020460358056266, "percentage": 60.41, "elapsed_time": "6:04:43", "remaining_time": "3:59:02"} +{"current_steps": 1182, "total_steps": 1955, "loss": 0.9406, "lr": 6.487478089902685e-05, "epoch": 3.023017902813299, "percentage": 60.46, "elapsed_time": "6:05:02", "remaining_time": "3:58:43"} +{"current_steps": 1183, "total_steps": 1955, "loss": 0.9612, "lr": 6.473450117215699e-05, "epoch": 3.0255754475703327, "percentage": 60.51, "elapsed_time": "6:05:20", "remaining_time": "3:58:24"} +{"current_steps": 1184, "total_steps": 1955, "loss": 0.93, "lr": 6.459427013967953e-05, "epoch": 3.028132992327366, "percentage": 60.56, "elapsed_time": "6:05:39", "remaining_time": "3:58:06"} +{"current_steps": 1185, "total_steps": 1955, "loss": 0.943, "lr": 6.445408824890805e-05, "epoch": 3.030690537084399, "percentage": 60.61, "elapsed_time": "6:05:57", "remaining_time": "3:57:47"} +{"current_steps": 1186, "total_steps": 1955, "loss": 0.9264, "lr": 6.431395594699943e-05, "epoch": 3.0332480818414322, "percentage": 60.66, "elapsed_time": "6:06:15", "remaining_time": "3:57:29"} +{"current_steps": 1187, "total_steps": 1955, "loss": 0.9492, "lr": 6.417387368095225e-05, "epoch": 3.0358056265984654, "percentage": 60.72, "elapsed_time": "6:06:34", "remaining_time": "3:57:10"} +{"current_steps": 1188, "total_steps": 1955, "loss": 0.9173, "lr": 6.403384189760556e-05, "epoch": 3.0383631713554986, "percentage": 60.77, "elapsed_time": "6:06:53", "remaining_time": "3:56:52"} +{"current_steps": 1189, "total_steps": 1955, "loss": 0.9483, "lr": 6.389386104363738e-05, "epoch": 3.040920716112532, "percentage": 60.82, "elapsed_time": "6:07:11", "remaining_time": "3:56:33"} +{"current_steps": 1190, "total_steps": 1955, "loss": 0.938, "lr": 6.375393156556325e-05, "epoch": 3.0434782608695654, "percentage": 60.87, "elapsed_time": "6:07:30", "remaining_time": "3:56:15"} +{"current_steps": 1191, "total_steps": 1955, "loss": 0.9174, "lr": 6.361405390973489e-05, "epoch": 3.0460358056265986, "percentage": 60.92, "elapsed_time": "6:07:48", "remaining_time": "3:55:56"} +{"current_steps": 1192, "total_steps": 1955, "loss": 0.9542, "lr": 6.347422852233862e-05, "epoch": 3.0485933503836318, "percentage": 60.97, "elapsed_time": "6:08:06", "remaining_time": "3:55:37"} +{"current_steps": 1193, "total_steps": 1955, "loss": 0.9617, "lr": 6.333445584939407e-05, "epoch": 3.051150895140665, "percentage": 61.02, "elapsed_time": "6:08:25", "remaining_time": "3:55:19"} +{"current_steps": 1194, "total_steps": 1955, "loss": 0.9349, "lr": 6.319473633675275e-05, "epoch": 3.053708439897698, "percentage": 61.07, "elapsed_time": "6:08:43", "remaining_time": "3:55:00"} +{"current_steps": 1195, "total_steps": 1955, "loss": 0.9414, "lr": 6.305507043009657e-05, "epoch": 3.0562659846547313, "percentage": 61.13, "elapsed_time": "6:09:01", "remaining_time": "3:54:41"} +{"current_steps": 1196, "total_steps": 1955, "loss": 0.9512, "lr": 6.291545857493645e-05, "epoch": 3.0588235294117645, "percentage": 61.18, "elapsed_time": "6:09:19", "remaining_time": "3:54:23"} +{"current_steps": 1197, "total_steps": 1955, "loss": 0.9522, "lr": 6.277590121661098e-05, "epoch": 3.061381074168798, "percentage": 61.23, "elapsed_time": "6:09:38", "remaining_time": "3:54:04"} +{"current_steps": 1198, "total_steps": 1955, "loss": 0.9493, "lr": 6.263639880028468e-05, "epoch": 3.0639386189258313, "percentage": 61.28, "elapsed_time": "6:09:57", "remaining_time": "3:53:46"} +{"current_steps": 1199, "total_steps": 1955, "loss": 0.9668, "lr": 6.249695177094707e-05, "epoch": 3.0664961636828645, "percentage": 61.33, "elapsed_time": "6:10:15", "remaining_time": "3:53:27"} +{"current_steps": 1200, "total_steps": 1955, "loss": 0.9279, "lr": 6.235756057341084e-05, "epoch": 3.0690537084398977, "percentage": 61.38, "elapsed_time": "6:10:34", "remaining_time": "3:53:09"} +{"current_steps": 1201, "total_steps": 1955, "loss": 0.9403, "lr": 6.221822565231066e-05, "epoch": 3.071611253196931, "percentage": 61.43, "elapsed_time": "6:10:52", "remaining_time": "3:52:50"} +{"current_steps": 1202, "total_steps": 1955, "loss": 0.9616, "lr": 6.207894745210168e-05, "epoch": 3.074168797953964, "percentage": 61.48, "elapsed_time": "6:11:10", "remaining_time": "3:52:31"} +{"current_steps": 1203, "total_steps": 1955, "loss": 0.9664, "lr": 6.193972641705809e-05, "epoch": 3.0767263427109977, "percentage": 61.53, "elapsed_time": "6:11:29", "remaining_time": "3:52:13"} +{"current_steps": 1204, "total_steps": 1955, "loss": 0.9663, "lr": 6.180056299127174e-05, "epoch": 3.079283887468031, "percentage": 61.59, "elapsed_time": "6:11:48", "remaining_time": "3:51:54"} +{"current_steps": 1205, "total_steps": 1955, "loss": 0.9676, "lr": 6.16614576186507e-05, "epoch": 3.081841432225064, "percentage": 61.64, "elapsed_time": "6:12:06", "remaining_time": "3:51:36"} +{"current_steps": 1206, "total_steps": 1955, "loss": 0.9385, "lr": 6.152241074291791e-05, "epoch": 3.084398976982097, "percentage": 61.69, "elapsed_time": "6:12:24", "remaining_time": "3:51:17"} +{"current_steps": 1207, "total_steps": 1955, "loss": 0.9593, "lr": 6.13834228076097e-05, "epoch": 3.0869565217391304, "percentage": 61.74, "elapsed_time": "6:12:42", "remaining_time": "3:50:58"} +{"current_steps": 1208, "total_steps": 1955, "loss": 0.9859, "lr": 6.12444942560744e-05, "epoch": 3.0895140664961636, "percentage": 61.79, "elapsed_time": "6:13:01", "remaining_time": "3:50:40"} +{"current_steps": 1209, "total_steps": 1955, "loss": 0.9343, "lr": 6.110562553147078e-05, "epoch": 3.0920716112531967, "percentage": 61.84, "elapsed_time": "6:13:20", "remaining_time": "3:50:21"} +{"current_steps": 1210, "total_steps": 1955, "loss": 0.9572, "lr": 6.0966817076767e-05, "epoch": 3.0946291560102304, "percentage": 61.89, "elapsed_time": "6:13:38", "remaining_time": "3:50:03"} +{"current_steps": 1211, "total_steps": 1955, "loss": 0.9577, "lr": 6.08280693347388e-05, "epoch": 3.0971867007672635, "percentage": 61.94, "elapsed_time": "6:13:57", "remaining_time": "3:49:44"} +{"current_steps": 1212, "total_steps": 1955, "loss": 0.9253, "lr": 6.068938274796834e-05, "epoch": 3.0997442455242967, "percentage": 61.99, "elapsed_time": "6:14:16", "remaining_time": "3:49:26"} +{"current_steps": 1213, "total_steps": 1955, "loss": 0.9896, "lr": 6.055075775884263e-05, "epoch": 3.10230179028133, "percentage": 62.05, "elapsed_time": "6:14:34", "remaining_time": "3:49:07"} +{"current_steps": 1214, "total_steps": 1955, "loss": 0.9387, "lr": 6.0412194809552316e-05, "epoch": 3.104859335038363, "percentage": 62.1, "elapsed_time": "6:14:53", "remaining_time": "3:48:49"} +{"current_steps": 1215, "total_steps": 1955, "loss": 0.9808, "lr": 6.027369434208999e-05, "epoch": 3.1074168797953963, "percentage": 62.15, "elapsed_time": "6:15:11", "remaining_time": "3:48:31"} +{"current_steps": 1216, "total_steps": 1955, "loss": 0.933, "lr": 6.0135256798249047e-05, "epoch": 3.10997442455243, "percentage": 62.2, "elapsed_time": "6:15:30", "remaining_time": "3:48:12"} +{"current_steps": 1217, "total_steps": 1955, "loss": 0.9684, "lr": 5.999688261962216e-05, "epoch": 3.112531969309463, "percentage": 62.25, "elapsed_time": "6:15:48", "remaining_time": "3:47:53"} +{"current_steps": 1218, "total_steps": 1955, "loss": 0.944, "lr": 5.985857224759981e-05, "epoch": 3.1150895140664963, "percentage": 62.3, "elapsed_time": "6:16:07", "remaining_time": "3:47:35"} +{"current_steps": 1219, "total_steps": 1955, "loss": 0.9598, "lr": 5.972032612336906e-05, "epoch": 3.1176470588235294, "percentage": 62.35, "elapsed_time": "6:16:26", "remaining_time": "3:47:16"} +{"current_steps": 1220, "total_steps": 1955, "loss": 0.9483, "lr": 5.958214468791189e-05, "epoch": 3.1202046035805626, "percentage": 62.4, "elapsed_time": "6:16:44", "remaining_time": "3:46:58"} +{"current_steps": 1221, "total_steps": 1955, "loss": 0.9455, "lr": 5.944402838200404e-05, "epoch": 3.122762148337596, "percentage": 62.46, "elapsed_time": "6:17:03", "remaining_time": "3:46:39"} +{"current_steps": 1222, "total_steps": 1955, "loss": 0.8963, "lr": 5.930597764621347e-05, "epoch": 3.125319693094629, "percentage": 62.51, "elapsed_time": "6:17:21", "remaining_time": "3:46:21"} +{"current_steps": 1223, "total_steps": 1955, "loss": 0.9564, "lr": 5.916799292089895e-05, "epoch": 3.1278772378516626, "percentage": 62.56, "elapsed_time": "6:17:39", "remaining_time": "3:46:02"} +{"current_steps": 1224, "total_steps": 1955, "loss": 0.9272, "lr": 5.9030074646208745e-05, "epoch": 3.130434782608696, "percentage": 62.61, "elapsed_time": "6:17:57", "remaining_time": "3:45:43"} +{"current_steps": 1225, "total_steps": 1955, "loss": 0.9316, "lr": 5.8892223262079144e-05, "epoch": 3.132992327365729, "percentage": 62.66, "elapsed_time": "6:18:16", "remaining_time": "3:45:25"} +{"current_steps": 1226, "total_steps": 1955, "loss": 0.9487, "lr": 5.875443920823297e-05, "epoch": 3.135549872122762, "percentage": 62.71, "elapsed_time": "6:18:34", "remaining_time": "3:45:06"} +{"current_steps": 1227, "total_steps": 1955, "loss": 0.9492, "lr": 5.861672292417842e-05, "epoch": 3.1381074168797953, "percentage": 62.76, "elapsed_time": "6:18:53", "remaining_time": "3:44:47"} +{"current_steps": 1228, "total_steps": 1955, "loss": 0.966, "lr": 5.84790748492074e-05, "epoch": 3.1406649616368285, "percentage": 62.81, "elapsed_time": "6:19:11", "remaining_time": "3:44:29"} +{"current_steps": 1229, "total_steps": 1955, "loss": 0.9708, "lr": 5.834149542239431e-05, "epoch": 3.1432225063938617, "percentage": 62.86, "elapsed_time": "6:19:30", "remaining_time": "3:44:10"} +{"current_steps": 1230, "total_steps": 1955, "loss": 0.9557, "lr": 5.8203985082594575e-05, "epoch": 3.1457800511508953, "percentage": 62.92, "elapsed_time": "6:19:48", "remaining_time": "3:43:52"} +{"current_steps": 1231, "total_steps": 1955, "loss": 0.9638, "lr": 5.806654426844315e-05, "epoch": 3.1483375959079285, "percentage": 62.97, "elapsed_time": "6:20:07", "remaining_time": "3:43:33"} +{"current_steps": 1232, "total_steps": 1955, "loss": 0.9434, "lr": 5.792917341835335e-05, "epoch": 3.1508951406649617, "percentage": 63.02, "elapsed_time": "6:20:25", "remaining_time": "3:43:15"} +{"current_steps": 1233, "total_steps": 1955, "loss": 0.9809, "lr": 5.77918729705152e-05, "epoch": 3.153452685421995, "percentage": 63.07, "elapsed_time": "6:20:44", "remaining_time": "3:42:56"} +{"current_steps": 1234, "total_steps": 1955, "loss": 0.9639, "lr": 5.765464336289424e-05, "epoch": 3.156010230179028, "percentage": 63.12, "elapsed_time": "6:21:02", "remaining_time": "3:42:38"} +{"current_steps": 1235, "total_steps": 1955, "loss": 0.954, "lr": 5.751748503322999e-05, "epoch": 3.1585677749360612, "percentage": 63.17, "elapsed_time": "6:21:21", "remaining_time": "3:42:19"} +{"current_steps": 1236, "total_steps": 1955, "loss": 0.9589, "lr": 5.7380398419034644e-05, "epoch": 3.1611253196930944, "percentage": 63.22, "elapsed_time": "6:21:39", "remaining_time": "3:42:01"} +{"current_steps": 1237, "total_steps": 1955, "loss": 0.9471, "lr": 5.7243383957591586e-05, "epoch": 3.163682864450128, "percentage": 63.27, "elapsed_time": "6:21:58", "remaining_time": "3:41:42"} +{"current_steps": 1238, "total_steps": 1955, "loss": 0.9518, "lr": 5.7106442085954045e-05, "epoch": 3.166240409207161, "percentage": 63.32, "elapsed_time": "6:22:17", "remaining_time": "3:41:24"} +{"current_steps": 1239, "total_steps": 1955, "loss": 0.9242, "lr": 5.69695732409438e-05, "epoch": 3.1687979539641944, "percentage": 63.38, "elapsed_time": "6:22:35", "remaining_time": "3:41:05"} +{"current_steps": 1240, "total_steps": 1955, "loss": 0.9423, "lr": 5.6832777859149536e-05, "epoch": 3.1713554987212276, "percentage": 63.43, "elapsed_time": "6:22:54", "remaining_time": "3:40:47"} +{"current_steps": 1241, "total_steps": 1955, "loss": 0.932, "lr": 5.669605637692575e-05, "epoch": 3.1739130434782608, "percentage": 63.48, "elapsed_time": "6:23:13", "remaining_time": "3:40:28"} +{"current_steps": 1242, "total_steps": 1955, "loss": 0.9379, "lr": 5.655940923039111e-05, "epoch": 3.176470588235294, "percentage": 63.53, "elapsed_time": "6:23:31", "remaining_time": "3:40:10"} +{"current_steps": 1243, "total_steps": 1955, "loss": 0.9456, "lr": 5.642283685542717e-05, "epoch": 3.1790281329923276, "percentage": 63.58, "elapsed_time": "6:23:49", "remaining_time": "3:39:51"} +{"current_steps": 1244, "total_steps": 1955, "loss": 0.9328, "lr": 5.6286339687677044e-05, "epoch": 3.1815856777493607, "percentage": 63.63, "elapsed_time": "6:24:08", "remaining_time": "3:39:33"} +{"current_steps": 1245, "total_steps": 1955, "loss": 0.9109, "lr": 5.614991816254388e-05, "epoch": 3.184143222506394, "percentage": 63.68, "elapsed_time": "6:24:27", "remaining_time": "3:39:14"} +{"current_steps": 1246, "total_steps": 1955, "loss": 0.9584, "lr": 5.601357271518959e-05, "epoch": 3.186700767263427, "percentage": 63.73, "elapsed_time": "6:24:45", "remaining_time": "3:38:56"} +{"current_steps": 1247, "total_steps": 1955, "loss": 0.9656, "lr": 5.587730378053339e-05, "epoch": 3.1892583120204603, "percentage": 63.79, "elapsed_time": "6:25:04", "remaining_time": "3:38:37"} +{"current_steps": 1248, "total_steps": 1955, "loss": 0.9487, "lr": 5.574111179325039e-05, "epoch": 3.1918158567774935, "percentage": 63.84, "elapsed_time": "6:25:22", "remaining_time": "3:38:19"} +{"current_steps": 1249, "total_steps": 1955, "loss": 0.9372, "lr": 5.560499718777031e-05, "epoch": 3.1943734015345266, "percentage": 63.89, "elapsed_time": "6:25:41", "remaining_time": "3:38:00"} +{"current_steps": 1250, "total_steps": 1955, "loss": 0.9459, "lr": 5.5468960398276014e-05, "epoch": 3.1969309462915603, "percentage": 63.94, "elapsed_time": "6:26:00", "remaining_time": "3:37:42"} +{"current_steps": 1251, "total_steps": 1955, "loss": 0.9255, "lr": 5.5333001858702164e-05, "epoch": 3.1994884910485935, "percentage": 63.99, "elapsed_time": "6:26:18", "remaining_time": "3:37:23"} +{"current_steps": 1252, "total_steps": 1955, "loss": 0.9615, "lr": 5.519712200273381e-05, "epoch": 3.2020460358056266, "percentage": 64.04, "elapsed_time": "6:26:36", "remaining_time": "3:37:04"} +{"current_steps": 1253, "total_steps": 1955, "loss": 0.9204, "lr": 5.5061321263804933e-05, "epoch": 3.20460358056266, "percentage": 64.09, "elapsed_time": "6:26:55", "remaining_time": "3:36:46"} +{"current_steps": 1254, "total_steps": 1955, "loss": 0.945, "lr": 5.4925600075097285e-05, "epoch": 3.207161125319693, "percentage": 64.14, "elapsed_time": "6:27:13", "remaining_time": "3:36:27"} +{"current_steps": 1255, "total_steps": 1955, "loss": 0.9435, "lr": 5.4789958869538756e-05, "epoch": 3.209718670076726, "percentage": 64.19, "elapsed_time": "6:27:32", "remaining_time": "3:36:09"} +{"current_steps": 1256, "total_steps": 1955, "loss": 0.9364, "lr": 5.4654398079802183e-05, "epoch": 3.21227621483376, "percentage": 64.25, "elapsed_time": "6:27:50", "remaining_time": "3:35:50"} +{"current_steps": 1257, "total_steps": 1955, "loss": 0.94, "lr": 5.451891813830382e-05, "epoch": 3.214833759590793, "percentage": 64.3, "elapsed_time": "6:28:09", "remaining_time": "3:35:32"} +{"current_steps": 1258, "total_steps": 1955, "loss": 0.9363, "lr": 5.4383519477202103e-05, "epoch": 3.217391304347826, "percentage": 64.35, "elapsed_time": "6:28:27", "remaining_time": "3:35:13"} +{"current_steps": 1259, "total_steps": 1955, "loss": 0.9815, "lr": 5.42482025283961e-05, "epoch": 3.2199488491048593, "percentage": 64.4, "elapsed_time": "6:28:45", "remaining_time": "3:34:54"} +{"current_steps": 1260, "total_steps": 1955, "loss": 0.9498, "lr": 5.41129677235243e-05, "epoch": 3.2225063938618925, "percentage": 64.45, "elapsed_time": "6:29:03", "remaining_time": "3:34:36"} +{"current_steps": 1261, "total_steps": 1955, "loss": 0.9337, "lr": 5.397781549396316e-05, "epoch": 3.2250639386189257, "percentage": 64.5, "elapsed_time": "6:29:22", "remaining_time": "3:34:17"} +{"current_steps": 1262, "total_steps": 1955, "loss": 0.9171, "lr": 5.3842746270825705e-05, "epoch": 3.227621483375959, "percentage": 64.55, "elapsed_time": "6:29:41", "remaining_time": "3:33:59"} +{"current_steps": 1263, "total_steps": 1955, "loss": 0.9376, "lr": 5.370776048496026e-05, "epoch": 3.2301790281329925, "percentage": 64.6, "elapsed_time": "6:29:59", "remaining_time": "3:33:40"} +{"current_steps": 1264, "total_steps": 1955, "loss": 0.9429, "lr": 5.357285856694891e-05, "epoch": 3.2327365728900257, "percentage": 64.65, "elapsed_time": "6:30:18", "remaining_time": "3:33:22"} +{"current_steps": 1265, "total_steps": 1955, "loss": 0.9377, "lr": 5.34380409471062e-05, "epoch": 3.235294117647059, "percentage": 64.71, "elapsed_time": "6:30:36", "remaining_time": "3:33:03"} +{"current_steps": 1266, "total_steps": 1955, "loss": 0.945, "lr": 5.33033080554779e-05, "epoch": 3.237851662404092, "percentage": 64.76, "elapsed_time": "6:30:55", "remaining_time": "3:32:45"} +{"current_steps": 1267, "total_steps": 1955, "loss": 0.9379, "lr": 5.3168660321839386e-05, "epoch": 3.2404092071611252, "percentage": 64.81, "elapsed_time": "6:31:13", "remaining_time": "3:32:26"} +{"current_steps": 1268, "total_steps": 1955, "loss": 0.9021, "lr": 5.303409817569449e-05, "epoch": 3.2429667519181584, "percentage": 64.86, "elapsed_time": "6:31:32", "remaining_time": "3:32:08"} +{"current_steps": 1269, "total_steps": 1955, "loss": 0.9613, "lr": 5.2899622046274e-05, "epoch": 3.2455242966751916, "percentage": 64.91, "elapsed_time": "6:31:50", "remaining_time": "3:31:49"} +{"current_steps": 1270, "total_steps": 1955, "loss": 0.9387, "lr": 5.276523236253425e-05, "epoch": 3.2480818414322252, "percentage": 64.96, "elapsed_time": "6:32:08", "remaining_time": "3:31:30"} +{"current_steps": 1271, "total_steps": 1955, "loss": 0.9546, "lr": 5.263092955315595e-05, "epoch": 3.2506393861892584, "percentage": 65.01, "elapsed_time": "6:32:27", "remaining_time": "3:31:12"} +{"current_steps": 1272, "total_steps": 1955, "loss": 0.9391, "lr": 5.2496714046542583e-05, "epoch": 3.2531969309462916, "percentage": 65.06, "elapsed_time": "6:32:46", "remaining_time": "3:30:53"} +{"current_steps": 1273, "total_steps": 1955, "loss": 0.9386, "lr": 5.2362586270819256e-05, "epoch": 3.2557544757033248, "percentage": 65.12, "elapsed_time": "6:33:04", "remaining_time": "3:30:35"} +{"current_steps": 1274, "total_steps": 1955, "loss": 0.9495, "lr": 5.222854665383116e-05, "epoch": 3.258312020460358, "percentage": 65.17, "elapsed_time": "6:33:23", "remaining_time": "3:30:16"} +{"current_steps": 1275, "total_steps": 1955, "loss": 0.9588, "lr": 5.2094595623142326e-05, "epoch": 3.260869565217391, "percentage": 65.22, "elapsed_time": "6:33:41", "remaining_time": "3:29:57"} +{"current_steps": 1276, "total_steps": 1955, "loss": 0.9151, "lr": 5.1960733606034126e-05, "epoch": 3.2634271099744243, "percentage": 65.27, "elapsed_time": "6:33:59", "remaining_time": "3:29:39"} +{"current_steps": 1277, "total_steps": 1955, "loss": 0.9686, "lr": 5.182696102950404e-05, "epoch": 3.265984654731458, "percentage": 65.32, "elapsed_time": "6:34:17", "remaining_time": "3:29:20"} +{"current_steps": 1278, "total_steps": 1955, "loss": 0.9384, "lr": 5.1693278320264304e-05, "epoch": 3.268542199488491, "percentage": 65.37, "elapsed_time": "6:34:35", "remaining_time": "3:29:01"} +{"current_steps": 1279, "total_steps": 1955, "loss": 0.9869, "lr": 5.1559685904740386e-05, "epoch": 3.2710997442455243, "percentage": 65.42, "elapsed_time": "6:34:54", "remaining_time": "3:28:43"} +{"current_steps": 1280, "total_steps": 1955, "loss": 0.9557, "lr": 5.142618420906985e-05, "epoch": 3.2736572890025575, "percentage": 65.47, "elapsed_time": "6:35:13", "remaining_time": "3:28:24"} +{"current_steps": 1281, "total_steps": 1955, "loss": 0.9642, "lr": 5.1292773659100755e-05, "epoch": 3.2762148337595907, "percentage": 65.52, "elapsed_time": "6:35:31", "remaining_time": "3:28:06"} +{"current_steps": 1282, "total_steps": 1955, "loss": 0.9509, "lr": 5.115945468039048e-05, "epoch": 3.2787723785166243, "percentage": 65.58, "elapsed_time": "6:35:50", "remaining_time": "3:27:47"} +{"current_steps": 1283, "total_steps": 1955, "loss": 0.9499, "lr": 5.1026227698204335e-05, "epoch": 3.2813299232736575, "percentage": 65.63, "elapsed_time": "6:36:08", "remaining_time": "3:27:29"} +{"current_steps": 1284, "total_steps": 1955, "loss": 0.9458, "lr": 5.089309313751415e-05, "epoch": 3.2838874680306906, "percentage": 65.68, "elapsed_time": "6:36:27", "remaining_time": "3:27:10"} +{"current_steps": 1285, "total_steps": 1955, "loss": 0.9499, "lr": 5.0760051422996925e-05, "epoch": 3.286445012787724, "percentage": 65.73, "elapsed_time": "6:36:45", "remaining_time": "3:26:52"} +{"current_steps": 1286, "total_steps": 1955, "loss": 0.9458, "lr": 5.0627102979033546e-05, "epoch": 3.289002557544757, "percentage": 65.78, "elapsed_time": "6:37:04", "remaining_time": "3:26:33"} +{"current_steps": 1287, "total_steps": 1955, "loss": 0.9379, "lr": 5.049424822970731e-05, "epoch": 3.29156010230179, "percentage": 65.83, "elapsed_time": "6:37:22", "remaining_time": "3:26:15"} +{"current_steps": 1288, "total_steps": 1955, "loss": 0.9249, "lr": 5.036148759880272e-05, "epoch": 3.2941176470588234, "percentage": 65.88, "elapsed_time": "6:37:41", "remaining_time": "3:25:56"} +{"current_steps": 1289, "total_steps": 1955, "loss": 0.9247, "lr": 5.0228821509803984e-05, "epoch": 3.296675191815857, "percentage": 65.93, "elapsed_time": "6:37:59", "remaining_time": "3:25:38"} +{"current_steps": 1290, "total_steps": 1955, "loss": 0.9236, "lr": 5.0096250385893825e-05, "epoch": 3.29923273657289, "percentage": 65.98, "elapsed_time": "6:38:17", "remaining_time": "3:25:19"} +{"current_steps": 1291, "total_steps": 1955, "loss": 0.9351, "lr": 4.9963774649951975e-05, "epoch": 3.3017902813299234, "percentage": 66.04, "elapsed_time": "6:38:36", "remaining_time": "3:25:00"} +{"current_steps": 1292, "total_steps": 1955, "loss": 0.9603, "lr": 4.983139472455387e-05, "epoch": 3.3043478260869565, "percentage": 66.09, "elapsed_time": "6:38:54", "remaining_time": "3:24:42"} +{"current_steps": 1293, "total_steps": 1955, "loss": 0.9067, "lr": 4.969911103196942e-05, "epoch": 3.3069053708439897, "percentage": 66.14, "elapsed_time": "6:39:12", "remaining_time": "3:24:23"} +{"current_steps": 1294, "total_steps": 1955, "loss": 0.9368, "lr": 4.956692399416149e-05, "epoch": 3.309462915601023, "percentage": 66.19, "elapsed_time": "6:39:31", "remaining_time": "3:24:05"} +{"current_steps": 1295, "total_steps": 1955, "loss": 0.947, "lr": 4.943483403278468e-05, "epoch": 3.312020460358056, "percentage": 66.24, "elapsed_time": "6:39:50", "remaining_time": "3:23:46"} +{"current_steps": 1296, "total_steps": 1955, "loss": 0.9575, "lr": 4.9302841569183884e-05, "epoch": 3.3145780051150897, "percentage": 66.29, "elapsed_time": "6:40:08", "remaining_time": "3:23:28"} +{"current_steps": 1297, "total_steps": 1955, "loss": 0.9156, "lr": 4.9170947024393074e-05, "epoch": 3.317135549872123, "percentage": 66.34, "elapsed_time": "6:40:27", "remaining_time": "3:23:09"} +{"current_steps": 1298, "total_steps": 1955, "loss": 0.9348, "lr": 4.9039150819133775e-05, "epoch": 3.319693094629156, "percentage": 66.39, "elapsed_time": "6:40:45", "remaining_time": "3:22:51"} +{"current_steps": 1299, "total_steps": 1955, "loss": 0.9587, "lr": 4.890745337381388e-05, "epoch": 3.3222506393861893, "percentage": 66.45, "elapsed_time": "6:41:04", "remaining_time": "3:22:32"} +{"current_steps": 1300, "total_steps": 1955, "loss": 0.9792, "lr": 4.877585510852627e-05, "epoch": 3.3248081841432224, "percentage": 66.5, "elapsed_time": "6:41:23", "remaining_time": "3:22:14"} +{"current_steps": 1301, "total_steps": 1955, "loss": 0.9253, "lr": 4.864435644304742e-05, "epoch": 3.3273657289002556, "percentage": 66.55, "elapsed_time": "6:41:42", "remaining_time": "3:21:55"} +{"current_steps": 1302, "total_steps": 1955, "loss": 0.9535, "lr": 4.851295779683616e-05, "epoch": 3.329923273657289, "percentage": 66.6, "elapsed_time": "6:42:00", "remaining_time": "3:21:37"} +{"current_steps": 1303, "total_steps": 1955, "loss": 0.9338, "lr": 4.8381659589032186e-05, "epoch": 3.3324808184143224, "percentage": 66.65, "elapsed_time": "6:42:18", "remaining_time": "3:21:18"} +{"current_steps": 1304, "total_steps": 1955, "loss": 0.9499, "lr": 4.825046223845486e-05, "epoch": 3.3350383631713556, "percentage": 66.7, "elapsed_time": "6:42:37", "remaining_time": "3:21:00"} +{"current_steps": 1305, "total_steps": 1955, "loss": 0.9256, "lr": 4.811936616360186e-05, "epoch": 3.337595907928389, "percentage": 66.75, "elapsed_time": "6:42:55", "remaining_time": "3:20:41"} +{"current_steps": 1306, "total_steps": 1955, "loss": 0.9582, "lr": 4.798837178264772e-05, "epoch": 3.340153452685422, "percentage": 66.8, "elapsed_time": "6:43:14", "remaining_time": "3:20:22"} +{"current_steps": 1307, "total_steps": 1955, "loss": 0.9125, "lr": 4.78574795134427e-05, "epoch": 3.342710997442455, "percentage": 66.85, "elapsed_time": "6:43:32", "remaining_time": "3:20:04"} +{"current_steps": 1308, "total_steps": 1955, "loss": 0.9537, "lr": 4.772668977351128e-05, "epoch": 3.3452685421994883, "percentage": 66.91, "elapsed_time": "6:43:51", "remaining_time": "3:19:45"} +{"current_steps": 1309, "total_steps": 1955, "loss": 0.9401, "lr": 4.7596002980050834e-05, "epoch": 3.3478260869565215, "percentage": 66.96, "elapsed_time": "6:44:09", "remaining_time": "3:19:27"} +{"current_steps": 1310, "total_steps": 1955, "loss": 0.9782, "lr": 4.7465419549930476e-05, "epoch": 3.350383631713555, "percentage": 67.01, "elapsed_time": "6:44:28", "remaining_time": "3:19:08"} +{"current_steps": 1311, "total_steps": 1955, "loss": 0.9458, "lr": 4.733493989968949e-05, "epoch": 3.3529411764705883, "percentage": 67.06, "elapsed_time": "6:44:47", "remaining_time": "3:18:50"} +{"current_steps": 1312, "total_steps": 1955, "loss": 0.9396, "lr": 4.7204564445536234e-05, "epoch": 3.3554987212276215, "percentage": 67.11, "elapsed_time": "6:45:05", "remaining_time": "3:18:32"} +{"current_steps": 1313, "total_steps": 1955, "loss": 0.942, "lr": 4.707429360334662e-05, "epoch": 3.3580562659846547, "percentage": 67.16, "elapsed_time": "6:45:24", "remaining_time": "3:18:13"} +{"current_steps": 1314, "total_steps": 1955, "loss": 0.9504, "lr": 4.694412778866285e-05, "epoch": 3.360613810741688, "percentage": 67.21, "elapsed_time": "6:45:43", "remaining_time": "3:17:55"} +{"current_steps": 1315, "total_steps": 1955, "loss": 0.9221, "lr": 4.681406741669216e-05, "epoch": 3.363171355498721, "percentage": 67.26, "elapsed_time": "6:46:01", "remaining_time": "3:17:36"} +{"current_steps": 1316, "total_steps": 1955, "loss": 0.944, "lr": 4.668411290230543e-05, "epoch": 3.3657289002557547, "percentage": 67.31, "elapsed_time": "6:46:19", "remaining_time": "3:17:17"} +{"current_steps": 1317, "total_steps": 1955, "loss": 0.9563, "lr": 4.655426466003586e-05, "epoch": 3.368286445012788, "percentage": 67.37, "elapsed_time": "6:46:38", "remaining_time": "3:16:59"} +{"current_steps": 1318, "total_steps": 1955, "loss": 0.9508, "lr": 4.6424523104077654e-05, "epoch": 3.370843989769821, "percentage": 67.42, "elapsed_time": "6:46:56", "remaining_time": "3:16:40"} +{"current_steps": 1319, "total_steps": 1955, "loss": 0.9502, "lr": 4.629488864828472e-05, "epoch": 3.373401534526854, "percentage": 67.47, "elapsed_time": "6:47:14", "remaining_time": "3:16:22"} +{"current_steps": 1320, "total_steps": 1955, "loss": 0.9268, "lr": 4.6165361706169325e-05, "epoch": 3.3759590792838874, "percentage": 67.52, "elapsed_time": "6:47:32", "remaining_time": "3:16:03"} +{"current_steps": 1321, "total_steps": 1955, "loss": 0.9268, "lr": 4.603594269090078e-05, "epoch": 3.3785166240409206, "percentage": 67.57, "elapsed_time": "6:47:51", "remaining_time": "3:15:44"} +{"current_steps": 1322, "total_steps": 1955, "loss": 0.9358, "lr": 4.5906632015304116e-05, "epoch": 3.381074168797954, "percentage": 67.62, "elapsed_time": "6:48:10", "remaining_time": "3:15:26"} +{"current_steps": 1323, "total_steps": 1955, "loss": 0.9302, "lr": 4.5777430091858855e-05, "epoch": 3.3836317135549874, "percentage": 67.67, "elapsed_time": "6:48:28", "remaining_time": "3:15:07"} +{"current_steps": 1324, "total_steps": 1955, "loss": 0.9427, "lr": 4.564833733269755e-05, "epoch": 3.3861892583120206, "percentage": 67.72, "elapsed_time": "6:48:46", "remaining_time": "3:14:49"} +{"current_steps": 1325, "total_steps": 1955, "loss": 0.9437, "lr": 4.5519354149604474e-05, "epoch": 3.3887468030690537, "percentage": 67.77, "elapsed_time": "6:49:05", "remaining_time": "3:14:30"} +{"current_steps": 1326, "total_steps": 1955, "loss": 0.9466, "lr": 4.539048095401452e-05, "epoch": 3.391304347826087, "percentage": 67.83, "elapsed_time": "6:49:23", "remaining_time": "3:14:12"} +{"current_steps": 1327, "total_steps": 1955, "loss": 0.9336, "lr": 4.526171815701165e-05, "epoch": 3.39386189258312, "percentage": 67.88, "elapsed_time": "6:49:42", "remaining_time": "3:13:53"} +{"current_steps": 1328, "total_steps": 1955, "loss": 0.9215, "lr": 4.513306616932764e-05, "epoch": 3.3964194373401533, "percentage": 67.93, "elapsed_time": "6:50:00", "remaining_time": "3:13:34"} +{"current_steps": 1329, "total_steps": 1955, "loss": 0.9801, "lr": 4.5004525401340915e-05, "epoch": 3.398976982097187, "percentage": 67.98, "elapsed_time": "6:50:19", "remaining_time": "3:13:16"} +{"current_steps": 1330, "total_steps": 1955, "loss": 0.9655, "lr": 4.487609626307508e-05, "epoch": 3.40153452685422, "percentage": 68.03, "elapsed_time": "6:50:37", "remaining_time": "3:12:57"} +{"current_steps": 1331, "total_steps": 1955, "loss": 0.9382, "lr": 4.4747779164197535e-05, "epoch": 3.4040920716112533, "percentage": 68.08, "elapsed_time": "6:50:56", "remaining_time": "3:12:39"} +{"current_steps": 1332, "total_steps": 1955, "loss": 0.9557, "lr": 4.4619574514018486e-05, "epoch": 3.4066496163682864, "percentage": 68.13, "elapsed_time": "6:51:14", "remaining_time": "3:12:20"} +{"current_steps": 1333, "total_steps": 1955, "loss": 0.9345, "lr": 4.449148272148934e-05, "epoch": 3.4092071611253196, "percentage": 68.18, "elapsed_time": "6:51:33", "remaining_time": "3:12:02"} +{"current_steps": 1334, "total_steps": 1955, "loss": 0.9608, "lr": 4.436350419520154e-05, "epoch": 3.411764705882353, "percentage": 68.24, "elapsed_time": "6:51:52", "remaining_time": "3:11:44"} +{"current_steps": 1335, "total_steps": 1955, "loss": 0.9458, "lr": 4.423563934338519e-05, "epoch": 3.414322250639386, "percentage": 68.29, "elapsed_time": "6:52:10", "remaining_time": "3:11:25"} +{"current_steps": 1336, "total_steps": 1955, "loss": 0.9466, "lr": 4.410788857390785e-05, "epoch": 3.4168797953964196, "percentage": 68.34, "elapsed_time": "6:52:28", "remaining_time": "3:11:06"} +{"current_steps": 1337, "total_steps": 1955, "loss": 0.9408, "lr": 4.39802522942731e-05, "epoch": 3.419437340153453, "percentage": 68.39, "elapsed_time": "6:52:47", "remaining_time": "3:10:48"} +{"current_steps": 1338, "total_steps": 1955, "loss": 0.9305, "lr": 4.385273091161937e-05, "epoch": 3.421994884910486, "percentage": 68.44, "elapsed_time": "6:53:05", "remaining_time": "3:10:29"} +{"current_steps": 1339, "total_steps": 1955, "loss": 0.9375, "lr": 4.372532483271863e-05, "epoch": 3.424552429667519, "percentage": 68.49, "elapsed_time": "6:53:24", "remaining_time": "3:10:11"} +{"current_steps": 1340, "total_steps": 1955, "loss": 0.9869, "lr": 4.3598034463974966e-05, "epoch": 3.4271099744245523, "percentage": 68.54, "elapsed_time": "6:53:42", "remaining_time": "3:09:52"} +{"current_steps": 1341, "total_steps": 1955, "loss": 0.9765, "lr": 4.347086021142339e-05, "epoch": 3.4296675191815855, "percentage": 68.59, "elapsed_time": "6:54:01", "remaining_time": "3:09:34"} +{"current_steps": 1342, "total_steps": 1955, "loss": 0.9431, "lr": 4.3343802480728544e-05, "epoch": 3.4322250639386187, "percentage": 68.64, "elapsed_time": "6:54:20", "remaining_time": "3:09:15"} +{"current_steps": 1343, "total_steps": 1955, "loss": 0.9545, "lr": 4.321686167718337e-05, "epoch": 3.4347826086956523, "percentage": 68.7, "elapsed_time": "6:54:37", "remaining_time": "3:08:56"} +{"current_steps": 1344, "total_steps": 1955, "loss": 0.9377, "lr": 4.309003820570785e-05, "epoch": 3.4373401534526855, "percentage": 68.75, "elapsed_time": "6:54:56", "remaining_time": "3:08:38"} +{"current_steps": 1345, "total_steps": 1955, "loss": 0.9283, "lr": 4.296333247084764e-05, "epoch": 3.4398976982097187, "percentage": 68.8, "elapsed_time": "6:55:15", "remaining_time": "3:08:19"} +{"current_steps": 1346, "total_steps": 1955, "loss": 0.9663, "lr": 4.283674487677297e-05, "epoch": 3.442455242966752, "percentage": 68.85, "elapsed_time": "6:55:34", "remaining_time": "3:08:01"} +{"current_steps": 1347, "total_steps": 1955, "loss": 0.9425, "lr": 4.271027582727703e-05, "epoch": 3.445012787723785, "percentage": 68.9, "elapsed_time": "6:55:52", "remaining_time": "3:07:43"} +{"current_steps": 1348, "total_steps": 1955, "loss": 0.963, "lr": 4.2583925725774996e-05, "epoch": 3.4475703324808182, "percentage": 68.95, "elapsed_time": "6:56:11", "remaining_time": "3:07:24"} +{"current_steps": 1349, "total_steps": 1955, "loss": 0.969, "lr": 4.2457694975302625e-05, "epoch": 3.4501278772378514, "percentage": 69.0, "elapsed_time": "6:56:29", "remaining_time": "3:07:05"} +{"current_steps": 1350, "total_steps": 1955, "loss": 0.9578, "lr": 4.233158397851494e-05, "epoch": 3.452685421994885, "percentage": 69.05, "elapsed_time": "6:56:47", "remaining_time": "3:06:47"} +{"current_steps": 1351, "total_steps": 1955, "loss": 0.9517, "lr": 4.220559313768492e-05, "epoch": 3.455242966751918, "percentage": 69.1, "elapsed_time": "6:57:06", "remaining_time": "3:06:28"} +{"current_steps": 1352, "total_steps": 1955, "loss": 0.9593, "lr": 4.207972285470236e-05, "epoch": 3.4578005115089514, "percentage": 69.16, "elapsed_time": "6:57:24", "remaining_time": "3:06:10"} +{"current_steps": 1353, "total_steps": 1955, "loss": 0.9238, "lr": 4.1953973531072403e-05, "epoch": 3.4603580562659846, "percentage": 69.21, "elapsed_time": "6:57:43", "remaining_time": "3:05:51"} +{"current_steps": 1354, "total_steps": 1955, "loss": 0.9463, "lr": 4.1828345567914426e-05, "epoch": 3.4629156010230178, "percentage": 69.26, "elapsed_time": "6:58:01", "remaining_time": "3:05:33"} +{"current_steps": 1355, "total_steps": 1955, "loss": 0.9379, "lr": 4.17028393659606e-05, "epoch": 3.4654731457800514, "percentage": 69.31, "elapsed_time": "6:58:20", "remaining_time": "3:05:14"} +{"current_steps": 1356, "total_steps": 1955, "loss": 0.9445, "lr": 4.157745532555484e-05, "epoch": 3.4680306905370846, "percentage": 69.36, "elapsed_time": "6:58:39", "remaining_time": "3:04:56"} +{"current_steps": 1357, "total_steps": 1955, "loss": 0.9471, "lr": 4.145219384665128e-05, "epoch": 3.4705882352941178, "percentage": 69.41, "elapsed_time": "6:58:57", "remaining_time": "3:04:37"} +{"current_steps": 1358, "total_steps": 1955, "loss": 0.9492, "lr": 4.1327055328813036e-05, "epoch": 3.473145780051151, "percentage": 69.46, "elapsed_time": "6:59:16", "remaining_time": "3:04:19"} +{"current_steps": 1359, "total_steps": 1955, "loss": 0.9677, "lr": 4.1202040171211195e-05, "epoch": 3.475703324808184, "percentage": 69.51, "elapsed_time": "6:59:34", "remaining_time": "3:04:00"} +{"current_steps": 1360, "total_steps": 1955, "loss": 0.9574, "lr": 4.107714877262318e-05, "epoch": 3.4782608695652173, "percentage": 69.57, "elapsed_time": "6:59:53", "remaining_time": "3:03:42"} +{"current_steps": 1361, "total_steps": 1955, "loss": 0.9411, "lr": 4.0952381531431716e-05, "epoch": 3.4808184143222505, "percentage": 69.62, "elapsed_time": "7:00:11", "remaining_time": "3:03:23"} +{"current_steps": 1362, "total_steps": 1955, "loss": 0.9465, "lr": 4.082773884562342e-05, "epoch": 3.483375959079284, "percentage": 69.67, "elapsed_time": "7:00:30", "remaining_time": "3:03:05"} +{"current_steps": 1363, "total_steps": 1955, "loss": 0.9631, "lr": 4.0703221112787774e-05, "epoch": 3.4859335038363173, "percentage": 69.72, "elapsed_time": "7:00:49", "remaining_time": "3:02:46"} +{"current_steps": 1364, "total_steps": 1955, "loss": 0.9333, "lr": 4.057882873011543e-05, "epoch": 3.4884910485933505, "percentage": 69.77, "elapsed_time": "7:01:07", "remaining_time": "3:02:28"} +{"current_steps": 1365, "total_steps": 1955, "loss": 0.9683, "lr": 4.045456209439734e-05, "epoch": 3.4910485933503836, "percentage": 69.82, "elapsed_time": "7:01:26", "remaining_time": "3:02:09"} +{"current_steps": 1366, "total_steps": 1955, "loss": 0.9872, "lr": 4.033042160202337e-05, "epoch": 3.493606138107417, "percentage": 69.87, "elapsed_time": "7:01:44", "remaining_time": "3:01:51"} +{"current_steps": 1367, "total_steps": 1955, "loss": 0.9685, "lr": 4.020640764898096e-05, "epoch": 3.49616368286445, "percentage": 69.92, "elapsed_time": "7:02:02", "remaining_time": "3:01:32"} +{"current_steps": 1368, "total_steps": 1955, "loss": 0.9112, "lr": 4.0082520630853865e-05, "epoch": 3.498721227621483, "percentage": 69.97, "elapsed_time": "7:02:21", "remaining_time": "3:01:13"} +{"current_steps": 1369, "total_steps": 1955, "loss": 0.9585, "lr": 3.995876094282104e-05, "epoch": 3.501278772378517, "percentage": 70.03, "elapsed_time": "7:02:40", "remaining_time": "3:00:55"} +{"current_steps": 1370, "total_steps": 1955, "loss": 0.959, "lr": 3.983512897965519e-05, "epoch": 3.50383631713555, "percentage": 70.08, "elapsed_time": "7:02:58", "remaining_time": "3:00:36"} +{"current_steps": 1371, "total_steps": 1955, "loss": 0.9555, "lr": 3.9711625135721664e-05, "epoch": 3.506393861892583, "percentage": 70.13, "elapsed_time": "7:03:17", "remaining_time": "3:00:18"} +{"current_steps": 1372, "total_steps": 1955, "loss": 0.9744, "lr": 3.958824980497704e-05, "epoch": 3.5089514066496164, "percentage": 70.18, "elapsed_time": "7:03:36", "remaining_time": "2:59:59"} +{"current_steps": 1373, "total_steps": 1955, "loss": 0.9353, "lr": 3.946500338096811e-05, "epoch": 3.5115089514066495, "percentage": 70.23, "elapsed_time": "7:03:54", "remaining_time": "2:59:41"} +{"current_steps": 1374, "total_steps": 1955, "loss": 0.9568, "lr": 3.934188625683037e-05, "epoch": 3.5140664961636827, "percentage": 70.28, "elapsed_time": "7:04:12", "remaining_time": "2:59:22"} +{"current_steps": 1375, "total_steps": 1955, "loss": 0.9279, "lr": 3.9218898825286806e-05, "epoch": 3.516624040920716, "percentage": 70.33, "elapsed_time": "7:04:31", "remaining_time": "2:59:04"} +{"current_steps": 1376, "total_steps": 1955, "loss": 0.9342, "lr": 3.9096041478646885e-05, "epoch": 3.5191815856777495, "percentage": 70.38, "elapsed_time": "7:04:50", "remaining_time": "2:58:45"} +{"current_steps": 1377, "total_steps": 1955, "loss": 0.962, "lr": 3.8973314608805e-05, "epoch": 3.5217391304347827, "percentage": 70.43, "elapsed_time": "7:05:08", "remaining_time": "2:58:27"} +{"current_steps": 1378, "total_steps": 1955, "loss": 0.9293, "lr": 3.885071860723937e-05, "epoch": 3.524296675191816, "percentage": 70.49, "elapsed_time": "7:05:27", "remaining_time": "2:58:08"} +{"current_steps": 1379, "total_steps": 1955, "loss": 0.9895, "lr": 3.8728253865010765e-05, "epoch": 3.526854219948849, "percentage": 70.54, "elapsed_time": "7:05:45", "remaining_time": "2:57:50"} +{"current_steps": 1380, "total_steps": 1955, "loss": 0.9328, "lr": 3.8605920772761274e-05, "epoch": 3.5294117647058822, "percentage": 70.59, "elapsed_time": "7:06:04", "remaining_time": "2:57:31"} +{"current_steps": 1381, "total_steps": 1955, "loss": 0.9859, "lr": 3.848371972071304e-05, "epoch": 3.531969309462916, "percentage": 70.64, "elapsed_time": "7:06:22", "remaining_time": "2:57:13"} +{"current_steps": 1382, "total_steps": 1955, "loss": 0.9569, "lr": 3.8361651098666967e-05, "epoch": 3.5345268542199486, "percentage": 70.69, "elapsed_time": "7:06:40", "remaining_time": "2:56:54"} +{"current_steps": 1383, "total_steps": 1955, "loss": 0.9418, "lr": 3.8239715296001654e-05, "epoch": 3.5370843989769822, "percentage": 70.74, "elapsed_time": "7:06:58", "remaining_time": "2:56:35"} +{"current_steps": 1384, "total_steps": 1955, "loss": 0.9696, "lr": 3.8117912701671905e-05, "epoch": 3.5396419437340154, "percentage": 70.79, "elapsed_time": "7:07:17", "remaining_time": "2:56:17"} +{"current_steps": 1385, "total_steps": 1955, "loss": 0.9502, "lr": 3.7996243704207686e-05, "epoch": 3.5421994884910486, "percentage": 70.84, "elapsed_time": "7:07:36", "remaining_time": "2:55:58"} +{"current_steps": 1386, "total_steps": 1955, "loss": 0.9673, "lr": 3.787470869171277e-05, "epoch": 3.544757033248082, "percentage": 70.9, "elapsed_time": "7:07:54", "remaining_time": "2:55:40"} +{"current_steps": 1387, "total_steps": 1955, "loss": 0.9244, "lr": 3.7753308051863534e-05, "epoch": 3.547314578005115, "percentage": 70.95, "elapsed_time": "7:08:13", "remaining_time": "2:55:21"} +{"current_steps": 1388, "total_steps": 1955, "loss": 0.9414, "lr": 3.763204217190778e-05, "epoch": 3.5498721227621486, "percentage": 71.0, "elapsed_time": "7:08:32", "remaining_time": "2:55:03"} +{"current_steps": 1389, "total_steps": 1955, "loss": 0.9677, "lr": 3.751091143866338e-05, "epoch": 3.5524296675191813, "percentage": 71.05, "elapsed_time": "7:08:50", "remaining_time": "2:54:44"} +{"current_steps": 1390, "total_steps": 1955, "loss": 0.9758, "lr": 3.7389916238517224e-05, "epoch": 3.554987212276215, "percentage": 71.1, "elapsed_time": "7:09:08", "remaining_time": "2:54:26"} +{"current_steps": 1391, "total_steps": 1955, "loss": 0.9142, "lr": 3.726905695742372e-05, "epoch": 3.557544757033248, "percentage": 71.15, "elapsed_time": "7:09:27", "remaining_time": "2:54:07"} +{"current_steps": 1392, "total_steps": 1955, "loss": 0.9389, "lr": 3.7148333980903796e-05, "epoch": 3.5601023017902813, "percentage": 71.2, "elapsed_time": "7:09:46", "remaining_time": "2:53:49"} +{"current_steps": 1393, "total_steps": 1955, "loss": 0.9557, "lr": 3.7027747694043645e-05, "epoch": 3.5626598465473145, "percentage": 71.25, "elapsed_time": "7:10:04", "remaining_time": "2:53:30"} +{"current_steps": 1394, "total_steps": 1955, "loss": 0.9588, "lr": 3.690729848149335e-05, "epoch": 3.5652173913043477, "percentage": 71.3, "elapsed_time": "7:10:22", "remaining_time": "2:53:12"} +{"current_steps": 1395, "total_steps": 1955, "loss": 0.964, "lr": 3.678698672746581e-05, "epoch": 3.5677749360613813, "percentage": 71.36, "elapsed_time": "7:10:41", "remaining_time": "2:52:53"} +{"current_steps": 1396, "total_steps": 1955, "loss": 0.9433, "lr": 3.6666812815735424e-05, "epoch": 3.5703324808184145, "percentage": 71.41, "elapsed_time": "7:11:00", "remaining_time": "2:52:35"} +{"current_steps": 1397, "total_steps": 1955, "loss": 0.9252, "lr": 3.6546777129636886e-05, "epoch": 3.5728900255754477, "percentage": 71.46, "elapsed_time": "7:11:18", "remaining_time": "2:52:16"} +{"current_steps": 1398, "total_steps": 1955, "loss": 0.9636, "lr": 3.6426880052064026e-05, "epoch": 3.575447570332481, "percentage": 71.51, "elapsed_time": "7:11:36", "remaining_time": "2:51:57"} +{"current_steps": 1399, "total_steps": 1955, "loss": 0.9649, "lr": 3.630712196546844e-05, "epoch": 3.578005115089514, "percentage": 71.56, "elapsed_time": "7:11:55", "remaining_time": "2:51:39"} +{"current_steps": 1400, "total_steps": 1955, "loss": 0.9267, "lr": 3.6187503251858505e-05, "epoch": 3.580562659846547, "percentage": 71.61, "elapsed_time": "7:12:14", "remaining_time": "2:51:20"} +{"current_steps": 1401, "total_steps": 1955, "loss": 0.9356, "lr": 3.6068024292797945e-05, "epoch": 3.5831202046035804, "percentage": 71.66, "elapsed_time": "7:12:32", "remaining_time": "2:51:02"} +{"current_steps": 1402, "total_steps": 1955, "loss": 0.9548, "lr": 3.59486854694046e-05, "epoch": 3.585677749360614, "percentage": 71.71, "elapsed_time": "7:12:51", "remaining_time": "2:50:43"} +{"current_steps": 1403, "total_steps": 1955, "loss": 0.9493, "lr": 3.582948716234948e-05, "epoch": 3.588235294117647, "percentage": 71.76, "elapsed_time": "7:13:09", "remaining_time": "2:50:25"} +{"current_steps": 1404, "total_steps": 1955, "loss": 0.9552, "lr": 3.571042975185524e-05, "epoch": 3.5907928388746804, "percentage": 71.82, "elapsed_time": "7:13:27", "remaining_time": "2:50:06"} +{"current_steps": 1405, "total_steps": 1955, "loss": 0.9466, "lr": 3.559151361769517e-05, "epoch": 3.5933503836317136, "percentage": 71.87, "elapsed_time": "7:13:46", "remaining_time": "2:49:48"} +{"current_steps": 1406, "total_steps": 1955, "loss": 0.95, "lr": 3.547273913919182e-05, "epoch": 3.5959079283887467, "percentage": 71.92, "elapsed_time": "7:14:04", "remaining_time": "2:49:29"} +{"current_steps": 1407, "total_steps": 1955, "loss": 0.9588, "lr": 3.535410669521605e-05, "epoch": 3.59846547314578, "percentage": 71.97, "elapsed_time": "7:14:23", "remaining_time": "2:49:11"} +{"current_steps": 1408, "total_steps": 1955, "loss": 0.9591, "lr": 3.5235616664185465e-05, "epoch": 3.601023017902813, "percentage": 72.02, "elapsed_time": "7:14:41", "remaining_time": "2:48:52"} +{"current_steps": 1409, "total_steps": 1955, "loss": 0.9372, "lr": 3.5117269424063466e-05, "epoch": 3.6035805626598467, "percentage": 72.07, "elapsed_time": "7:15:00", "remaining_time": "2:48:34"} +{"current_steps": 1410, "total_steps": 1955, "loss": 0.9128, "lr": 3.4999065352358055e-05, "epoch": 3.60613810741688, "percentage": 72.12, "elapsed_time": "7:15:19", "remaining_time": "2:48:15"} +{"current_steps": 1411, "total_steps": 1955, "loss": 0.9533, "lr": 3.488100482612046e-05, "epoch": 3.608695652173913, "percentage": 72.17, "elapsed_time": "7:15:37", "remaining_time": "2:47:57"} +{"current_steps": 1412, "total_steps": 1955, "loss": 0.9696, "lr": 3.476308822194404e-05, "epoch": 3.6112531969309463, "percentage": 72.23, "elapsed_time": "7:15:55", "remaining_time": "2:47:38"} +{"current_steps": 1413, "total_steps": 1955, "loss": 0.9295, "lr": 3.4645315915963085e-05, "epoch": 3.6138107416879794, "percentage": 72.28, "elapsed_time": "7:16:13", "remaining_time": "2:47:19"} +{"current_steps": 1414, "total_steps": 1955, "loss": 0.9478, "lr": 3.452768828385156e-05, "epoch": 3.6163682864450126, "percentage": 72.33, "elapsed_time": "7:16:32", "remaining_time": "2:47:01"} +{"current_steps": 1415, "total_steps": 1955, "loss": 0.9267, "lr": 3.4410205700822e-05, "epoch": 3.618925831202046, "percentage": 72.38, "elapsed_time": "7:16:51", "remaining_time": "2:46:42"} +{"current_steps": 1416, "total_steps": 1955, "loss": 0.9487, "lr": 3.42928685416242e-05, "epoch": 3.6214833759590794, "percentage": 72.43, "elapsed_time": "7:17:09", "remaining_time": "2:46:24"} +{"current_steps": 1417, "total_steps": 1955, "loss": 0.9257, "lr": 3.417567718054413e-05, "epoch": 3.6240409207161126, "percentage": 72.48, "elapsed_time": "7:17:28", "remaining_time": "2:46:05"} +{"current_steps": 1418, "total_steps": 1955, "loss": 0.9594, "lr": 3.405863199140271e-05, "epoch": 3.626598465473146, "percentage": 72.53, "elapsed_time": "7:17:46", "remaining_time": "2:45:47"} +{"current_steps": 1419, "total_steps": 1955, "loss": 0.954, "lr": 3.3941733347554434e-05, "epoch": 3.629156010230179, "percentage": 72.58, "elapsed_time": "7:18:05", "remaining_time": "2:45:28"} +{"current_steps": 1420, "total_steps": 1955, "loss": 0.9536, "lr": 3.3824981621886545e-05, "epoch": 3.631713554987212, "percentage": 72.63, "elapsed_time": "7:18:23", "remaining_time": "2:45:10"} +{"current_steps": 1421, "total_steps": 1955, "loss": 0.9685, "lr": 3.370837718681754e-05, "epoch": 3.634271099744246, "percentage": 72.69, "elapsed_time": "7:18:42", "remaining_time": "2:44:51"} +{"current_steps": 1422, "total_steps": 1955, "loss": 0.9248, "lr": 3.3591920414296094e-05, "epoch": 3.6368286445012785, "percentage": 72.74, "elapsed_time": "7:19:00", "remaining_time": "2:44:33"} +{"current_steps": 1423, "total_steps": 1955, "loss": 0.9521, "lr": 3.347561167579986e-05, "epoch": 3.639386189258312, "percentage": 72.79, "elapsed_time": "7:19:19", "remaining_time": "2:44:14"} +{"current_steps": 1424, "total_steps": 1955, "loss": 0.9431, "lr": 3.3359451342334306e-05, "epoch": 3.6419437340153453, "percentage": 72.84, "elapsed_time": "7:19:38", "remaining_time": "2:43:56"} +{"current_steps": 1425, "total_steps": 1955, "loss": 0.9716, "lr": 3.324343978443148e-05, "epoch": 3.6445012787723785, "percentage": 72.89, "elapsed_time": "7:19:56", "remaining_time": "2:43:37"} +{"current_steps": 1426, "total_steps": 1955, "loss": 0.9322, "lr": 3.3127577372148874e-05, "epoch": 3.6470588235294117, "percentage": 72.94, "elapsed_time": "7:20:15", "remaining_time": "2:43:19"} +{"current_steps": 1427, "total_steps": 1955, "loss": 0.9422, "lr": 3.301186447506827e-05, "epoch": 3.649616368286445, "percentage": 72.99, "elapsed_time": "7:20:33", "remaining_time": "2:43:00"} +{"current_steps": 1428, "total_steps": 1955, "loss": 0.9366, "lr": 3.289630146229449e-05, "epoch": 3.6521739130434785, "percentage": 73.04, "elapsed_time": "7:20:52", "remaining_time": "2:42:42"} +{"current_steps": 1429, "total_steps": 1955, "loss": 0.9286, "lr": 3.278088870245423e-05, "epoch": 3.6547314578005117, "percentage": 73.09, "elapsed_time": "7:21:10", "remaining_time": "2:42:23"} +{"current_steps": 1430, "total_steps": 1955, "loss": 0.9572, "lr": 3.2665626563694937e-05, "epoch": 3.657289002557545, "percentage": 73.15, "elapsed_time": "7:21:29", "remaining_time": "2:42:05"} +{"current_steps": 1431, "total_steps": 1955, "loss": 0.9512, "lr": 3.2550515413683574e-05, "epoch": 3.659846547314578, "percentage": 73.2, "elapsed_time": "7:21:47", "remaining_time": "2:41:46"} +{"current_steps": 1432, "total_steps": 1955, "loss": 0.9542, "lr": 3.2435555619605504e-05, "epoch": 3.662404092071611, "percentage": 73.25, "elapsed_time": "7:22:06", "remaining_time": "2:41:28"} +{"current_steps": 1433, "total_steps": 1955, "loss": 0.9306, "lr": 3.232074754816323e-05, "epoch": 3.6649616368286444, "percentage": 73.3, "elapsed_time": "7:22:25", "remaining_time": "2:41:09"} +{"current_steps": 1434, "total_steps": 1955, "loss": 0.9363, "lr": 3.220609156557544e-05, "epoch": 3.6675191815856776, "percentage": 73.35, "elapsed_time": "7:22:43", "remaining_time": "2:40:51"} +{"current_steps": 1435, "total_steps": 1955, "loss": 0.9321, "lr": 3.209158803757546e-05, "epoch": 3.670076726342711, "percentage": 73.4, "elapsed_time": "7:23:02", "remaining_time": "2:40:32"} +{"current_steps": 1436, "total_steps": 1955, "loss": 0.9608, "lr": 3.1977237329410446e-05, "epoch": 3.6726342710997444, "percentage": 73.45, "elapsed_time": "7:23:20", "remaining_time": "2:40:14"} +{"current_steps": 1437, "total_steps": 1955, "loss": 0.9389, "lr": 3.186303980584012e-05, "epoch": 3.6751918158567776, "percentage": 73.5, "elapsed_time": "7:23:39", "remaining_time": "2:39:55"} +{"current_steps": 1438, "total_steps": 1955, "loss": 0.9945, "lr": 3.174899583113548e-05, "epoch": 3.6777493606138107, "percentage": 73.55, "elapsed_time": "7:23:58", "remaining_time": "2:39:37"} +{"current_steps": 1439, "total_steps": 1955, "loss": 0.9307, "lr": 3.1635105769077766e-05, "epoch": 3.680306905370844, "percentage": 73.61, "elapsed_time": "7:24:16", "remaining_time": "2:39:18"} +{"current_steps": 1440, "total_steps": 1955, "loss": 0.9321, "lr": 3.152136998295727e-05, "epoch": 3.682864450127877, "percentage": 73.66, "elapsed_time": "7:24:35", "remaining_time": "2:39:00"} +{"current_steps": 1441, "total_steps": 1955, "loss": 0.932, "lr": 3.140778883557213e-05, "epoch": 3.6854219948849103, "percentage": 73.71, "elapsed_time": "7:24:54", "remaining_time": "2:38:41"} +{"current_steps": 1442, "total_steps": 1955, "loss": 0.9324, "lr": 3.129436268922728e-05, "epoch": 3.687979539641944, "percentage": 73.76, "elapsed_time": "7:25:12", "remaining_time": "2:38:23"} +{"current_steps": 1443, "total_steps": 1955, "loss": 0.9145, "lr": 3.118109190573313e-05, "epoch": 3.690537084398977, "percentage": 73.81, "elapsed_time": "7:25:30", "remaining_time": "2:38:04"} +{"current_steps": 1444, "total_steps": 1955, "loss": 0.9402, "lr": 3.106797684640464e-05, "epoch": 3.6930946291560103, "percentage": 73.86, "elapsed_time": "7:25:48", "remaining_time": "2:37:45"} +{"current_steps": 1445, "total_steps": 1955, "loss": 0.9591, "lr": 3.0955017872059956e-05, "epoch": 3.6956521739130435, "percentage": 73.91, "elapsed_time": "7:26:07", "remaining_time": "2:37:27"} +{"current_steps": 1446, "total_steps": 1955, "loss": 0.9253, "lr": 3.084221534301926e-05, "epoch": 3.6982097186700766, "percentage": 73.96, "elapsed_time": "7:26:26", "remaining_time": "2:37:08"} +{"current_steps": 1447, "total_steps": 1955, "loss": 0.9501, "lr": 3.0729569619103876e-05, "epoch": 3.70076726342711, "percentage": 74.02, "elapsed_time": "7:26:44", "remaining_time": "2:36:50"} +{"current_steps": 1448, "total_steps": 1955, "loss": 0.9706, "lr": 3.061708105963481e-05, "epoch": 3.703324808184143, "percentage": 74.07, "elapsed_time": "7:27:03", "remaining_time": "2:36:31"} +{"current_steps": 1449, "total_steps": 1955, "loss": 0.9268, "lr": 3.0504750023431787e-05, "epoch": 3.7058823529411766, "percentage": 74.12, "elapsed_time": "7:27:21", "remaining_time": "2:36:13"} +{"current_steps": 1450, "total_steps": 1955, "loss": 0.9385, "lr": 3.039257686881209e-05, "epoch": 3.70843989769821, "percentage": 74.17, "elapsed_time": "7:27:40", "remaining_time": "2:35:54"} +{"current_steps": 1451, "total_steps": 1955, "loss": 0.9201, "lr": 3.028056195358936e-05, "epoch": 3.710997442455243, "percentage": 74.22, "elapsed_time": "7:27:58", "remaining_time": "2:35:36"} +{"current_steps": 1452, "total_steps": 1955, "loss": 0.9486, "lr": 3.016870563507241e-05, "epoch": 3.713554987212276, "percentage": 74.27, "elapsed_time": "7:28:17", "remaining_time": "2:35:17"} +{"current_steps": 1453, "total_steps": 1955, "loss": 0.9326, "lr": 3.0057008270064226e-05, "epoch": 3.7161125319693094, "percentage": 74.32, "elapsed_time": "7:28:35", "remaining_time": "2:34:59"} +{"current_steps": 1454, "total_steps": 1955, "loss": 0.9737, "lr": 2.9945470214860815e-05, "epoch": 3.718670076726343, "percentage": 74.37, "elapsed_time": "7:28:54", "remaining_time": "2:34:40"} +{"current_steps": 1455, "total_steps": 1955, "loss": 0.9319, "lr": 2.9834091825249908e-05, "epoch": 3.7212276214833757, "percentage": 74.42, "elapsed_time": "7:29:12", "remaining_time": "2:34:22"} +{"current_steps": 1456, "total_steps": 1955, "loss": 0.9289, "lr": 2.9722873456509985e-05, "epoch": 3.7237851662404093, "percentage": 74.48, "elapsed_time": "7:29:30", "remaining_time": "2:34:03"} +{"current_steps": 1457, "total_steps": 1955, "loss": 0.9707, "lr": 2.961181546340906e-05, "epoch": 3.7263427109974425, "percentage": 74.53, "elapsed_time": "7:29:48", "remaining_time": "2:33:44"} +{"current_steps": 1458, "total_steps": 1955, "loss": 0.9313, "lr": 2.95009182002036e-05, "epoch": 3.7289002557544757, "percentage": 74.58, "elapsed_time": "7:30:07", "remaining_time": "2:33:26"} +{"current_steps": 1459, "total_steps": 1955, "loss": 0.9647, "lr": 2.939018202063732e-05, "epoch": 3.731457800511509, "percentage": 74.63, "elapsed_time": "7:30:25", "remaining_time": "2:33:07"} +{"current_steps": 1460, "total_steps": 1955, "loss": 0.9474, "lr": 2.9279607277940196e-05, "epoch": 3.734015345268542, "percentage": 74.68, "elapsed_time": "7:30:43", "remaining_time": "2:32:48"} +{"current_steps": 1461, "total_steps": 1955, "loss": 0.926, "lr": 2.9169194324827183e-05, "epoch": 3.7365728900255757, "percentage": 74.73, "elapsed_time": "7:31:02", "remaining_time": "2:32:30"} +{"current_steps": 1462, "total_steps": 1955, "loss": 0.9312, "lr": 2.9058943513497158e-05, "epoch": 3.7391304347826084, "percentage": 74.78, "elapsed_time": "7:31:20", "remaining_time": "2:32:11"} +{"current_steps": 1463, "total_steps": 1955, "loss": 0.9417, "lr": 2.8948855195631797e-05, "epoch": 3.741687979539642, "percentage": 74.83, "elapsed_time": "7:31:39", "remaining_time": "2:31:53"} +{"current_steps": 1464, "total_steps": 1955, "loss": 0.9596, "lr": 2.883892972239445e-05, "epoch": 3.7442455242966752, "percentage": 74.88, "elapsed_time": "7:31:58", "remaining_time": "2:31:34"} +{"current_steps": 1465, "total_steps": 1955, "loss": 0.9476, "lr": 2.8729167444429042e-05, "epoch": 3.7468030690537084, "percentage": 74.94, "elapsed_time": "7:32:16", "remaining_time": "2:31:16"} +{"current_steps": 1466, "total_steps": 1955, "loss": 0.945, "lr": 2.8619568711858858e-05, "epoch": 3.7493606138107416, "percentage": 74.99, "elapsed_time": "7:32:34", "remaining_time": "2:30:57"} +{"current_steps": 1467, "total_steps": 1955, "loss": 0.9462, "lr": 2.8510133874285633e-05, "epoch": 3.7519181585677748, "percentage": 75.04, "elapsed_time": "7:32:52", "remaining_time": "2:30:39"} +{"current_steps": 1468, "total_steps": 1955, "loss": 0.9407, "lr": 2.8400863280788207e-05, "epoch": 3.7544757033248084, "percentage": 75.09, "elapsed_time": "7:33:11", "remaining_time": "2:30:20"} +{"current_steps": 1469, "total_steps": 1955, "loss": 0.963, "lr": 2.829175727992147e-05, "epoch": 3.7570332480818416, "percentage": 75.14, "elapsed_time": "7:33:29", "remaining_time": "2:30:01"} +{"current_steps": 1470, "total_steps": 1955, "loss": 0.9221, "lr": 2.818281621971541e-05, "epoch": 3.7595907928388748, "percentage": 75.19, "elapsed_time": "7:33:48", "remaining_time": "2:29:43"} +{"current_steps": 1471, "total_steps": 1955, "loss": 0.9535, "lr": 2.8074040447673794e-05, "epoch": 3.762148337595908, "percentage": 75.24, "elapsed_time": "7:34:06", "remaining_time": "2:29:24"} +{"current_steps": 1472, "total_steps": 1955, "loss": 0.9475, "lr": 2.7965430310773184e-05, "epoch": 3.764705882352941, "percentage": 75.29, "elapsed_time": "7:34:25", "remaining_time": "2:29:06"} +{"current_steps": 1473, "total_steps": 1955, "loss": 0.9315, "lr": 2.7856986155461777e-05, "epoch": 3.7672634271099743, "percentage": 75.35, "elapsed_time": "7:34:43", "remaining_time": "2:28:47"} +{"current_steps": 1474, "total_steps": 1955, "loss": 0.9455, "lr": 2.7748708327658317e-05, "epoch": 3.7698209718670075, "percentage": 75.4, "elapsed_time": "7:35:02", "remaining_time": "2:28:29"} +{"current_steps": 1475, "total_steps": 1955, "loss": 0.9525, "lr": 2.7640597172751004e-05, "epoch": 3.772378516624041, "percentage": 75.45, "elapsed_time": "7:35:20", "remaining_time": "2:28:10"} +{"current_steps": 1476, "total_steps": 1955, "loss": 0.9453, "lr": 2.7532653035596336e-05, "epoch": 3.7749360613810743, "percentage": 75.5, "elapsed_time": "7:35:38", "remaining_time": "2:27:52"} +{"current_steps": 1477, "total_steps": 1955, "loss": 0.9152, "lr": 2.7424876260518146e-05, "epoch": 3.7774936061381075, "percentage": 75.55, "elapsed_time": "7:35:57", "remaining_time": "2:27:33"} +{"current_steps": 1478, "total_steps": 1955, "loss": 0.9398, "lr": 2.7317267191306318e-05, "epoch": 3.7800511508951407, "percentage": 75.6, "elapsed_time": "7:36:15", "remaining_time": "2:27:15"} +{"current_steps": 1479, "total_steps": 1955, "loss": 0.9246, "lr": 2.7209826171215827e-05, "epoch": 3.782608695652174, "percentage": 75.65, "elapsed_time": "7:36:34", "remaining_time": "2:26:56"} +{"current_steps": 1480, "total_steps": 1955, "loss": 0.936, "lr": 2.7102553542965577e-05, "epoch": 3.785166240409207, "percentage": 75.7, "elapsed_time": "7:36:52", "remaining_time": "2:26:37"} +{"current_steps": 1481, "total_steps": 1955, "loss": 0.9359, "lr": 2.6995449648737343e-05, "epoch": 3.78772378516624, "percentage": 75.75, "elapsed_time": "7:37:10", "remaining_time": "2:26:19"} +{"current_steps": 1482, "total_steps": 1955, "loss": 0.954, "lr": 2.6888514830174678e-05, "epoch": 3.790281329923274, "percentage": 75.81, "elapsed_time": "7:37:28", "remaining_time": "2:26:00"} +{"current_steps": 1483, "total_steps": 1955, "loss": 1.0034, "lr": 2.6781749428381752e-05, "epoch": 3.792838874680307, "percentage": 75.86, "elapsed_time": "7:37:47", "remaining_time": "2:25:42"} +{"current_steps": 1484, "total_steps": 1955, "loss": 0.9518, "lr": 2.6675153783922457e-05, "epoch": 3.79539641943734, "percentage": 75.91, "elapsed_time": "7:38:05", "remaining_time": "2:25:23"} +{"current_steps": 1485, "total_steps": 1955, "loss": 0.9817, "lr": 2.6568728236819023e-05, "epoch": 3.7979539641943734, "percentage": 75.96, "elapsed_time": "7:38:24", "remaining_time": "2:25:05"} +{"current_steps": 1486, "total_steps": 1955, "loss": 0.9735, "lr": 2.6462473126551187e-05, "epoch": 3.8005115089514065, "percentage": 76.01, "elapsed_time": "7:38:43", "remaining_time": "2:24:46"} +{"current_steps": 1487, "total_steps": 1955, "loss": 0.9579, "lr": 2.635638879205504e-05, "epoch": 3.80306905370844, "percentage": 76.06, "elapsed_time": "7:39:01", "remaining_time": "2:24:28"} +{"current_steps": 1488, "total_steps": 1955, "loss": 0.9402, "lr": 2.625047557172189e-05, "epoch": 3.805626598465473, "percentage": 76.11, "elapsed_time": "7:39:19", "remaining_time": "2:24:09"} +{"current_steps": 1489, "total_steps": 1955, "loss": 0.9474, "lr": 2.6144733803397212e-05, "epoch": 3.8081841432225065, "percentage": 76.16, "elapsed_time": "7:39:38", "remaining_time": "2:23:50"} +{"current_steps": 1490, "total_steps": 1955, "loss": 0.9506, "lr": 2.6039163824379588e-05, "epoch": 3.8107416879795397, "percentage": 76.21, "elapsed_time": "7:39:56", "remaining_time": "2:23:32"} +{"current_steps": 1491, "total_steps": 1955, "loss": 0.9488, "lr": 2.5933765971419647e-05, "epoch": 3.813299232736573, "percentage": 76.27, "elapsed_time": "7:40:15", "remaining_time": "2:23:13"} +{"current_steps": 1492, "total_steps": 1955, "loss": 0.9458, "lr": 2.582854058071892e-05, "epoch": 3.815856777493606, "percentage": 76.32, "elapsed_time": "7:40:34", "remaining_time": "2:22:55"} +{"current_steps": 1493, "total_steps": 1955, "loss": 0.9518, "lr": 2.5723487987928817e-05, "epoch": 3.8184143222506393, "percentage": 76.37, "elapsed_time": "7:40:52", "remaining_time": "2:22:36"} +{"current_steps": 1494, "total_steps": 1955, "loss": 0.93, "lr": 2.5618608528149614e-05, "epoch": 3.820971867007673, "percentage": 76.42, "elapsed_time": "7:41:11", "remaining_time": "2:22:18"} +{"current_steps": 1495, "total_steps": 1955, "loss": 0.9763, "lr": 2.5513902535929288e-05, "epoch": 3.8235294117647056, "percentage": 76.47, "elapsed_time": "7:41:30", "remaining_time": "2:22:00"} +{"current_steps": 1496, "total_steps": 1955, "loss": 0.9471, "lr": 2.5409370345262385e-05, "epoch": 3.8260869565217392, "percentage": 76.52, "elapsed_time": "7:41:48", "remaining_time": "2:21:41"} +{"current_steps": 1497, "total_steps": 1955, "loss": 0.9588, "lr": 2.5305012289589223e-05, "epoch": 3.8286445012787724, "percentage": 76.57, "elapsed_time": "7:42:06", "remaining_time": "2:21:22"} +{"current_steps": 1498, "total_steps": 1955, "loss": 0.9294, "lr": 2.5200828701794543e-05, "epoch": 3.8312020460358056, "percentage": 76.62, "elapsed_time": "7:42:24", "remaining_time": "2:21:04"} +{"current_steps": 1499, "total_steps": 1955, "loss": 0.9372, "lr": 2.5096819914206592e-05, "epoch": 3.833759590792839, "percentage": 76.68, "elapsed_time": "7:42:43", "remaining_time": "2:20:45"} +{"current_steps": 1500, "total_steps": 1955, "loss": 0.9648, "lr": 2.4992986258596023e-05, "epoch": 3.836317135549872, "percentage": 76.73, "elapsed_time": "7:43:01", "remaining_time": "2:20:26"} +{"current_steps": 1501, "total_steps": 1955, "loss": 0.9458, "lr": 2.4889328066174932e-05, "epoch": 3.8388746803069056, "percentage": 76.78, "elapsed_time": "7:43:19", "remaining_time": "2:20:08"} +{"current_steps": 1502, "total_steps": 1955, "loss": 0.9532, "lr": 2.4785845667595565e-05, "epoch": 3.8414322250639388, "percentage": 76.83, "elapsed_time": "7:43:38", "remaining_time": "2:19:49"} +{"current_steps": 1503, "total_steps": 1955, "loss": 0.9194, "lr": 2.4682539392949494e-05, "epoch": 3.843989769820972, "percentage": 76.88, "elapsed_time": "7:43:56", "remaining_time": "2:19:31"} +{"current_steps": 1504, "total_steps": 1955, "loss": 0.9619, "lr": 2.4579409571766543e-05, "epoch": 3.846547314578005, "percentage": 76.93, "elapsed_time": "7:44:15", "remaining_time": "2:19:12"} +{"current_steps": 1505, "total_steps": 1955, "loss": 0.9637, "lr": 2.4476456533013597e-05, "epoch": 3.8491048593350383, "percentage": 76.98, "elapsed_time": "7:44:33", "remaining_time": "2:18:54"} +{"current_steps": 1506, "total_steps": 1955, "loss": 0.9406, "lr": 2.437368060509365e-05, "epoch": 3.8516624040920715, "percentage": 77.03, "elapsed_time": "7:44:52", "remaining_time": "2:18:35"} +{"current_steps": 1507, "total_steps": 1955, "loss": 0.9595, "lr": 2.427108211584476e-05, "epoch": 3.8542199488491047, "percentage": 77.08, "elapsed_time": "7:45:10", "remaining_time": "2:18:17"} +{"current_steps": 1508, "total_steps": 1955, "loss": 0.9421, "lr": 2.4168661392538982e-05, "epoch": 3.8567774936061383, "percentage": 77.14, "elapsed_time": "7:45:29", "remaining_time": "2:17:58"} +{"current_steps": 1509, "total_steps": 1955, "loss": 0.9687, "lr": 2.4066418761881308e-05, "epoch": 3.8593350383631715, "percentage": 77.19, "elapsed_time": "7:45:47", "remaining_time": "2:17:40"} +{"current_steps": 1510, "total_steps": 1955, "loss": 0.9468, "lr": 2.396435455000864e-05, "epoch": 3.8618925831202047, "percentage": 77.24, "elapsed_time": "7:46:06", "remaining_time": "2:17:21"} +{"current_steps": 1511, "total_steps": 1955, "loss": 0.9228, "lr": 2.386246908248883e-05, "epoch": 3.864450127877238, "percentage": 77.29, "elapsed_time": "7:46:25", "remaining_time": "2:17:03"} +{"current_steps": 1512, "total_steps": 1955, "loss": 0.9094, "lr": 2.3760762684319508e-05, "epoch": 3.867007672634271, "percentage": 77.34, "elapsed_time": "7:46:43", "remaining_time": "2:16:44"} +{"current_steps": 1513, "total_steps": 1955, "loss": 0.9351, "lr": 2.3659235679927016e-05, "epoch": 3.869565217391304, "percentage": 77.39, "elapsed_time": "7:47:01", "remaining_time": "2:16:26"} +{"current_steps": 1514, "total_steps": 1955, "loss": 0.9454, "lr": 2.3557888393165627e-05, "epoch": 3.8721227621483374, "percentage": 77.44, "elapsed_time": "7:47:20", "remaining_time": "2:16:07"} +{"current_steps": 1515, "total_steps": 1955, "loss": 0.9481, "lr": 2.345672114731624e-05, "epoch": 3.874680306905371, "percentage": 77.49, "elapsed_time": "7:47:39", "remaining_time": "2:15:49"} +{"current_steps": 1516, "total_steps": 1955, "loss": 0.9583, "lr": 2.335573426508547e-05, "epoch": 3.877237851662404, "percentage": 77.54, "elapsed_time": "7:47:57", "remaining_time": "2:15:30"} +{"current_steps": 1517, "total_steps": 1955, "loss": 0.9799, "lr": 2.325492806860462e-05, "epoch": 3.8797953964194374, "percentage": 77.6, "elapsed_time": "7:48:15", "remaining_time": "2:15:11"} +{"current_steps": 1518, "total_steps": 1955, "loss": 0.9533, "lr": 2.315430287942862e-05, "epoch": 3.8823529411764706, "percentage": 77.65, "elapsed_time": "7:48:34", "remaining_time": "2:14:53"} +{"current_steps": 1519, "total_steps": 1955, "loss": 0.9709, "lr": 2.3053859018535026e-05, "epoch": 3.8849104859335037, "percentage": 77.7, "elapsed_time": "7:48:52", "remaining_time": "2:14:34"} +{"current_steps": 1520, "total_steps": 1955, "loss": 0.9615, "lr": 2.295359680632295e-05, "epoch": 3.887468030690537, "percentage": 77.75, "elapsed_time": "7:49:11", "remaining_time": "2:14:16"} +{"current_steps": 1521, "total_steps": 1955, "loss": 0.9376, "lr": 2.2853516562612173e-05, "epoch": 3.89002557544757, "percentage": 77.8, "elapsed_time": "7:49:29", "remaining_time": "2:13:57"} +{"current_steps": 1522, "total_steps": 1955, "loss": 0.9092, "lr": 2.2753618606641928e-05, "epoch": 3.8925831202046037, "percentage": 77.85, "elapsed_time": "7:49:48", "remaining_time": "2:13:39"} +{"current_steps": 1523, "total_steps": 1955, "loss": 0.9443, "lr": 2.2653903257070012e-05, "epoch": 3.895140664961637, "percentage": 77.9, "elapsed_time": "7:50:06", "remaining_time": "2:13:20"} +{"current_steps": 1524, "total_steps": 1955, "loss": 0.9406, "lr": 2.2554370831971743e-05, "epoch": 3.89769820971867, "percentage": 77.95, "elapsed_time": "7:50:25", "remaining_time": "2:13:02"} +{"current_steps": 1525, "total_steps": 1955, "loss": 0.9614, "lr": 2.2455021648838935e-05, "epoch": 3.9002557544757033, "percentage": 78.01, "elapsed_time": "7:50:44", "remaining_time": "2:12:43"} +{"current_steps": 1526, "total_steps": 1955, "loss": 0.9487, "lr": 2.235585602457891e-05, "epoch": 3.9028132992327365, "percentage": 78.06, "elapsed_time": "7:51:02", "remaining_time": "2:12:25"} +{"current_steps": 1527, "total_steps": 1955, "loss": 0.9401, "lr": 2.225687427551341e-05, "epoch": 3.90537084398977, "percentage": 78.11, "elapsed_time": "7:51:21", "remaining_time": "2:12:06"} +{"current_steps": 1528, "total_steps": 1955, "loss": 0.9536, "lr": 2.2158076717377765e-05, "epoch": 3.907928388746803, "percentage": 78.16, "elapsed_time": "7:51:39", "remaining_time": "2:11:48"} +{"current_steps": 1529, "total_steps": 1955, "loss": 0.9198, "lr": 2.2059463665319623e-05, "epoch": 3.9104859335038364, "percentage": 78.21, "elapsed_time": "7:51:58", "remaining_time": "2:11:29"} +{"current_steps": 1530, "total_steps": 1955, "loss": 0.9481, "lr": 2.196103543389815e-05, "epoch": 3.9130434782608696, "percentage": 78.26, "elapsed_time": "7:52:16", "remaining_time": "2:11:11"} +{"current_steps": 1531, "total_steps": 1955, "loss": 0.9387, "lr": 2.1862792337083017e-05, "epoch": 3.915601023017903, "percentage": 78.31, "elapsed_time": "7:52:34", "remaining_time": "2:10:52"} +{"current_steps": 1532, "total_steps": 1955, "loss": 0.9536, "lr": 2.176473468825328e-05, "epoch": 3.918158567774936, "percentage": 78.36, "elapsed_time": "7:52:53", "remaining_time": "2:10:34"} +{"current_steps": 1533, "total_steps": 1955, "loss": 0.9491, "lr": 2.1666862800196454e-05, "epoch": 3.920716112531969, "percentage": 78.41, "elapsed_time": "7:53:11", "remaining_time": "2:10:15"} +{"current_steps": 1534, "total_steps": 1955, "loss": 0.9612, "lr": 2.1569176985107535e-05, "epoch": 3.923273657289003, "percentage": 78.47, "elapsed_time": "7:53:30", "remaining_time": "2:09:57"} +{"current_steps": 1535, "total_steps": 1955, "loss": 0.9511, "lr": 2.1471677554587958e-05, "epoch": 3.9258312020460355, "percentage": 78.52, "elapsed_time": "7:53:49", "remaining_time": "2:09:38"} +{"current_steps": 1536, "total_steps": 1955, "loss": 0.9373, "lr": 2.1374364819644623e-05, "epoch": 3.928388746803069, "percentage": 78.57, "elapsed_time": "7:54:07", "remaining_time": "2:09:20"} +{"current_steps": 1537, "total_steps": 1955, "loss": 0.9353, "lr": 2.1277239090688894e-05, "epoch": 3.9309462915601023, "percentage": 78.62, "elapsed_time": "7:54:25", "remaining_time": "2:09:01"} +{"current_steps": 1538, "total_steps": 1955, "loss": 0.9531, "lr": 2.1180300677535655e-05, "epoch": 3.9335038363171355, "percentage": 78.67, "elapsed_time": "7:54:44", "remaining_time": "2:08:43"} +{"current_steps": 1539, "total_steps": 1955, "loss": 0.936, "lr": 2.108354988940228e-05, "epoch": 3.9360613810741687, "percentage": 78.72, "elapsed_time": "7:55:03", "remaining_time": "2:08:24"} +{"current_steps": 1540, "total_steps": 1955, "loss": 0.9452, "lr": 2.0986987034907554e-05, "epoch": 3.938618925831202, "percentage": 78.77, "elapsed_time": "7:55:22", "remaining_time": "2:08:06"} +{"current_steps": 1541, "total_steps": 1955, "loss": 0.9369, "lr": 2.089061242207092e-05, "epoch": 3.9411764705882355, "percentage": 78.82, "elapsed_time": "7:55:40", "remaining_time": "2:07:47"} +{"current_steps": 1542, "total_steps": 1955, "loss": 0.9142, "lr": 2.0794426358311294e-05, "epoch": 3.9437340153452687, "percentage": 78.87, "elapsed_time": "7:55:59", "remaining_time": "2:07:29"} +{"current_steps": 1543, "total_steps": 1955, "loss": 0.9381, "lr": 2.069842915044614e-05, "epoch": 3.946291560102302, "percentage": 78.93, "elapsed_time": "7:56:17", "remaining_time": "2:07:10"} +{"current_steps": 1544, "total_steps": 1955, "loss": 0.921, "lr": 2.0602621104690517e-05, "epoch": 3.948849104859335, "percentage": 78.98, "elapsed_time": "7:56:36", "remaining_time": "2:06:52"} +{"current_steps": 1545, "total_steps": 1955, "loss": 0.9549, "lr": 2.050700252665615e-05, "epoch": 3.9514066496163682, "percentage": 79.03, "elapsed_time": "7:56:54", "remaining_time": "2:06:33"} +{"current_steps": 1546, "total_steps": 1955, "loss": 0.9287, "lr": 2.041157372135028e-05, "epoch": 3.9539641943734014, "percentage": 79.08, "elapsed_time": "7:57:13", "remaining_time": "2:06:14"} +{"current_steps": 1547, "total_steps": 1955, "loss": 0.9555, "lr": 2.0316334993174856e-05, "epoch": 3.9565217391304346, "percentage": 79.13, "elapsed_time": "7:57:32", "remaining_time": "2:05:56"} +{"current_steps": 1548, "total_steps": 1955, "loss": 0.9343, "lr": 2.0221286645925558e-05, "epoch": 3.959079283887468, "percentage": 79.18, "elapsed_time": "7:57:50", "remaining_time": "2:05:38"} +{"current_steps": 1549, "total_steps": 1955, "loss": 0.9961, "lr": 2.012642898279074e-05, "epoch": 3.9616368286445014, "percentage": 79.23, "elapsed_time": "7:58:09", "remaining_time": "2:05:19"} +{"current_steps": 1550, "total_steps": 1955, "loss": 0.9647, "lr": 2.003176230635049e-05, "epoch": 3.9641943734015346, "percentage": 79.28, "elapsed_time": "7:58:27", "remaining_time": "2:05:01"} +{"current_steps": 1551, "total_steps": 1955, "loss": 0.9541, "lr": 1.9937286918575713e-05, "epoch": 3.9667519181585678, "percentage": 79.34, "elapsed_time": "7:58:45", "remaining_time": "2:04:42"} +{"current_steps": 1552, "total_steps": 1955, "loss": 0.9549, "lr": 1.984300312082711e-05, "epoch": 3.969309462915601, "percentage": 79.39, "elapsed_time": "7:59:04", "remaining_time": "2:04:23"} +{"current_steps": 1553, "total_steps": 1955, "loss": 0.9538, "lr": 1.9748911213854267e-05, "epoch": 3.971867007672634, "percentage": 79.44, "elapsed_time": "7:59:22", "remaining_time": "2:04:05"} +{"current_steps": 1554, "total_steps": 1955, "loss": 0.9426, "lr": 1.9655011497794616e-05, "epoch": 3.9744245524296673, "percentage": 79.49, "elapsed_time": "7:59:40", "remaining_time": "2:03:46"} +{"current_steps": 1555, "total_steps": 1955, "loss": 0.9639, "lr": 1.9561304272172644e-05, "epoch": 3.976982097186701, "percentage": 79.54, "elapsed_time": "7:59:59", "remaining_time": "2:03:28"} +{"current_steps": 1556, "total_steps": 1955, "loss": 0.9733, "lr": 1.946778983589873e-05, "epoch": 3.979539641943734, "percentage": 79.59, "elapsed_time": "8:00:18", "remaining_time": "2:03:09"} +{"current_steps": 1557, "total_steps": 1955, "loss": 0.944, "lr": 1.9374468487268254e-05, "epoch": 3.9820971867007673, "percentage": 79.64, "elapsed_time": "8:00:36", "remaining_time": "2:02:51"} +{"current_steps": 1558, "total_steps": 1955, "loss": 0.9575, "lr": 1.9281340523960806e-05, "epoch": 3.9846547314578005, "percentage": 79.69, "elapsed_time": "8:00:54", "remaining_time": "2:02:32"} +{"current_steps": 1559, "total_steps": 1955, "loss": 0.939, "lr": 1.9188406243039015e-05, "epoch": 3.9872122762148337, "percentage": 79.74, "elapsed_time": "8:01:13", "remaining_time": "2:02:14"} +{"current_steps": 1560, "total_steps": 1955, "loss": 0.9523, "lr": 1.9095665940947717e-05, "epoch": 3.9897698209718673, "percentage": 79.8, "elapsed_time": "8:01:31", "remaining_time": "2:01:55"} +{"current_steps": 1561, "total_steps": 1955, "loss": 0.9518, "lr": 1.9003119913512992e-05, "epoch": 3.9923273657289, "percentage": 79.85, "elapsed_time": "8:01:50", "remaining_time": "2:01:36"} +{"current_steps": 1562, "total_steps": 1955, "loss": 0.966, "lr": 1.891076845594122e-05, "epoch": 3.9948849104859336, "percentage": 79.9, "elapsed_time": "8:02:08", "remaining_time": "2:01:18"} +{"current_steps": 1563, "total_steps": 1955, "loss": 0.9425, "lr": 1.881861186281813e-05, "epoch": 3.997442455242967, "percentage": 79.95, "elapsed_time": "8:02:27", "remaining_time": "2:00:59"} +{"current_steps": 1564, "total_steps": 1955, "loss": 0.9491, "lr": 1.872665042810784e-05, "epoch": 4.0, "percentage": 80.0, "elapsed_time": "8:02:45", "remaining_time": "2:00:41"} +{"current_steps": 1565, "total_steps": 1955, "loss": 0.9131, "lr": 1.863488444515203e-05, "epoch": 4.002557544757034, "percentage": 80.05, "elapsed_time": "8:03:14", "remaining_time": "2:00:25"} +{"current_steps": 1566, "total_steps": 1955, "loss": 0.9254, "lr": 1.854331420666882e-05, "epoch": 4.005115089514066, "percentage": 80.1, "elapsed_time": "8:03:32", "remaining_time": "2:00:06"} +{"current_steps": 1567, "total_steps": 1955, "loss": 0.9005, "lr": 1.845194000475199e-05, "epoch": 4.0076726342711, "percentage": 80.15, "elapsed_time": "8:03:51", "remaining_time": "1:59:48"} +{"current_steps": 1568, "total_steps": 1955, "loss": 0.9177, "lr": 1.836076213087e-05, "epoch": 4.010230179028133, "percentage": 80.2, "elapsed_time": "8:04:09", "remaining_time": "1:59:29"} +{"current_steps": 1569, "total_steps": 1955, "loss": 0.9288, "lr": 1.826978087586502e-05, "epoch": 4.012787723785166, "percentage": 80.26, "elapsed_time": "8:04:29", "remaining_time": "1:59:11"} +{"current_steps": 1570, "total_steps": 1955, "loss": 0.9302, "lr": 1.8178996529952088e-05, "epoch": 4.015345268542199, "percentage": 80.31, "elapsed_time": "8:04:48", "remaining_time": "1:58:53"} +{"current_steps": 1571, "total_steps": 1955, "loss": 0.9277, "lr": 1.808840938271807e-05, "epoch": 4.017902813299233, "percentage": 80.36, "elapsed_time": "8:05:06", "remaining_time": "1:58:34"} +{"current_steps": 1572, "total_steps": 1955, "loss": 0.9146, "lr": 1.799801972312092e-05, "epoch": 4.020460358056266, "percentage": 80.41, "elapsed_time": "8:05:24", "remaining_time": "1:58:15"} +{"current_steps": 1573, "total_steps": 1955, "loss": 0.9175, "lr": 1.7907827839488474e-05, "epoch": 4.023017902813299, "percentage": 80.46, "elapsed_time": "8:05:43", "remaining_time": "1:57:57"} +{"current_steps": 1574, "total_steps": 1955, "loss": 0.9128, "lr": 1.7817834019517805e-05, "epoch": 4.025575447570333, "percentage": 80.51, "elapsed_time": "8:06:01", "remaining_time": "1:57:38"} +{"current_steps": 1575, "total_steps": 1955, "loss": 0.9185, "lr": 1.7728038550274193e-05, "epoch": 4.028132992327365, "percentage": 80.56, "elapsed_time": "8:06:20", "remaining_time": "1:57:20"} +{"current_steps": 1576, "total_steps": 1955, "loss": 0.9296, "lr": 1.7638441718190192e-05, "epoch": 4.030690537084399, "percentage": 80.61, "elapsed_time": "8:06:39", "remaining_time": "1:57:01"} +{"current_steps": 1577, "total_steps": 1955, "loss": 0.9011, "lr": 1.7549043809064697e-05, "epoch": 4.033248081841432, "percentage": 80.66, "elapsed_time": "8:06:57", "remaining_time": "1:56:43"} +{"current_steps": 1578, "total_steps": 1955, "loss": 0.9301, "lr": 1.74598451080622e-05, "epoch": 4.035805626598465, "percentage": 80.72, "elapsed_time": "8:07:15", "remaining_time": "1:56:24"} +{"current_steps": 1579, "total_steps": 1955, "loss": 0.9294, "lr": 1.737084589971157e-05, "epoch": 4.038363171355499, "percentage": 80.77, "elapsed_time": "8:07:34", "remaining_time": "1:56:06"} +{"current_steps": 1580, "total_steps": 1955, "loss": 0.9464, "lr": 1.728204646790544e-05, "epoch": 4.040920716112532, "percentage": 80.82, "elapsed_time": "8:07:52", "remaining_time": "1:55:47"} +{"current_steps": 1581, "total_steps": 1955, "loss": 0.9224, "lr": 1.7193447095899206e-05, "epoch": 4.043478260869565, "percentage": 80.87, "elapsed_time": "8:08:10", "remaining_time": "1:55:29"} +{"current_steps": 1582, "total_steps": 1955, "loss": 0.9087, "lr": 1.710504806631005e-05, "epoch": 4.046035805626598, "percentage": 80.92, "elapsed_time": "8:08:28", "remaining_time": "1:55:10"} +{"current_steps": 1583, "total_steps": 1955, "loss": 0.9036, "lr": 1.701684966111615e-05, "epoch": 4.048593350383632, "percentage": 80.97, "elapsed_time": "8:08:47", "remaining_time": "1:54:51"} +{"current_steps": 1584, "total_steps": 1955, "loss": 0.92, "lr": 1.6928852161655616e-05, "epoch": 4.051150895140665, "percentage": 81.02, "elapsed_time": "8:09:05", "remaining_time": "1:54:33"} +{"current_steps": 1585, "total_steps": 1955, "loss": 0.9156, "lr": 1.684105584862584e-05, "epoch": 4.053708439897698, "percentage": 81.07, "elapsed_time": "8:09:24", "remaining_time": "1:54:14"} +{"current_steps": 1586, "total_steps": 1955, "loss": 0.9554, "lr": 1.6753461002082395e-05, "epoch": 4.056265984654732, "percentage": 81.13, "elapsed_time": "8:09:42", "remaining_time": "1:53:56"} +{"current_steps": 1587, "total_steps": 1955, "loss": 0.8844, "lr": 1.6666067901438178e-05, "epoch": 4.0588235294117645, "percentage": 81.18, "elapsed_time": "8:10:01", "remaining_time": "1:53:37"} +{"current_steps": 1588, "total_steps": 1955, "loss": 0.9091, "lr": 1.657887682546264e-05, "epoch": 4.061381074168798, "percentage": 81.23, "elapsed_time": "8:10:19", "remaining_time": "1:53:19"} +{"current_steps": 1589, "total_steps": 1955, "loss": 0.9462, "lr": 1.649188805228076e-05, "epoch": 4.063938618925831, "percentage": 81.28, "elapsed_time": "8:10:38", "remaining_time": "1:53:00"} +{"current_steps": 1590, "total_steps": 1955, "loss": 0.9153, "lr": 1.6405101859372123e-05, "epoch": 4.0664961636828645, "percentage": 81.33, "elapsed_time": "8:10:56", "remaining_time": "1:52:42"} +{"current_steps": 1591, "total_steps": 1955, "loss": 0.9519, "lr": 1.631851852357026e-05, "epoch": 4.069053708439898, "percentage": 81.38, "elapsed_time": "8:11:15", "remaining_time": "1:52:23"} +{"current_steps": 1592, "total_steps": 1955, "loss": 0.9412, "lr": 1.6232138321061544e-05, "epoch": 4.071611253196931, "percentage": 81.43, "elapsed_time": "8:11:34", "remaining_time": "1:52:05"} +{"current_steps": 1593, "total_steps": 1955, "loss": 0.9517, "lr": 1.6145961527384395e-05, "epoch": 4.0741687979539645, "percentage": 81.48, "elapsed_time": "8:11:52", "remaining_time": "1:51:46"} +{"current_steps": 1594, "total_steps": 1955, "loss": 0.9513, "lr": 1.6059988417428396e-05, "epoch": 4.076726342710997, "percentage": 81.53, "elapsed_time": "8:12:11", "remaining_time": "1:51:28"} +{"current_steps": 1595, "total_steps": 1955, "loss": 0.9154, "lr": 1.5974219265433406e-05, "epoch": 4.079283887468031, "percentage": 81.59, "elapsed_time": "8:12:29", "remaining_time": "1:51:09"} +{"current_steps": 1596, "total_steps": 1955, "loss": 0.9394, "lr": 1.58886543449887e-05, "epoch": 4.081841432225064, "percentage": 81.64, "elapsed_time": "8:12:48", "remaining_time": "1:50:51"} +{"current_steps": 1597, "total_steps": 1955, "loss": 0.9281, "lr": 1.5803293929032078e-05, "epoch": 4.084398976982097, "percentage": 81.69, "elapsed_time": "8:13:06", "remaining_time": "1:50:32"} +{"current_steps": 1598, "total_steps": 1955, "loss": 0.8957, "lr": 1.5718138289849055e-05, "epoch": 4.086956521739131, "percentage": 81.74, "elapsed_time": "8:13:24", "remaining_time": "1:50:13"} +{"current_steps": 1599, "total_steps": 1955, "loss": 0.9004, "lr": 1.563318769907187e-05, "epoch": 4.089514066496164, "percentage": 81.79, "elapsed_time": "8:13:43", "remaining_time": "1:49:55"} +{"current_steps": 1600, "total_steps": 1955, "loss": 0.9311, "lr": 1.554844242767872e-05, "epoch": 4.092071611253197, "percentage": 81.84, "elapsed_time": "8:14:01", "remaining_time": "1:49:36"} +{"current_steps": 1601, "total_steps": 1955, "loss": 0.9256, "lr": 1.546390274599289e-05, "epoch": 4.09462915601023, "percentage": 81.89, "elapsed_time": "8:14:20", "remaining_time": "1:49:18"} +{"current_steps": 1602, "total_steps": 1955, "loss": 0.9136, "lr": 1.5379568923681833e-05, "epoch": 4.0971867007672635, "percentage": 81.94, "elapsed_time": "8:14:38", "remaining_time": "1:48:59"} +{"current_steps": 1603, "total_steps": 1955, "loss": 0.9007, "lr": 1.5295441229756364e-05, "epoch": 4.099744245524296, "percentage": 81.99, "elapsed_time": "8:14:57", "remaining_time": "1:48:41"} +{"current_steps": 1604, "total_steps": 1955, "loss": 0.9406, "lr": 1.521151993256977e-05, "epoch": 4.10230179028133, "percentage": 82.05, "elapsed_time": "8:15:15", "remaining_time": "1:48:22"} +{"current_steps": 1605, "total_steps": 1955, "loss": 0.9264, "lr": 1.5127805299817025e-05, "epoch": 4.1048593350383635, "percentage": 82.1, "elapsed_time": "8:15:34", "remaining_time": "1:48:04"} +{"current_steps": 1606, "total_steps": 1955, "loss": 0.9285, "lr": 1.5044297598533777e-05, "epoch": 4.107416879795396, "percentage": 82.15, "elapsed_time": "8:15:52", "remaining_time": "1:47:45"} +{"current_steps": 1607, "total_steps": 1955, "loss": 0.9078, "lr": 1.496099709509565e-05, "epoch": 4.10997442455243, "percentage": 82.2, "elapsed_time": "8:16:11", "remaining_time": "1:47:26"} +{"current_steps": 1608, "total_steps": 1955, "loss": 0.9149, "lr": 1.4877904055217376e-05, "epoch": 4.112531969309463, "percentage": 82.25, "elapsed_time": "8:16:29", "remaining_time": "1:47:08"} +{"current_steps": 1609, "total_steps": 1955, "loss": 0.9304, "lr": 1.4795018743951857e-05, "epoch": 4.115089514066496, "percentage": 82.3, "elapsed_time": "8:16:48", "remaining_time": "1:46:50"} +{"current_steps": 1610, "total_steps": 1955, "loss": 0.926, "lr": 1.4712341425689406e-05, "epoch": 4.117647058823529, "percentage": 82.35, "elapsed_time": "8:17:07", "remaining_time": "1:46:31"} +{"current_steps": 1611, "total_steps": 1955, "loss": 0.9185, "lr": 1.4629872364156854e-05, "epoch": 4.120204603580563, "percentage": 82.4, "elapsed_time": "8:17:25", "remaining_time": "1:46:12"} +{"current_steps": 1612, "total_steps": 1955, "loss": 0.9126, "lr": 1.4547611822416748e-05, "epoch": 4.122762148337596, "percentage": 82.46, "elapsed_time": "8:17:43", "remaining_time": "1:45:54"} +{"current_steps": 1613, "total_steps": 1955, "loss": 0.9372, "lr": 1.446556006286648e-05, "epoch": 4.125319693094629, "percentage": 82.51, "elapsed_time": "8:18:02", "remaining_time": "1:45:35"} +{"current_steps": 1614, "total_steps": 1955, "loss": 0.9255, "lr": 1.4383717347237425e-05, "epoch": 4.127877237851663, "percentage": 82.56, "elapsed_time": "8:18:21", "remaining_time": "1:45:17"} +{"current_steps": 1615, "total_steps": 1955, "loss": 0.9267, "lr": 1.4302083936594247e-05, "epoch": 4.130434782608695, "percentage": 82.61, "elapsed_time": "8:18:39", "remaining_time": "1:44:58"} +{"current_steps": 1616, "total_steps": 1955, "loss": 0.9237, "lr": 1.4220660091333875e-05, "epoch": 4.132992327365729, "percentage": 82.66, "elapsed_time": "8:18:57", "remaining_time": "1:44:40"} +{"current_steps": 1617, "total_steps": 1955, "loss": 0.9197, "lr": 1.4139446071184737e-05, "epoch": 4.135549872122763, "percentage": 82.71, "elapsed_time": "8:19:15", "remaining_time": "1:44:21"} +{"current_steps": 1618, "total_steps": 1955, "loss": 0.9197, "lr": 1.405844213520604e-05, "epoch": 4.138107416879795, "percentage": 82.76, "elapsed_time": "8:19:34", "remaining_time": "1:44:03"} +{"current_steps": 1619, "total_steps": 1955, "loss": 0.896, "lr": 1.3977648541786804e-05, "epoch": 4.140664961636829, "percentage": 82.81, "elapsed_time": "8:19:52", "remaining_time": "1:43:44"} +{"current_steps": 1620, "total_steps": 1955, "loss": 0.9453, "lr": 1.3897065548645104e-05, "epoch": 4.143222506393862, "percentage": 82.86, "elapsed_time": "8:20:10", "remaining_time": "1:43:25"} +{"current_steps": 1621, "total_steps": 1955, "loss": 0.9317, "lr": 1.381669341282721e-05, "epoch": 4.145780051150895, "percentage": 82.92, "elapsed_time": "8:20:29", "remaining_time": "1:43:07"} +{"current_steps": 1622, "total_steps": 1955, "loss": 0.9279, "lr": 1.3736532390706878e-05, "epoch": 4.148337595907928, "percentage": 82.97, "elapsed_time": "8:20:47", "remaining_time": "1:42:48"} +{"current_steps": 1623, "total_steps": 1955, "loss": 0.9439, "lr": 1.3656582737984318e-05, "epoch": 4.150895140664962, "percentage": 83.02, "elapsed_time": "8:21:06", "remaining_time": "1:42:30"} +{"current_steps": 1624, "total_steps": 1955, "loss": 0.9088, "lr": 1.3576844709685583e-05, "epoch": 4.153452685421995, "percentage": 83.07, "elapsed_time": "8:21:24", "remaining_time": "1:42:11"} +{"current_steps": 1625, "total_steps": 1955, "loss": 0.9211, "lr": 1.3497318560161704e-05, "epoch": 4.156010230179028, "percentage": 83.12, "elapsed_time": "8:21:43", "remaining_time": "1:41:53"} +{"current_steps": 1626, "total_steps": 1955, "loss": 0.9312, "lr": 1.3418004543087792e-05, "epoch": 4.158567774936062, "percentage": 83.17, "elapsed_time": "8:22:01", "remaining_time": "1:41:34"} +{"current_steps": 1627, "total_steps": 1955, "loss": 0.9253, "lr": 1.3338902911462336e-05, "epoch": 4.161125319693094, "percentage": 83.22, "elapsed_time": "8:22:20", "remaining_time": "1:41:16"} +{"current_steps": 1628, "total_steps": 1955, "loss": 0.9383, "lr": 1.3260013917606319e-05, "epoch": 4.163682864450128, "percentage": 83.27, "elapsed_time": "8:22:38", "remaining_time": "1:40:57"} +{"current_steps": 1629, "total_steps": 1955, "loss": 0.9416, "lr": 1.318133781316247e-05, "epoch": 4.166240409207161, "percentage": 83.32, "elapsed_time": "8:22:56", "remaining_time": "1:40:38"} +{"current_steps": 1630, "total_steps": 1955, "loss": 0.9316, "lr": 1.3102874849094414e-05, "epoch": 4.168797953964194, "percentage": 83.38, "elapsed_time": "8:23:14", "remaining_time": "1:40:20"} +{"current_steps": 1631, "total_steps": 1955, "loss": 0.9465, "lr": 1.3024625275685891e-05, "epoch": 4.171355498721228, "percentage": 83.43, "elapsed_time": "8:23:32", "remaining_time": "1:40:01"} +{"current_steps": 1632, "total_steps": 1955, "loss": 0.9275, "lr": 1.2946589342540023e-05, "epoch": 4.173913043478261, "percentage": 83.48, "elapsed_time": "8:23:51", "remaining_time": "1:39:43"} +{"current_steps": 1633, "total_steps": 1955, "loss": 0.9225, "lr": 1.2868767298578395e-05, "epoch": 4.176470588235294, "percentage": 83.53, "elapsed_time": "8:24:09", "remaining_time": "1:39:24"} +{"current_steps": 1634, "total_steps": 1955, "loss": 0.9196, "lr": 1.2791159392040275e-05, "epoch": 4.179028132992327, "percentage": 83.58, "elapsed_time": "8:24:27", "remaining_time": "1:39:06"} +{"current_steps": 1635, "total_steps": 1955, "loss": 0.9353, "lr": 1.2713765870481995e-05, "epoch": 4.181585677749361, "percentage": 83.63, "elapsed_time": "8:24:46", "remaining_time": "1:38:47"} +{"current_steps": 1636, "total_steps": 1955, "loss": 0.9666, "lr": 1.2636586980775945e-05, "epoch": 4.1841432225063935, "percentage": 83.68, "elapsed_time": "8:25:05", "remaining_time": "1:38:29"} +{"current_steps": 1637, "total_steps": 1955, "loss": 0.9209, "lr": 1.2559622969109886e-05, "epoch": 4.186700767263427, "percentage": 83.73, "elapsed_time": "8:25:23", "remaining_time": "1:38:10"} +{"current_steps": 1638, "total_steps": 1955, "loss": 0.9377, "lr": 1.2482874080986176e-05, "epoch": 4.189258312020461, "percentage": 83.79, "elapsed_time": "8:25:42", "remaining_time": "1:37:52"} +{"current_steps": 1639, "total_steps": 1955, "loss": 0.9207, "lr": 1.2406340561220947e-05, "epoch": 4.1918158567774935, "percentage": 83.84, "elapsed_time": "8:26:00", "remaining_time": "1:37:33"} +{"current_steps": 1640, "total_steps": 1955, "loss": 0.914, "lr": 1.2330022653943358e-05, "epoch": 4.194373401534527, "percentage": 83.89, "elapsed_time": "8:26:19", "remaining_time": "1:37:15"} +{"current_steps": 1641, "total_steps": 1955, "loss": 0.8923, "lr": 1.2253920602594759e-05, "epoch": 4.19693094629156, "percentage": 83.94, "elapsed_time": "8:26:37", "remaining_time": "1:36:56"} +{"current_steps": 1642, "total_steps": 1955, "loss": 0.9396, "lr": 1.2178034649928034e-05, "epoch": 4.1994884910485935, "percentage": 83.99, "elapsed_time": "8:26:55", "remaining_time": "1:36:37"} +{"current_steps": 1643, "total_steps": 1955, "loss": 0.8981, "lr": 1.2102365038006672e-05, "epoch": 4.202046035805626, "percentage": 84.04, "elapsed_time": "8:27:14", "remaining_time": "1:36:19"} +{"current_steps": 1644, "total_steps": 1955, "loss": 0.9395, "lr": 1.2026912008204117e-05, "epoch": 4.20460358056266, "percentage": 84.09, "elapsed_time": "8:27:33", "remaining_time": "1:36:00"} +{"current_steps": 1645, "total_steps": 1955, "loss": 0.9257, "lr": 1.195167580120292e-05, "epoch": 4.207161125319693, "percentage": 84.14, "elapsed_time": "8:27:51", "remaining_time": "1:35:42"} +{"current_steps": 1646, "total_steps": 1955, "loss": 0.907, "lr": 1.1876656656994032e-05, "epoch": 4.209718670076726, "percentage": 84.19, "elapsed_time": "8:28:09", "remaining_time": "1:35:23"} +{"current_steps": 1647, "total_steps": 1955, "loss": 0.9082, "lr": 1.180185481487599e-05, "epoch": 4.21227621483376, "percentage": 84.25, "elapsed_time": "8:28:28", "remaining_time": "1:35:05"} +{"current_steps": 1648, "total_steps": 1955, "loss": 0.9207, "lr": 1.1727270513454161e-05, "epoch": 4.2148337595907925, "percentage": 84.3, "elapsed_time": "8:28:46", "remaining_time": "1:34:46"} +{"current_steps": 1649, "total_steps": 1955, "loss": 0.9041, "lr": 1.1652903990640075e-05, "epoch": 4.217391304347826, "percentage": 84.35, "elapsed_time": "8:29:05", "remaining_time": "1:34:28"} +{"current_steps": 1650, "total_steps": 1955, "loss": 0.9071, "lr": 1.1578755483650465e-05, "epoch": 4.21994884910486, "percentage": 84.4, "elapsed_time": "8:29:24", "remaining_time": "1:34:09"} +{"current_steps": 1651, "total_steps": 1955, "loss": 0.9267, "lr": 1.150482522900668e-05, "epoch": 4.2225063938618925, "percentage": 84.45, "elapsed_time": "8:29:42", "remaining_time": "1:33:51"} +{"current_steps": 1652, "total_steps": 1955, "loss": 0.9188, "lr": 1.1431113462533942e-05, "epoch": 4.225063938618926, "percentage": 84.5, "elapsed_time": "8:30:01", "remaining_time": "1:33:32"} +{"current_steps": 1653, "total_steps": 1955, "loss": 0.93, "lr": 1.1357620419360438e-05, "epoch": 4.227621483375959, "percentage": 84.55, "elapsed_time": "8:30:19", "remaining_time": "1:33:14"} +{"current_steps": 1654, "total_steps": 1955, "loss": 0.9248, "lr": 1.128434633391673e-05, "epoch": 4.2301790281329925, "percentage": 84.6, "elapsed_time": "8:30:38", "remaining_time": "1:32:55"} +{"current_steps": 1655, "total_steps": 1955, "loss": 0.9482, "lr": 1.121129143993489e-05, "epoch": 4.232736572890025, "percentage": 84.65, "elapsed_time": "8:30:56", "remaining_time": "1:32:37"} +{"current_steps": 1656, "total_steps": 1955, "loss": 0.9237, "lr": 1.1138455970447857e-05, "epoch": 4.235294117647059, "percentage": 84.71, "elapsed_time": "8:31:15", "remaining_time": "1:32:18"} +{"current_steps": 1657, "total_steps": 1955, "loss": 0.9117, "lr": 1.1065840157788599e-05, "epoch": 4.2378516624040925, "percentage": 84.76, "elapsed_time": "8:31:34", "remaining_time": "1:32:00"} +{"current_steps": 1658, "total_steps": 1955, "loss": 0.944, "lr": 1.099344423358943e-05, "epoch": 4.240409207161125, "percentage": 84.81, "elapsed_time": "8:31:52", "remaining_time": "1:31:41"} +{"current_steps": 1659, "total_steps": 1955, "loss": 0.928, "lr": 1.0921268428781277e-05, "epoch": 4.242966751918159, "percentage": 84.86, "elapsed_time": "8:32:11", "remaining_time": "1:31:23"} +{"current_steps": 1660, "total_steps": 1955, "loss": 0.9307, "lr": 1.084931297359293e-05, "epoch": 4.245524296675192, "percentage": 84.91, "elapsed_time": "8:32:29", "remaining_time": "1:31:04"} +{"current_steps": 1661, "total_steps": 1955, "loss": 0.938, "lr": 1.0777578097550206e-05, "epoch": 4.248081841432225, "percentage": 84.96, "elapsed_time": "8:32:48", "remaining_time": "1:30:46"} +{"current_steps": 1662, "total_steps": 1955, "loss": 0.9339, "lr": 1.0706064029475436e-05, "epoch": 4.250639386189258, "percentage": 85.01, "elapsed_time": "8:33:07", "remaining_time": "1:30:27"} +{"current_steps": 1663, "total_steps": 1955, "loss": 0.9153, "lr": 1.0634770997486546e-05, "epoch": 4.253196930946292, "percentage": 85.06, "elapsed_time": "8:33:25", "remaining_time": "1:30:08"} +{"current_steps": 1664, "total_steps": 1955, "loss": 0.9129, "lr": 1.0563699228996405e-05, "epoch": 4.255754475703325, "percentage": 85.12, "elapsed_time": "8:33:43", "remaining_time": "1:29:50"} +{"current_steps": 1665, "total_steps": 1955, "loss": 0.9183, "lr": 1.0492848950712067e-05, "epoch": 4.258312020460358, "percentage": 85.17, "elapsed_time": "8:34:02", "remaining_time": "1:29:31"} +{"current_steps": 1666, "total_steps": 1955, "loss": 0.9194, "lr": 1.0422220388634145e-05, "epoch": 4.260869565217392, "percentage": 85.22, "elapsed_time": "8:34:21", "remaining_time": "1:29:13"} +{"current_steps": 1667, "total_steps": 1955, "loss": 0.93, "lr": 1.03518137680559e-05, "epoch": 4.263427109974424, "percentage": 85.27, "elapsed_time": "8:34:39", "remaining_time": "1:28:54"} +{"current_steps": 1668, "total_steps": 1955, "loss": 0.8812, "lr": 1.0281629313562704e-05, "epoch": 4.265984654731458, "percentage": 85.32, "elapsed_time": "8:34:58", "remaining_time": "1:28:36"} +{"current_steps": 1669, "total_steps": 1955, "loss": 0.9211, "lr": 1.0211667249031278e-05, "epoch": 4.268542199488491, "percentage": 85.37, "elapsed_time": "8:35:16", "remaining_time": "1:28:17"} +{"current_steps": 1670, "total_steps": 1955, "loss": 0.9346, "lr": 1.0141927797628913e-05, "epoch": 4.271099744245524, "percentage": 85.42, "elapsed_time": "8:35:35", "remaining_time": "1:27:59"} +{"current_steps": 1671, "total_steps": 1955, "loss": 0.9103, "lr": 1.0072411181812805e-05, "epoch": 4.273657289002558, "percentage": 85.47, "elapsed_time": "8:35:53", "remaining_time": "1:27:40"} +{"current_steps": 1672, "total_steps": 1955, "loss": 0.9188, "lr": 1.0003117623329373e-05, "epoch": 4.276214833759591, "percentage": 85.52, "elapsed_time": "8:36:12", "remaining_time": "1:27:22"} +{"current_steps": 1673, "total_steps": 1955, "loss": 0.8779, "lr": 9.934047343213468e-06, "epoch": 4.278772378516624, "percentage": 85.58, "elapsed_time": "8:36:30", "remaining_time": "1:27:03"} +{"current_steps": 1674, "total_steps": 1955, "loss": 0.916, "lr": 9.865200561787779e-06, "epoch": 4.281329923273657, "percentage": 85.63, "elapsed_time": "8:36:49", "remaining_time": "1:26:45"} +{"current_steps": 1675, "total_steps": 1955, "loss": 0.9316, "lr": 9.796577498662017e-06, "epoch": 4.283887468030691, "percentage": 85.68, "elapsed_time": "8:37:07", "remaining_time": "1:26:26"} +{"current_steps": 1676, "total_steps": 1955, "loss": 0.9175, "lr": 9.728178372732323e-06, "epoch": 4.286445012787723, "percentage": 85.73, "elapsed_time": "8:37:26", "remaining_time": "1:26:08"} +{"current_steps": 1677, "total_steps": 1955, "loss": 0.9322, "lr": 9.660003402180495e-06, "epoch": 4.289002557544757, "percentage": 85.78, "elapsed_time": "8:37:44", "remaining_time": "1:25:49"} +{"current_steps": 1678, "total_steps": 1955, "loss": 0.9338, "lr": 9.592052804473248e-06, "epoch": 4.291560102301791, "percentage": 85.83, "elapsed_time": "8:38:03", "remaining_time": "1:25:31"} +{"current_steps": 1679, "total_steps": 1955, "loss": 0.9198, "lr": 9.524326796361704e-06, "epoch": 4.294117647058823, "percentage": 85.88, "elapsed_time": "8:38:22", "remaining_time": "1:25:12"} +{"current_steps": 1680, "total_steps": 1955, "loss": 0.9381, "lr": 9.456825593880502e-06, "epoch": 4.296675191815857, "percentage": 85.93, "elapsed_time": "8:38:39", "remaining_time": "1:24:54"} +{"current_steps": 1681, "total_steps": 1955, "loss": 0.9114, "lr": 9.389549412347204e-06, "epoch": 4.29923273657289, "percentage": 85.98, "elapsed_time": "8:38:58", "remaining_time": "1:24:35"} +{"current_steps": 1682, "total_steps": 1955, "loss": 0.9564, "lr": 9.322498466361574e-06, "epoch": 4.301790281329923, "percentage": 86.04, "elapsed_time": "8:39:17", "remaining_time": "1:24:17"} +{"current_steps": 1683, "total_steps": 1955, "loss": 0.9372, "lr": 9.25567296980499e-06, "epoch": 4.304347826086957, "percentage": 86.09, "elapsed_time": "8:39:35", "remaining_time": "1:23:58"} +{"current_steps": 1684, "total_steps": 1955, "loss": 0.9571, "lr": 9.18907313583958e-06, "epoch": 4.30690537084399, "percentage": 86.14, "elapsed_time": "8:39:53", "remaining_time": "1:23:39"} +{"current_steps": 1685, "total_steps": 1955, "loss": 0.91, "lr": 9.122699176907699e-06, "epoch": 4.309462915601023, "percentage": 86.19, "elapsed_time": "8:40:11", "remaining_time": "1:23:21"} +{"current_steps": 1686, "total_steps": 1955, "loss": 0.9403, "lr": 9.056551304731216e-06, "epoch": 4.312020460358056, "percentage": 86.24, "elapsed_time": "8:40:30", "remaining_time": "1:23:02"} +{"current_steps": 1687, "total_steps": 1955, "loss": 0.9045, "lr": 8.990629730310787e-06, "epoch": 4.31457800511509, "percentage": 86.29, "elapsed_time": "8:40:49", "remaining_time": "1:22:44"} +{"current_steps": 1688, "total_steps": 1955, "loss": 0.9005, "lr": 8.924934663925228e-06, "epoch": 4.3171355498721224, "percentage": 86.34, "elapsed_time": "8:41:08", "remaining_time": "1:22:25"} +{"current_steps": 1689, "total_steps": 1955, "loss": 0.905, "lr": 8.859466315130833e-06, "epoch": 4.319693094629156, "percentage": 86.39, "elapsed_time": "8:41:26", "remaining_time": "1:22:07"} +{"current_steps": 1690, "total_steps": 1955, "loss": 0.964, "lr": 8.794224892760694e-06, "epoch": 4.322250639386189, "percentage": 86.45, "elapsed_time": "8:41:44", "remaining_time": "1:21:48"} +{"current_steps": 1691, "total_steps": 1955, "loss": 0.9168, "lr": 8.729210604924075e-06, "epoch": 4.324808184143222, "percentage": 86.5, "elapsed_time": "8:42:03", "remaining_time": "1:21:30"} +{"current_steps": 1692, "total_steps": 1955, "loss": 0.9155, "lr": 8.66442365900566e-06, "epoch": 4.327365728900256, "percentage": 86.55, "elapsed_time": "8:42:22", "remaining_time": "1:21:11"} +{"current_steps": 1693, "total_steps": 1955, "loss": 0.929, "lr": 8.599864261665032e-06, "epoch": 4.329923273657289, "percentage": 86.6, "elapsed_time": "8:42:40", "remaining_time": "1:20:53"} +{"current_steps": 1694, "total_steps": 1955, "loss": 0.9196, "lr": 8.535532618835894e-06, "epoch": 4.332480818414322, "percentage": 86.65, "elapsed_time": "8:42:59", "remaining_time": "1:20:34"} +{"current_steps": 1695, "total_steps": 1955, "loss": 0.9097, "lr": 8.471428935725394e-06, "epoch": 4.335038363171355, "percentage": 86.7, "elapsed_time": "8:43:18", "remaining_time": "1:20:16"} +{"current_steps": 1696, "total_steps": 1955, "loss": 0.9486, "lr": 8.407553416813621e-06, "epoch": 4.337595907928389, "percentage": 86.75, "elapsed_time": "8:43:36", "remaining_time": "1:19:57"} +{"current_steps": 1697, "total_steps": 1955, "loss": 0.9194, "lr": 8.343906265852806e-06, "epoch": 4.340153452685422, "percentage": 86.8, "elapsed_time": "8:43:55", "remaining_time": "1:19:39"} +{"current_steps": 1698, "total_steps": 1955, "loss": 0.8964, "lr": 8.280487685866707e-06, "epoch": 4.342710997442455, "percentage": 86.85, "elapsed_time": "8:44:13", "remaining_time": "1:19:20"} +{"current_steps": 1699, "total_steps": 1955, "loss": 0.9305, "lr": 8.217297879150065e-06, "epoch": 4.345268542199489, "percentage": 86.91, "elapsed_time": "8:44:32", "remaining_time": "1:19:02"} +{"current_steps": 1700, "total_steps": 1955, "loss": 0.926, "lr": 8.154337047267763e-06, "epoch": 4.3478260869565215, "percentage": 86.96, "elapsed_time": "8:44:51", "remaining_time": "1:18:43"} +{"current_steps": 1701, "total_steps": 1955, "loss": 0.8922, "lr": 8.091605391054354e-06, "epoch": 4.350383631713555, "percentage": 87.01, "elapsed_time": "8:45:09", "remaining_time": "1:18:25"} +{"current_steps": 1702, "total_steps": 1955, "loss": 0.9401, "lr": 8.02910311061333e-06, "epoch": 4.352941176470588, "percentage": 87.06, "elapsed_time": "8:45:27", "remaining_time": "1:18:06"} +{"current_steps": 1703, "total_steps": 1955, "loss": 0.9547, "lr": 7.966830405316561e-06, "epoch": 4.3554987212276215, "percentage": 87.11, "elapsed_time": "8:45:46", "remaining_time": "1:17:48"} +{"current_steps": 1704, "total_steps": 1955, "loss": 0.9103, "lr": 7.90478747380357e-06, "epoch": 4.358056265984655, "percentage": 87.16, "elapsed_time": "8:46:05", "remaining_time": "1:17:29"} +{"current_steps": 1705, "total_steps": 1955, "loss": 0.9271, "lr": 7.842974513980946e-06, "epoch": 4.360613810741688, "percentage": 87.21, "elapsed_time": "8:46:23", "remaining_time": "1:17:11"} +{"current_steps": 1706, "total_steps": 1955, "loss": 0.9363, "lr": 7.781391723021711e-06, "epoch": 4.3631713554987215, "percentage": 87.26, "elapsed_time": "8:46:42", "remaining_time": "1:16:52"} +{"current_steps": 1707, "total_steps": 1955, "loss": 0.9274, "lr": 7.720039297364681e-06, "epoch": 4.365728900255754, "percentage": 87.31, "elapsed_time": "8:47:00", "remaining_time": "1:16:34"} +{"current_steps": 1708, "total_steps": 1955, "loss": 0.9172, "lr": 7.658917432713839e-06, "epoch": 4.368286445012788, "percentage": 87.37, "elapsed_time": "8:47:19", "remaining_time": "1:16:15"} +{"current_steps": 1709, "total_steps": 1955, "loss": 0.939, "lr": 7.598026324037762e-06, "epoch": 4.370843989769821, "percentage": 87.42, "elapsed_time": "8:47:37", "remaining_time": "1:15:56"} +{"current_steps": 1710, "total_steps": 1955, "loss": 0.9288, "lr": 7.537366165568909e-06, "epoch": 4.373401534526854, "percentage": 87.47, "elapsed_time": "8:47:55", "remaining_time": "1:15:38"} +{"current_steps": 1711, "total_steps": 1955, "loss": 0.9497, "lr": 7.476937150803025e-06, "epoch": 4.375959079283888, "percentage": 87.52, "elapsed_time": "8:48:13", "remaining_time": "1:15:19"} +{"current_steps": 1712, "total_steps": 1955, "loss": 0.9479, "lr": 7.416739472498613e-06, "epoch": 4.378516624040921, "percentage": 87.57, "elapsed_time": "8:48:32", "remaining_time": "1:15:01"} +{"current_steps": 1713, "total_steps": 1955, "loss": 0.9158, "lr": 7.356773322676205e-06, "epoch": 4.381074168797954, "percentage": 87.62, "elapsed_time": "8:48:50", "remaining_time": "1:14:42"} +{"current_steps": 1714, "total_steps": 1955, "loss": 0.937, "lr": 7.2970388926178045e-06, "epoch": 4.383631713554987, "percentage": 87.67, "elapsed_time": "8:49:09", "remaining_time": "1:14:24"} +{"current_steps": 1715, "total_steps": 1955, "loss": 0.9327, "lr": 7.237536372866247e-06, "epoch": 4.3861892583120206, "percentage": 87.72, "elapsed_time": "8:49:28", "remaining_time": "1:14:05"} +{"current_steps": 1716, "total_steps": 1955, "loss": 0.9227, "lr": 7.178265953224701e-06, "epoch": 4.388746803069053, "percentage": 87.77, "elapsed_time": "8:49:46", "remaining_time": "1:13:47"} +{"current_steps": 1717, "total_steps": 1955, "loss": 0.9571, "lr": 7.119227822755843e-06, "epoch": 4.391304347826087, "percentage": 87.83, "elapsed_time": "8:50:04", "remaining_time": "1:13:28"} +{"current_steps": 1718, "total_steps": 1955, "loss": 0.9041, "lr": 7.060422169781467e-06, "epoch": 4.3938618925831205, "percentage": 87.88, "elapsed_time": "8:50:22", "remaining_time": "1:13:10"} +{"current_steps": 1719, "total_steps": 1955, "loss": 0.9166, "lr": 7.001849181881808e-06, "epoch": 4.396419437340153, "percentage": 87.93, "elapsed_time": "8:50:41", "remaining_time": "1:12:51"} +{"current_steps": 1720, "total_steps": 1955, "loss": 0.9341, "lr": 6.943509045894905e-06, "epoch": 4.398976982097187, "percentage": 87.98, "elapsed_time": "8:51:00", "remaining_time": "1:12:32"} +{"current_steps": 1721, "total_steps": 1955, "loss": 0.9514, "lr": 6.885401947916048e-06, "epoch": 4.40153452685422, "percentage": 88.03, "elapsed_time": "8:51:18", "remaining_time": "1:12:14"} +{"current_steps": 1722, "total_steps": 1955, "loss": 0.9382, "lr": 6.827528073297185e-06, "epoch": 4.404092071611253, "percentage": 88.08, "elapsed_time": "8:51:36", "remaining_time": "1:11:55"} +{"current_steps": 1723, "total_steps": 1955, "loss": 0.9414, "lr": 6.769887606646306e-06, "epoch": 4.406649616368286, "percentage": 88.13, "elapsed_time": "8:51:54", "remaining_time": "1:11:37"} +{"current_steps": 1724, "total_steps": 1955, "loss": 0.912, "lr": 6.712480731826878e-06, "epoch": 4.40920716112532, "percentage": 88.18, "elapsed_time": "8:52:12", "remaining_time": "1:11:18"} +{"current_steps": 1725, "total_steps": 1955, "loss": 0.9268, "lr": 6.6553076319572394e-06, "epoch": 4.411764705882353, "percentage": 88.24, "elapsed_time": "8:52:31", "remaining_time": "1:11:00"} +{"current_steps": 1726, "total_steps": 1955, "loss": 0.9253, "lr": 6.59836848941005e-06, "epoch": 4.414322250639386, "percentage": 88.29, "elapsed_time": "8:52:49", "remaining_time": "1:10:41"} +{"current_steps": 1727, "total_steps": 1955, "loss": 0.915, "lr": 6.541663485811667e-06, "epoch": 4.41687979539642, "percentage": 88.34, "elapsed_time": "8:53:08", "remaining_time": "1:10:23"} +{"current_steps": 1728, "total_steps": 1955, "loss": 0.9156, "lr": 6.485192802041553e-06, "epoch": 4.419437340153452, "percentage": 88.39, "elapsed_time": "8:53:26", "remaining_time": "1:10:04"} +{"current_steps": 1729, "total_steps": 1955, "loss": 0.9197, "lr": 6.428956618231788e-06, "epoch": 4.421994884910486, "percentage": 88.44, "elapsed_time": "8:53:45", "remaining_time": "1:09:46"} +{"current_steps": 1730, "total_steps": 1955, "loss": 0.9545, "lr": 6.3729551137664055e-06, "epoch": 4.42455242966752, "percentage": 88.49, "elapsed_time": "8:54:04", "remaining_time": "1:09:27"} +{"current_steps": 1731, "total_steps": 1955, "loss": 0.9103, "lr": 6.3171884672808524e-06, "epoch": 4.427109974424552, "percentage": 88.54, "elapsed_time": "8:54:22", "remaining_time": "1:09:09"} +{"current_steps": 1732, "total_steps": 1955, "loss": 0.9016, "lr": 6.26165685666142e-06, "epoch": 4.429667519181586, "percentage": 88.59, "elapsed_time": "8:54:40", "remaining_time": "1:08:50"} +{"current_steps": 1733, "total_steps": 1955, "loss": 0.931, "lr": 6.206360459044671e-06, "epoch": 4.432225063938619, "percentage": 88.64, "elapsed_time": "8:54:59", "remaining_time": "1:08:31"} +{"current_steps": 1734, "total_steps": 1955, "loss": 0.9151, "lr": 6.15129945081689e-06, "epoch": 4.434782608695652, "percentage": 88.7, "elapsed_time": "8:55:17", "remaining_time": "1:08:13"} +{"current_steps": 1735, "total_steps": 1955, "loss": 0.9365, "lr": 6.096474007613476e-06, "epoch": 4.437340153452685, "percentage": 88.75, "elapsed_time": "8:55:35", "remaining_time": "1:07:54"} +{"current_steps": 1736, "total_steps": 1955, "loss": 0.9552, "lr": 6.0418843043184636e-06, "epoch": 4.439897698209719, "percentage": 88.8, "elapsed_time": "8:55:53", "remaining_time": "1:07:36"} +{"current_steps": 1737, "total_steps": 1955, "loss": 0.9194, "lr": 5.987530515063889e-06, "epoch": 4.442455242966752, "percentage": 88.85, "elapsed_time": "8:56:12", "remaining_time": "1:07:17"} +{"current_steps": 1738, "total_steps": 1955, "loss": 0.9189, "lr": 5.933412813229256e-06, "epoch": 4.445012787723785, "percentage": 88.9, "elapsed_time": "8:56:30", "remaining_time": "1:06:59"} +{"current_steps": 1739, "total_steps": 1955, "loss": 0.9388, "lr": 5.879531371440994e-06, "epoch": 4.447570332480819, "percentage": 88.95, "elapsed_time": "8:56:49", "remaining_time": "1:06:40"} +{"current_steps": 1740, "total_steps": 1955, "loss": 0.8945, "lr": 5.825886361571922e-06, "epoch": 4.450127877237851, "percentage": 89.0, "elapsed_time": "8:57:07", "remaining_time": "1:06:22"} +{"current_steps": 1741, "total_steps": 1955, "loss": 0.9126, "lr": 5.772477954740652e-06, "epoch": 4.452685421994885, "percentage": 89.05, "elapsed_time": "8:57:26", "remaining_time": "1:06:03"} +{"current_steps": 1742, "total_steps": 1955, "loss": 0.9565, "lr": 5.719306321311075e-06, "epoch": 4.455242966751918, "percentage": 89.1, "elapsed_time": "8:57:45", "remaining_time": "1:05:45"} +{"current_steps": 1743, "total_steps": 1955, "loss": 0.9127, "lr": 5.666371630891858e-06, "epoch": 4.457800511508951, "percentage": 89.16, "elapsed_time": "8:58:03", "remaining_time": "1:05:26"} +{"current_steps": 1744, "total_steps": 1955, "loss": 0.9184, "lr": 5.613674052335798e-06, "epoch": 4.460358056265985, "percentage": 89.21, "elapsed_time": "8:58:21", "remaining_time": "1:05:08"} +{"current_steps": 1745, "total_steps": 1955, "loss": 0.9281, "lr": 5.561213753739356e-06, "epoch": 4.462915601023018, "percentage": 89.26, "elapsed_time": "8:58:40", "remaining_time": "1:04:49"} +{"current_steps": 1746, "total_steps": 1955, "loss": 0.9327, "lr": 5.5089909024421685e-06, "epoch": 4.465473145780051, "percentage": 89.31, "elapsed_time": "8:58:58", "remaining_time": "1:04:31"} +{"current_steps": 1747, "total_steps": 1955, "loss": 0.9196, "lr": 5.4570056650263784e-06, "epoch": 4.468030690537084, "percentage": 89.36, "elapsed_time": "8:59:17", "remaining_time": "1:04:12"} +{"current_steps": 1748, "total_steps": 1955, "loss": 0.9248, "lr": 5.405258207316228e-06, "epoch": 4.470588235294118, "percentage": 89.41, "elapsed_time": "8:59:35", "remaining_time": "1:03:53"} +{"current_steps": 1749, "total_steps": 1955, "loss": 0.9278, "lr": 5.3537486943774674e-06, "epoch": 4.4731457800511505, "percentage": 89.46, "elapsed_time": "8:59:54", "remaining_time": "1:03:35"} +{"current_steps": 1750, "total_steps": 1955, "loss": 0.9508, "lr": 5.302477290516832e-06, "epoch": 4.475703324808184, "percentage": 89.51, "elapsed_time": "9:00:13", "remaining_time": "1:03:16"} +{"current_steps": 1751, "total_steps": 1955, "loss": 0.9177, "lr": 5.251444159281551e-06, "epoch": 4.478260869565218, "percentage": 89.57, "elapsed_time": "9:00:31", "remaining_time": "1:02:58"} +{"current_steps": 1752, "total_steps": 1955, "loss": 0.9315, "lr": 5.200649463458769e-06, "epoch": 4.4808184143222505, "percentage": 89.62, "elapsed_time": "9:00:50", "remaining_time": "1:02:39"} +{"current_steps": 1753, "total_steps": 1955, "loss": 0.9423, "lr": 5.150093365075117e-06, "epoch": 4.483375959079284, "percentage": 89.67, "elapsed_time": "9:01:09", "remaining_time": "1:02:21"} +{"current_steps": 1754, "total_steps": 1955, "loss": 0.9432, "lr": 5.0997760253961036e-06, "epoch": 4.485933503836317, "percentage": 89.72, "elapsed_time": "9:01:27", "remaining_time": "1:02:02"} +{"current_steps": 1755, "total_steps": 1955, "loss": 0.9201, "lr": 5.049697604925605e-06, "epoch": 4.4884910485933505, "percentage": 89.77, "elapsed_time": "9:01:45", "remaining_time": "1:01:44"} +{"current_steps": 1756, "total_steps": 1955, "loss": 0.9335, "lr": 4.999858263405468e-06, "epoch": 4.491048593350383, "percentage": 89.82, "elapsed_time": "9:02:04", "remaining_time": "1:01:25"} +{"current_steps": 1757, "total_steps": 1955, "loss": 0.9326, "lr": 4.9502581598148425e-06, "epoch": 4.493606138107417, "percentage": 89.87, "elapsed_time": "9:02:22", "remaining_time": "1:01:07"} +{"current_steps": 1758, "total_steps": 1955, "loss": 0.9085, "lr": 4.900897452369782e-06, "epoch": 4.4961636828644505, "percentage": 89.92, "elapsed_time": "9:02:41", "remaining_time": "1:00:48"} +{"current_steps": 1759, "total_steps": 1955, "loss": 0.8962, "lr": 4.851776298522692e-06, "epoch": 4.498721227621483, "percentage": 89.97, "elapsed_time": "9:03:00", "remaining_time": "1:00:30"} +{"current_steps": 1760, "total_steps": 1955, "loss": 0.945, "lr": 4.802894854961882e-06, "epoch": 4.501278772378517, "percentage": 90.03, "elapsed_time": "9:03:18", "remaining_time": "1:00:11"} +{"current_steps": 1761, "total_steps": 1955, "loss": 0.9362, "lr": 4.754253277610969e-06, "epoch": 4.5038363171355495, "percentage": 90.08, "elapsed_time": "9:03:36", "remaining_time": "0:59:53"} +{"current_steps": 1762, "total_steps": 1955, "loss": 0.9489, "lr": 4.705851721628465e-06, "epoch": 4.506393861892583, "percentage": 90.13, "elapsed_time": "9:03:55", "remaining_time": "0:59:34"} +{"current_steps": 1763, "total_steps": 1955, "loss": 0.9345, "lr": 4.6576903414072576e-06, "epoch": 4.508951406649617, "percentage": 90.18, "elapsed_time": "9:04:13", "remaining_time": "0:59:16"} +{"current_steps": 1764, "total_steps": 1955, "loss": 0.912, "lr": 4.6097692905741194e-06, "epoch": 4.5115089514066495, "percentage": 90.23, "elapsed_time": "9:04:32", "remaining_time": "0:58:57"} +{"current_steps": 1765, "total_steps": 1955, "loss": 0.9263, "lr": 4.562088721989178e-06, "epoch": 4.514066496163683, "percentage": 90.28, "elapsed_time": "9:04:51", "remaining_time": "0:58:39"} +{"current_steps": 1766, "total_steps": 1955, "loss": 0.9132, "lr": 4.514648787745506e-06, "epoch": 4.516624040920716, "percentage": 90.33, "elapsed_time": "9:05:09", "remaining_time": "0:58:20"} +{"current_steps": 1767, "total_steps": 1955, "loss": 0.9435, "lr": 4.467449639168564e-06, "epoch": 4.5191815856777495, "percentage": 90.38, "elapsed_time": "9:05:27", "remaining_time": "0:58:02"} +{"current_steps": 1768, "total_steps": 1955, "loss": 0.9405, "lr": 4.420491426815758e-06, "epoch": 4.521739130434782, "percentage": 90.43, "elapsed_time": "9:05:46", "remaining_time": "0:57:43"} +{"current_steps": 1769, "total_steps": 1955, "loss": 0.9013, "lr": 4.373774300475928e-06, "epoch": 4.524296675191816, "percentage": 90.49, "elapsed_time": "9:06:04", "remaining_time": "0:57:25"} +{"current_steps": 1770, "total_steps": 1955, "loss": 0.9234, "lr": 4.327298409168928e-06, "epoch": 4.526854219948849, "percentage": 90.54, "elapsed_time": "9:06:23", "remaining_time": "0:57:06"} +{"current_steps": 1771, "total_steps": 1955, "loss": 0.9191, "lr": 4.281063901145102e-06, "epoch": 4.529411764705882, "percentage": 90.59, "elapsed_time": "9:06:41", "remaining_time": "0:56:47"} +{"current_steps": 1772, "total_steps": 1955, "loss": 0.9218, "lr": 4.235070923884772e-06, "epoch": 4.531969309462916, "percentage": 90.64, "elapsed_time": "9:07:00", "remaining_time": "0:56:29"} +{"current_steps": 1773, "total_steps": 1955, "loss": 0.9109, "lr": 4.18931962409789e-06, "epoch": 4.534526854219949, "percentage": 90.69, "elapsed_time": "9:07:19", "remaining_time": "0:56:10"} +{"current_steps": 1774, "total_steps": 1955, "loss": 0.9152, "lr": 4.143810147723448e-06, "epoch": 4.537084398976982, "percentage": 90.74, "elapsed_time": "9:07:37", "remaining_time": "0:55:52"} +{"current_steps": 1775, "total_steps": 1955, "loss": 0.9046, "lr": 4.098542639929086e-06, "epoch": 4.539641943734015, "percentage": 90.79, "elapsed_time": "9:07:55", "remaining_time": "0:55:33"} +{"current_steps": 1776, "total_steps": 1955, "loss": 0.9128, "lr": 4.0535172451105785e-06, "epoch": 4.542199488491049, "percentage": 90.84, "elapsed_time": "9:08:14", "remaining_time": "0:55:15"} +{"current_steps": 1777, "total_steps": 1955, "loss": 0.929, "lr": 4.008734106891439e-06, "epoch": 4.544757033248082, "percentage": 90.9, "elapsed_time": "9:08:32", "remaining_time": "0:54:56"} +{"current_steps": 1778, "total_steps": 1955, "loss": 0.9397, "lr": 3.964193368122384e-06, "epoch": 4.547314578005115, "percentage": 90.95, "elapsed_time": "9:08:50", "remaining_time": "0:54:38"} +{"current_steps": 1779, "total_steps": 1955, "loss": 0.9252, "lr": 3.919895170880938e-06, "epoch": 4.549872122762149, "percentage": 91.0, "elapsed_time": "9:09:09", "remaining_time": "0:54:19"} +{"current_steps": 1780, "total_steps": 1955, "loss": 0.9182, "lr": 3.875839656470959e-06, "epoch": 4.552429667519181, "percentage": 91.05, "elapsed_time": "9:09:27", "remaining_time": "0:54:01"} +{"current_steps": 1781, "total_steps": 1955, "loss": 0.949, "lr": 3.832026965422184e-06, "epoch": 4.554987212276215, "percentage": 91.1, "elapsed_time": "9:09:46", "remaining_time": "0:53:42"} +{"current_steps": 1782, "total_steps": 1955, "loss": 0.9238, "lr": 3.788457237489773e-06, "epoch": 4.557544757033249, "percentage": 91.15, "elapsed_time": "9:10:04", "remaining_time": "0:53:24"} +{"current_steps": 1783, "total_steps": 1955, "loss": 0.9711, "lr": 3.7451306116538867e-06, "epoch": 4.560102301790281, "percentage": 91.2, "elapsed_time": "9:10:23", "remaining_time": "0:53:05"} +{"current_steps": 1784, "total_steps": 1955, "loss": 0.9005, "lr": 3.7020472261192253e-06, "epoch": 4.562659846547315, "percentage": 91.25, "elapsed_time": "9:10:42", "remaining_time": "0:52:47"} +{"current_steps": 1785, "total_steps": 1955, "loss": 0.9014, "lr": 3.6592072183146043e-06, "epoch": 4.565217391304348, "percentage": 91.3, "elapsed_time": "9:11:00", "remaining_time": "0:52:28"} +{"current_steps": 1786, "total_steps": 1955, "loss": 0.9105, "lr": 3.616610724892473e-06, "epoch": 4.567774936061381, "percentage": 91.36, "elapsed_time": "9:11:19", "remaining_time": "0:52:10"} +{"current_steps": 1787, "total_steps": 1955, "loss": 0.9193, "lr": 3.5742578817285777e-06, "epoch": 4.570332480818414, "percentage": 91.41, "elapsed_time": "9:11:38", "remaining_time": "0:51:51"} +{"current_steps": 1788, "total_steps": 1955, "loss": 0.91, "lr": 3.532148823921375e-06, "epoch": 4.572890025575448, "percentage": 91.46, "elapsed_time": "9:11:56", "remaining_time": "0:51:33"} +{"current_steps": 1789, "total_steps": 1955, "loss": 0.9594, "lr": 3.490283685791722e-06, "epoch": 4.57544757033248, "percentage": 91.51, "elapsed_time": "9:12:15", "remaining_time": "0:51:14"} +{"current_steps": 1790, "total_steps": 1955, "loss": 0.9327, "lr": 3.4486626008824575e-06, "epoch": 4.578005115089514, "percentage": 91.56, "elapsed_time": "9:12:34", "remaining_time": "0:50:56"} +{"current_steps": 1791, "total_steps": 1955, "loss": 0.9219, "lr": 3.4072857019578787e-06, "epoch": 4.580562659846548, "percentage": 91.61, "elapsed_time": "9:12:52", "remaining_time": "0:50:37"} +{"current_steps": 1792, "total_steps": 1955, "loss": 0.9256, "lr": 3.3661531210033684e-06, "epoch": 4.58312020460358, "percentage": 91.66, "elapsed_time": "9:13:10", "remaining_time": "0:50:19"} +{"current_steps": 1793, "total_steps": 1955, "loss": 0.9188, "lr": 3.3252649892250123e-06, "epoch": 4.585677749360614, "percentage": 91.71, "elapsed_time": "9:13:28", "remaining_time": "0:50:00"} +{"current_steps": 1794, "total_steps": 1955, "loss": 0.9286, "lr": 3.2846214370491114e-06, "epoch": 4.588235294117647, "percentage": 91.76, "elapsed_time": "9:13:47", "remaining_time": "0:49:41"} +{"current_steps": 1795, "total_steps": 1955, "loss": 0.91, "lr": 3.2442225941218175e-06, "epoch": 4.59079283887468, "percentage": 91.82, "elapsed_time": "9:14:05", "remaining_time": "0:49:23"} +{"current_steps": 1796, "total_steps": 1955, "loss": 0.9187, "lr": 3.20406858930868e-06, "epoch": 4.593350383631714, "percentage": 91.87, "elapsed_time": "9:14:24", "remaining_time": "0:49:04"} +{"current_steps": 1797, "total_steps": 1955, "loss": 0.9268, "lr": 3.164159550694299e-06, "epoch": 4.595907928388747, "percentage": 91.92, "elapsed_time": "9:14:42", "remaining_time": "0:48:46"} +{"current_steps": 1798, "total_steps": 1955, "loss": 0.9045, "lr": 3.12449560558183e-06, "epoch": 4.59846547314578, "percentage": 91.97, "elapsed_time": "9:15:00", "remaining_time": "0:48:27"} +{"current_steps": 1799, "total_steps": 1955, "loss": 0.9131, "lr": 3.085076880492608e-06, "epoch": 4.601023017902813, "percentage": 92.02, "elapsed_time": "9:15:19", "remaining_time": "0:48:09"} +{"current_steps": 1800, "total_steps": 1955, "loss": 0.9456, "lr": 3.045903501165821e-06, "epoch": 4.603580562659847, "percentage": 92.07, "elapsed_time": "9:15:37", "remaining_time": "0:47:50"} +{"current_steps": 1801, "total_steps": 1955, "loss": 0.9068, "lr": 3.0069755925579945e-06, "epoch": 4.6061381074168795, "percentage": 92.12, "elapsed_time": "9:15:56", "remaining_time": "0:47:32"} +{"current_steps": 1802, "total_steps": 1955, "loss": 0.8961, "lr": 2.9682932788426622e-06, "epoch": 4.608695652173913, "percentage": 92.17, "elapsed_time": "9:16:14", "remaining_time": "0:47:13"} +{"current_steps": 1803, "total_steps": 1955, "loss": 0.9196, "lr": 2.9298566834099307e-06, "epoch": 4.611253196930946, "percentage": 92.23, "elapsed_time": "9:16:33", "remaining_time": "0:46:55"} +{"current_steps": 1804, "total_steps": 1955, "loss": 0.8891, "lr": 2.891665928866152e-06, "epoch": 4.6138107416879794, "percentage": 92.28, "elapsed_time": "9:16:52", "remaining_time": "0:46:36"} +{"current_steps": 1805, "total_steps": 1955, "loss": 0.9309, "lr": 2.853721137033425e-06, "epoch": 4.616368286445013, "percentage": 92.33, "elapsed_time": "9:17:11", "remaining_time": "0:46:18"} +{"current_steps": 1806, "total_steps": 1955, "loss": 0.8956, "lr": 2.816022428949303e-06, "epoch": 4.618925831202046, "percentage": 92.38, "elapsed_time": "9:17:29", "remaining_time": "0:45:59"} +{"current_steps": 1807, "total_steps": 1955, "loss": 0.9245, "lr": 2.7785699248663946e-06, "epoch": 4.621483375959079, "percentage": 92.43, "elapsed_time": "9:17:47", "remaining_time": "0:45:41"} +{"current_steps": 1808, "total_steps": 1955, "loss": 0.9641, "lr": 2.741363744251917e-06, "epoch": 4.624040920716112, "percentage": 92.48, "elapsed_time": "9:18:06", "remaining_time": "0:45:22"} +{"current_steps": 1809, "total_steps": 1955, "loss": 0.936, "lr": 2.70440400578738e-06, "epoch": 4.626598465473146, "percentage": 92.53, "elapsed_time": "9:18:25", "remaining_time": "0:45:04"} +{"current_steps": 1810, "total_steps": 1955, "loss": 0.9236, "lr": 2.6676908273681745e-06, "epoch": 4.629156010230179, "percentage": 92.58, "elapsed_time": "9:18:43", "remaining_time": "0:44:45"} +{"current_steps": 1811, "total_steps": 1955, "loss": 0.9235, "lr": 2.63122432610321e-06, "epoch": 4.631713554987212, "percentage": 92.63, "elapsed_time": "9:19:01", "remaining_time": "0:44:27"} +{"current_steps": 1812, "total_steps": 1955, "loss": 0.9477, "lr": 2.5950046183145315e-06, "epoch": 4.634271099744246, "percentage": 92.69, "elapsed_time": "9:19:20", "remaining_time": "0:44:08"} +{"current_steps": 1813, "total_steps": 1955, "loss": 0.8923, "lr": 2.559031819536966e-06, "epoch": 4.6368286445012785, "percentage": 92.74, "elapsed_time": "9:19:38", "remaining_time": "0:43:49"} +{"current_steps": 1814, "total_steps": 1955, "loss": 0.9575, "lr": 2.523306044517737e-06, "epoch": 4.639386189258312, "percentage": 92.79, "elapsed_time": "9:19:57", "remaining_time": "0:43:31"} +{"current_steps": 1815, "total_steps": 1955, "loss": 0.9478, "lr": 2.4878274072161147e-06, "epoch": 4.641943734015345, "percentage": 92.84, "elapsed_time": "9:20:15", "remaining_time": "0:43:12"} +{"current_steps": 1816, "total_steps": 1955, "loss": 0.9468, "lr": 2.4525960208029843e-06, "epoch": 4.6445012787723785, "percentage": 92.89, "elapsed_time": "9:20:34", "remaining_time": "0:42:54"} +{"current_steps": 1817, "total_steps": 1955, "loss": 0.9441, "lr": 2.417611997660636e-06, "epoch": 4.647058823529412, "percentage": 92.94, "elapsed_time": "9:20:52", "remaining_time": "0:42:35"} +{"current_steps": 1818, "total_steps": 1955, "loss": 0.9342, "lr": 2.3828754493822315e-06, "epoch": 4.649616368286445, "percentage": 92.99, "elapsed_time": "9:21:10", "remaining_time": "0:42:17"} +{"current_steps": 1819, "total_steps": 1955, "loss": 0.9121, "lr": 2.348386486771572e-06, "epoch": 4.6521739130434785, "percentage": 93.04, "elapsed_time": "9:21:29", "remaining_time": "0:41:58"} +{"current_steps": 1820, "total_steps": 1955, "loss": 0.8991, "lr": 2.314145219842683e-06, "epoch": 4.654731457800511, "percentage": 93.09, "elapsed_time": "9:21:47", "remaining_time": "0:41:40"} +{"current_steps": 1821, "total_steps": 1955, "loss": 0.9023, "lr": 2.2801517578194997e-06, "epoch": 4.657289002557545, "percentage": 93.15, "elapsed_time": "9:22:06", "remaining_time": "0:41:21"} +{"current_steps": 1822, "total_steps": 1955, "loss": 0.9526, "lr": 2.246406209135481e-06, "epoch": 4.659846547314578, "percentage": 93.2, "elapsed_time": "9:22:24", "remaining_time": "0:41:03"} +{"current_steps": 1823, "total_steps": 1955, "loss": 0.9032, "lr": 2.212908681433286e-06, "epoch": 4.662404092071611, "percentage": 93.25, "elapsed_time": "9:22:42", "remaining_time": "0:40:44"} +{"current_steps": 1824, "total_steps": 1955, "loss": 0.9164, "lr": 2.179659281564446e-06, "epoch": 4.664961636828645, "percentage": 93.3, "elapsed_time": "9:23:01", "remaining_time": "0:40:26"} +{"current_steps": 1825, "total_steps": 1955, "loss": 0.9191, "lr": 2.146658115589002e-06, "epoch": 4.667519181585678, "percentage": 93.35, "elapsed_time": "9:23:19", "remaining_time": "0:40:07"} +{"current_steps": 1826, "total_steps": 1955, "loss": 0.9155, "lr": 2.113905288775149e-06, "epoch": 4.670076726342711, "percentage": 93.4, "elapsed_time": "9:23:38", "remaining_time": "0:39:49"} +{"current_steps": 1827, "total_steps": 1955, "loss": 0.9165, "lr": 2.0814009055989403e-06, "epoch": 4.672634271099744, "percentage": 93.45, "elapsed_time": "9:23:57", "remaining_time": "0:39:30"} +{"current_steps": 1828, "total_steps": 1955, "loss": 0.9101, "lr": 2.0491450697439362e-06, "epoch": 4.675191815856778, "percentage": 93.5, "elapsed_time": "9:24:15", "remaining_time": "0:39:12"} +{"current_steps": 1829, "total_steps": 1955, "loss": 0.914, "lr": 2.017137884100855e-06, "epoch": 4.677749360613811, "percentage": 93.55, "elapsed_time": "9:24:34", "remaining_time": "0:38:53"} +{"current_steps": 1830, "total_steps": 1955, "loss": 0.9376, "lr": 1.9853794507672885e-06, "epoch": 4.680306905370844, "percentage": 93.61, "elapsed_time": "9:24:52", "remaining_time": "0:38:35"} +{"current_steps": 1831, "total_steps": 1955, "loss": 0.9236, "lr": 1.9538698710473404e-06, "epoch": 4.6828644501278776, "percentage": 93.66, "elapsed_time": "9:25:11", "remaining_time": "0:38:16"} +{"current_steps": 1832, "total_steps": 1955, "loss": 0.9449, "lr": 1.9226092454512945e-06, "epoch": 4.68542199488491, "percentage": 93.71, "elapsed_time": "9:25:30", "remaining_time": "0:37:58"} +{"current_steps": 1833, "total_steps": 1955, "loss": 0.9138, "lr": 1.8915976736953157e-06, "epoch": 4.687979539641944, "percentage": 93.76, "elapsed_time": "9:25:49", "remaining_time": "0:37:39"} +{"current_steps": 1834, "total_steps": 1955, "loss": 0.9687, "lr": 1.8608352547011722e-06, "epoch": 4.690537084398977, "percentage": 93.81, "elapsed_time": "9:26:07", "remaining_time": "0:37:21"} +{"current_steps": 1835, "total_steps": 1955, "loss": 0.9331, "lr": 1.8303220865958194e-06, "epoch": 4.69309462915601, "percentage": 93.86, "elapsed_time": "9:26:26", "remaining_time": "0:37:02"} +{"current_steps": 1836, "total_steps": 1955, "loss": 0.945, "lr": 1.8000582667111777e-06, "epoch": 4.695652173913043, "percentage": 93.91, "elapsed_time": "9:26:45", "remaining_time": "0:36:44"} +{"current_steps": 1837, "total_steps": 1955, "loss": 0.9284, "lr": 1.7700438915837858e-06, "epoch": 4.698209718670077, "percentage": 93.96, "elapsed_time": "9:27:03", "remaining_time": "0:36:25"} +{"current_steps": 1838, "total_steps": 1955, "loss": 0.9, "lr": 1.7402790569544813e-06, "epoch": 4.70076726342711, "percentage": 94.02, "elapsed_time": "9:27:21", "remaining_time": "0:36:06"} +{"current_steps": 1839, "total_steps": 1955, "loss": 0.8962, "lr": 1.7107638577681073e-06, "epoch": 4.703324808184143, "percentage": 94.07, "elapsed_time": "9:27:39", "remaining_time": "0:35:48"} +{"current_steps": 1840, "total_steps": 1955, "loss": 0.9516, "lr": 1.681498388173246e-06, "epoch": 4.705882352941177, "percentage": 94.12, "elapsed_time": "9:27:58", "remaining_time": "0:35:29"} +{"current_steps": 1841, "total_steps": 1955, "loss": 0.9131, "lr": 1.652482741521837e-06, "epoch": 4.708439897698209, "percentage": 94.17, "elapsed_time": "9:28:16", "remaining_time": "0:35:11"} +{"current_steps": 1842, "total_steps": 1955, "loss": 0.9119, "lr": 1.6237170103689547e-06, "epoch": 4.710997442455243, "percentage": 94.22, "elapsed_time": "9:28:35", "remaining_time": "0:34:52"} +{"current_steps": 1843, "total_steps": 1955, "loss": 0.9141, "lr": 1.5952012864724898e-06, "epoch": 4.713554987212277, "percentage": 94.27, "elapsed_time": "9:28:54", "remaining_time": "0:34:34"} +{"current_steps": 1844, "total_steps": 1955, "loss": 0.9331, "lr": 1.5669356607928188e-06, "epoch": 4.716112531969309, "percentage": 94.32, "elapsed_time": "9:29:12", "remaining_time": "0:34:15"} +{"current_steps": 1845, "total_steps": 1955, "loss": 0.929, "lr": 1.5389202234925837e-06, "epoch": 4.718670076726343, "percentage": 94.37, "elapsed_time": "9:29:30", "remaining_time": "0:33:57"} +{"current_steps": 1846, "total_steps": 1955, "loss": 0.9195, "lr": 1.5111550639363447e-06, "epoch": 4.721227621483376, "percentage": 94.42, "elapsed_time": "9:29:48", "remaining_time": "0:33:38"} +{"current_steps": 1847, "total_steps": 1955, "loss": 0.9236, "lr": 1.483640270690332e-06, "epoch": 4.723785166240409, "percentage": 94.48, "elapsed_time": "9:30:06", "remaining_time": "0:33:20"} +{"current_steps": 1848, "total_steps": 1955, "loss": 0.9515, "lr": 1.4563759315221515e-06, "epoch": 4.726342710997442, "percentage": 94.53, "elapsed_time": "9:30:24", "remaining_time": "0:33:01"} +{"current_steps": 1849, "total_steps": 1955, "loss": 0.9522, "lr": 1.4293621334004581e-06, "epoch": 4.728900255754476, "percentage": 94.58, "elapsed_time": "9:30:43", "remaining_time": "0:32:43"} +{"current_steps": 1850, "total_steps": 1955, "loss": 0.9207, "lr": 1.4025989624947856e-06, "epoch": 4.731457800511509, "percentage": 94.63, "elapsed_time": "9:31:01", "remaining_time": "0:32:24"} +{"current_steps": 1851, "total_steps": 1955, "loss": 0.9226, "lr": 1.3760865041751736e-06, "epoch": 4.734015345268542, "percentage": 94.68, "elapsed_time": "9:31:20", "remaining_time": "0:32:06"} +{"current_steps": 1852, "total_steps": 1955, "loss": 0.9141, "lr": 1.3498248430119465e-06, "epoch": 4.736572890025576, "percentage": 94.73, "elapsed_time": "9:31:38", "remaining_time": "0:31:47"} +{"current_steps": 1853, "total_steps": 1955, "loss": 0.9544, "lr": 1.3238140627754014e-06, "epoch": 4.739130434782608, "percentage": 94.78, "elapsed_time": "9:31:56", "remaining_time": "0:31:28"} +{"current_steps": 1854, "total_steps": 1955, "loss": 0.9492, "lr": 1.2980542464355962e-06, "epoch": 4.741687979539642, "percentage": 94.83, "elapsed_time": "9:32:15", "remaining_time": "0:31:10"} +{"current_steps": 1855, "total_steps": 1955, "loss": 0.9253, "lr": 1.272545476162037e-06, "epoch": 4.744245524296675, "percentage": 94.88, "elapsed_time": "9:32:33", "remaining_time": "0:30:51"} +{"current_steps": 1856, "total_steps": 1955, "loss": 0.895, "lr": 1.2472878333234407e-06, "epoch": 4.746803069053708, "percentage": 94.94, "elapsed_time": "9:32:52", "remaining_time": "0:30:33"} +{"current_steps": 1857, "total_steps": 1955, "loss": 0.9146, "lr": 1.2222813984874749e-06, "epoch": 4.749360613810742, "percentage": 94.99, "elapsed_time": "9:33:10", "remaining_time": "0:30:14"} +{"current_steps": 1858, "total_steps": 1955, "loss": 0.9434, "lr": 1.197526251420502e-06, "epoch": 4.751918158567775, "percentage": 95.04, "elapsed_time": "9:33:28", "remaining_time": "0:29:56"} +{"current_steps": 1859, "total_steps": 1955, "loss": 0.917, "lr": 1.1730224710872862e-06, "epoch": 4.754475703324808, "percentage": 95.09, "elapsed_time": "9:33:46", "remaining_time": "0:29:37"} +{"current_steps": 1860, "total_steps": 1955, "loss": 0.9402, "lr": 1.148770135650814e-06, "epoch": 4.757033248081841, "percentage": 95.14, "elapsed_time": "9:34:05", "remaining_time": "0:29:19"} +{"current_steps": 1861, "total_steps": 1955, "loss": 0.9341, "lr": 1.1247693224719768e-06, "epoch": 4.759590792838875, "percentage": 95.19, "elapsed_time": "9:34:24", "remaining_time": "0:29:00"} +{"current_steps": 1862, "total_steps": 1955, "loss": 0.9258, "lr": 1.1010201081093653e-06, "epoch": 4.762148337595908, "percentage": 95.24, "elapsed_time": "9:34:42", "remaining_time": "0:28:42"} +{"current_steps": 1863, "total_steps": 1955, "loss": 0.9401, "lr": 1.0775225683190027e-06, "epoch": 4.764705882352941, "percentage": 95.29, "elapsed_time": "9:35:01", "remaining_time": "0:28:23"} +{"current_steps": 1864, "total_steps": 1955, "loss": 0.9452, "lr": 1.0542767780541242e-06, "epoch": 4.767263427109975, "percentage": 95.35, "elapsed_time": "9:35:20", "remaining_time": "0:28:05"} +{"current_steps": 1865, "total_steps": 1955, "loss": 0.9147, "lr": 1.0312828114649175e-06, "epoch": 4.7698209718670075, "percentage": 95.4, "elapsed_time": "9:35:38", "remaining_time": "0:27:46"} +{"current_steps": 1866, "total_steps": 1955, "loss": 0.9364, "lr": 1.008540741898285e-06, "epoch": 4.772378516624041, "percentage": 95.45, "elapsed_time": "9:35:56", "remaining_time": "0:27:28"} +{"current_steps": 1867, "total_steps": 1955, "loss": 0.9155, "lr": 9.860506418976556e-07, "epoch": 4.774936061381074, "percentage": 95.5, "elapsed_time": "9:36:15", "remaining_time": "0:27:09"} +{"current_steps": 1868, "total_steps": 1955, "loss": 0.9164, "lr": 9.638125832026658e-07, "epoch": 4.7774936061381075, "percentage": 95.55, "elapsed_time": "9:36:34", "remaining_time": "0:26:51"} +{"current_steps": 1869, "total_steps": 1955, "loss": 0.9294, "lr": 9.418266367490347e-07, "epoch": 4.78005115089514, "percentage": 95.6, "elapsed_time": "9:36:52", "remaining_time": "0:26:32"} +{"current_steps": 1870, "total_steps": 1955, "loss": 0.9198, "lr": 9.200928726682456e-07, "epoch": 4.782608695652174, "percentage": 95.65, "elapsed_time": "9:37:11", "remaining_time": "0:26:14"} +{"current_steps": 1871, "total_steps": 1955, "loss": 0.9696, "lr": 8.986113602873758e-07, "epoch": 4.7851662404092075, "percentage": 95.7, "elapsed_time": "9:37:29", "remaining_time": "0:25:55"} +{"current_steps": 1872, "total_steps": 1955, "loss": 0.9059, "lr": 8.773821681288752e-07, "epoch": 4.78772378516624, "percentage": 95.75, "elapsed_time": "9:37:48", "remaining_time": "0:25:37"} +{"current_steps": 1873, "total_steps": 1955, "loss": 0.9104, "lr": 8.564053639103087e-07, "epoch": 4.790281329923274, "percentage": 95.81, "elapsed_time": "9:38:07", "remaining_time": "0:25:18"} +{"current_steps": 1874, "total_steps": 1955, "loss": 0.8999, "lr": 8.356810145441874e-07, "epoch": 4.792838874680307, "percentage": 95.86, "elapsed_time": "9:38:25", "remaining_time": "0:25:00"} +{"current_steps": 1875, "total_steps": 1955, "loss": 0.9281, "lr": 8.152091861377198e-07, "epoch": 4.79539641943734, "percentage": 95.91, "elapsed_time": "9:38:43", "remaining_time": "0:24:41"} +{"current_steps": 1876, "total_steps": 1955, "loss": 0.8972, "lr": 7.949899439926345e-07, "epoch": 4.797953964194374, "percentage": 95.96, "elapsed_time": "9:39:02", "remaining_time": "0:24:23"} +{"current_steps": 1877, "total_steps": 1955, "loss": 0.9374, "lr": 7.750233526049222e-07, "epoch": 4.8005115089514065, "percentage": 96.01, "elapsed_time": "9:39:20", "remaining_time": "0:24:04"} +{"current_steps": 1878, "total_steps": 1955, "loss": 0.922, "lr": 7.553094756646761e-07, "epoch": 4.80306905370844, "percentage": 96.06, "elapsed_time": "9:39:39", "remaining_time": "0:23:45"} +{"current_steps": 1879, "total_steps": 1955, "loss": 0.9092, "lr": 7.358483760558877e-07, "epoch": 4.805626598465473, "percentage": 96.11, "elapsed_time": "9:39:58", "remaining_time": "0:23:27"} +{"current_steps": 1880, "total_steps": 1955, "loss": 0.9053, "lr": 7.166401158561886e-07, "epoch": 4.8081841432225065, "percentage": 96.16, "elapsed_time": "9:40:16", "remaining_time": "0:23:08"} +{"current_steps": 1881, "total_steps": 1955, "loss": 0.9342, "lr": 6.976847563367539e-07, "epoch": 4.810741687979539, "percentage": 96.21, "elapsed_time": "9:40:34", "remaining_time": "0:22:50"} +{"current_steps": 1882, "total_steps": 1955, "loss": 0.9055, "lr": 6.789823579619992e-07, "epoch": 4.813299232736573, "percentage": 96.27, "elapsed_time": "9:40:52", "remaining_time": "0:22:31"} +{"current_steps": 1883, "total_steps": 1955, "loss": 0.8971, "lr": 6.605329803894389e-07, "epoch": 4.8158567774936065, "percentage": 96.32, "elapsed_time": "9:41:10", "remaining_time": "0:22:13"} +{"current_steps": 1884, "total_steps": 1955, "loss": 0.9176, "lr": 6.423366824695265e-07, "epoch": 4.818414322250639, "percentage": 96.37, "elapsed_time": "9:41:28", "remaining_time": "0:21:54"} +{"current_steps": 1885, "total_steps": 1955, "loss": 0.9176, "lr": 6.243935222454145e-07, "epoch": 4.820971867007673, "percentage": 96.42, "elapsed_time": "9:41:47", "remaining_time": "0:21:36"} +{"current_steps": 1886, "total_steps": 1955, "loss": 0.9336, "lr": 6.067035569527768e-07, "epoch": 4.823529411764706, "percentage": 96.47, "elapsed_time": "9:42:06", "remaining_time": "0:21:17"} +{"current_steps": 1887, "total_steps": 1955, "loss": 0.9335, "lr": 5.89266843019658e-07, "epoch": 4.826086956521739, "percentage": 96.52, "elapsed_time": "9:42:24", "remaining_time": "0:20:59"} +{"current_steps": 1888, "total_steps": 1955, "loss": 0.9302, "lr": 5.720834360662597e-07, "epoch": 4.828644501278772, "percentage": 96.57, "elapsed_time": "9:42:42", "remaining_time": "0:20:40"} +{"current_steps": 1889, "total_steps": 1955, "loss": 0.9173, "lr": 5.551533909047812e-07, "epoch": 4.831202046035806, "percentage": 96.62, "elapsed_time": "9:43:00", "remaining_time": "0:20:22"} +{"current_steps": 1890, "total_steps": 1955, "loss": 0.8973, "lr": 5.384767615392328e-07, "epoch": 4.833759590792839, "percentage": 96.68, "elapsed_time": "9:43:19", "remaining_time": "0:20:03"} +{"current_steps": 1891, "total_steps": 1955, "loss": 0.9327, "lr": 5.220536011652933e-07, "epoch": 4.836317135549872, "percentage": 96.73, "elapsed_time": "9:43:38", "remaining_time": "0:19:45"} +{"current_steps": 1892, "total_steps": 1955, "loss": 0.8986, "lr": 5.058839621700973e-07, "epoch": 4.838874680306906, "percentage": 96.78, "elapsed_time": "9:43:56", "remaining_time": "0:19:26"} +{"current_steps": 1893, "total_steps": 1955, "loss": 0.8783, "lr": 4.899678961320842e-07, "epoch": 4.841432225063938, "percentage": 96.83, "elapsed_time": "9:44:15", "remaining_time": "0:19:08"} +{"current_steps": 1894, "total_steps": 1955, "loss": 0.9265, "lr": 4.743054538208558e-07, "epoch": 4.843989769820972, "percentage": 96.88, "elapsed_time": "9:44:33", "remaining_time": "0:18:49"} +{"current_steps": 1895, "total_steps": 1955, "loss": 0.917, "lr": 4.5889668519698117e-07, "epoch": 4.846547314578006, "percentage": 96.93, "elapsed_time": "9:44:52", "remaining_time": "0:18:31"} +{"current_steps": 1896, "total_steps": 1955, "loss": 0.9475, "lr": 4.437416394118721e-07, "epoch": 4.849104859335038, "percentage": 96.98, "elapsed_time": "9:45:11", "remaining_time": "0:18:12"} +{"current_steps": 1897, "total_steps": 1955, "loss": 0.9136, "lr": 4.2884036480757896e-07, "epoch": 4.851662404092072, "percentage": 97.03, "elapsed_time": "9:45:29", "remaining_time": "0:17:54"} +{"current_steps": 1898, "total_steps": 1955, "loss": 0.9276, "lr": 4.1419290891669293e-07, "epoch": 4.854219948849105, "percentage": 97.08, "elapsed_time": "9:45:47", "remaining_time": "0:17:35"} +{"current_steps": 1899, "total_steps": 1955, "loss": 0.9584, "lr": 3.997993184621418e-07, "epoch": 4.856777493606138, "percentage": 97.14, "elapsed_time": "9:46:05", "remaining_time": "0:17:17"} +{"current_steps": 1900, "total_steps": 1955, "loss": 0.9128, "lr": 3.856596393570744e-07, "epoch": 4.859335038363171, "percentage": 97.19, "elapsed_time": "9:46:24", "remaining_time": "0:16:58"} +{"current_steps": 1901, "total_steps": 1955, "loss": 0.912, "lr": 3.717739167047185e-07, "epoch": 4.861892583120205, "percentage": 97.24, "elapsed_time": "9:46:43", "remaining_time": "0:16:39"} +{"current_steps": 1902, "total_steps": 1955, "loss": 0.9166, "lr": 3.581421947982122e-07, "epoch": 4.864450127877237, "percentage": 97.29, "elapsed_time": "9:47:01", "remaining_time": "0:16:21"} +{"current_steps": 1903, "total_steps": 1955, "loss": 0.9308, "lr": 3.447645171204528e-07, "epoch": 4.867007672634271, "percentage": 97.34, "elapsed_time": "9:47:19", "remaining_time": "0:16:02"} +{"current_steps": 1904, "total_steps": 1955, "loss": 0.9401, "lr": 3.316409263440168e-07, "epoch": 4.869565217391305, "percentage": 97.39, "elapsed_time": "9:47:38", "remaining_time": "0:15:44"} +{"current_steps": 1905, "total_steps": 1955, "loss": 0.9349, "lr": 3.1877146433095584e-07, "epoch": 4.872122762148337, "percentage": 97.44, "elapsed_time": "9:47:56", "remaining_time": "0:15:25"} +{"current_steps": 1906, "total_steps": 1955, "loss": 0.9218, "lr": 3.0615617213271664e-07, "epoch": 4.874680306905371, "percentage": 97.49, "elapsed_time": "9:48:14", "remaining_time": "0:15:07"} +{"current_steps": 1907, "total_steps": 1955, "loss": 0.9278, "lr": 2.937950899899633e-07, "epoch": 4.877237851662404, "percentage": 97.54, "elapsed_time": "9:48:33", "remaining_time": "0:14:48"} +{"current_steps": 1908, "total_steps": 1955, "loss": 0.949, "lr": 2.816882573324886e-07, "epoch": 4.879795396419437, "percentage": 97.6, "elapsed_time": "9:48:52", "remaining_time": "0:14:30"} +{"current_steps": 1909, "total_steps": 1955, "loss": 0.9563, "lr": 2.6983571277907184e-07, "epoch": 4.882352941176471, "percentage": 97.65, "elapsed_time": "9:49:10", "remaining_time": "0:14:11"} +{"current_steps": 1910, "total_steps": 1955, "loss": 0.9211, "lr": 2.582374941373456e-07, "epoch": 4.884910485933504, "percentage": 97.7, "elapsed_time": "9:49:29", "remaining_time": "0:13:53"} +{"current_steps": 1911, "total_steps": 1955, "loss": 0.9013, "lr": 2.468936384036891e-07, "epoch": 4.887468030690537, "percentage": 97.75, "elapsed_time": "9:49:47", "remaining_time": "0:13:34"} +{"current_steps": 1912, "total_steps": 1955, "loss": 0.9417, "lr": 2.3580418176311293e-07, "epoch": 4.89002557544757, "percentage": 97.8, "elapsed_time": "9:50:05", "remaining_time": "0:13:16"} +{"current_steps": 1913, "total_steps": 1955, "loss": 0.9253, "lr": 2.2496915958913458e-07, "epoch": 4.892583120204604, "percentage": 97.85, "elapsed_time": "9:50:24", "remaining_time": "0:12:57"} +{"current_steps": 1914, "total_steps": 1955, "loss": 0.9344, "lr": 2.143886064436629e-07, "epoch": 4.8951406649616365, "percentage": 97.9, "elapsed_time": "9:50:42", "remaining_time": "0:12:39"} +{"current_steps": 1915, "total_steps": 1955, "loss": 0.9258, "lr": 2.0406255607688274e-07, "epoch": 4.89769820971867, "percentage": 97.95, "elapsed_time": "9:51:00", "remaining_time": "0:12:20"} +{"current_steps": 1916, "total_steps": 1955, "loss": 0.9484, "lr": 1.9399104142719283e-07, "epoch": 4.900255754475703, "percentage": 98.01, "elapsed_time": "9:51:19", "remaining_time": "0:12:02"} +{"current_steps": 1917, "total_steps": 1955, "loss": 0.9073, "lr": 1.8417409462102798e-07, "epoch": 4.9028132992327365, "percentage": 98.06, "elapsed_time": "9:51:37", "remaining_time": "0:11:43"} +{"current_steps": 1918, "total_steps": 1955, "loss": 0.8841, "lr": 1.746117469728148e-07, "epoch": 4.90537084398977, "percentage": 98.11, "elapsed_time": "9:51:56", "remaining_time": "0:11:25"} +{"current_steps": 1919, "total_steps": 1955, "loss": 0.9525, "lr": 1.6530402898484733e-07, "epoch": 4.907928388746803, "percentage": 98.16, "elapsed_time": "9:52:15", "remaining_time": "0:11:06"} +{"current_steps": 1920, "total_steps": 1955, "loss": 0.9193, "lr": 1.5625097034719815e-07, "epoch": 4.910485933503836, "percentage": 98.21, "elapsed_time": "9:52:33", "remaining_time": "0:10:48"} +{"current_steps": 1921, "total_steps": 1955, "loss": 0.9339, "lr": 1.474525999375942e-07, "epoch": 4.913043478260869, "percentage": 98.26, "elapsed_time": "9:52:51", "remaining_time": "0:10:29"} +{"current_steps": 1922, "total_steps": 1955, "loss": 0.9271, "lr": 1.3890894582138103e-07, "epoch": 4.915601023017903, "percentage": 98.31, "elapsed_time": "9:53:10", "remaining_time": "0:10:11"} +{"current_steps": 1923, "total_steps": 1955, "loss": 0.9129, "lr": 1.3062003525138089e-07, "epoch": 4.918158567774936, "percentage": 98.36, "elapsed_time": "9:53:29", "remaining_time": "0:09:52"} +{"current_steps": 1924, "total_steps": 1955, "loss": 0.9149, "lr": 1.225858946678393e-07, "epoch": 4.920716112531969, "percentage": 98.41, "elapsed_time": "9:53:47", "remaining_time": "0:09:34"} +{"current_steps": 1925, "total_steps": 1955, "loss": 0.9473, "lr": 1.1480654969833638e-07, "epoch": 4.923273657289003, "percentage": 98.47, "elapsed_time": "9:54:05", "remaining_time": "0:09:15"} +{"current_steps": 1926, "total_steps": 1955, "loss": 0.9452, "lr": 1.0728202515766228e-07, "epoch": 4.9258312020460355, "percentage": 98.52, "elapsed_time": "9:54:24", "remaining_time": "0:08:56"} +{"current_steps": 1927, "total_steps": 1955, "loss": 0.9478, "lr": 1.0001234504779966e-07, "epoch": 4.928388746803069, "percentage": 98.57, "elapsed_time": "9:54:42", "remaining_time": "0:08:38"} +{"current_steps": 1928, "total_steps": 1955, "loss": 0.9113, "lr": 9.299753255781696e-08, "epoch": 4.930946291560103, "percentage": 98.62, "elapsed_time": "9:55:01", "remaining_time": "0:08:19"} +{"current_steps": 1929, "total_steps": 1955, "loss": 0.9322, "lr": 8.623761006379738e-08, "epoch": 4.9335038363171355, "percentage": 98.67, "elapsed_time": "9:55:19", "remaining_time": "0:08:01"} +{"current_steps": 1930, "total_steps": 1955, "loss": 0.9529, "lr": 7.973259912875897e-08, "epoch": 4.936061381074169, "percentage": 98.72, "elapsed_time": "9:55:38", "remaining_time": "0:07:42"} +{"current_steps": 1931, "total_steps": 1955, "loss": 0.9516, "lr": 7.348252050261018e-08, "epoch": 4.938618925831202, "percentage": 98.77, "elapsed_time": "9:55:56", "remaining_time": "0:07:24"} +{"current_steps": 1932, "total_steps": 1955, "loss": 0.9327, "lr": 6.748739412205218e-08, "epoch": 4.9411764705882355, "percentage": 98.82, "elapsed_time": "9:56:15", "remaining_time": "0:07:05"} +{"current_steps": 1933, "total_steps": 1955, "loss": 0.9033, "lr": 6.174723911053449e-08, "epoch": 4.943734015345268, "percentage": 98.87, "elapsed_time": "9:56:33", "remaining_time": "0:06:47"} +{"current_steps": 1934, "total_steps": 1955, "loss": 0.9289, "lr": 5.6262073778192705e-08, "epoch": 4.946291560102302, "percentage": 98.93, "elapsed_time": "9:56:52", "remaining_time": "0:06:28"} +{"current_steps": 1935, "total_steps": 1955, "loss": 0.9127, "lr": 5.1031915621795325e-08, "epoch": 4.948849104859335, "percentage": 98.98, "elapsed_time": "9:57:10", "remaining_time": "0:06:10"} +{"current_steps": 1936, "total_steps": 1955, "loss": 0.9195, "lr": 4.605678132467262e-08, "epoch": 4.951406649616368, "percentage": 99.03, "elapsed_time": "9:57:28", "remaining_time": "0:05:51"} +{"current_steps": 1937, "total_steps": 1955, "loss": 0.9235, "lr": 4.133668675666336e-08, "epoch": 4.953964194373402, "percentage": 99.08, "elapsed_time": "9:57:46", "remaining_time": "0:05:33"} +{"current_steps": 1938, "total_steps": 1955, "loss": 0.8983, "lr": 3.687164697408818e-08, "epoch": 4.956521739130435, "percentage": 99.13, "elapsed_time": "9:58:05", "remaining_time": "0:05:14"} +{"current_steps": 1939, "total_steps": 1955, "loss": 0.9333, "lr": 3.266167621967853e-08, "epoch": 4.959079283887468, "percentage": 99.18, "elapsed_time": "9:58:24", "remaining_time": "0:04:56"} +{"current_steps": 1940, "total_steps": 1955, "loss": 0.9677, "lr": 2.8706787922541112e-08, "epoch": 4.961636828644501, "percentage": 99.23, "elapsed_time": "9:58:42", "remaining_time": "0:04:37"} +{"current_steps": 1941, "total_steps": 1955, "loss": 0.9205, "lr": 2.5006994698095754e-08, "epoch": 4.964194373401535, "percentage": 99.28, "elapsed_time": "9:59:01", "remaining_time": "0:04:19"} +{"current_steps": 1942, "total_steps": 1955, "loss": 0.9314, "lr": 2.156230834808426e-08, "epoch": 4.966751918158568, "percentage": 99.34, "elapsed_time": "9:59:19", "remaining_time": "0:04:00"} +{"current_steps": 1943, "total_steps": 1955, "loss": 0.9289, "lr": 1.837273986046384e-08, "epoch": 4.969309462915601, "percentage": 99.39, "elapsed_time": "9:59:38", "remaining_time": "0:03:42"} +{"current_steps": 1944, "total_steps": 1955, "loss": 0.9013, "lr": 1.5438299409433755e-08, "epoch": 4.971867007672635, "percentage": 99.44, "elapsed_time": "9:59:56", "remaining_time": "0:03:23"} +{"current_steps": 1945, "total_steps": 1955, "loss": 0.9203, "lr": 1.2758996355373144e-08, "epoch": 4.974424552429667, "percentage": 99.49, "elapsed_time": "10:00:14", "remaining_time": "0:03:05"} +{"current_steps": 1946, "total_steps": 1955, "loss": 0.9541, "lr": 1.0334839244805495e-08, "epoch": 4.976982097186701, "percentage": 99.54, "elapsed_time": "10:00:32", "remaining_time": "0:02:46"} +{"current_steps": 1947, "total_steps": 1955, "loss": 0.9064, "lr": 8.165835810389766e-09, "epoch": 4.979539641943734, "percentage": 99.59, "elapsed_time": "10:00:51", "remaining_time": "0:02:28"} +{"current_steps": 1948, "total_steps": 1955, "loss": 0.9214, "lr": 6.251992970875975e-09, "epoch": 4.982097186700767, "percentage": 99.64, "elapsed_time": "10:01:10", "remaining_time": "0:02:09"} +{"current_steps": 1949, "total_steps": 1955, "loss": 0.9461, "lr": 4.5933168311140805e-09, "epoch": 4.9846547314578, "percentage": 99.69, "elapsed_time": "10:01:28", "remaining_time": "0:01:51"} +{"current_steps": 1950, "total_steps": 1955, "loss": 0.9465, "lr": 3.1898126820006924e-09, "epoch": 4.987212276214834, "percentage": 99.74, "elapsed_time": "10:01:47", "remaining_time": "0:01:32"} +{"current_steps": 1951, "total_steps": 1955, "loss": 0.9108, "lr": 2.041485000479071e-09, "epoch": 4.989769820971867, "percentage": 99.8, "elapsed_time": "10:02:05", "remaining_time": "0:01:14"} +{"current_steps": 1952, "total_steps": 1955, "loss": 0.9356, "lr": 1.148337449521364e-09, "epoch": 4.9923273657289, "percentage": 99.85, "elapsed_time": "10:02:24", "remaining_time": "0:00:55"} +{"current_steps": 1953, "total_steps": 1955, "loss": 0.9002, "lr": 5.103728781197248e-10, "epoch": 4.994884910485934, "percentage": 99.9, "elapsed_time": "10:02:43", "remaining_time": "0:00:37"} +{"current_steps": 1954, "total_steps": 1955, "loss": 0.9081, "lr": 1.275933212774305e-10, "epoch": 4.997442455242966, "percentage": 99.95, "elapsed_time": "10:03:01", "remaining_time": "0:00:18"} +{"current_steps": 1955, "total_steps": 1955, "loss": 0.9254, "lr": 0.0, "epoch": 5.0, "percentage": 100.0, "elapsed_time": "10:03:20", "remaining_time": "0:00:00"} +{"current_steps": 1955, "total_steps": 1955, "epoch": 5.0, "percentage": 100.0, "elapsed_time": "10:03:38", "remaining_time": "0:00:00"} diff --git a/trainer_state.json b/trainer_state.json new file mode 100644 index 0000000..4aa61da --- /dev/null +++ b/trainer_state.json @@ -0,0 +1,13727 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 5.0, + "eval_steps": 500, + "global_step": 1955, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.0025575447570332483, + "grad_norm": 2.9563186457899664, + "learning_rate": 8.163265306122449e-07, + "loss": 1.5213, + "step": 1 + }, + { + "epoch": 0.005115089514066497, + "grad_norm": 2.9570403686006705, + "learning_rate": 1.6326530612244897e-06, + "loss": 1.4742, + "step": 2 + }, + { + "epoch": 0.0076726342710997444, + "grad_norm": 3.010165733072805, + "learning_rate": 2.4489795918367347e-06, + "loss": 1.4946, + "step": 3 + }, + { + "epoch": 0.010230179028132993, + "grad_norm": 2.868210172096221, + "learning_rate": 3.2653061224489794e-06, + "loss": 1.482, + "step": 4 + }, + { + "epoch": 0.01278772378516624, + "grad_norm": 2.6518374719836477, + "learning_rate": 4.081632653061225e-06, + "loss": 1.4866, + "step": 5 + }, + { + "epoch": 0.015345268542199489, + "grad_norm": 2.0719187075670176, + "learning_rate": 4.897959183673469e-06, + "loss": 1.4844, + "step": 6 + }, + { + "epoch": 0.017902813299232736, + "grad_norm": 1.8691931463314044, + "learning_rate": 5.7142857142857145e-06, + "loss": 1.4533, + "step": 7 + }, + { + "epoch": 0.020460358056265986, + "grad_norm": 1.8092896927352589, + "learning_rate": 6.530612244897959e-06, + "loss": 1.4433, + "step": 8 + }, + { + "epoch": 0.023017902813299233, + "grad_norm": 1.7543993002445608, + "learning_rate": 7.346938775510205e-06, + "loss": 1.4744, + "step": 9 + }, + { + "epoch": 0.02557544757033248, + "grad_norm": 1.6606628173638305, + "learning_rate": 8.16326530612245e-06, + "loss": 1.4586, + "step": 10 + }, + { + "epoch": 0.028132992327365727, + "grad_norm": 2.197985553952399, + "learning_rate": 8.979591836734695e-06, + "loss": 1.4315, + "step": 11 + }, + { + "epoch": 0.030690537084398978, + "grad_norm": 2.096672912966444, + "learning_rate": 9.795918367346939e-06, + "loss": 1.3907, + "step": 12 + }, + { + "epoch": 0.03324808184143223, + "grad_norm": 1.7669816182231157, + "learning_rate": 1.0612244897959186e-05, + "loss": 1.4234, + "step": 13 + }, + { + "epoch": 0.03580562659846547, + "grad_norm": 1.3020764177290665, + "learning_rate": 1.1428571428571429e-05, + "loss": 1.3478, + "step": 14 + }, + { + "epoch": 0.03836317135549872, + "grad_norm": 1.2917276833945952, + "learning_rate": 1.2244897959183674e-05, + "loss": 1.378, + "step": 15 + }, + { + "epoch": 0.04092071611253197, + "grad_norm": 0.9647900041095249, + "learning_rate": 1.3061224489795918e-05, + "loss": 1.3273, + "step": 16 + }, + { + "epoch": 0.043478260869565216, + "grad_norm": 0.998986811649884, + "learning_rate": 1.3877551020408165e-05, + "loss": 1.3424, + "step": 17 + }, + { + "epoch": 0.04603580562659847, + "grad_norm": 0.8293785359427173, + "learning_rate": 1.469387755102041e-05, + "loss": 1.3354, + "step": 18 + }, + { + "epoch": 0.04859335038363171, + "grad_norm": 0.7442208693017255, + "learning_rate": 1.5510204081632655e-05, + "loss": 1.3216, + "step": 19 + }, + { + "epoch": 0.05115089514066496, + "grad_norm": 0.8334097235660463, + "learning_rate": 1.63265306122449e-05, + "loss": 1.305, + "step": 20 + }, + { + "epoch": 0.05370843989769821, + "grad_norm": 0.7133053870238929, + "learning_rate": 1.7142857142857142e-05, + "loss": 1.2863, + "step": 21 + }, + { + "epoch": 0.056265984654731455, + "grad_norm": 0.5994613004850937, + "learning_rate": 1.795918367346939e-05, + "loss": 1.31, + "step": 22 + }, + { + "epoch": 0.058823529411764705, + "grad_norm": 0.6168319603979278, + "learning_rate": 1.8775510204081636e-05, + "loss": 1.2652, + "step": 23 + }, + { + "epoch": 0.061381074168797956, + "grad_norm": 0.5934674503101482, + "learning_rate": 1.9591836734693877e-05, + "loss": 1.2848, + "step": 24 + }, + { + "epoch": 0.0639386189258312, + "grad_norm": 0.5809141308410171, + "learning_rate": 2.0408163265306126e-05, + "loss": 1.2605, + "step": 25 + }, + { + "epoch": 0.06649616368286446, + "grad_norm": 0.5544963829723922, + "learning_rate": 2.122448979591837e-05, + "loss": 1.2663, + "step": 26 + }, + { + "epoch": 0.06905370843989769, + "grad_norm": 0.5109525040926751, + "learning_rate": 2.2040816326530613e-05, + "loss": 1.2493, + "step": 27 + }, + { + "epoch": 0.07161125319693094, + "grad_norm": 0.47071086900075043, + "learning_rate": 2.2857142857142858e-05, + "loss": 1.2725, + "step": 28 + }, + { + "epoch": 0.0741687979539642, + "grad_norm": 0.47760033429842697, + "learning_rate": 2.3673469387755103e-05, + "loss": 1.2493, + "step": 29 + }, + { + "epoch": 0.07672634271099744, + "grad_norm": 0.47942640683455684, + "learning_rate": 2.448979591836735e-05, + "loss": 1.2635, + "step": 30 + }, + { + "epoch": 0.0792838874680307, + "grad_norm": 0.3817784984018378, + "learning_rate": 2.5306122448979597e-05, + "loss": 1.2581, + "step": 31 + }, + { + "epoch": 0.08184143222506395, + "grad_norm": 0.41863028873772656, + "learning_rate": 2.6122448979591835e-05, + "loss": 1.2319, + "step": 32 + }, + { + "epoch": 0.08439897698209718, + "grad_norm": 0.4561646749370822, + "learning_rate": 2.6938775510204084e-05, + "loss": 1.2647, + "step": 33 + }, + { + "epoch": 0.08695652173913043, + "grad_norm": 0.32944852639889954, + "learning_rate": 2.775510204081633e-05, + "loss": 1.2828, + "step": 34 + }, + { + "epoch": 0.08951406649616368, + "grad_norm": 0.36090683632534276, + "learning_rate": 2.8571428571428574e-05, + "loss": 1.2245, + "step": 35 + }, + { + "epoch": 0.09207161125319693, + "grad_norm": 0.36952861081098753, + "learning_rate": 2.938775510204082e-05, + "loss": 1.2383, + "step": 36 + }, + { + "epoch": 0.09462915601023018, + "grad_norm": 0.39714992118388376, + "learning_rate": 3.020408163265306e-05, + "loss": 1.2524, + "step": 37 + }, + { + "epoch": 0.09718670076726342, + "grad_norm": 0.3567290279003148, + "learning_rate": 3.102040816326531e-05, + "loss": 1.229, + "step": 38 + }, + { + "epoch": 0.09974424552429667, + "grad_norm": 0.3806643799838351, + "learning_rate": 3.183673469387755e-05, + "loss": 1.2438, + "step": 39 + }, + { + "epoch": 0.10230179028132992, + "grad_norm": 0.407422548294049, + "learning_rate": 3.26530612244898e-05, + "loss": 1.1862, + "step": 40 + }, + { + "epoch": 0.10485933503836317, + "grad_norm": 0.34463209168828013, + "learning_rate": 3.346938775510204e-05, + "loss": 1.2127, + "step": 41 + }, + { + "epoch": 0.10741687979539642, + "grad_norm": 0.36477387999624367, + "learning_rate": 3.4285714285714284e-05, + "loss": 1.2118, + "step": 42 + }, + { + "epoch": 0.10997442455242967, + "grad_norm": 0.33681318596769666, + "learning_rate": 3.510204081632653e-05, + "loss": 1.1849, + "step": 43 + }, + { + "epoch": 0.11253196930946291, + "grad_norm": 0.3683055012446813, + "learning_rate": 3.591836734693878e-05, + "loss": 1.1965, + "step": 44 + }, + { + "epoch": 0.11508951406649616, + "grad_norm": 0.3236097196989051, + "learning_rate": 3.673469387755102e-05, + "loss": 1.1973, + "step": 45 + }, + { + "epoch": 0.11764705882352941, + "grad_norm": 0.45336744047964317, + "learning_rate": 3.755102040816327e-05, + "loss": 1.219, + "step": 46 + }, + { + "epoch": 0.12020460358056266, + "grad_norm": 0.6485049911187234, + "learning_rate": 3.836734693877551e-05, + "loss": 1.22, + "step": 47 + }, + { + "epoch": 0.12276214833759591, + "grad_norm": 0.7308887737693851, + "learning_rate": 3.9183673469387755e-05, + "loss": 1.1927, + "step": 48 + }, + { + "epoch": 0.12531969309462915, + "grad_norm": 0.7412779741523179, + "learning_rate": 4e-05, + "loss": 1.207, + "step": 49 + }, + { + "epoch": 0.1278772378516624, + "grad_norm": 0.61907561491782, + "learning_rate": 4.081632653061225e-05, + "loss": 1.1761, + "step": 50 + }, + { + "epoch": 0.13043478260869565, + "grad_norm": 0.5645180027937694, + "learning_rate": 4.1632653061224494e-05, + "loss": 1.1828, + "step": 51 + }, + { + "epoch": 0.1329923273657289, + "grad_norm": 0.6097938476878244, + "learning_rate": 4.244897959183674e-05, + "loss": 1.1645, + "step": 52 + }, + { + "epoch": 0.13554987212276215, + "grad_norm": 0.68105585214221, + "learning_rate": 4.3265306122448984e-05, + "loss": 1.1663, + "step": 53 + }, + { + "epoch": 0.13810741687979539, + "grad_norm": 0.5148592364190684, + "learning_rate": 4.4081632653061226e-05, + "loss": 1.2013, + "step": 54 + }, + { + "epoch": 0.14066496163682865, + "grad_norm": 0.6290537917728678, + "learning_rate": 4.489795918367347e-05, + "loss": 1.2142, + "step": 55 + }, + { + "epoch": 0.1432225063938619, + "grad_norm": 0.8770682994258979, + "learning_rate": 4.5714285714285716e-05, + "loss": 1.2031, + "step": 56 + }, + { + "epoch": 0.14578005115089515, + "grad_norm": 1.211521452597314, + "learning_rate": 4.6530612244897965e-05, + "loss": 1.1872, + "step": 57 + }, + { + "epoch": 0.1483375959079284, + "grad_norm": 1.1706192692433377, + "learning_rate": 4.7346938775510206e-05, + "loss": 1.2026, + "step": 58 + }, + { + "epoch": 0.15089514066496162, + "grad_norm": 1.0347528096815952, + "learning_rate": 4.8163265306122455e-05, + "loss": 1.1698, + "step": 59 + }, + { + "epoch": 0.1534526854219949, + "grad_norm": 0.8917967843832559, + "learning_rate": 4.89795918367347e-05, + "loss": 1.1935, + "step": 60 + }, + { + "epoch": 0.15601023017902813, + "grad_norm": 0.8447536110052303, + "learning_rate": 4.9795918367346945e-05, + "loss": 1.1816, + "step": 61 + }, + { + "epoch": 0.1585677749360614, + "grad_norm": 1.0808910383761972, + "learning_rate": 5.0612244897959194e-05, + "loss": 1.2148, + "step": 62 + }, + { + "epoch": 0.16112531969309463, + "grad_norm": 1.0232789536513451, + "learning_rate": 5.1428571428571436e-05, + "loss": 1.1974, + "step": 63 + }, + { + "epoch": 0.1636828644501279, + "grad_norm": 0.870169282881004, + "learning_rate": 5.224489795918367e-05, + "loss": 1.1914, + "step": 64 + }, + { + "epoch": 0.16624040920716113, + "grad_norm": 0.7292663989493176, + "learning_rate": 5.306122448979592e-05, + "loss": 1.183, + "step": 65 + }, + { + "epoch": 0.16879795396419436, + "grad_norm": 0.8315009268144099, + "learning_rate": 5.387755102040817e-05, + "loss": 1.1457, + "step": 66 + }, + { + "epoch": 0.17135549872122763, + "grad_norm": 1.04261775715331, + "learning_rate": 5.469387755102041e-05, + "loss": 1.1724, + "step": 67 + }, + { + "epoch": 0.17391304347826086, + "grad_norm": 1.0040970248925822, + "learning_rate": 5.551020408163266e-05, + "loss": 1.1469, + "step": 68 + }, + { + "epoch": 0.17647058823529413, + "grad_norm": 1.0399514999943609, + "learning_rate": 5.63265306122449e-05, + "loss": 1.1335, + "step": 69 + }, + { + "epoch": 0.17902813299232737, + "grad_norm": 0.9541534570834667, + "learning_rate": 5.714285714285715e-05, + "loss": 1.1475, + "step": 70 + }, + { + "epoch": 0.1815856777493606, + "grad_norm": 1.155886502502828, + "learning_rate": 5.79591836734694e-05, + "loss": 1.1595, + "step": 71 + }, + { + "epoch": 0.18414322250639387, + "grad_norm": 1.4920355778823207, + "learning_rate": 5.877551020408164e-05, + "loss": 1.1764, + "step": 72 + }, + { + "epoch": 0.1867007672634271, + "grad_norm": 0.8392580472572768, + "learning_rate": 5.959183673469389e-05, + "loss": 1.2046, + "step": 73 + }, + { + "epoch": 0.18925831202046037, + "grad_norm": 1.3327976055634758, + "learning_rate": 6.040816326530612e-05, + "loss": 1.1601, + "step": 74 + }, + { + "epoch": 0.1918158567774936, + "grad_norm": 1.2349989957797203, + "learning_rate": 6.122448979591836e-05, + "loss": 1.1524, + "step": 75 + }, + { + "epoch": 0.19437340153452684, + "grad_norm": 1.1978584662405511, + "learning_rate": 6.204081632653062e-05, + "loss": 1.1559, + "step": 76 + }, + { + "epoch": 0.1969309462915601, + "grad_norm": 1.0353931821191475, + "learning_rate": 6.285714285714286e-05, + "loss": 1.1657, + "step": 77 + }, + { + "epoch": 0.19948849104859334, + "grad_norm": 0.9094148187384907, + "learning_rate": 6.36734693877551e-05, + "loss": 1.1471, + "step": 78 + }, + { + "epoch": 0.2020460358056266, + "grad_norm": 1.187032715727395, + "learning_rate": 6.448979591836736e-05, + "loss": 1.1408, + "step": 79 + }, + { + "epoch": 0.20460358056265984, + "grad_norm": 1.0732720468700825, + "learning_rate": 6.53061224489796e-05, + "loss": 1.1565, + "step": 80 + }, + { + "epoch": 0.2071611253196931, + "grad_norm": 0.8103161887096414, + "learning_rate": 6.612244897959184e-05, + "loss": 1.1394, + "step": 81 + }, + { + "epoch": 0.20971867007672634, + "grad_norm": 1.0683800405517745, + "learning_rate": 6.693877551020408e-05, + "loss": 1.1366, + "step": 82 + }, + { + "epoch": 0.21227621483375958, + "grad_norm": 1.0570001635125388, + "learning_rate": 6.775510204081634e-05, + "loss": 1.1521, + "step": 83 + }, + { + "epoch": 0.21483375959079284, + "grad_norm": 1.0038253962694932, + "learning_rate": 6.857142857142857e-05, + "loss": 1.1485, + "step": 84 + }, + { + "epoch": 0.21739130434782608, + "grad_norm": 1.1691956641199828, + "learning_rate": 6.938775510204082e-05, + "loss": 1.199, + "step": 85 + }, + { + "epoch": 0.21994884910485935, + "grad_norm": 1.160205747766507, + "learning_rate": 7.020408163265306e-05, + "loss": 1.15, + "step": 86 + }, + { + "epoch": 0.22250639386189258, + "grad_norm": 1.0403901594667788, + "learning_rate": 7.10204081632653e-05, + "loss": 1.1547, + "step": 87 + }, + { + "epoch": 0.22506393861892582, + "grad_norm": 1.253302691517826, + "learning_rate": 7.183673469387756e-05, + "loss": 1.1808, + "step": 88 + }, + { + "epoch": 0.22762148337595908, + "grad_norm": 1.0181115029064822, + "learning_rate": 7.26530612244898e-05, + "loss": 1.1399, + "step": 89 + }, + { + "epoch": 0.23017902813299232, + "grad_norm": 1.179029120883534, + "learning_rate": 7.346938775510205e-05, + "loss": 1.1709, + "step": 90 + }, + { + "epoch": 0.23273657289002558, + "grad_norm": 0.8065046535786934, + "learning_rate": 7.42857142857143e-05, + "loss": 1.1649, + "step": 91 + }, + { + "epoch": 0.23529411764705882, + "grad_norm": 0.9920804997259105, + "learning_rate": 7.510204081632654e-05, + "loss": 1.1693, + "step": 92 + }, + { + "epoch": 0.23785166240409208, + "grad_norm": 1.4041632222361236, + "learning_rate": 7.591836734693878e-05, + "loss": 1.1525, + "step": 93 + }, + { + "epoch": 0.24040920716112532, + "grad_norm": 1.1202267325769892, + "learning_rate": 7.673469387755103e-05, + "loss": 1.1727, + "step": 94 + }, + { + "epoch": 0.24296675191815856, + "grad_norm": 0.9214700487119486, + "learning_rate": 7.755102040816327e-05, + "loss": 1.1193, + "step": 95 + }, + { + "epoch": 0.24552429667519182, + "grad_norm": 0.9731714014969046, + "learning_rate": 7.836734693877551e-05, + "loss": 1.1605, + "step": 96 + }, + { + "epoch": 0.24808184143222506, + "grad_norm": 1.2370098654676154, + "learning_rate": 7.918367346938776e-05, + "loss": 1.1663, + "step": 97 + }, + { + "epoch": 0.2506393861892583, + "grad_norm": 0.856182416906324, + "learning_rate": 8e-05, + "loss": 1.134, + "step": 98 + }, + { + "epoch": 0.2531969309462916, + "grad_norm": 1.0086218583881408, + "learning_rate": 8.081632653061225e-05, + "loss": 1.1307, + "step": 99 + }, + { + "epoch": 0.2557544757033248, + "grad_norm": 1.3360855997576195, + "learning_rate": 8.16326530612245e-05, + "loss": 1.1283, + "step": 100 + }, + { + "epoch": 0.25831202046035806, + "grad_norm": 1.1442169715816302, + "learning_rate": 8.244897959183673e-05, + "loss": 1.1486, + "step": 101 + }, + { + "epoch": 0.2608695652173913, + "grad_norm": 0.9528964485268216, + "learning_rate": 8.326530612244899e-05, + "loss": 1.1539, + "step": 102 + }, + { + "epoch": 0.26342710997442453, + "grad_norm": 1.2014260964822987, + "learning_rate": 8.408163265306123e-05, + "loss": 1.1246, + "step": 103 + }, + { + "epoch": 0.2659846547314578, + "grad_norm": 1.2896301281378582, + "learning_rate": 8.489795918367348e-05, + "loss": 1.1193, + "step": 104 + }, + { + "epoch": 0.26854219948849106, + "grad_norm": 1.2365104040466046, + "learning_rate": 8.571428571428571e-05, + "loss": 1.1257, + "step": 105 + }, + { + "epoch": 0.2710997442455243, + "grad_norm": 0.8909578987791607, + "learning_rate": 8.653061224489797e-05, + "loss": 1.1127, + "step": 106 + }, + { + "epoch": 0.27365728900255754, + "grad_norm": 1.170325095506981, + "learning_rate": 8.734693877551021e-05, + "loss": 1.1441, + "step": 107 + }, + { + "epoch": 0.27621483375959077, + "grad_norm": 0.8299590325351531, + "learning_rate": 8.816326530612245e-05, + "loss": 1.1199, + "step": 108 + }, + { + "epoch": 0.27877237851662406, + "grad_norm": 1.0039851893132474, + "learning_rate": 8.897959183673471e-05, + "loss": 1.1454, + "step": 109 + }, + { + "epoch": 0.2813299232736573, + "grad_norm": 1.309094467948393, + "learning_rate": 8.979591836734694e-05, + "loss": 1.1534, + "step": 110 + }, + { + "epoch": 0.28388746803069054, + "grad_norm": 1.030513843956652, + "learning_rate": 9.061224489795919e-05, + "loss": 1.1518, + "step": 111 + }, + { + "epoch": 0.2864450127877238, + "grad_norm": 1.1548472835092134, + "learning_rate": 9.142857142857143e-05, + "loss": 1.1422, + "step": 112 + }, + { + "epoch": 0.289002557544757, + "grad_norm": 1.0781950243182032, + "learning_rate": 9.224489795918369e-05, + "loss": 1.1125, + "step": 113 + }, + { + "epoch": 0.2915601023017903, + "grad_norm": 1.4696697800626741, + "learning_rate": 9.306122448979593e-05, + "loss": 1.147, + "step": 114 + }, + { + "epoch": 0.29411764705882354, + "grad_norm": 0.8932168550895682, + "learning_rate": 9.387755102040817e-05, + "loss": 1.1225, + "step": 115 + }, + { + "epoch": 0.2966751918158568, + "grad_norm": 1.4609624921794502, + "learning_rate": 9.469387755102041e-05, + "loss": 1.1402, + "step": 116 + }, + { + "epoch": 0.29923273657289, + "grad_norm": 1.1608303447004447, + "learning_rate": 9.551020408163267e-05, + "loss": 1.1268, + "step": 117 + }, + { + "epoch": 0.30179028132992325, + "grad_norm": 1.3699566135342083, + "learning_rate": 9.632653061224491e-05, + "loss": 1.1782, + "step": 118 + }, + { + "epoch": 0.30434782608695654, + "grad_norm": 1.0951988856065036, + "learning_rate": 9.714285714285714e-05, + "loss": 1.1383, + "step": 119 + }, + { + "epoch": 0.3069053708439898, + "grad_norm": 1.311071103466961, + "learning_rate": 9.79591836734694e-05, + "loss": 1.1485, + "step": 120 + }, + { + "epoch": 0.309462915601023, + "grad_norm": 0.8986951965704776, + "learning_rate": 9.877551020408164e-05, + "loss": 1.1604, + "step": 121 + }, + { + "epoch": 0.31202046035805625, + "grad_norm": 1.2243542530871734, + "learning_rate": 9.959183673469389e-05, + "loss": 1.1129, + "step": 122 + }, + { + "epoch": 0.3145780051150895, + "grad_norm": 1.3033780963392814, + "learning_rate": 0.00010040816326530613, + "loss": 1.1344, + "step": 123 + }, + { + "epoch": 0.3171355498721228, + "grad_norm": 1.1948786110977876, + "learning_rate": 0.00010122448979591839, + "loss": 1.1269, + "step": 124 + }, + { + "epoch": 0.319693094629156, + "grad_norm": 1.142953671137177, + "learning_rate": 0.00010204081632653062, + "loss": 1.1078, + "step": 125 + }, + { + "epoch": 0.32225063938618925, + "grad_norm": 1.1524456987304121, + "learning_rate": 0.00010285714285714287, + "loss": 1.1653, + "step": 126 + }, + { + "epoch": 0.3248081841432225, + "grad_norm": 1.1658601331325984, + "learning_rate": 0.00010367346938775511, + "loss": 1.1088, + "step": 127 + }, + { + "epoch": 0.3273657289002558, + "grad_norm": 1.72409589259486, + "learning_rate": 0.00010448979591836734, + "loss": 1.1289, + "step": 128 + }, + { + "epoch": 0.329923273657289, + "grad_norm": 0.7330929431707807, + "learning_rate": 0.0001053061224489796, + "loss": 1.1191, + "step": 129 + }, + { + "epoch": 0.33248081841432225, + "grad_norm": 1.2646590423606026, + "learning_rate": 0.00010612244897959184, + "loss": 1.1527, + "step": 130 + }, + { + "epoch": 0.3350383631713555, + "grad_norm": 1.723349785426215, + "learning_rate": 0.0001069387755102041, + "loss": 1.118, + "step": 131 + }, + { + "epoch": 0.3375959079283887, + "grad_norm": 0.8139452555798524, + "learning_rate": 0.00010775510204081634, + "loss": 1.1702, + "step": 132 + }, + { + "epoch": 0.340153452685422, + "grad_norm": 1.1603914477308341, + "learning_rate": 0.00010857142857142859, + "loss": 1.1467, + "step": 133 + }, + { + "epoch": 0.34271099744245526, + "grad_norm": 1.2110578835398869, + "learning_rate": 0.00010938775510204082, + "loss": 1.1174, + "step": 134 + }, + { + "epoch": 0.3452685421994885, + "grad_norm": 1.3576198261777483, + "learning_rate": 0.00011020408163265307, + "loss": 1.1746, + "step": 135 + }, + { + "epoch": 0.34782608695652173, + "grad_norm": 1.0573462588351146, + "learning_rate": 0.00011102040816326532, + "loss": 1.1333, + "step": 136 + }, + { + "epoch": 0.35038363171355497, + "grad_norm": 1.3340765255200633, + "learning_rate": 0.00011183673469387757, + "loss": 1.1482, + "step": 137 + }, + { + "epoch": 0.35294117647058826, + "grad_norm": 0.9284689786425085, + "learning_rate": 0.0001126530612244898, + "loss": 1.1304, + "step": 138 + }, + { + "epoch": 0.3554987212276215, + "grad_norm": 1.4254480759114776, + "learning_rate": 0.00011346938775510204, + "loss": 1.1106, + "step": 139 + }, + { + "epoch": 0.35805626598465473, + "grad_norm": 1.3594890583091455, + "learning_rate": 0.0001142857142857143, + "loss": 1.1664, + "step": 140 + }, + { + "epoch": 0.36061381074168797, + "grad_norm": 1.086678024627712, + "learning_rate": 0.00011510204081632654, + "loss": 1.0802, + "step": 141 + }, + { + "epoch": 0.3631713554987212, + "grad_norm": 1.533977830454029, + "learning_rate": 0.0001159183673469388, + "loss": 1.1332, + "step": 142 + }, + { + "epoch": 0.3657289002557545, + "grad_norm": 0.940287237501315, + "learning_rate": 0.00011673469387755102, + "loss": 1.1573, + "step": 143 + }, + { + "epoch": 0.36828644501278773, + "grad_norm": 1.2572408225100642, + "learning_rate": 0.00011755102040816328, + "loss": 1.1292, + "step": 144 + }, + { + "epoch": 0.37084398976982097, + "grad_norm": 0.9995509690787548, + "learning_rate": 0.00011836734693877552, + "loss": 1.1375, + "step": 145 + }, + { + "epoch": 0.3734015345268542, + "grad_norm": 1.6478855533912629, + "learning_rate": 0.00011918367346938777, + "loss": 1.1281, + "step": 146 + }, + { + "epoch": 0.37595907928388744, + "grad_norm": 0.9807964464883856, + "learning_rate": 0.00012000000000000002, + "loss": 1.1604, + "step": 147 + }, + { + "epoch": 0.37851662404092073, + "grad_norm": 1.3424151204814954, + "learning_rate": 0.00012081632653061224, + "loss": 1.1247, + "step": 148 + }, + { + "epoch": 0.38107416879795397, + "grad_norm": 1.1827965041877697, + "learning_rate": 0.0001216326530612245, + "loss": 1.1087, + "step": 149 + }, + { + "epoch": 0.3836317135549872, + "grad_norm": 1.374289317317436, + "learning_rate": 0.00012244897959183673, + "loss": 1.1174, + "step": 150 + }, + { + "epoch": 0.38618925831202044, + "grad_norm": 1.4462982798920152, + "learning_rate": 0.00012326530612244898, + "loss": 1.1243, + "step": 151 + }, + { + "epoch": 0.3887468030690537, + "grad_norm": 1.2338591594860693, + "learning_rate": 0.00012408163265306124, + "loss": 1.127, + "step": 152 + }, + { + "epoch": 0.391304347826087, + "grad_norm": 0.9926991217212723, + "learning_rate": 0.0001248979591836735, + "loss": 1.1152, + "step": 153 + }, + { + "epoch": 0.3938618925831202, + "grad_norm": 1.6602432782777794, + "learning_rate": 0.00012571428571428572, + "loss": 1.129, + "step": 154 + }, + { + "epoch": 0.39641943734015345, + "grad_norm": 1.0710563936657969, + "learning_rate": 0.00012653061224489798, + "loss": 1.1347, + "step": 155 + }, + { + "epoch": 0.3989769820971867, + "grad_norm": 1.0203164310897854, + "learning_rate": 0.0001273469387755102, + "loss": 1.1398, + "step": 156 + }, + { + "epoch": 0.40153452685422, + "grad_norm": 1.4486120558817688, + "learning_rate": 0.00012816326530612246, + "loss": 1.1572, + "step": 157 + }, + { + "epoch": 0.4040920716112532, + "grad_norm": 1.0665461325193415, + "learning_rate": 0.00012897959183673472, + "loss": 1.1443, + "step": 158 + }, + { + "epoch": 0.40664961636828645, + "grad_norm": 1.6999184553867208, + "learning_rate": 0.00012979591836734695, + "loss": 1.1027, + "step": 159 + }, + { + "epoch": 0.4092071611253197, + "grad_norm": 1.0289801155197138, + "learning_rate": 0.0001306122448979592, + "loss": 1.1194, + "step": 160 + }, + { + "epoch": 0.4117647058823529, + "grad_norm": 1.5775539926551432, + "learning_rate": 0.00013142857142857143, + "loss": 1.1209, + "step": 161 + }, + { + "epoch": 0.4143222506393862, + "grad_norm": 0.9132827293227751, + "learning_rate": 0.00013224489795918368, + "loss": 1.1115, + "step": 162 + }, + { + "epoch": 0.41687979539641945, + "grad_norm": 1.8502610336806449, + "learning_rate": 0.00013306122448979594, + "loss": 1.1237, + "step": 163 + }, + { + "epoch": 0.4194373401534527, + "grad_norm": 1.3709322904356605, + "learning_rate": 0.00013387755102040817, + "loss": 1.1353, + "step": 164 + }, + { + "epoch": 0.4219948849104859, + "grad_norm": 1.1361849330749851, + "learning_rate": 0.00013469387755102042, + "loss": 1.1043, + "step": 165 + }, + { + "epoch": 0.42455242966751916, + "grad_norm": 1.1401579492886242, + "learning_rate": 0.00013551020408163268, + "loss": 1.1252, + "step": 166 + }, + { + "epoch": 0.42710997442455245, + "grad_norm": 1.171525164401231, + "learning_rate": 0.0001363265306122449, + "loss": 1.1226, + "step": 167 + }, + { + "epoch": 0.4296675191815857, + "grad_norm": 1.7103135890270424, + "learning_rate": 0.00013714285714285713, + "loss": 1.1323, + "step": 168 + }, + { + "epoch": 0.4322250639386189, + "grad_norm": 1.0590485560747558, + "learning_rate": 0.0001379591836734694, + "loss": 1.1318, + "step": 169 + }, + { + "epoch": 0.43478260869565216, + "grad_norm": 1.1381323068879685, + "learning_rate": 0.00013877551020408165, + "loss": 1.1093, + "step": 170 + }, + { + "epoch": 0.4373401534526854, + "grad_norm": 1.8095148756504853, + "learning_rate": 0.0001395918367346939, + "loss": 1.1297, + "step": 171 + }, + { + "epoch": 0.4398976982097187, + "grad_norm": 1.022630524722603, + "learning_rate": 0.00014040816326530613, + "loss": 1.1217, + "step": 172 + }, + { + "epoch": 0.4424552429667519, + "grad_norm": 1.3822427448836618, + "learning_rate": 0.00014122448979591838, + "loss": 1.145, + "step": 173 + }, + { + "epoch": 0.44501278772378516, + "grad_norm": 1.3577366143882001, + "learning_rate": 0.0001420408163265306, + "loss": 1.151, + "step": 174 + }, + { + "epoch": 0.4475703324808184, + "grad_norm": 1.1001324929653025, + "learning_rate": 0.00014285714285714287, + "loss": 1.1209, + "step": 175 + }, + { + "epoch": 0.45012787723785164, + "grad_norm": 1.7043306971887084, + "learning_rate": 0.00014367346938775512, + "loss": 1.155, + "step": 176 + }, + { + "epoch": 0.45268542199488493, + "grad_norm": 0.8908531162714106, + "learning_rate": 0.00014448979591836735, + "loss": 1.1264, + "step": 177 + }, + { + "epoch": 0.45524296675191817, + "grad_norm": 2.0064867394818577, + "learning_rate": 0.0001453061224489796, + "loss": 1.1339, + "step": 178 + }, + { + "epoch": 0.4578005115089514, + "grad_norm": 1.272017394425661, + "learning_rate": 0.00014612244897959183, + "loss": 1.1179, + "step": 179 + }, + { + "epoch": 0.46035805626598464, + "grad_norm": 1.6809099682132984, + "learning_rate": 0.0001469387755102041, + "loss": 1.1306, + "step": 180 + }, + { + "epoch": 0.4629156010230179, + "grad_norm": 1.2729546637062361, + "learning_rate": 0.00014775510204081635, + "loss": 1.1547, + "step": 181 + }, + { + "epoch": 0.46547314578005117, + "grad_norm": 1.2637405257695475, + "learning_rate": 0.0001485714285714286, + "loss": 1.1234, + "step": 182 + }, + { + "epoch": 0.4680306905370844, + "grad_norm": 1.3792667256601667, + "learning_rate": 0.00014938775510204083, + "loss": 1.1384, + "step": 183 + }, + { + "epoch": 0.47058823529411764, + "grad_norm": 1.0581158807496975, + "learning_rate": 0.00015020408163265308, + "loss": 1.1308, + "step": 184 + }, + { + "epoch": 0.4731457800511509, + "grad_norm": 1.2395276036732317, + "learning_rate": 0.0001510204081632653, + "loss": 1.142, + "step": 185 + }, + { + "epoch": 0.47570332480818417, + "grad_norm": 1.1474988241030795, + "learning_rate": 0.00015183673469387757, + "loss": 1.1399, + "step": 186 + }, + { + "epoch": 0.4782608695652174, + "grad_norm": 1.4488607840873033, + "learning_rate": 0.0001526530612244898, + "loss": 1.1247, + "step": 187 + }, + { + "epoch": 0.48081841432225064, + "grad_norm": 0.9895262383072666, + "learning_rate": 0.00015346938775510205, + "loss": 1.1439, + "step": 188 + }, + { + "epoch": 0.4833759590792839, + "grad_norm": 1.509540789570866, + "learning_rate": 0.0001542857142857143, + "loss": 1.1268, + "step": 189 + }, + { + "epoch": 0.4859335038363171, + "grad_norm": 1.2634220572499701, + "learning_rate": 0.00015510204081632654, + "loss": 1.1315, + "step": 190 + }, + { + "epoch": 0.4884910485933504, + "grad_norm": 2.03411519572473, + "learning_rate": 0.0001559183673469388, + "loss": 1.0859, + "step": 191 + }, + { + "epoch": 0.49104859335038364, + "grad_norm": 1.1783378998438716, + "learning_rate": 0.00015673469387755102, + "loss": 1.122, + "step": 192 + }, + { + "epoch": 0.4936061381074169, + "grad_norm": 1.869178693106169, + "learning_rate": 0.00015755102040816327, + "loss": 1.0953, + "step": 193 + }, + { + "epoch": 0.4961636828644501, + "grad_norm": 1.4133576585465655, + "learning_rate": 0.00015836734693877553, + "loss": 1.0973, + "step": 194 + }, + { + "epoch": 0.49872122762148335, + "grad_norm": 1.1007402607506083, + "learning_rate": 0.00015918367346938778, + "loss": 1.1666, + "step": 195 + }, + { + "epoch": 0.5012787723785166, + "grad_norm": 1.0455333445001125, + "learning_rate": 0.00016, + "loss": 1.1244, + "step": 196 + }, + { + "epoch": 0.5038363171355499, + "grad_norm": 1.1414091012657146, + "learning_rate": 0.00015999987240667874, + "loss": 1.118, + "step": 197 + }, + { + "epoch": 0.5063938618925832, + "grad_norm": 1.1934725533176622, + "learning_rate": 0.0001599994896271219, + "loss": 1.1489, + "step": 198 + }, + { + "epoch": 0.5089514066496164, + "grad_norm": 1.3418673611629677, + "learning_rate": 0.0001599988516625505, + "loss": 1.1172, + "step": 199 + }, + { + "epoch": 0.5115089514066496, + "grad_norm": 1.2281301450926736, + "learning_rate": 0.00015999795851499954, + "loss": 1.124, + "step": 200 + }, + { + "epoch": 0.5140664961636828, + "grad_norm": 1.4232277874832118, + "learning_rate": 0.000159996810187318, + "loss": 1.1087, + "step": 201 + }, + { + "epoch": 0.5166240409207161, + "grad_norm": 1.2445810609035501, + "learning_rate": 0.0001599954066831689, + "loss": 1.0977, + "step": 202 + }, + { + "epoch": 0.5191815856777494, + "grad_norm": 1.4902156849341144, + "learning_rate": 0.00015999374800702916, + "loss": 1.1278, + "step": 203 + }, + { + "epoch": 0.5217391304347826, + "grad_norm": 0.9117749926569193, + "learning_rate": 0.00015999183416418963, + "loss": 1.0978, + "step": 204 + }, + { + "epoch": 0.5242966751918159, + "grad_norm": 1.521914055307176, + "learning_rate": 0.0001599896651607552, + "loss": 1.1255, + "step": 205 + }, + { + "epoch": 0.5268542199488491, + "grad_norm": 1.675086821646465, + "learning_rate": 0.00015998724100364464, + "loss": 1.1117, + "step": 206 + }, + { + "epoch": 0.5294117647058824, + "grad_norm": 1.0370916213463357, + "learning_rate": 0.00015998456170059059, + "loss": 1.1269, + "step": 207 + }, + { + "epoch": 0.5319693094629157, + "grad_norm": 1.4543936507994073, + "learning_rate": 0.00015998162726013954, + "loss": 1.1159, + "step": 208 + }, + { + "epoch": 0.5345268542199488, + "grad_norm": 1.628168132567413, + "learning_rate": 0.00015997843769165193, + "loss": 1.1025, + "step": 209 + }, + { + "epoch": 0.5370843989769821, + "grad_norm": 1.114123127352084, + "learning_rate": 0.0001599749930053019, + "loss": 1.0962, + "step": 210 + }, + { + "epoch": 0.5396419437340153, + "grad_norm": 1.7051681399590384, + "learning_rate": 0.00015997129321207747, + "loss": 1.1216, + "step": 211 + }, + { + "epoch": 0.5421994884910486, + "grad_norm": 0.9137353240287979, + "learning_rate": 0.00015996733832378032, + "loss": 1.0845, + "step": 212 + }, + { + "epoch": 0.5447570332480819, + "grad_norm": 1.3585376285654678, + "learning_rate": 0.00015996312835302593, + "loss": 1.1337, + "step": 213 + }, + { + "epoch": 0.5473145780051151, + "grad_norm": 0.986649874454745, + "learning_rate": 0.00015995866331324334, + "loss": 1.0791, + "step": 214 + }, + { + "epoch": 0.5498721227621484, + "grad_norm": 1.4872086766761456, + "learning_rate": 0.00015995394321867534, + "loss": 1.0898, + "step": 215 + }, + { + "epoch": 0.5524296675191815, + "grad_norm": 1.3583123340693906, + "learning_rate": 0.0001599489680843782, + "loss": 1.1221, + "step": 216 + }, + { + "epoch": 0.5549872122762148, + "grad_norm": 1.1209846232833984, + "learning_rate": 0.00015994373792622182, + "loss": 1.0914, + "step": 217 + }, + { + "epoch": 0.5575447570332481, + "grad_norm": 1.1159100799958372, + "learning_rate": 0.0001599382527608895, + "loss": 1.0659, + "step": 218 + }, + { + "epoch": 0.5601023017902813, + "grad_norm": 1.014792737157986, + "learning_rate": 0.00015993251260587796, + "loss": 1.0895, + "step": 219 + }, + { + "epoch": 0.5626598465473146, + "grad_norm": 1.3514884114926682, + "learning_rate": 0.00015992651747949742, + "loss": 1.1447, + "step": 220 + }, + { + "epoch": 0.5652173913043478, + "grad_norm": 1.3662814180004041, + "learning_rate": 0.00015992026740087125, + "loss": 1.082, + "step": 221 + }, + { + "epoch": 0.5677749360613811, + "grad_norm": 1.1729073479593213, + "learning_rate": 0.00015991376238993623, + "loss": 1.0858, + "step": 222 + }, + { + "epoch": 0.5703324808184144, + "grad_norm": 1.098894416827083, + "learning_rate": 0.0001599070024674422, + "loss": 1.0903, + "step": 223 + }, + { + "epoch": 0.5728900255754475, + "grad_norm": 0.975594652798118, + "learning_rate": 0.0001598999876549522, + "loss": 1.1162, + "step": 224 + }, + { + "epoch": 0.5754475703324808, + "grad_norm": 1.0143269006614197, + "learning_rate": 0.00015989271797484236, + "loss": 1.1131, + "step": 225 + }, + { + "epoch": 0.578005115089514, + "grad_norm": 1.3483287924450105, + "learning_rate": 0.00015988519345030167, + "loss": 1.0896, + "step": 226 + }, + { + "epoch": 0.5805626598465473, + "grad_norm": 0.7520971748388883, + "learning_rate": 0.00015987741410533217, + "loss": 1.0953, + "step": 227 + }, + { + "epoch": 0.5831202046035806, + "grad_norm": 1.3201762056381772, + "learning_rate": 0.0001598693799647486, + "loss": 1.0837, + "step": 228 + }, + { + "epoch": 0.5856777493606138, + "grad_norm": 1.2193125892583727, + "learning_rate": 0.00015986109105417862, + "loss": 1.1026, + "step": 229 + }, + { + "epoch": 0.5882352941176471, + "grad_norm": 1.3892856581992825, + "learning_rate": 0.0001598525474000624, + "loss": 1.1069, + "step": 230 + }, + { + "epoch": 0.5907928388746803, + "grad_norm": 0.8831793540357707, + "learning_rate": 0.00015984374902965284, + "loss": 1.1079, + "step": 231 + }, + { + "epoch": 0.5933503836317136, + "grad_norm": 0.8405263869404558, + "learning_rate": 0.00015983469597101517, + "loss": 1.088, + "step": 232 + }, + { + "epoch": 0.5959079283887468, + "grad_norm": 0.8048081062282874, + "learning_rate": 0.0001598253882530272, + "loss": 1.0947, + "step": 233 + }, + { + "epoch": 0.59846547314578, + "grad_norm": 1.1026453527649267, + "learning_rate": 0.00015981582590537897, + "loss": 1.0527, + "step": 234 + }, + { + "epoch": 0.6010230179028133, + "grad_norm": 1.945124480668707, + "learning_rate": 0.0001598060089585728, + "loss": 1.0747, + "step": 235 + }, + { + "epoch": 0.6035805626598465, + "grad_norm": 0.6633926296437849, + "learning_rate": 0.00015979593744392312, + "loss": 1.1013, + "step": 236 + }, + { + "epoch": 0.6061381074168798, + "grad_norm": 1.9149178380903846, + "learning_rate": 0.00015978561139355635, + "loss": 1.0967, + "step": 237 + }, + { + "epoch": 0.6086956521739131, + "grad_norm": 1.3222885863625786, + "learning_rate": 0.00015977503084041087, + "loss": 1.0733, + "step": 238 + }, + { + "epoch": 0.6112531969309463, + "grad_norm": 1.0130031801765467, + "learning_rate": 0.00015976419581823688, + "loss": 1.1196, + "step": 239 + }, + { + "epoch": 0.6138107416879796, + "grad_norm": 1.5551163600364186, + "learning_rate": 0.00015975310636159632, + "loss": 1.088, + "step": 240 + }, + { + "epoch": 0.6163682864450127, + "grad_norm": 1.2158294095692619, + "learning_rate": 0.00015974176250586265, + "loss": 1.0768, + "step": 241 + }, + { + "epoch": 0.618925831202046, + "grad_norm": 1.0765542476008974, + "learning_rate": 0.00015973016428722094, + "loss": 1.106, + "step": 242 + }, + { + "epoch": 0.6214833759590793, + "grad_norm": 1.1132699812581053, + "learning_rate": 0.0001597183117426675, + "loss": 1.1002, + "step": 243 + }, + { + "epoch": 0.6240409207161125, + "grad_norm": 1.3600712766399181, + "learning_rate": 0.00015970620491001004, + "loss": 1.1445, + "step": 244 + }, + { + "epoch": 0.6265984654731458, + "grad_norm": 1.0416236386170334, + "learning_rate": 0.00015969384382786729, + "loss": 1.1019, + "step": 245 + }, + { + "epoch": 0.629156010230179, + "grad_norm": 1.3027622469497735, + "learning_rate": 0.00015968122853566905, + "loss": 1.1002, + "step": 246 + }, + { + "epoch": 0.6317135549872123, + "grad_norm": 0.8037304289524585, + "learning_rate": 0.000159668359073656, + "loss": 1.0892, + "step": 247 + }, + { + "epoch": 0.6342710997442456, + "grad_norm": 0.9188404876547497, + "learning_rate": 0.00015965523548287956, + "loss": 1.1395, + "step": 248 + }, + { + "epoch": 0.6368286445012787, + "grad_norm": 1.1903100937742757, + "learning_rate": 0.0001596418578052018, + "loss": 1.1157, + "step": 249 + }, + { + "epoch": 0.639386189258312, + "grad_norm": 1.134136870599723, + "learning_rate": 0.0001596282260832953, + "loss": 1.0961, + "step": 250 + }, + { + "epoch": 0.6419437340153452, + "grad_norm": 1.1666299453160198, + "learning_rate": 0.00015961434036064294, + "loss": 1.1019, + "step": 251 + }, + { + "epoch": 0.6445012787723785, + "grad_norm": 0.8723696508206527, + "learning_rate": 0.00015960020068153785, + "loss": 1.1053, + "step": 252 + }, + { + "epoch": 0.6470588235294118, + "grad_norm": 0.9568431382175138, + "learning_rate": 0.00015958580709108332, + "loss": 1.0848, + "step": 253 + }, + { + "epoch": 0.649616368286445, + "grad_norm": 1.1129808719393837, + "learning_rate": 0.00015957115963519244, + "loss": 1.136, + "step": 254 + }, + { + "epoch": 0.6521739130434783, + "grad_norm": 1.3963252311082919, + "learning_rate": 0.00015955625836058815, + "loss": 1.0952, + "step": 255 + }, + { + "epoch": 0.6547314578005116, + "grad_norm": 0.9298685363556572, + "learning_rate": 0.00015954110331480302, + "loss": 1.0809, + "step": 256 + }, + { + "epoch": 0.6572890025575447, + "grad_norm": 0.7001103257159264, + "learning_rate": 0.00015952569454617916, + "loss": 1.116, + "step": 257 + }, + { + "epoch": 0.659846547314578, + "grad_norm": 0.9441648189630093, + "learning_rate": 0.00015951003210386793, + "loss": 1.0784, + "step": 258 + }, + { + "epoch": 0.6624040920716112, + "grad_norm": 1.4002615649377306, + "learning_rate": 0.0001594941160378299, + "loss": 1.1071, + "step": 259 + }, + { + "epoch": 0.6649616368286445, + "grad_norm": 0.8178386113146091, + "learning_rate": 0.00015947794639883473, + "loss": 1.087, + "step": 260 + }, + { + "epoch": 0.6675191815856778, + "grad_norm": 1.452979203118016, + "learning_rate": 0.0001594615232384608, + "loss": 1.0604, + "step": 261 + }, + { + "epoch": 0.670076726342711, + "grad_norm": 0.6774046196617319, + "learning_rate": 0.00015944484660909523, + "loss": 1.076, + "step": 262 + }, + { + "epoch": 0.6726342710997443, + "grad_norm": 0.7670969521082094, + "learning_rate": 0.00015942791656393376, + "loss": 1.1204, + "step": 263 + }, + { + "epoch": 0.6751918158567775, + "grad_norm": 1.0850513811767653, + "learning_rate": 0.00015941073315698035, + "loss": 1.0986, + "step": 264 + }, + { + "epoch": 0.6777493606138107, + "grad_norm": 1.472017968872445, + "learning_rate": 0.00015939329644304724, + "loss": 1.1274, + "step": 265 + }, + { + "epoch": 0.680306905370844, + "grad_norm": 0.9702787550395545, + "learning_rate": 0.0001593756064777546, + "loss": 1.0934, + "step": 266 + }, + { + "epoch": 0.6828644501278772, + "grad_norm": 1.0584827946044062, + "learning_rate": 0.00015935766331753049, + "loss": 1.0471, + "step": 267 + }, + { + "epoch": 0.6854219948849105, + "grad_norm": 0.8089889110807604, + "learning_rate": 0.00015933946701961055, + "loss": 1.0887, + "step": 268 + }, + { + "epoch": 0.6879795396419437, + "grad_norm": 1.0320882417148256, + "learning_rate": 0.000159321017642038, + "loss": 1.0667, + "step": 269 + }, + { + "epoch": 0.690537084398977, + "grad_norm": 1.4674982303373638, + "learning_rate": 0.00015930231524366326, + "loss": 1.1073, + "step": 270 + }, + { + "epoch": 0.6930946291560103, + "grad_norm": 0.7320918729382444, + "learning_rate": 0.0001592833598841438, + "loss": 1.1053, + "step": 271 + }, + { + "epoch": 0.6956521739130435, + "grad_norm": 0.8289503109780553, + "learning_rate": 0.00015926415162394414, + "loss": 1.0707, + "step": 272 + }, + { + "epoch": 0.6982097186700768, + "grad_norm": 1.130825151382903, + "learning_rate": 0.00015924469052433534, + "loss": 1.0878, + "step": 273 + }, + { + "epoch": 0.7007672634271099, + "grad_norm": 0.9816938036576663, + "learning_rate": 0.00015922497664739508, + "loss": 1.1036, + "step": 274 + }, + { + "epoch": 0.7033248081841432, + "grad_norm": 1.1744231549177595, + "learning_rate": 0.0001592050100560074, + "loss": 1.0826, + "step": 275 + }, + { + "epoch": 0.7058823529411765, + "grad_norm": 1.1244228971801966, + "learning_rate": 0.0001591847908138623, + "loss": 1.0992, + "step": 276 + }, + { + "epoch": 0.7084398976982097, + "grad_norm": 1.0273673884618308, + "learning_rate": 0.00015916431898545583, + "loss": 1.1122, + "step": 277 + }, + { + "epoch": 0.710997442455243, + "grad_norm": 1.3019719478481941, + "learning_rate": 0.0001591435946360897, + "loss": 1.0797, + "step": 278 + }, + { + "epoch": 0.7135549872122762, + "grad_norm": 0.9179007336169464, + "learning_rate": 0.00015912261783187113, + "loss": 1.1083, + "step": 279 + }, + { + "epoch": 0.7161125319693095, + "grad_norm": 1.3938652199122237, + "learning_rate": 0.00015910138863971265, + "loss": 1.0768, + "step": 280 + }, + { + "epoch": 0.7186700767263428, + "grad_norm": 0.8460589876687793, + "learning_rate": 0.00015907990712733176, + "loss": 1.0675, + "step": 281 + }, + { + "epoch": 0.7212276214833759, + "grad_norm": 1.2311027949600852, + "learning_rate": 0.00015905817336325098, + "loss": 1.095, + "step": 282 + }, + { + "epoch": 0.7237851662404092, + "grad_norm": 0.5637046057878358, + "learning_rate": 0.00015903618741679735, + "loss": 1.0227, + "step": 283 + }, + { + "epoch": 0.7263427109974424, + "grad_norm": 0.8864195638565602, + "learning_rate": 0.00015901394935810236, + "loss": 1.0894, + "step": 284 + }, + { + "epoch": 0.7289002557544757, + "grad_norm": 1.118154448385255, + "learning_rate": 0.00015899145925810172, + "loss": 1.0708, + "step": 285 + }, + { + "epoch": 0.731457800511509, + "grad_norm": 0.8797417608904688, + "learning_rate": 0.0001589687171885351, + "loss": 1.0973, + "step": 286 + }, + { + "epoch": 0.7340153452685422, + "grad_norm": 1.2417892204976435, + "learning_rate": 0.0001589457232219459, + "loss": 1.0959, + "step": 287 + }, + { + "epoch": 0.7365728900255755, + "grad_norm": 1.3823792436001885, + "learning_rate": 0.000158922477431681, + "loss": 1.0588, + "step": 288 + }, + { + "epoch": 0.7391304347826086, + "grad_norm": 0.5914973374896305, + "learning_rate": 0.00015889897989189065, + "loss": 1.0877, + "step": 289 + }, + { + "epoch": 0.7416879795396419, + "grad_norm": 0.6894697219091279, + "learning_rate": 0.00015887523067752805, + "loss": 1.0987, + "step": 290 + }, + { + "epoch": 0.7442455242966752, + "grad_norm": 0.9378104999898202, + "learning_rate": 0.0001588512298643492, + "loss": 1.0813, + "step": 291 + }, + { + "epoch": 0.7468030690537084, + "grad_norm": 1.5924222953617497, + "learning_rate": 0.00015882697752891273, + "loss": 1.0493, + "step": 292 + }, + { + "epoch": 0.7493606138107417, + "grad_norm": 0.8644236985398326, + "learning_rate": 0.0001588024737485795, + "loss": 1.0745, + "step": 293 + }, + { + "epoch": 0.7519181585677749, + "grad_norm": 1.2617771174370838, + "learning_rate": 0.00015877771860151255, + "loss": 1.0756, + "step": 294 + }, + { + "epoch": 0.7544757033248082, + "grad_norm": 0.6053221801377883, + "learning_rate": 0.00015875271216667658, + "loss": 1.0624, + "step": 295 + }, + { + "epoch": 0.7570332480818415, + "grad_norm": 0.8733719684486176, + "learning_rate": 0.00015872745452383797, + "loss": 1.0713, + "step": 296 + }, + { + "epoch": 0.7595907928388747, + "grad_norm": 1.0570673007983702, + "learning_rate": 0.00015870194575356444, + "loss": 1.1115, + "step": 297 + }, + { + "epoch": 0.7621483375959079, + "grad_norm": 0.7325728255149376, + "learning_rate": 0.00015867618593722464, + "loss": 1.0871, + "step": 298 + }, + { + "epoch": 0.7647058823529411, + "grad_norm": 0.7340524897043603, + "learning_rate": 0.00015865017515698807, + "loss": 1.0979, + "step": 299 + }, + { + "epoch": 0.7672634271099744, + "grad_norm": 1.1656279626023016, + "learning_rate": 0.00015862391349582484, + "loss": 1.0597, + "step": 300 + }, + { + "epoch": 0.7698209718670077, + "grad_norm": 0.9978239568565908, + "learning_rate": 0.00015859740103750522, + "loss": 1.0932, + "step": 301 + }, + { + "epoch": 0.7723785166240409, + "grad_norm": 1.878442480743071, + "learning_rate": 0.00015857063786659954, + "loss": 1.0938, + "step": 302 + }, + { + "epoch": 0.7749360613810742, + "grad_norm": 0.6117011045915516, + "learning_rate": 0.00015854362406847786, + "loss": 1.0623, + "step": 303 + }, + { + "epoch": 0.7774936061381074, + "grad_norm": 1.8420720325784072, + "learning_rate": 0.00015851635972930967, + "loss": 1.0699, + "step": 304 + }, + { + "epoch": 0.7800511508951407, + "grad_norm": 1.002131752478182, + "learning_rate": 0.00015848884493606367, + "loss": 1.0826, + "step": 305 + }, + { + "epoch": 0.782608695652174, + "grad_norm": 1.2471718061674597, + "learning_rate": 0.00015846107977650743, + "loss": 1.0755, + "step": 306 + }, + { + "epoch": 0.7851662404092071, + "grad_norm": 0.9634733361160541, + "learning_rate": 0.0001584330643392072, + "loss": 1.0416, + "step": 307 + }, + { + "epoch": 0.7877237851662404, + "grad_norm": 1.790526532103535, + "learning_rate": 0.00015840479871352754, + "loss": 1.0754, + "step": 308 + }, + { + "epoch": 0.7902813299232737, + "grad_norm": 0.8667875735812341, + "learning_rate": 0.00015837628298963105, + "loss": 1.0934, + "step": 309 + }, + { + "epoch": 0.7928388746803069, + "grad_norm": 1.4536288271279978, + "learning_rate": 0.00015834751725847816, + "loss": 1.0632, + "step": 310 + }, + { + "epoch": 0.7953964194373402, + "grad_norm": 1.3777516183353187, + "learning_rate": 0.00015831850161182677, + "loss": 1.0956, + "step": 311 + }, + { + "epoch": 0.7979539641943734, + "grad_norm": 0.7721449298753891, + "learning_rate": 0.0001582892361422319, + "loss": 1.1069, + "step": 312 + }, + { + "epoch": 0.8005115089514067, + "grad_norm": 1.174156872017157, + "learning_rate": 0.00015825972094304555, + "loss": 1.0728, + "step": 313 + }, + { + "epoch": 0.80306905370844, + "grad_norm": 1.2588808228888746, + "learning_rate": 0.00015822995610841623, + "loss": 1.0772, + "step": 314 + }, + { + "epoch": 0.8056265984654731, + "grad_norm": 0.8720000426242472, + "learning_rate": 0.00015819994173328885, + "loss": 1.0654, + "step": 315 + }, + { + "epoch": 0.8081841432225064, + "grad_norm": 0.923631788770043, + "learning_rate": 0.00015816967791340417, + "loss": 1.0668, + "step": 316 + }, + { + "epoch": 0.8107416879795396, + "grad_norm": 1.1357229877804957, + "learning_rate": 0.00015813916474529885, + "loss": 1.0911, + "step": 317 + }, + { + "epoch": 0.8132992327365729, + "grad_norm": 0.8907121901474587, + "learning_rate": 0.0001581084023263047, + "loss": 1.0826, + "step": 318 + }, + { + "epoch": 0.8158567774936062, + "grad_norm": 1.0350783431396418, + "learning_rate": 0.00015807739075454874, + "loss": 1.0426, + "step": 319 + }, + { + "epoch": 0.8184143222506394, + "grad_norm": 1.2795269410097496, + "learning_rate": 0.00015804613012895268, + "loss": 1.0731, + "step": 320 + }, + { + "epoch": 0.8209718670076727, + "grad_norm": 0.8440033467786482, + "learning_rate": 0.0001580146205492327, + "loss": 1.0491, + "step": 321 + }, + { + "epoch": 0.8235294117647058, + "grad_norm": 0.9336906509179427, + "learning_rate": 0.00015798286211589916, + "loss": 1.0796, + "step": 322 + }, + { + "epoch": 0.8260869565217391, + "grad_norm": 1.243210147279451, + "learning_rate": 0.00015795085493025608, + "loss": 1.0998, + "step": 323 + }, + { + "epoch": 0.8286445012787724, + "grad_norm": 0.985781736568132, + "learning_rate": 0.00015791859909440107, + "loss": 1.097, + "step": 324 + }, + { + "epoch": 0.8312020460358056, + "grad_norm": 1.115722030381177, + "learning_rate": 0.00015788609471122485, + "loss": 1.0594, + "step": 325 + }, + { + "epoch": 0.8337595907928389, + "grad_norm": 0.6317177707367972, + "learning_rate": 0.000157853341884411, + "loss": 1.0672, + "step": 326 + }, + { + "epoch": 0.8363171355498721, + "grad_norm": 0.7614994384747567, + "learning_rate": 0.00015782034071843557, + "loss": 1.1076, + "step": 327 + }, + { + "epoch": 0.8388746803069054, + "grad_norm": 0.6788203373242645, + "learning_rate": 0.00015778709131856675, + "loss": 1.0794, + "step": 328 + }, + { + "epoch": 0.8414322250639387, + "grad_norm": 0.6573621171258895, + "learning_rate": 0.00015775359379086455, + "loss": 1.1175, + "step": 329 + }, + { + "epoch": 0.8439897698209718, + "grad_norm": 0.865009547315977, + "learning_rate": 0.00015771984824218053, + "loss": 1.0893, + "step": 330 + }, + { + "epoch": 0.8465473145780051, + "grad_norm": 1.0982989183876286, + "learning_rate": 0.00015768585478015732, + "loss": 1.0628, + "step": 331 + }, + { + "epoch": 0.8491048593350383, + "grad_norm": 1.5816845014682415, + "learning_rate": 0.00015765161351322845, + "loss": 1.0553, + "step": 332 + }, + { + "epoch": 0.8516624040920716, + "grad_norm": 0.5583122236625028, + "learning_rate": 0.0001576171245506178, + "loss": 1.1007, + "step": 333 + }, + { + "epoch": 0.8542199488491049, + "grad_norm": 1.4589646002026686, + "learning_rate": 0.00015758238800233937, + "loss": 1.0354, + "step": 334 + }, + { + "epoch": 0.8567774936061381, + "grad_norm": 1.1988373358126654, + "learning_rate": 0.00015754740397919703, + "loss": 1.0609, + "step": 335 + }, + { + "epoch": 0.8593350383631714, + "grad_norm": 0.7798431918437426, + "learning_rate": 0.0001575121725927839, + "loss": 1.0599, + "step": 336 + }, + { + "epoch": 0.8618925831202046, + "grad_norm": 0.8001399476748517, + "learning_rate": 0.00015747669395548228, + "loss": 1.0825, + "step": 337 + }, + { + "epoch": 0.8644501278772379, + "grad_norm": 0.9268381518772149, + "learning_rate": 0.00015744096818046306, + "loss": 1.0867, + "step": 338 + }, + { + "epoch": 0.8670076726342711, + "grad_norm": 0.8482506857320948, + "learning_rate": 0.00015740499538168548, + "loss": 1.0519, + "step": 339 + }, + { + "epoch": 0.8695652173913043, + "grad_norm": 1.1051027320167537, + "learning_rate": 0.00015736877567389682, + "loss": 1.0926, + "step": 340 + }, + { + "epoch": 0.8721227621483376, + "grad_norm": 1.1295814345497992, + "learning_rate": 0.00015733230917263182, + "loss": 1.0485, + "step": 341 + }, + { + "epoch": 0.8746803069053708, + "grad_norm": 0.8381578992561258, + "learning_rate": 0.00015729559599421262, + "loss": 1.0742, + "step": 342 + }, + { + "epoch": 0.8772378516624041, + "grad_norm": 1.1355285501553987, + "learning_rate": 0.00015725863625574808, + "loss": 1.0731, + "step": 343 + }, + { + "epoch": 0.8797953964194374, + "grad_norm": 1.2716344612482289, + "learning_rate": 0.0001572214300751336, + "loss": 1.0818, + "step": 344 + }, + { + "epoch": 0.8823529411764706, + "grad_norm": 0.7977797928903454, + "learning_rate": 0.00015718397757105072, + "loss": 1.0592, + "step": 345 + }, + { + "epoch": 0.8849104859335039, + "grad_norm": 0.5888833117266756, + "learning_rate": 0.0001571462788629666, + "loss": 1.124, + "step": 346 + }, + { + "epoch": 0.887468030690537, + "grad_norm": 0.7277724084604381, + "learning_rate": 0.00015710833407113386, + "loss": 1.0076, + "step": 347 + }, + { + "epoch": 0.8900255754475703, + "grad_norm": 0.7175876926395411, + "learning_rate": 0.00015707014331659008, + "loss": 1.0735, + "step": 348 + }, + { + "epoch": 0.8925831202046036, + "grad_norm": 0.8127426786215441, + "learning_rate": 0.00015703170672115737, + "loss": 1.0582, + "step": 349 + }, + { + "epoch": 0.8951406649616368, + "grad_norm": 1.0648976192629485, + "learning_rate": 0.00015699302440744202, + "loss": 1.0788, + "step": 350 + }, + { + "epoch": 0.8976982097186701, + "grad_norm": 1.2133128800930093, + "learning_rate": 0.00015695409649883418, + "loss": 1.0986, + "step": 351 + }, + { + "epoch": 0.9002557544757033, + "grad_norm": 0.946491692276404, + "learning_rate": 0.0001569149231195074, + "loss": 1.0522, + "step": 352 + }, + { + "epoch": 0.9028132992327366, + "grad_norm": 1.2375939940771874, + "learning_rate": 0.0001568755043944182, + "loss": 1.077, + "step": 353 + }, + { + "epoch": 0.9053708439897699, + "grad_norm": 0.7734830655451521, + "learning_rate": 0.00015683584044930572, + "loss": 1.0659, + "step": 354 + }, + { + "epoch": 0.907928388746803, + "grad_norm": 0.6097683019560797, + "learning_rate": 0.00015679593141069132, + "loss": 1.0446, + "step": 355 + }, + { + "epoch": 0.9104859335038363, + "grad_norm": 0.5759587093662797, + "learning_rate": 0.0001567557774058782, + "loss": 1.0577, + "step": 356 + }, + { + "epoch": 0.9130434782608695, + "grad_norm": 0.5878753626840652, + "learning_rate": 0.0001567153785629509, + "loss": 1.0675, + "step": 357 + }, + { + "epoch": 0.9156010230179028, + "grad_norm": 0.6653732754348032, + "learning_rate": 0.000156674735010775, + "loss": 1.0891, + "step": 358 + }, + { + "epoch": 0.9181585677749361, + "grad_norm": 0.768263015413779, + "learning_rate": 0.00015663384687899663, + "loss": 1.0715, + "step": 359 + }, + { + "epoch": 0.9207161125319693, + "grad_norm": 0.9765055577703315, + "learning_rate": 0.00015659271429804215, + "loss": 1.0396, + "step": 360 + }, + { + "epoch": 0.9232736572890026, + "grad_norm": 1.4554265699809417, + "learning_rate": 0.00015655133739911757, + "loss": 1.0919, + "step": 361 + }, + { + "epoch": 0.9258312020460358, + "grad_norm": 0.7208280463855818, + "learning_rate": 0.0001565097163142083, + "loss": 1.0151, + "step": 362 + }, + { + "epoch": 0.928388746803069, + "grad_norm": 0.8611710190483517, + "learning_rate": 0.00015646785117607865, + "loss": 1.0796, + "step": 363 + }, + { + "epoch": 0.9309462915601023, + "grad_norm": 1.1291766944081427, + "learning_rate": 0.00015642574211827142, + "loss": 1.0651, + "step": 364 + }, + { + "epoch": 0.9335038363171355, + "grad_norm": 1.0023408896760695, + "learning_rate": 0.00015638338927510752, + "loss": 1.0785, + "step": 365 + }, + { + "epoch": 0.9360613810741688, + "grad_norm": 1.2325468393537922, + "learning_rate": 0.00015634079278168542, + "loss": 1.1032, + "step": 366 + }, + { + "epoch": 0.9386189258312021, + "grad_norm": 0.8116887550297889, + "learning_rate": 0.00015629795277388077, + "loss": 1.0784, + "step": 367 + }, + { + "epoch": 0.9411764705882353, + "grad_norm": 0.8465793191190484, + "learning_rate": 0.00015625486938834613, + "loss": 1.0729, + "step": 368 + }, + { + "epoch": 0.9437340153452686, + "grad_norm": 0.8630348039771475, + "learning_rate": 0.00015621154276251024, + "loss": 1.0676, + "step": 369 + }, + { + "epoch": 0.9462915601023018, + "grad_norm": 0.8909789093135501, + "learning_rate": 0.00015616797303457782, + "loss": 1.0626, + "step": 370 + }, + { + "epoch": 0.948849104859335, + "grad_norm": 1.3639686895279477, + "learning_rate": 0.00015612416034352906, + "loss": 1.0935, + "step": 371 + }, + { + "epoch": 0.9514066496163683, + "grad_norm": 0.7547937680438821, + "learning_rate": 0.00015608010482911908, + "loss": 1.0714, + "step": 372 + }, + { + "epoch": 0.9539641943734015, + "grad_norm": 0.6097577881338234, + "learning_rate": 0.00015603580663187765, + "loss": 1.0757, + "step": 373 + }, + { + "epoch": 0.9565217391304348, + "grad_norm": 0.7408592240149442, + "learning_rate": 0.00015599126589310857, + "loss": 1.0762, + "step": 374 + }, + { + "epoch": 0.959079283887468, + "grad_norm": 0.8123009573402776, + "learning_rate": 0.00015594648275488944, + "loss": 1.0991, + "step": 375 + }, + { + "epoch": 0.9616368286445013, + "grad_norm": 0.8997010834862542, + "learning_rate": 0.00015590145736007091, + "loss": 1.0493, + "step": 376 + }, + { + "epoch": 0.9641943734015346, + "grad_norm": 1.211365253216414, + "learning_rate": 0.00015585618985227657, + "loss": 1.0845, + "step": 377 + }, + { + "epoch": 0.9667519181585678, + "grad_norm": 1.1546641796621098, + "learning_rate": 0.00015581068037590212, + "loss": 1.0851, + "step": 378 + }, + { + "epoch": 0.969309462915601, + "grad_norm": 1.1673337321688009, + "learning_rate": 0.00015576492907611524, + "loss": 1.054, + "step": 379 + }, + { + "epoch": 0.9718670076726342, + "grad_norm": 0.6737544031199463, + "learning_rate": 0.00015571893609885493, + "loss": 1.0377, + "step": 380 + }, + { + "epoch": 0.9744245524296675, + "grad_norm": 0.8151328439701532, + "learning_rate": 0.00015567270159083107, + "loss": 1.0698, + "step": 381 + }, + { + "epoch": 0.9769820971867008, + "grad_norm": 0.9445758081131683, + "learning_rate": 0.00015562622569952408, + "loss": 1.0723, + "step": 382 + }, + { + "epoch": 0.979539641943734, + "grad_norm": 1.0143687259241263, + "learning_rate": 0.00015557950857318425, + "loss": 1.0753, + "step": 383 + }, + { + "epoch": 0.9820971867007673, + "grad_norm": 1.0909144236610384, + "learning_rate": 0.00015553255036083145, + "loss": 1.0301, + "step": 384 + }, + { + "epoch": 0.9846547314578005, + "grad_norm": 1.2562026829762518, + "learning_rate": 0.0001554853512122545, + "loss": 1.1103, + "step": 385 + }, + { + "epoch": 0.9872122762148338, + "grad_norm": 0.7752538678352305, + "learning_rate": 0.00015543791127801084, + "loss": 1.0633, + "step": 386 + }, + { + "epoch": 0.989769820971867, + "grad_norm": 0.6480828071883595, + "learning_rate": 0.0001553902307094259, + "loss": 1.0769, + "step": 387 + }, + { + "epoch": 0.9923273657289002, + "grad_norm": 0.8764236095011647, + "learning_rate": 0.00015534230965859276, + "loss": 1.0905, + "step": 388 + }, + { + "epoch": 0.9948849104859335, + "grad_norm": 1.1982183014384076, + "learning_rate": 0.00015529414827837156, + "loss": 1.0737, + "step": 389 + }, + { + "epoch": 0.9974424552429667, + "grad_norm": 1.0015924584874194, + "learning_rate": 0.00015524574672238906, + "loss": 1.0539, + "step": 390 + }, + { + "epoch": 1.0, + "grad_norm": 1.3714997731388885, + "learning_rate": 0.00015519710514503814, + "loss": 1.0846, + "step": 391 + }, + { + "epoch": 1.0025575447570332, + "grad_norm": 0.5566435857743947, + "learning_rate": 0.00015514822370147732, + "loss": 1.0432, + "step": 392 + }, + { + "epoch": 1.0051150895140666, + "grad_norm": 0.7918387632633654, + "learning_rate": 0.00015509910254763023, + "loss": 1.0578, + "step": 393 + }, + { + "epoch": 1.0076726342710998, + "grad_norm": 1.256938009132569, + "learning_rate": 0.0001550497418401852, + "loss": 1.0306, + "step": 394 + }, + { + "epoch": 1.010230179028133, + "grad_norm": 1.2314520681198668, + "learning_rate": 0.00015500014173659457, + "loss": 1.0383, + "step": 395 + }, + { + "epoch": 1.0127877237851663, + "grad_norm": 0.923069995672888, + "learning_rate": 0.00015495030239507442, + "loss": 1.0573, + "step": 396 + }, + { + "epoch": 1.0153452685421995, + "grad_norm": 0.936236903889318, + "learning_rate": 0.00015490022397460392, + "loss": 1.0573, + "step": 397 + }, + { + "epoch": 1.0179028132992327, + "grad_norm": 0.6628420746065794, + "learning_rate": 0.0001548499066349249, + "loss": 1.0474, + "step": 398 + }, + { + "epoch": 1.020460358056266, + "grad_norm": 0.47759016557709666, + "learning_rate": 0.00015479935053654126, + "loss": 1.0175, + "step": 399 + }, + { + "epoch": 1.0230179028132993, + "grad_norm": 0.61072929455943, + "learning_rate": 0.00015474855584071847, + "loss": 1.0724, + "step": 400 + }, + { + "epoch": 1.0255754475703325, + "grad_norm": 0.607075351205747, + "learning_rate": 0.0001546975227094832, + "loss": 1.0527, + "step": 401 + }, + { + "epoch": 1.0281329923273657, + "grad_norm": 0.5993295243529821, + "learning_rate": 0.00015464625130562256, + "loss": 1.0695, + "step": 402 + }, + { + "epoch": 1.030690537084399, + "grad_norm": 0.9177173231285568, + "learning_rate": 0.0001545947417926838, + "loss": 1.0344, + "step": 403 + }, + { + "epoch": 1.0332480818414322, + "grad_norm": 1.4911897806007488, + "learning_rate": 0.00015454299433497362, + "loss": 1.0443, + "step": 404 + }, + { + "epoch": 1.0358056265984654, + "grad_norm": 0.6069008914687445, + "learning_rate": 0.00015449100909755784, + "loss": 1.0393, + "step": 405 + }, + { + "epoch": 1.0383631713554988, + "grad_norm": 0.9163856494121054, + "learning_rate": 0.00015443878624626066, + "loss": 1.0737, + "step": 406 + }, + { + "epoch": 1.040920716112532, + "grad_norm": 1.369010227838881, + "learning_rate": 0.0001543863259476642, + "loss": 1.0106, + "step": 407 + }, + { + "epoch": 1.0434782608695652, + "grad_norm": 0.8651156065397383, + "learning_rate": 0.00015433362836910817, + "loss": 1.0399, + "step": 408 + }, + { + "epoch": 1.0460358056265984, + "grad_norm": 0.8527058058258006, + "learning_rate": 0.00015428069367868892, + "loss": 1.0222, + "step": 409 + }, + { + "epoch": 1.0485933503836318, + "grad_norm": 0.7680613356197566, + "learning_rate": 0.00015422752204525937, + "loss": 1.0161, + "step": 410 + }, + { + "epoch": 1.051150895140665, + "grad_norm": 1.0745283772693792, + "learning_rate": 0.0001541741136384281, + "loss": 1.0446, + "step": 411 + }, + { + "epoch": 1.0537084398976981, + "grad_norm": 1.0936408809378098, + "learning_rate": 0.00015412046862855902, + "loss": 1.0245, + "step": 412 + }, + { + "epoch": 1.0562659846547315, + "grad_norm": 0.9926125079651018, + "learning_rate": 0.00015406658718677076, + "loss": 1.0308, + "step": 413 + }, + { + "epoch": 1.0588235294117647, + "grad_norm": 1.1175953083121093, + "learning_rate": 0.00015401246948493612, + "loss": 1.0768, + "step": 414 + }, + { + "epoch": 1.061381074168798, + "grad_norm": 0.8210085027845057, + "learning_rate": 0.00015395811569568154, + "loss": 1.0473, + "step": 415 + }, + { + "epoch": 1.0639386189258313, + "grad_norm": 0.9226634652720442, + "learning_rate": 0.00015390352599238655, + "loss": 1.0299, + "step": 416 + }, + { + "epoch": 1.0664961636828645, + "grad_norm": 1.2471786951586945, + "learning_rate": 0.00015384870054918314, + "loss": 1.0139, + "step": 417 + }, + { + "epoch": 1.0690537084398977, + "grad_norm": 0.8806851237766041, + "learning_rate": 0.00015379363954095535, + "loss": 1.0237, + "step": 418 + }, + { + "epoch": 1.0716112531969308, + "grad_norm": 0.727069173053958, + "learning_rate": 0.0001537383431433386, + "loss": 1.0786, + "step": 419 + }, + { + "epoch": 1.0741687979539642, + "grad_norm": 0.6337579771769642, + "learning_rate": 0.00015368281153271918, + "loss": 1.0264, + "step": 420 + }, + { + "epoch": 1.0767263427109974, + "grad_norm": 0.8868138217653037, + "learning_rate": 0.0001536270448862336, + "loss": 1.0413, + "step": 421 + }, + { + "epoch": 1.0792838874680306, + "grad_norm": 0.8013668539540468, + "learning_rate": 0.00015357104338176823, + "loss": 1.0305, + "step": 422 + }, + { + "epoch": 1.081841432225064, + "grad_norm": 1.0111414586274687, + "learning_rate": 0.00015351480719795845, + "loss": 1.0177, + "step": 423 + }, + { + "epoch": 1.0843989769820972, + "grad_norm": 1.3128642093201517, + "learning_rate": 0.00015345833651418835, + "loss": 1.0663, + "step": 424 + }, + { + "epoch": 1.0869565217391304, + "grad_norm": 0.7074818377117421, + "learning_rate": 0.00015340163151058997, + "loss": 1.0262, + "step": 425 + }, + { + "epoch": 1.0895140664961638, + "grad_norm": 0.7476417982075203, + "learning_rate": 0.00015334469236804278, + "loss": 1.0166, + "step": 426 + }, + { + "epoch": 1.092071611253197, + "grad_norm": 0.7163607115802371, + "learning_rate": 0.00015328751926817314, + "loss": 1.041, + "step": 427 + }, + { + "epoch": 1.0946291560102301, + "grad_norm": 1.0614664295591614, + "learning_rate": 0.0001532301123933537, + "loss": 1.0236, + "step": 428 + }, + { + "epoch": 1.0971867007672633, + "grad_norm": 1.265439568931787, + "learning_rate": 0.00015317247192670282, + "loss": 1.0528, + "step": 429 + }, + { + "epoch": 1.0997442455242967, + "grad_norm": 0.7025263297795912, + "learning_rate": 0.00015311459805208397, + "loss": 1.0277, + "step": 430 + }, + { + "epoch": 1.10230179028133, + "grad_norm": 0.8167641509021383, + "learning_rate": 0.0001530564909541051, + "loss": 1.0582, + "step": 431 + }, + { + "epoch": 1.104859335038363, + "grad_norm": 0.8716549745993203, + "learning_rate": 0.0001529981508181182, + "loss": 1.077, + "step": 432 + }, + { + "epoch": 1.1074168797953965, + "grad_norm": 0.7246028123611893, + "learning_rate": 0.00015293957783021854, + "loss": 1.0542, + "step": 433 + }, + { + "epoch": 1.1099744245524297, + "grad_norm": 0.6784199036145839, + "learning_rate": 0.0001528807721772442, + "loss": 1.0418, + "step": 434 + }, + { + "epoch": 1.1125319693094629, + "grad_norm": 0.8506075875171634, + "learning_rate": 0.00015282173404677533, + "loss": 1.0343, + "step": 435 + }, + { + "epoch": 1.1150895140664963, + "grad_norm": 0.8375757880980345, + "learning_rate": 0.00015276246362713375, + "loss": 1.0341, + "step": 436 + }, + { + "epoch": 1.1176470588235294, + "grad_norm": 0.7540319449850698, + "learning_rate": 0.00015270296110738221, + "loss": 1.014, + "step": 437 + }, + { + "epoch": 1.1202046035805626, + "grad_norm": 0.9166441931706429, + "learning_rate": 0.0001526432266773238, + "loss": 1.0269, + "step": 438 + }, + { + "epoch": 1.1227621483375958, + "grad_norm": 1.0822305273066126, + "learning_rate": 0.0001525832605275014, + "loss": 1.0472, + "step": 439 + }, + { + "epoch": 1.1253196930946292, + "grad_norm": 0.9450917972251209, + "learning_rate": 0.000152523062849197, + "loss": 1.024, + "step": 440 + }, + { + "epoch": 1.1278772378516624, + "grad_norm": 1.1333566165350994, + "learning_rate": 0.0001524626338344311, + "loss": 1.0448, + "step": 441 + }, + { + "epoch": 1.1304347826086956, + "grad_norm": 1.177581998734778, + "learning_rate": 0.00015240197367596226, + "loss": 1.0244, + "step": 442 + }, + { + "epoch": 1.132992327365729, + "grad_norm": 0.8866480092962395, + "learning_rate": 0.00015234108256728616, + "loss": 1.0499, + "step": 443 + }, + { + "epoch": 1.1355498721227621, + "grad_norm": 0.6882160288370965, + "learning_rate": 0.00015227996070263535, + "loss": 1.0151, + "step": 444 + }, + { + "epoch": 1.1381074168797953, + "grad_norm": 0.7419397568748587, + "learning_rate": 0.00015221860827697832, + "loss": 1.0345, + "step": 445 + }, + { + "epoch": 1.1406649616368287, + "grad_norm": 0.854881931061872, + "learning_rate": 0.00015215702548601907, + "loss": 1.008, + "step": 446 + }, + { + "epoch": 1.143222506393862, + "grad_norm": 0.8138274292487687, + "learning_rate": 0.00015209521252619644, + "loss": 0.9962, + "step": 447 + }, + { + "epoch": 1.145780051150895, + "grad_norm": 0.7536271031473499, + "learning_rate": 0.00015203316959468344, + "loss": 1.0299, + "step": 448 + }, + { + "epoch": 1.1483375959079285, + "grad_norm": 0.9110426205382722, + "learning_rate": 0.0001519708968893867, + "loss": 1.019, + "step": 449 + }, + { + "epoch": 1.1508951406649617, + "grad_norm": 1.2088991550402766, + "learning_rate": 0.00015190839460894567, + "loss": 1.0708, + "step": 450 + }, + { + "epoch": 1.1534526854219949, + "grad_norm": 0.8573913285400658, + "learning_rate": 0.00015184566295273227, + "loss": 1.0417, + "step": 451 + }, + { + "epoch": 1.156010230179028, + "grad_norm": 0.6951469442919158, + "learning_rate": 0.00015178270212084995, + "loss": 1.0464, + "step": 452 + }, + { + "epoch": 1.1585677749360614, + "grad_norm": 0.6419948195410027, + "learning_rate": 0.00015171951231413328, + "loss": 1.0612, + "step": 453 + }, + { + "epoch": 1.1611253196930946, + "grad_norm": 0.6841619518854335, + "learning_rate": 0.00015165609373414722, + "loss": 1.0325, + "step": 454 + }, + { + "epoch": 1.1636828644501278, + "grad_norm": 0.8037291566188051, + "learning_rate": 0.0001515924465831864, + "loss": 1.0295, + "step": 455 + }, + { + "epoch": 1.1662404092071612, + "grad_norm": 1.1795212959071533, + "learning_rate": 0.00015152857106427462, + "loss": 1.0231, + "step": 456 + }, + { + "epoch": 1.1687979539641944, + "grad_norm": 1.1007425485117117, + "learning_rate": 0.00015146446738116412, + "loss": 1.015, + "step": 457 + }, + { + "epoch": 1.1713554987212276, + "grad_norm": 1.072656472389329, + "learning_rate": 0.00015140013573833498, + "loss": 1.0195, + "step": 458 + }, + { + "epoch": 1.1739130434782608, + "grad_norm": 0.9339605123999745, + "learning_rate": 0.00015133557634099435, + "loss": 1.026, + "step": 459 + }, + { + "epoch": 1.1764705882352942, + "grad_norm": 0.8580962355846978, + "learning_rate": 0.00015127078939507595, + "loss": 1.055, + "step": 460 + }, + { + "epoch": 1.1790281329923273, + "grad_norm": 1.028703820245517, + "learning_rate": 0.00015120577510723934, + "loss": 1.0768, + "step": 461 + }, + { + "epoch": 1.1815856777493605, + "grad_norm": 1.1535909770008528, + "learning_rate": 0.00015114053368486919, + "loss": 1.0227, + "step": 462 + }, + { + "epoch": 1.184143222506394, + "grad_norm": 0.7549525724152655, + "learning_rate": 0.0001510750653360748, + "loss": 1.0101, + "step": 463 + }, + { + "epoch": 1.186700767263427, + "grad_norm": 0.6560485854233202, + "learning_rate": 0.00015100937026968922, + "loss": 1.0372, + "step": 464 + }, + { + "epoch": 1.1892583120204603, + "grad_norm": 0.5946694031246916, + "learning_rate": 0.0001509434486952688, + "loss": 1.0471, + "step": 465 + }, + { + "epoch": 1.1918158567774937, + "grad_norm": 0.5311919492244818, + "learning_rate": 0.00015087730082309232, + "loss": 1.0431, + "step": 466 + }, + { + "epoch": 1.1943734015345269, + "grad_norm": 0.5154174371307244, + "learning_rate": 0.00015081092686416043, + "loss": 1.0199, + "step": 467 + }, + { + "epoch": 1.19693094629156, + "grad_norm": 0.505383670902881, + "learning_rate": 0.00015074432703019504, + "loss": 1.0706, + "step": 468 + }, + { + "epoch": 1.1994884910485935, + "grad_norm": 0.4907682209551291, + "learning_rate": 0.00015067750153363845, + "loss": 1.0346, + "step": 469 + }, + { + "epoch": 1.2020460358056266, + "grad_norm": 0.39066205442828883, + "learning_rate": 0.00015061045058765282, + "loss": 1.0554, + "step": 470 + }, + { + "epoch": 1.2046035805626598, + "grad_norm": 0.34420579713251814, + "learning_rate": 0.0001505431744061195, + "loss": 1.0279, + "step": 471 + }, + { + "epoch": 1.207161125319693, + "grad_norm": 0.43688810183174753, + "learning_rate": 0.0001504756732036383, + "loss": 0.9885, + "step": 472 + }, + { + "epoch": 1.2097186700767264, + "grad_norm": 0.4751633909038584, + "learning_rate": 0.00015040794719552676, + "loss": 1.0432, + "step": 473 + }, + { + "epoch": 1.2122762148337596, + "grad_norm": 0.5269656781598262, + "learning_rate": 0.00015033999659781953, + "loss": 1.027, + "step": 474 + }, + { + "epoch": 1.2148337595907928, + "grad_norm": 0.5712060191776948, + "learning_rate": 0.00015027182162726769, + "loss": 1.0421, + "step": 475 + }, + { + "epoch": 1.2173913043478262, + "grad_norm": 0.6411090148779058, + "learning_rate": 0.000150203422501338, + "loss": 1.013, + "step": 476 + }, + { + "epoch": 1.2199488491048593, + "grad_norm": 0.922985318540642, + "learning_rate": 0.00015013479943821225, + "loss": 1.0671, + "step": 477 + }, + { + "epoch": 1.2225063938618925, + "grad_norm": 1.411342942170953, + "learning_rate": 0.00015006595265678655, + "loss": 1.0506, + "step": 478 + }, + { + "epoch": 1.2250639386189257, + "grad_norm": 0.7044934707287243, + "learning_rate": 0.00014999688237667065, + "loss": 1.058, + "step": 479 + }, + { + "epoch": 1.227621483375959, + "grad_norm": 0.844446069080729, + "learning_rate": 0.00014992758881818722, + "loss": 1.0112, + "step": 480 + }, + { + "epoch": 1.2301790281329923, + "grad_norm": 0.863795773273135, + "learning_rate": 0.00014985807220237112, + "loss": 1.0223, + "step": 481 + }, + { + "epoch": 1.2327365728900257, + "grad_norm": 1.1955253111068895, + "learning_rate": 0.00014978833275096872, + "loss": 1.0437, + "step": 482 + }, + { + "epoch": 1.2352941176470589, + "grad_norm": 0.9710436321082059, + "learning_rate": 0.00014971837068643732, + "loss": 1.0331, + "step": 483 + }, + { + "epoch": 1.237851662404092, + "grad_norm": 0.9838152365395039, + "learning_rate": 0.00014964818623194412, + "loss": 1.0503, + "step": 484 + }, + { + "epoch": 1.2404092071611252, + "grad_norm": 1.3111101164937617, + "learning_rate": 0.00014957777961136588, + "loss": 1.0536, + "step": 485 + }, + { + "epoch": 1.2429667519181586, + "grad_norm": 0.9426881648292104, + "learning_rate": 0.00014950715104928794, + "loss": 1.0452, + "step": 486 + }, + { + "epoch": 1.2455242966751918, + "grad_norm": 0.9708865131907598, + "learning_rate": 0.0001494363007710036, + "loss": 1.0205, + "step": 487 + }, + { + "epoch": 1.248081841432225, + "grad_norm": 0.735118260321914, + "learning_rate": 0.00014936522900251348, + "loss": 1.0355, + "step": 488 + }, + { + "epoch": 1.2506393861892584, + "grad_norm": 0.8962772386972064, + "learning_rate": 0.00014929393597052458, + "loss": 1.0455, + "step": 489 + }, + { + "epoch": 1.2531969309462916, + "grad_norm": 0.6546912235303116, + "learning_rate": 0.00014922242190244981, + "loss": 1.0625, + "step": 490 + }, + { + "epoch": 1.2557544757033248, + "grad_norm": 0.5383201135001036, + "learning_rate": 0.0001491506870264071, + "loss": 1.0346, + "step": 491 + }, + { + "epoch": 1.258312020460358, + "grad_norm": 0.8097960021561659, + "learning_rate": 0.00014907873157121875, + "loss": 1.0605, + "step": 492 + }, + { + "epoch": 1.2608695652173914, + "grad_norm": 0.670808763781411, + "learning_rate": 0.00014900655576641057, + "loss": 1.0282, + "step": 493 + }, + { + "epoch": 1.2634271099744245, + "grad_norm": 0.7979394762122887, + "learning_rate": 0.00014893415984221141, + "loss": 1.0264, + "step": 494 + }, + { + "epoch": 1.265984654731458, + "grad_norm": 1.026770422301297, + "learning_rate": 0.00014886154402955217, + "loss": 1.0514, + "step": 495 + }, + { + "epoch": 1.2685421994884911, + "grad_norm": 1.032280976957703, + "learning_rate": 0.00014878870856006513, + "loss": 1.0408, + "step": 496 + }, + { + "epoch": 1.2710997442455243, + "grad_norm": 1.1296018012465836, + "learning_rate": 0.00014871565366608329, + "loss": 1.0338, + "step": 497 + }, + { + "epoch": 1.2736572890025575, + "grad_norm": 0.9749313409863054, + "learning_rate": 0.0001486423795806396, + "loss": 1.0193, + "step": 498 + }, + { + "epoch": 1.2762148337595907, + "grad_norm": 0.8177048634676223, + "learning_rate": 0.00014856888653746607, + "loss": 1.0324, + "step": 499 + }, + { + "epoch": 1.278772378516624, + "grad_norm": 0.7747012524305006, + "learning_rate": 0.00014849517477099334, + "loss": 1.0076, + "step": 500 + }, + { + "epoch": 1.2813299232736572, + "grad_norm": 0.8429034680075405, + "learning_rate": 0.00014842124451634956, + "loss": 1.0266, + "step": 501 + }, + { + "epoch": 1.2838874680306906, + "grad_norm": 1.0704964042478793, + "learning_rate": 0.00014834709600935995, + "loss": 1.033, + "step": 502 + }, + { + "epoch": 1.2864450127877238, + "grad_norm": 1.1030823411998563, + "learning_rate": 0.00014827272948654584, + "loss": 1.0519, + "step": 503 + }, + { + "epoch": 1.289002557544757, + "grad_norm": 0.7099638951621647, + "learning_rate": 0.00014819814518512403, + "loss": 1.0258, + "step": 504 + }, + { + "epoch": 1.2915601023017902, + "grad_norm": 0.5286675820388321, + "learning_rate": 0.000148123343343006, + "loss": 1.0398, + "step": 505 + }, + { + "epoch": 1.2941176470588236, + "grad_norm": 0.5306607233732565, + "learning_rate": 0.0001480483241987971, + "loss": 1.0155, + "step": 506 + }, + { + "epoch": 1.2966751918158568, + "grad_norm": 0.6060078277369222, + "learning_rate": 0.0001479730879917959, + "loss": 1.0486, + "step": 507 + }, + { + "epoch": 1.29923273657289, + "grad_norm": 0.8537119327365599, + "learning_rate": 0.00014789763496199335, + "loss": 1.0115, + "step": 508 + }, + { + "epoch": 1.3017902813299234, + "grad_norm": 1.0701098672995177, + "learning_rate": 0.00014782196535007198, + "loss": 1.0449, + "step": 509 + }, + { + "epoch": 1.3043478260869565, + "grad_norm": 1.0452113870678157, + "learning_rate": 0.00014774607939740524, + "loss": 1.0132, + "step": 510 + }, + { + "epoch": 1.3069053708439897, + "grad_norm": 1.0085703377598065, + "learning_rate": 0.0001476699773460567, + "loss": 1.0229, + "step": 511 + }, + { + "epoch": 1.309462915601023, + "grad_norm": 0.8918712650363909, + "learning_rate": 0.00014759365943877906, + "loss": 1.0509, + "step": 512 + }, + { + "epoch": 1.3120204603580563, + "grad_norm": 0.839691736422046, + "learning_rate": 0.00014751712591901385, + "loss": 1.0078, + "step": 513 + }, + { + "epoch": 1.3145780051150895, + "grad_norm": 0.7023292683764998, + "learning_rate": 0.00014744037703089014, + "loss": 1.0289, + "step": 514 + }, + { + "epoch": 1.317135549872123, + "grad_norm": 0.686332323144994, + "learning_rate": 0.00014736341301922406, + "loss": 1.0213, + "step": 515 + }, + { + "epoch": 1.319693094629156, + "grad_norm": 0.5991056794621004, + "learning_rate": 0.00014728623412951802, + "loss": 1.0164, + "step": 516 + }, + { + "epoch": 1.3222506393861893, + "grad_norm": 0.7507696949786656, + "learning_rate": 0.00014720884060795975, + "loss": 1.0119, + "step": 517 + }, + { + "epoch": 1.3248081841432224, + "grad_norm": 0.8658712614342154, + "learning_rate": 0.00014713123270142163, + "loss": 1.0295, + "step": 518 + }, + { + "epoch": 1.3273657289002558, + "grad_norm": 0.6119299788578647, + "learning_rate": 0.00014705341065745999, + "loss": 1.0197, + "step": 519 + }, + { + "epoch": 1.329923273657289, + "grad_norm": 0.4927851179899278, + "learning_rate": 0.00014697537472431411, + "loss": 1.0624, + "step": 520 + }, + { + "epoch": 1.3324808184143222, + "grad_norm": 0.4167468121183674, + "learning_rate": 0.0001468971251509056, + "loss": 1.0647, + "step": 521 + }, + { + "epoch": 1.3350383631713556, + "grad_norm": 0.47586787480372, + "learning_rate": 0.00014681866218683757, + "loss": 1.0402, + "step": 522 + }, + { + "epoch": 1.3375959079283888, + "grad_norm": 0.5745122439927115, + "learning_rate": 0.0001467399860823937, + "loss": 1.0304, + "step": 523 + }, + { + "epoch": 1.340153452685422, + "grad_norm": 0.7552655303578069, + "learning_rate": 0.00014666109708853767, + "loss": 1.0548, + "step": 524 + }, + { + "epoch": 1.3427109974424551, + "grad_norm": 1.06908823148847, + "learning_rate": 0.00014658199545691222, + "loss": 1.0287, + "step": 525 + }, + { + "epoch": 1.3452685421994885, + "grad_norm": 1.1444185918054413, + "learning_rate": 0.0001465026814398383, + "loss": 1.0539, + "step": 526 + }, + { + "epoch": 1.3478260869565217, + "grad_norm": 0.7989998085879703, + "learning_rate": 0.00014642315529031442, + "loss": 1.0035, + "step": 527 + }, + { + "epoch": 1.350383631713555, + "grad_norm": 0.6352155319789643, + "learning_rate": 0.00014634341726201572, + "loss": 1.0659, + "step": 528 + }, + { + "epoch": 1.3529411764705883, + "grad_norm": 0.5614215368601074, + "learning_rate": 0.00014626346760929316, + "loss": 1.0282, + "step": 529 + }, + { + "epoch": 1.3554987212276215, + "grad_norm": 0.5422618777488837, + "learning_rate": 0.00014618330658717278, + "loss": 1.0002, + "step": 530 + }, + { + "epoch": 1.3580562659846547, + "grad_norm": 0.4783637133302247, + "learning_rate": 0.00014610293445135492, + "loss": 1.0377, + "step": 531 + }, + { + "epoch": 1.3606138107416879, + "grad_norm": 0.4390483950197236, + "learning_rate": 0.00014602235145821322, + "loss": 1.023, + "step": 532 + }, + { + "epoch": 1.3631713554987213, + "grad_norm": 0.4768466306371761, + "learning_rate": 0.00014594155786479398, + "loss": 1.0601, + "step": 533 + }, + { + "epoch": 1.3657289002557544, + "grad_norm": 0.7582418871164014, + "learning_rate": 0.00014586055392881527, + "loss": 1.0292, + "step": 534 + }, + { + "epoch": 1.3682864450127878, + "grad_norm": 1.0430189228296438, + "learning_rate": 0.00014577933990866617, + "loss": 1.0397, + "step": 535 + }, + { + "epoch": 1.370843989769821, + "grad_norm": 1.2646327577842662, + "learning_rate": 0.00014569791606340577, + "loss": 1.0749, + "step": 536 + }, + { + "epoch": 1.3734015345268542, + "grad_norm": 0.6922891659849906, + "learning_rate": 0.00014561628265276257, + "loss": 1.0293, + "step": 537 + }, + { + "epoch": 1.3759590792838874, + "grad_norm": 0.44386889614919295, + "learning_rate": 0.00014553443993713355, + "loss": 1.0398, + "step": 538 + }, + { + "epoch": 1.3785166240409208, + "grad_norm": 0.5439717030086442, + "learning_rate": 0.00014545238817758327, + "loss": 1.0268, + "step": 539 + }, + { + "epoch": 1.381074168797954, + "grad_norm": 0.8373630963710572, + "learning_rate": 0.00014537012763584316, + "loss": 1.0354, + "step": 540 + }, + { + "epoch": 1.3836317135549872, + "grad_norm": 1.3266757684220118, + "learning_rate": 0.0001452876585743106, + "loss": 1.0642, + "step": 541 + }, + { + "epoch": 1.3861892583120206, + "grad_norm": 0.7488029622406787, + "learning_rate": 0.00014520498125604814, + "loss": 1.0534, + "step": 542 + }, + { + "epoch": 1.3887468030690537, + "grad_norm": 0.7282698103684015, + "learning_rate": 0.00014512209594478263, + "loss": 1.01, + "step": 543 + }, + { + "epoch": 1.391304347826087, + "grad_norm": 0.7969771518742094, + "learning_rate": 0.00014503900290490436, + "loss": 1.0307, + "step": 544 + }, + { + "epoch": 1.39386189258312, + "grad_norm": 0.9263524028660353, + "learning_rate": 0.00014495570240146625, + "loss": 1.0211, + "step": 545 + }, + { + "epoch": 1.3964194373401535, + "grad_norm": 1.1608361715103017, + "learning_rate": 0.000144872194700183, + "loss": 1.0005, + "step": 546 + }, + { + "epoch": 1.3989769820971867, + "grad_norm": 0.836914057851843, + "learning_rate": 0.00014478848006743022, + "loss": 1.0387, + "step": 547 + }, + { + "epoch": 1.40153452685422, + "grad_norm": 0.6826412525653701, + "learning_rate": 0.00014470455877024365, + "loss": 1.0292, + "step": 548 + }, + { + "epoch": 1.4040920716112533, + "grad_norm": 0.48703773893723834, + "learning_rate": 0.00014462043107631818, + "loss": 1.0511, + "step": 549 + }, + { + "epoch": 1.4066496163682864, + "grad_norm": 0.6223475644721191, + "learning_rate": 0.00014453609725400713, + "loss": 0.9925, + "step": 550 + }, + { + "epoch": 1.4092071611253196, + "grad_norm": 0.8882232962821335, + "learning_rate": 0.0001444515575723213, + "loss": 1.0061, + "step": 551 + }, + { + "epoch": 1.4117647058823528, + "grad_norm": 1.1304081971561695, + "learning_rate": 0.00014436681230092815, + "loss": 1.0488, + "step": 552 + }, + { + "epoch": 1.4143222506393862, + "grad_norm": 0.8848381914341709, + "learning_rate": 0.00014428186171015097, + "loss": 1.0324, + "step": 553 + }, + { + "epoch": 1.4168797953964194, + "grad_norm": 0.7483522323458203, + "learning_rate": 0.00014419670607096791, + "loss": 1.0422, + "step": 554 + }, + { + "epoch": 1.4194373401534528, + "grad_norm": 0.7721209602826212, + "learning_rate": 0.00014411134565501133, + "loss": 1.056, + "step": 555 + }, + { + "epoch": 1.421994884910486, + "grad_norm": 0.8535777213626637, + "learning_rate": 0.00014402578073456661, + "loss": 1.0408, + "step": 556 + }, + { + "epoch": 1.4245524296675192, + "grad_norm": 0.6959036355749549, + "learning_rate": 0.00014394001158257163, + "loss": 1.0271, + "step": 557 + }, + { + "epoch": 1.4271099744245523, + "grad_norm": 0.6014343484373971, + "learning_rate": 0.00014385403847261562, + "loss": 1.0193, + "step": 558 + }, + { + "epoch": 1.4296675191815857, + "grad_norm": 0.7106873814775013, + "learning_rate": 0.00014376786167893846, + "loss": 1.0122, + "step": 559 + }, + { + "epoch": 1.432225063938619, + "grad_norm": 0.8444210941994957, + "learning_rate": 0.00014368148147642974, + "loss": 1.0045, + "step": 560 + }, + { + "epoch": 1.434782608695652, + "grad_norm": 0.8805969266684864, + "learning_rate": 0.00014359489814062788, + "loss": 1.0144, + "step": 561 + }, + { + "epoch": 1.4373401534526855, + "grad_norm": 1.009450224204603, + "learning_rate": 0.00014350811194771928, + "loss": 1.0287, + "step": 562 + }, + { + "epoch": 1.4398976982097187, + "grad_norm": 1.2351992837125931, + "learning_rate": 0.00014342112317453738, + "loss": 1.0566, + "step": 563 + }, + { + "epoch": 1.4424552429667519, + "grad_norm": 0.6573457770192163, + "learning_rate": 0.00014333393209856182, + "loss": 1.052, + "step": 564 + }, + { + "epoch": 1.445012787723785, + "grad_norm": 0.5070847718255479, + "learning_rate": 0.00014324653899791765, + "loss": 1.0608, + "step": 565 + }, + { + "epoch": 1.4475703324808185, + "grad_norm": 0.6935855951791632, + "learning_rate": 0.00014315894415137416, + "loss": 1.0234, + "step": 566 + }, + { + "epoch": 1.4501278772378516, + "grad_norm": 0.7956146938043426, + "learning_rate": 0.00014307114783834442, + "loss": 1.0048, + "step": 567 + }, + { + "epoch": 1.452685421994885, + "grad_norm": 0.9003410836319078, + "learning_rate": 0.0001429831503388839, + "loss": 1.0363, + "step": 568 + }, + { + "epoch": 1.4552429667519182, + "grad_norm": 1.0643618726104027, + "learning_rate": 0.00014289495193368996, + "loss": 1.0269, + "step": 569 + }, + { + "epoch": 1.4578005115089514, + "grad_norm": 0.9080907950888324, + "learning_rate": 0.0001428065529041008, + "loss": 1.017, + "step": 570 + }, + { + "epoch": 1.4603580562659846, + "grad_norm": 0.8536436997073572, + "learning_rate": 0.00014271795353209456, + "loss": 1.0375, + "step": 571 + }, + { + "epoch": 1.4629156010230178, + "grad_norm": 0.9398461282489688, + "learning_rate": 0.00014262915410028848, + "loss": 1.0434, + "step": 572 + }, + { + "epoch": 1.4654731457800512, + "grad_norm": 0.9631928132083718, + "learning_rate": 0.00014254015489193782, + "loss": 1.0292, + "step": 573 + }, + { + "epoch": 1.4680306905370843, + "grad_norm": 0.9076791954370104, + "learning_rate": 0.00014245095619093532, + "loss": 1.0159, + "step": 574 + }, + { + "epoch": 1.4705882352941178, + "grad_norm": 0.9587339014454659, + "learning_rate": 0.00014236155828180983, + "loss": 1.0484, + "step": 575 + }, + { + "epoch": 1.473145780051151, + "grad_norm": 0.8891566782622077, + "learning_rate": 0.00014227196144972582, + "loss": 1.0508, + "step": 576 + }, + { + "epoch": 1.4757033248081841, + "grad_norm": 0.6581614104684226, + "learning_rate": 0.0001421821659804822, + "loss": 1.0403, + "step": 577 + }, + { + "epoch": 1.4782608695652173, + "grad_norm": 0.5861192400584929, + "learning_rate": 0.00014209217216051156, + "loss": 1.0304, + "step": 578 + }, + { + "epoch": 1.4808184143222507, + "grad_norm": 0.5774127863656433, + "learning_rate": 0.00014200198027687912, + "loss": 1.0102, + "step": 579 + }, + { + "epoch": 1.4833759590792839, + "grad_norm": 0.6502157171768282, + "learning_rate": 0.00014191159061728193, + "loss": 1.0253, + "step": 580 + }, + { + "epoch": 1.485933503836317, + "grad_norm": 0.5386614139768452, + "learning_rate": 0.00014182100347004793, + "loss": 1.044, + "step": 581 + }, + { + "epoch": 1.4884910485933505, + "grad_norm": 0.4786011997004328, + "learning_rate": 0.000141730219124135, + "loss": 1.0322, + "step": 582 + }, + { + "epoch": 1.4910485933503836, + "grad_norm": 0.5755235187273994, + "learning_rate": 0.00014163923786913004, + "loss": 1.0572, + "step": 583 + }, + { + "epoch": 1.4936061381074168, + "grad_norm": 0.641263771557679, + "learning_rate": 0.00014154805999524802, + "loss": 1.0627, + "step": 584 + }, + { + "epoch": 1.49616368286445, + "grad_norm": 0.798665776000645, + "learning_rate": 0.0001414566857933312, + "loss": 1.0017, + "step": 585 + }, + { + "epoch": 1.4987212276214834, + "grad_norm": 0.8759678129527348, + "learning_rate": 0.00014136511555484798, + "loss": 1.0168, + "step": 586 + }, + { + "epoch": 1.5012787723785166, + "grad_norm": 0.7904395533793586, + "learning_rate": 0.00014127334957189219, + "loss": 1.0253, + "step": 587 + }, + { + "epoch": 1.50383631713555, + "grad_norm": 0.6451046472087583, + "learning_rate": 0.00014118138813718192, + "loss": 1.0523, + "step": 588 + }, + { + "epoch": 1.5063938618925832, + "grad_norm": 0.5705461372803496, + "learning_rate": 0.0001410892315440588, + "loss": 0.9921, + "step": 589 + }, + { + "epoch": 1.5089514066496164, + "grad_norm": 0.6000400371240294, + "learning_rate": 0.00014099688008648703, + "loss": 1.0219, + "step": 590 + }, + { + "epoch": 1.5115089514066495, + "grad_norm": 0.6112952152068515, + "learning_rate": 0.0001409043340590523, + "loss": 0.9963, + "step": 591 + }, + { + "epoch": 1.5140664961636827, + "grad_norm": 0.5886324573188866, + "learning_rate": 0.00014081159375696102, + "loss": 1.0484, + "step": 592 + }, + { + "epoch": 1.5166240409207161, + "grad_norm": 0.5048817308801855, + "learning_rate": 0.00014071865947603922, + "loss": 0.978, + "step": 593 + }, + { + "epoch": 1.5191815856777495, + "grad_norm": 0.5000111304078102, + "learning_rate": 0.00014062553151273177, + "loss": 1.0431, + "step": 594 + }, + { + "epoch": 1.5217391304347827, + "grad_norm": 0.47701322805085783, + "learning_rate": 0.0001405322101641013, + "loss": 1.0157, + "step": 595 + }, + { + "epoch": 1.5242966751918159, + "grad_norm": 0.45047959305759844, + "learning_rate": 0.00014043869572782737, + "loss": 1.026, + "step": 596 + }, + { + "epoch": 1.526854219948849, + "grad_norm": 0.37562193605886857, + "learning_rate": 0.00014034498850220537, + "loss": 1.0334, + "step": 597 + }, + { + "epoch": 1.5294117647058822, + "grad_norm": 0.44055163797782626, + "learning_rate": 0.00014025108878614576, + "loss": 1.0353, + "step": 598 + }, + { + "epoch": 1.5319693094629157, + "grad_norm": 0.39725606847915634, + "learning_rate": 0.0001401569968791729, + "loss": 1.0115, + "step": 599 + }, + { + "epoch": 1.5345268542199488, + "grad_norm": 0.39650786805208904, + "learning_rate": 0.00014006271308142433, + "loss": 1.0604, + "step": 600 + }, + { + "epoch": 1.5370843989769822, + "grad_norm": 0.32569926641458746, + "learning_rate": 0.0001399682376936495, + "loss": 1.0096, + "step": 601 + }, + { + "epoch": 1.5396419437340154, + "grad_norm": 0.43543100187257516, + "learning_rate": 0.00013987357101720929, + "loss": 1.0059, + "step": 602 + }, + { + "epoch": 1.5421994884910486, + "grad_norm": 0.458695174168892, + "learning_rate": 0.00013977871335407445, + "loss": 1.0197, + "step": 603 + }, + { + "epoch": 1.5447570332480818, + "grad_norm": 0.43690410697330667, + "learning_rate": 0.00013968366500682514, + "loss": 1.0302, + "step": 604 + }, + { + "epoch": 1.547314578005115, + "grad_norm": 0.4143725631119223, + "learning_rate": 0.00013958842627864975, + "loss": 1.0167, + "step": 605 + }, + { + "epoch": 1.5498721227621484, + "grad_norm": 0.36509470245988934, + "learning_rate": 0.00013949299747334387, + "loss": 0.994, + "step": 606 + }, + { + "epoch": 1.5524296675191815, + "grad_norm": 0.42997115738098735, + "learning_rate": 0.00013939737889530948, + "loss": 1.0182, + "step": 607 + }, + { + "epoch": 1.554987212276215, + "grad_norm": 0.519737904298238, + "learning_rate": 0.00013930157084955387, + "loss": 1.0432, + "step": 608 + }, + { + "epoch": 1.5575447570332481, + "grad_norm": 0.5413718715320616, + "learning_rate": 0.00013920557364168872, + "loss": 1.0392, + "step": 609 + }, + { + "epoch": 1.5601023017902813, + "grad_norm": 0.4622784565390988, + "learning_rate": 0.00013910938757792911, + "loss": 1.0089, + "step": 610 + }, + { + "epoch": 1.5626598465473145, + "grad_norm": 0.517572135003303, + "learning_rate": 0.00013901301296509247, + "loss": 1.0433, + "step": 611 + }, + { + "epoch": 1.5652173913043477, + "grad_norm": 0.6472771877158792, + "learning_rate": 0.00013891645011059774, + "loss": 1.033, + "step": 612 + }, + { + "epoch": 1.567774936061381, + "grad_norm": 0.73777975779115, + "learning_rate": 0.00013881969932246434, + "loss": 1.0233, + "step": 613 + }, + { + "epoch": 1.5703324808184145, + "grad_norm": 0.6556752106938734, + "learning_rate": 0.00013872276090931112, + "loss": 1.0283, + "step": 614 + }, + { + "epoch": 1.5728900255754477, + "grad_norm": 0.647001672639268, + "learning_rate": 0.0001386256351803554, + "loss": 1.0449, + "step": 615 + }, + { + "epoch": 1.5754475703324808, + "grad_norm": 0.755466796600313, + "learning_rate": 0.00013852832244541207, + "loss": 1.0005, + "step": 616 + }, + { + "epoch": 1.578005115089514, + "grad_norm": 0.9067726592525303, + "learning_rate": 0.00013843082301489247, + "loss": 1.034, + "step": 617 + }, + { + "epoch": 1.5805626598465472, + "grad_norm": 1.205016289595881, + "learning_rate": 0.00013833313719980358, + "loss": 1.0292, + "step": 618 + }, + { + "epoch": 1.5831202046035806, + "grad_norm": 0.8478168612376876, + "learning_rate": 0.00013823526531174675, + "loss": 1.0142, + "step": 619 + }, + { + "epoch": 1.5856777493606138, + "grad_norm": 0.7403592560784086, + "learning_rate": 0.000138137207662917, + "loss": 1.0019, + "step": 620 + }, + { + "epoch": 1.5882352941176472, + "grad_norm": 0.6403376151233803, + "learning_rate": 0.00013803896456610187, + "loss": 1.0308, + "step": 621 + }, + { + "epoch": 1.5907928388746804, + "grad_norm": 0.712308710605845, + "learning_rate": 0.0001379405363346804, + "loss": 1.0455, + "step": 622 + }, + { + "epoch": 1.5933503836317136, + "grad_norm": 0.6512025986675177, + "learning_rate": 0.00013784192328262227, + "loss": 1.018, + "step": 623 + }, + { + "epoch": 1.5959079283887467, + "grad_norm": 0.6467882755688008, + "learning_rate": 0.00013774312572448658, + "loss": 1.0566, + "step": 624 + }, + { + "epoch": 1.59846547314578, + "grad_norm": 0.7409770827879977, + "learning_rate": 0.00013764414397542113, + "loss": 1.0759, + "step": 625 + }, + { + "epoch": 1.6010230179028133, + "grad_norm": 0.8147656835217053, + "learning_rate": 0.0001375449783511611, + "loss": 1.0041, + "step": 626 + }, + { + "epoch": 1.6035805626598465, + "grad_norm": 0.9034624506464588, + "learning_rate": 0.0001374456291680283, + "loss": 1.0141, + "step": 627 + }, + { + "epoch": 1.60613810741688, + "grad_norm": 1.0050570938199166, + "learning_rate": 0.00013734609674293001, + "loss": 1.0532, + "step": 628 + }, + { + "epoch": 1.608695652173913, + "grad_norm": 0.9807521253903259, + "learning_rate": 0.00013724638139335808, + "loss": 1.0079, + "step": 629 + }, + { + "epoch": 1.6112531969309463, + "grad_norm": 1.0251289878636651, + "learning_rate": 0.00013714648343738785, + "loss": 1.014, + "step": 630 + }, + { + "epoch": 1.6138107416879794, + "grad_norm": 1.1145588268761022, + "learning_rate": 0.00013704640319367706, + "loss": 1.0217, + "step": 631 + }, + { + "epoch": 1.6163682864450126, + "grad_norm": 0.9024588644594059, + "learning_rate": 0.000136946140981465, + "loss": 1.0151, + "step": 632 + }, + { + "epoch": 1.618925831202046, + "grad_norm": 0.7164435145214515, + "learning_rate": 0.00013684569712057141, + "loss": 0.9972, + "step": 633 + }, + { + "epoch": 1.6214833759590794, + "grad_norm": 0.40989603024156007, + "learning_rate": 0.0001367450719313954, + "loss": 1.0438, + "step": 634 + }, + { + "epoch": 1.6240409207161126, + "grad_norm": 0.4621187072292993, + "learning_rate": 0.00013664426573491454, + "loss": 0.9964, + "step": 635 + }, + { + "epoch": 1.6265984654731458, + "grad_norm": 0.7796243265332405, + "learning_rate": 0.0001365432788526838, + "loss": 1.0428, + "step": 636 + }, + { + "epoch": 1.629156010230179, + "grad_norm": 0.9807118313427811, + "learning_rate": 0.0001364421116068344, + "loss": 1.0374, + "step": 637 + }, + { + "epoch": 1.6317135549872122, + "grad_norm": 1.0521751456854462, + "learning_rate": 0.00013634076432007298, + "loss": 1.022, + "step": 638 + }, + { + "epoch": 1.6342710997442456, + "grad_norm": 1.014819808376515, + "learning_rate": 0.00013623923731568053, + "loss": 1.0555, + "step": 639 + }, + { + "epoch": 1.6368286445012787, + "grad_norm": 0.8908217824529507, + "learning_rate": 0.00013613753091751117, + "loss": 0.9896, + "step": 640 + }, + { + "epoch": 1.6393861892583121, + "grad_norm": 0.7338590542416318, + "learning_rate": 0.00013603564544999134, + "loss": 1.0104, + "step": 641 + }, + { + "epoch": 1.6419437340153453, + "grad_norm": 0.4947515917010355, + "learning_rate": 0.00013593358123811873, + "loss": 1.013, + "step": 642 + }, + { + "epoch": 1.6445012787723785, + "grad_norm": 0.3613565103885808, + "learning_rate": 0.00013583133860746102, + "loss": 1.0285, + "step": 643 + }, + { + "epoch": 1.6470588235294117, + "grad_norm": 0.44918465574622884, + "learning_rate": 0.00013572891788415526, + "loss": 1.0735, + "step": 644 + }, + { + "epoch": 1.6496163682864449, + "grad_norm": 0.6919277753013154, + "learning_rate": 0.00013562631939490638, + "loss": 0.9838, + "step": 645 + }, + { + "epoch": 1.6521739130434783, + "grad_norm": 0.998596135317296, + "learning_rate": 0.00013552354346698644, + "loss": 1.0407, + "step": 646 + }, + { + "epoch": 1.6547314578005117, + "grad_norm": 1.1274200277350097, + "learning_rate": 0.0001354205904282335, + "loss": 0.9994, + "step": 647 + }, + { + "epoch": 1.6572890025575449, + "grad_norm": 0.7298162047765786, + "learning_rate": 0.0001353174606070505, + "loss": 1.0158, + "step": 648 + }, + { + "epoch": 1.659846547314578, + "grad_norm": 0.4959923867676345, + "learning_rate": 0.00013521415433240448, + "loss": 1.0223, + "step": 649 + }, + { + "epoch": 1.6624040920716112, + "grad_norm": 0.4028073795408234, + "learning_rate": 0.0001351106719338251, + "loss": 1.0048, + "step": 650 + }, + { + "epoch": 1.6649616368286444, + "grad_norm": 0.4151895967851957, + "learning_rate": 0.000135007013741404, + "loss": 1.031, + "step": 651 + }, + { + "epoch": 1.6675191815856778, + "grad_norm": 0.493296338959119, + "learning_rate": 0.0001349031800857934, + "loss": 1.0551, + "step": 652 + }, + { + "epoch": 1.670076726342711, + "grad_norm": 0.5474927271625798, + "learning_rate": 0.00013479917129820547, + "loss": 1.0296, + "step": 653 + }, + { + "epoch": 1.6726342710997444, + "grad_norm": 0.6314250125042725, + "learning_rate": 0.00013469498771041078, + "loss": 1.0355, + "step": 654 + }, + { + "epoch": 1.6751918158567776, + "grad_norm": 0.7183033795455095, + "learning_rate": 0.0001345906296547376, + "loss": 1.0239, + "step": 655 + }, + { + "epoch": 1.6777493606138107, + "grad_norm": 0.6627049343116693, + "learning_rate": 0.00013448609746407076, + "loss": 1.0107, + "step": 656 + }, + { + "epoch": 1.680306905370844, + "grad_norm": 0.8323267890128159, + "learning_rate": 0.0001343813914718504, + "loss": 1.0132, + "step": 657 + }, + { + "epoch": 1.682864450127877, + "grad_norm": 1.0100396544553614, + "learning_rate": 0.0001342765120120712, + "loss": 1.034, + "step": 658 + }, + { + "epoch": 1.6854219948849105, + "grad_norm": 0.9397586944756832, + "learning_rate": 0.0001341714594192811, + "loss": 1.0359, + "step": 659 + }, + { + "epoch": 1.6879795396419437, + "grad_norm": 0.60948367814948, + "learning_rate": 0.00013406623402858038, + "loss": 1.0515, + "step": 660 + }, + { + "epoch": 1.690537084398977, + "grad_norm": 0.4064851961480879, + "learning_rate": 0.00013396083617562041, + "loss": 1.0295, + "step": 661 + }, + { + "epoch": 1.6930946291560103, + "grad_norm": 0.4835321670487211, + "learning_rate": 0.0001338552661966028, + "loss": 1.0218, + "step": 662 + }, + { + "epoch": 1.6956521739130435, + "grad_norm": 0.5087590456762057, + "learning_rate": 0.00013374952442827813, + "loss": 1.0438, + "step": 663 + }, + { + "epoch": 1.6982097186700766, + "grad_norm": 0.487251739240553, + "learning_rate": 0.00013364361120794495, + "loss": 1.0293, + "step": 664 + }, + { + "epoch": 1.7007672634271098, + "grad_norm": 0.5712982739684782, + "learning_rate": 0.00013353752687344882, + "loss": 1.0332, + "step": 665 + }, + { + "epoch": 1.7033248081841432, + "grad_norm": 0.7033661782388088, + "learning_rate": 0.000133431271763181, + "loss": 1.0053, + "step": 666 + }, + { + "epoch": 1.7058823529411766, + "grad_norm": 0.6935444307133046, + "learning_rate": 0.00013332484621607758, + "loss": 1.0262, + "step": 667 + }, + { + "epoch": 1.7084398976982098, + "grad_norm": 0.7341105705188075, + "learning_rate": 0.00013321825057161825, + "loss": 1.0156, + "step": 668 + }, + { + "epoch": 1.710997442455243, + "grad_norm": 0.7907280681410083, + "learning_rate": 0.00013311148516982534, + "loss": 1.0413, + "step": 669 + }, + { + "epoch": 1.7135549872122762, + "grad_norm": 0.7112672488330658, + "learning_rate": 0.00013300455035126268, + "loss": 1.0199, + "step": 670 + }, + { + "epoch": 1.7161125319693094, + "grad_norm": 0.5766576717286938, + "learning_rate": 0.00013289744645703444, + "loss": 1.0361, + "step": 671 + }, + { + "epoch": 1.7186700767263428, + "grad_norm": 0.5059688666618373, + "learning_rate": 0.0001327901738287842, + "loss": 1.0385, + "step": 672 + }, + { + "epoch": 1.721227621483376, + "grad_norm": 0.45263501963427877, + "learning_rate": 0.0001326827328086937, + "loss": 1.0163, + "step": 673 + }, + { + "epoch": 1.7237851662404093, + "grad_norm": 0.5156404930129397, + "learning_rate": 0.00013257512373948186, + "loss": 1.0592, + "step": 674 + }, + { + "epoch": 1.7263427109974425, + "grad_norm": 0.6373966994332245, + "learning_rate": 0.00013246734696440368, + "loss": 1.0303, + "step": 675 + }, + { + "epoch": 1.7289002557544757, + "grad_norm": 0.6497706378399105, + "learning_rate": 0.000132359402827249, + "loss": 0.9963, + "step": 676 + }, + { + "epoch": 1.7314578005115089, + "grad_norm": 0.6649205635237081, + "learning_rate": 0.0001322512916723417, + "loss": 1.0133, + "step": 677 + }, + { + "epoch": 1.734015345268542, + "grad_norm": 0.7302337459964975, + "learning_rate": 0.00013214301384453824, + "loss": 1.0143, + "step": 678 + }, + { + "epoch": 1.7365728900255755, + "grad_norm": 0.7742690150052379, + "learning_rate": 0.00013203456968922684, + "loss": 1.0164, + "step": 679 + }, + { + "epoch": 1.7391304347826086, + "grad_norm": 0.6798309822233196, + "learning_rate": 0.0001319259595523262, + "loss": 1.0172, + "step": 680 + }, + { + "epoch": 1.741687979539642, + "grad_norm": 0.5208733748449712, + "learning_rate": 0.0001318171837802846, + "loss": 1.0048, + "step": 681 + }, + { + "epoch": 1.7442455242966752, + "grad_norm": 0.41856841228081965, + "learning_rate": 0.00013170824272007854, + "loss": 1.0508, + "step": 682 + }, + { + "epoch": 1.7468030690537084, + "grad_norm": 0.41744052183195546, + "learning_rate": 0.00013159913671921184, + "loss": 1.0433, + "step": 683 + }, + { + "epoch": 1.7493606138107416, + "grad_norm": 0.45034351237029546, + "learning_rate": 0.00013148986612571438, + "loss": 1.0281, + "step": 684 + }, + { + "epoch": 1.7519181585677748, + "grad_norm": 0.5021896906440644, + "learning_rate": 0.00013138043128814114, + "loss": 1.0207, + "step": 685 + }, + { + "epoch": 1.7544757033248082, + "grad_norm": 0.6367316434278153, + "learning_rate": 0.000131270832555571, + "loss": 1.0509, + "step": 686 + }, + { + "epoch": 1.7570332480818416, + "grad_norm": 0.9449450079946309, + "learning_rate": 0.00013116107027760557, + "loss": 1.0263, + "step": 687 + }, + { + "epoch": 1.7595907928388748, + "grad_norm": 1.2671861813793404, + "learning_rate": 0.00013105114480436823, + "loss": 1.015, + "step": 688 + }, + { + "epoch": 1.762148337595908, + "grad_norm": 0.6133472053088566, + "learning_rate": 0.00013094105648650285, + "loss": 0.9964, + "step": 689 + }, + { + "epoch": 1.7647058823529411, + "grad_norm": 0.5563333895443464, + "learning_rate": 0.00013083080567517284, + "loss": 1.0221, + "step": 690 + }, + { + "epoch": 1.7672634271099743, + "grad_norm": 0.8984060988722041, + "learning_rate": 0.0001307203927220598, + "loss": 1.0333, + "step": 691 + }, + { + "epoch": 1.7698209718670077, + "grad_norm": 1.1600459077736829, + "learning_rate": 0.0001306098179793627, + "loss": 1.0281, + "step": 692 + }, + { + "epoch": 1.772378516624041, + "grad_norm": 0.8749748158295617, + "learning_rate": 0.00013049908179979644, + "loss": 1.0414, + "step": 693 + }, + { + "epoch": 1.7749360613810743, + "grad_norm": 0.6456013771393564, + "learning_rate": 0.00013038818453659098, + "loss": 0.9934, + "step": 694 + }, + { + "epoch": 1.7774936061381075, + "grad_norm": 0.4834000513881869, + "learning_rate": 0.00013027712654349003, + "loss": 1.0077, + "step": 695 + }, + { + "epoch": 1.7800511508951407, + "grad_norm": 0.46969762642929197, + "learning_rate": 0.0001301659081747501, + "loss": 1.0408, + "step": 696 + }, + { + "epoch": 1.7826086956521738, + "grad_norm": 0.5147779689056563, + "learning_rate": 0.0001300545297851392, + "loss": 1.0186, + "step": 697 + }, + { + "epoch": 1.785166240409207, + "grad_norm": 0.55729153001615, + "learning_rate": 0.0001299429917299358, + "loss": 1.0329, + "step": 698 + }, + { + "epoch": 1.7877237851662404, + "grad_norm": 0.5260414108398854, + "learning_rate": 0.00012983129436492763, + "loss": 1.0233, + "step": 699 + }, + { + "epoch": 1.7902813299232738, + "grad_norm": 0.5427361149590243, + "learning_rate": 0.00012971943804641068, + "loss": 1.0409, + "step": 700 + }, + { + "epoch": 1.792838874680307, + "grad_norm": 0.5405520825559765, + "learning_rate": 0.0001296074231311879, + "loss": 1.0066, + "step": 701 + }, + { + "epoch": 1.7953964194373402, + "grad_norm": 0.6297890907155308, + "learning_rate": 0.0001294952499765682, + "loss": 1.0254, + "step": 702 + }, + { + "epoch": 1.7979539641943734, + "grad_norm": 0.6644546067252105, + "learning_rate": 0.00012938291894036522, + "loss": 1.0285, + "step": 703 + }, + { + "epoch": 1.8005115089514065, + "grad_norm": 0.683427488866508, + "learning_rate": 0.00012927043038089616, + "loss": 1.0091, + "step": 704 + }, + { + "epoch": 1.80306905370844, + "grad_norm": 0.6319295334248269, + "learning_rate": 0.00012915778465698077, + "loss": 1.0397, + "step": 705 + }, + { + "epoch": 1.8056265984654731, + "grad_norm": 0.5438735087695892, + "learning_rate": 0.00012904498212794007, + "loss": 0.991, + "step": 706 + }, + { + "epoch": 1.8081841432225065, + "grad_norm": 0.5047705166677889, + "learning_rate": 0.00012893202315359537, + "loss": 0.9944, + "step": 707 + }, + { + "epoch": 1.8107416879795397, + "grad_norm": 0.5361496724146492, + "learning_rate": 0.00012881890809426688, + "loss": 1.0212, + "step": 708 + }, + { + "epoch": 1.813299232736573, + "grad_norm": 0.4758891777297796, + "learning_rate": 0.00012870563731077277, + "loss": 0.9717, + "step": 709 + }, + { + "epoch": 1.815856777493606, + "grad_norm": 0.41562952895729655, + "learning_rate": 0.0001285922111644279, + "loss": 1.0162, + "step": 710 + }, + { + "epoch": 1.8184143222506393, + "grad_norm": 0.4923656957788762, + "learning_rate": 0.00012847863001704278, + "loss": 1.0685, + "step": 711 + }, + { + "epoch": 1.8209718670076727, + "grad_norm": 0.43817036243213936, + "learning_rate": 0.00012836489423092225, + "loss": 1.0166, + "step": 712 + }, + { + "epoch": 1.8235294117647058, + "grad_norm": 0.36194875273904087, + "learning_rate": 0.00012825100416886454, + "loss": 1.0255, + "step": 713 + }, + { + "epoch": 1.8260869565217392, + "grad_norm": 0.5507986270387409, + "learning_rate": 0.0001281369601941599, + "loss": 1.0135, + "step": 714 + }, + { + "epoch": 1.8286445012787724, + "grad_norm": 0.685338916623197, + "learning_rate": 0.00012802276267058957, + "loss": 0.999, + "step": 715 + }, + { + "epoch": 1.8312020460358056, + "grad_norm": 0.5568312967518175, + "learning_rate": 0.00012790841196242458, + "loss": 1.0153, + "step": 716 + }, + { + "epoch": 1.8337595907928388, + "grad_norm": 0.4401729278401454, + "learning_rate": 0.00012779390843442462, + "loss": 0.9855, + "step": 717 + }, + { + "epoch": 1.836317135549872, + "grad_norm": 0.4249893778808539, + "learning_rate": 0.00012767925245183676, + "loss": 1.0351, + "step": 718 + }, + { + "epoch": 1.8388746803069054, + "grad_norm": 0.47539299147834413, + "learning_rate": 0.00012756444438039453, + "loss": 1.035, + "step": 719 + }, + { + "epoch": 1.8414322250639388, + "grad_norm": 0.5475741371560751, + "learning_rate": 0.00012744948458631646, + "loss": 1.0412, + "step": 720 + }, + { + "epoch": 1.843989769820972, + "grad_norm": 0.5751955332609484, + "learning_rate": 0.0001273343734363051, + "loss": 1.0419, + "step": 721 + }, + { + "epoch": 1.8465473145780051, + "grad_norm": 0.5673429560849089, + "learning_rate": 0.00012721911129754578, + "loss": 0.9993, + "step": 722 + }, + { + "epoch": 1.8491048593350383, + "grad_norm": 0.475786389030356, + "learning_rate": 0.0001271036985377055, + "loss": 1.0255, + "step": 723 + }, + { + "epoch": 1.8516624040920715, + "grad_norm": 0.4435215042959613, + "learning_rate": 0.00012698813552493174, + "loss": 1.0159, + "step": 724 + }, + { + "epoch": 1.854219948849105, + "grad_norm": 0.6384652673350472, + "learning_rate": 0.00012687242262785116, + "loss": 1.0468, + "step": 725 + }, + { + "epoch": 1.856777493606138, + "grad_norm": 0.660707948092585, + "learning_rate": 0.00012675656021556855, + "loss": 0.9702, + "step": 726 + }, + { + "epoch": 1.8593350383631715, + "grad_norm": 0.5190779530078301, + "learning_rate": 0.00012664054865766573, + "loss": 0.9959, + "step": 727 + }, + { + "epoch": 1.8618925831202047, + "grad_norm": 0.59002541889049, + "learning_rate": 0.00012652438832420017, + "loss": 1.0009, + "step": 728 + }, + { + "epoch": 1.8644501278772379, + "grad_norm": 0.724406502768554, + "learning_rate": 0.00012640807958570394, + "loss": 1.0572, + "step": 729 + }, + { + "epoch": 1.867007672634271, + "grad_norm": 0.606082979636232, + "learning_rate": 0.00012629162281318248, + "loss": 1.0123, + "step": 730 + }, + { + "epoch": 1.8695652173913042, + "grad_norm": 0.3890444487309348, + "learning_rate": 0.00012617501837811347, + "loss": 0.9835, + "step": 731 + }, + { + "epoch": 1.8721227621483376, + "grad_norm": 0.4748189131220067, + "learning_rate": 0.00012605826665244559, + "loss": 1.0206, + "step": 732 + }, + { + "epoch": 1.8746803069053708, + "grad_norm": 0.5894024279814004, + "learning_rate": 0.00012594136800859733, + "loss": 1.0312, + "step": 733 + }, + { + "epoch": 1.8772378516624042, + "grad_norm": 0.8812294314944346, + "learning_rate": 0.00012582432281945587, + "loss": 0.9929, + "step": 734 + }, + { + "epoch": 1.8797953964194374, + "grad_norm": 1.2695722544281176, + "learning_rate": 0.0001257071314583758, + "loss": 1.0232, + "step": 735 + }, + { + "epoch": 1.8823529411764706, + "grad_norm": 0.7877721338048511, + "learning_rate": 0.00012558979429917803, + "loss": 1.0528, + "step": 736 + }, + { + "epoch": 1.8849104859335037, + "grad_norm": 0.6479567586178989, + "learning_rate": 0.00012547231171614845, + "loss": 1.0262, + "step": 737 + }, + { + "epoch": 1.887468030690537, + "grad_norm": 0.6844520570754378, + "learning_rate": 0.00012535468408403697, + "loss": 1.0333, + "step": 738 + }, + { + "epoch": 1.8900255754475703, + "grad_norm": 0.6085957966970293, + "learning_rate": 0.00012523691177805597, + "loss": 1.0168, + "step": 739 + }, + { + "epoch": 1.8925831202046037, + "grad_norm": 0.5254572324853038, + "learning_rate": 0.00012511899517387955, + "loss": 0.9883, + "step": 740 + }, + { + "epoch": 1.895140664961637, + "grad_norm": 0.6139364866532532, + "learning_rate": 0.00012500093464764197, + "loss": 0.9977, + "step": 741 + }, + { + "epoch": 1.89769820971867, + "grad_norm": 0.6998963267481692, + "learning_rate": 0.00012488273057593654, + "loss": 1.0044, + "step": 742 + }, + { + "epoch": 1.9002557544757033, + "grad_norm": 0.5270554785542413, + "learning_rate": 0.00012476438333581456, + "loss": 1.0412, + "step": 743 + }, + { + "epoch": 1.9028132992327365, + "grad_norm": 0.5157043265448235, + "learning_rate": 0.00012464589330478398, + "loss": 0.9978, + "step": 744 + }, + { + "epoch": 1.9053708439897699, + "grad_norm": 0.5631065206891138, + "learning_rate": 0.0001245272608608082, + "loss": 0.9944, + "step": 745 + }, + { + "epoch": 1.907928388746803, + "grad_norm": 0.4807212257749526, + "learning_rate": 0.00012440848638230485, + "loss": 1.0184, + "step": 746 + }, + { + "epoch": 1.9104859335038364, + "grad_norm": 0.42670701279562534, + "learning_rate": 0.00012428957024814477, + "loss": 1.0105, + "step": 747 + }, + { + "epoch": 1.9130434782608696, + "grad_norm": 0.41188284810782877, + "learning_rate": 0.00012417051283765055, + "loss": 1.0256, + "step": 748 + }, + { + "epoch": 1.9156010230179028, + "grad_norm": 0.39912216267661754, + "learning_rate": 0.0001240513145305954, + "loss": 1.0479, + "step": 749 + }, + { + "epoch": 1.918158567774936, + "grad_norm": 0.40181896505552256, + "learning_rate": 0.00012393197570720208, + "loss": 1.0006, + "step": 750 + }, + { + "epoch": 1.9207161125319692, + "grad_norm": 0.4686514718132313, + "learning_rate": 0.0001238124967481415, + "loss": 1.0527, + "step": 751 + }, + { + "epoch": 1.9232736572890026, + "grad_norm": 0.4847458570755899, + "learning_rate": 0.00012369287803453156, + "loss": 1.0039, + "step": 752 + }, + { + "epoch": 1.9258312020460358, + "grad_norm": 0.5873940841619928, + "learning_rate": 0.00012357311994793603, + "loss": 1.0191, + "step": 753 + }, + { + "epoch": 1.9283887468030692, + "grad_norm": 0.6710549953392281, + "learning_rate": 0.00012345322287036315, + "loss": 1.014, + "step": 754 + }, + { + "epoch": 1.9309462915601023, + "grad_norm": 0.7897611598340533, + "learning_rate": 0.0001233331871842646, + "loss": 0.9853, + "step": 755 + }, + { + "epoch": 1.9335038363171355, + "grad_norm": 0.870069888372245, + "learning_rate": 0.0001232130132725342, + "loss": 1.022, + "step": 756 + }, + { + "epoch": 1.9360613810741687, + "grad_norm": 1.0698935466826593, + "learning_rate": 0.00012309270151850666, + "loss": 1.0199, + "step": 757 + }, + { + "epoch": 1.938618925831202, + "grad_norm": 1.0318153691478889, + "learning_rate": 0.00012297225230595637, + "loss": 1.0008, + "step": 758 + }, + { + "epoch": 1.9411764705882353, + "grad_norm": 0.8031059628622865, + "learning_rate": 0.0001228516660190962, + "loss": 1.0464, + "step": 759 + }, + { + "epoch": 1.9437340153452687, + "grad_norm": 0.4432470641559668, + "learning_rate": 0.00012273094304257633, + "loss": 1.0486, + "step": 760 + }, + { + "epoch": 1.9462915601023019, + "grad_norm": 0.4413834236432169, + "learning_rate": 0.00012261008376148282, + "loss": 1.0483, + "step": 761 + }, + { + "epoch": 1.948849104859335, + "grad_norm": 0.5753204802658383, + "learning_rate": 0.0001224890885613366, + "loss": 1.026, + "step": 762 + }, + { + "epoch": 1.9514066496163682, + "grad_norm": 0.6330964706251369, + "learning_rate": 0.00012236795782809225, + "loss": 1.017, + "step": 763 + }, + { + "epoch": 1.9539641943734014, + "grad_norm": 0.6869010778127252, + "learning_rate": 0.00012224669194813647, + "loss": 1.031, + "step": 764 + }, + { + "epoch": 1.9565217391304348, + "grad_norm": 0.7455335150670086, + "learning_rate": 0.00012212529130828725, + "loss": 0.9639, + "step": 765 + }, + { + "epoch": 1.959079283887468, + "grad_norm": 0.6598851148094896, + "learning_rate": 0.00012200375629579234, + "loss": 1.0298, + "step": 766 + }, + { + "epoch": 1.9616368286445014, + "grad_norm": 0.44847708135640946, + "learning_rate": 0.0001218820872983281, + "loss": 0.9979, + "step": 767 + }, + { + "epoch": 1.9641943734015346, + "grad_norm": 0.4421542384496395, + "learning_rate": 0.00012176028470399836, + "loss": 1.0219, + "step": 768 + }, + { + "epoch": 1.9667519181585678, + "grad_norm": 0.5551681283301225, + "learning_rate": 0.00012163834890133303, + "loss": 1.0321, + "step": 769 + }, + { + "epoch": 1.969309462915601, + "grad_norm": 0.5433680138372817, + "learning_rate": 0.000121516280279287, + "loss": 1.0152, + "step": 770 + }, + { + "epoch": 1.9718670076726341, + "grad_norm": 0.3927534411279976, + "learning_rate": 0.00012139407922723875, + "loss": 1.0056, + "step": 771 + }, + { + "epoch": 1.9744245524296675, + "grad_norm": 0.3504638375301521, + "learning_rate": 0.00012127174613498925, + "loss": 1.0211, + "step": 772 + }, + { + "epoch": 1.976982097186701, + "grad_norm": 0.5235226714465111, + "learning_rate": 0.00012114928139276064, + "loss": 1.0298, + "step": 773 + }, + { + "epoch": 1.979539641943734, + "grad_norm": 0.47218634270204046, + "learning_rate": 0.00012102668539119501, + "loss": 0.997, + "step": 774 + }, + { + "epoch": 1.9820971867007673, + "grad_norm": 0.3909468495312419, + "learning_rate": 0.00012090395852135314, + "loss": 1.008, + "step": 775 + }, + { + "epoch": 1.9846547314578005, + "grad_norm": 0.3354579546285365, + "learning_rate": 0.0001207811011747132, + "loss": 1.0247, + "step": 776 + }, + { + "epoch": 1.9872122762148337, + "grad_norm": 0.3467079716757078, + "learning_rate": 0.00012065811374316966, + "loss": 1.0049, + "step": 777 + }, + { + "epoch": 1.989769820971867, + "grad_norm": 0.3407603167118022, + "learning_rate": 0.0001205349966190319, + "loss": 1.0454, + "step": 778 + }, + { + "epoch": 1.9923273657289002, + "grad_norm": 0.3172074392515775, + "learning_rate": 0.00012041175019502295, + "loss": 1.0269, + "step": 779 + }, + { + "epoch": 1.9948849104859336, + "grad_norm": 0.38289682905322714, + "learning_rate": 0.00012028837486427837, + "loss": 1.0085, + "step": 780 + }, + { + "epoch": 1.9974424552429668, + "grad_norm": 0.3409699287203162, + "learning_rate": 0.00012016487102034482, + "loss": 1.0151, + "step": 781 + }, + { + "epoch": 2.0, + "grad_norm": 0.4841721621140613, + "learning_rate": 0.00012004123905717898, + "loss": 0.9888, + "step": 782 + }, + { + "epoch": 2.002557544757033, + "grad_norm": 0.5947034995797379, + "learning_rate": 0.00011991747936914614, + "loss": 0.98, + "step": 783 + }, + { + "epoch": 2.0051150895140664, + "grad_norm": 0.5314717777356649, + "learning_rate": 0.00011979359235101906, + "loss": 0.966, + "step": 784 + }, + { + "epoch": 2.0076726342710995, + "grad_norm": 0.4148615363763489, + "learning_rate": 0.00011966957839797664, + "loss": 0.9695, + "step": 785 + }, + { + "epoch": 2.010230179028133, + "grad_norm": 0.4001599305252567, + "learning_rate": 0.00011954543790560267, + "loss": 1.0493, + "step": 786 + }, + { + "epoch": 2.0127877237851663, + "grad_norm": 0.43752065357850173, + "learning_rate": 0.00011942117126988461, + "loss": 0.9883, + "step": 787 + }, + { + "epoch": 2.0153452685421995, + "grad_norm": 0.5092717368916159, + "learning_rate": 0.00011929677888721227, + "loss": 0.9984, + "step": 788 + }, + { + "epoch": 2.0179028132992327, + "grad_norm": 0.5840375290444557, + "learning_rate": 0.00011917226115437656, + "loss": 0.9833, + "step": 789 + }, + { + "epoch": 2.020460358056266, + "grad_norm": 0.573138093028074, + "learning_rate": 0.00011904761846856831, + "loss": 0.9724, + "step": 790 + }, + { + "epoch": 2.023017902813299, + "grad_norm": 0.5890770850578259, + "learning_rate": 0.00011892285122737683, + "loss": 0.9699, + "step": 791 + }, + { + "epoch": 2.0255754475703327, + "grad_norm": 0.5692021165096304, + "learning_rate": 0.00011879795982878883, + "loss": 0.9741, + "step": 792 + }, + { + "epoch": 2.028132992327366, + "grad_norm": 0.6399550167383995, + "learning_rate": 0.00011867294467118698, + "loss": 0.9682, + "step": 793 + }, + { + "epoch": 2.030690537084399, + "grad_norm": 0.7338640869363395, + "learning_rate": 0.00011854780615334875, + "loss": 0.9683, + "step": 794 + }, + { + "epoch": 2.0332480818414322, + "grad_norm": 0.806906500405086, + "learning_rate": 0.00011842254467444517, + "loss": 0.9756, + "step": 795 + }, + { + "epoch": 2.0358056265984654, + "grad_norm": 0.7925351913713344, + "learning_rate": 0.0001182971606340394, + "loss": 0.9853, + "step": 796 + }, + { + "epoch": 2.0383631713554986, + "grad_norm": 0.6258347835444797, + "learning_rate": 0.00011817165443208562, + "loss": 1.0054, + "step": 797 + }, + { + "epoch": 2.040920716112532, + "grad_norm": 0.4512585898690294, + "learning_rate": 0.00011804602646892762, + "loss": 0.9792, + "step": 798 + }, + { + "epoch": 2.0434782608695654, + "grad_norm": 0.3681772077619349, + "learning_rate": 0.00011792027714529767, + "loss": 0.9788, + "step": 799 + }, + { + "epoch": 2.0460358056265986, + "grad_norm": 0.4769785686846811, + "learning_rate": 0.0001177944068623151, + "loss": 1.023, + "step": 800 + }, + { + "epoch": 2.0485933503836318, + "grad_norm": 0.5513670753501893, + "learning_rate": 0.00011766841602148507, + "loss": 0.9758, + "step": 801 + }, + { + "epoch": 2.051150895140665, + "grad_norm": 0.5343242524485008, + "learning_rate": 0.00011754230502469739, + "loss": 0.9828, + "step": 802 + }, + { + "epoch": 2.053708439897698, + "grad_norm": 0.3790786798266737, + "learning_rate": 0.00011741607427422502, + "loss": 0.9891, + "step": 803 + }, + { + "epoch": 2.0562659846547313, + "grad_norm": 0.3356594047836669, + "learning_rate": 0.000117289724172723, + "loss": 1.0182, + "step": 804 + }, + { + "epoch": 2.0588235294117645, + "grad_norm": 0.4979916614188739, + "learning_rate": 0.00011716325512322707, + "loss": 0.9653, + "step": 805 + }, + { + "epoch": 2.061381074168798, + "grad_norm": 0.5917115439040083, + "learning_rate": 0.00011703666752915235, + "loss": 0.9779, + "step": 806 + }, + { + "epoch": 2.0639386189258313, + "grad_norm": 0.7711282568070231, + "learning_rate": 0.00011690996179429219, + "loss": 1.0192, + "step": 807 + }, + { + "epoch": 2.0664961636828645, + "grad_norm": 0.9738458712850159, + "learning_rate": 0.00011678313832281664, + "loss": 0.9929, + "step": 808 + }, + { + "epoch": 2.0690537084398977, + "grad_norm": 1.0543246508556696, + "learning_rate": 0.00011665619751927146, + "loss": 0.9711, + "step": 809 + }, + { + "epoch": 2.071611253196931, + "grad_norm": 0.7273546848221022, + "learning_rate": 0.00011652913978857664, + "loss": 0.9732, + "step": 810 + }, + { + "epoch": 2.074168797953964, + "grad_norm": 0.5119256334998138, + "learning_rate": 0.00011640196553602505, + "loss": 0.9955, + "step": 811 + }, + { + "epoch": 2.0767263427109977, + "grad_norm": 0.36268273560962566, + "learning_rate": 0.00011627467516728138, + "loss": 0.9706, + "step": 812 + }, + { + "epoch": 2.079283887468031, + "grad_norm": 0.40355937427082544, + "learning_rate": 0.00011614726908838063, + "loss": 0.9712, + "step": 813 + }, + { + "epoch": 2.081841432225064, + "grad_norm": 0.5018343946579583, + "learning_rate": 0.00011601974770572692, + "loss": 1.0314, + "step": 814 + }, + { + "epoch": 2.084398976982097, + "grad_norm": 0.49570234160885446, + "learning_rate": 0.0001158921114260922, + "loss": 0.961, + "step": 815 + }, + { + "epoch": 2.0869565217391304, + "grad_norm": 0.5836483164644858, + "learning_rate": 0.00011576436065661484, + "loss": 0.9732, + "step": 816 + }, + { + "epoch": 2.0895140664961636, + "grad_norm": 0.562651886144191, + "learning_rate": 0.00011563649580479848, + "loss": 0.9827, + "step": 817 + }, + { + "epoch": 2.0920716112531967, + "grad_norm": 0.3634053027085326, + "learning_rate": 0.00011550851727851067, + "loss": 0.9634, + "step": 818 + }, + { + "epoch": 2.0946291560102304, + "grad_norm": 0.35421206748470696, + "learning_rate": 0.00011538042548598154, + "loss": 0.9674, + "step": 819 + }, + { + "epoch": 2.0971867007672635, + "grad_norm": 0.34410099266933664, + "learning_rate": 0.00011525222083580247, + "loss": 0.9682, + "step": 820 + }, + { + "epoch": 2.0997442455242967, + "grad_norm": 0.36019738429870557, + "learning_rate": 0.00011512390373692495, + "loss": 0.98, + "step": 821 + }, + { + "epoch": 2.10230179028133, + "grad_norm": 0.4497160405180852, + "learning_rate": 0.00011499547459865908, + "loss": 0.9658, + "step": 822 + }, + { + "epoch": 2.104859335038363, + "grad_norm": 0.48924052145081715, + "learning_rate": 0.00011486693383067234, + "loss": 0.9961, + "step": 823 + }, + { + "epoch": 2.1074168797953963, + "grad_norm": 0.51728675513698, + "learning_rate": 0.0001147382818429884, + "loss": 0.9886, + "step": 824 + }, + { + "epoch": 2.10997442455243, + "grad_norm": 0.48298534091718054, + "learning_rate": 0.0001146095190459855, + "loss": 0.99, + "step": 825 + }, + { + "epoch": 2.112531969309463, + "grad_norm": 0.3873329201691133, + "learning_rate": 0.00011448064585039555, + "loss": 0.9855, + "step": 826 + }, + { + "epoch": 2.1150895140664963, + "grad_norm": 0.36617676835976043, + "learning_rate": 0.0001143516626673025, + "loss": 0.9784, + "step": 827 + }, + { + "epoch": 2.1176470588235294, + "grad_norm": 0.39303542839485295, + "learning_rate": 0.00011422256990814115, + "loss": 0.9884, + "step": 828 + }, + { + "epoch": 2.1202046035805626, + "grad_norm": 0.5159106405133932, + "learning_rate": 0.0001140933679846959, + "loss": 0.9926, + "step": 829 + }, + { + "epoch": 2.122762148337596, + "grad_norm": 0.7469560811887815, + "learning_rate": 0.00011396405730909925, + "loss": 1.0183, + "step": 830 + }, + { + "epoch": 2.125319693094629, + "grad_norm": 0.7327464479712988, + "learning_rate": 0.00011383463829383071, + "loss": 1.0098, + "step": 831 + }, + { + "epoch": 2.1278772378516626, + "grad_norm": 0.5977082749289835, + "learning_rate": 0.00011370511135171532, + "loss": 1.0071, + "step": 832 + }, + { + "epoch": 2.130434782608696, + "grad_norm": 0.4052295767189102, + "learning_rate": 0.00011357547689592237, + "loss": 1.0049, + "step": 833 + }, + { + "epoch": 2.132992327365729, + "grad_norm": 0.5292207555015371, + "learning_rate": 0.00011344573533996417, + "loss": 0.9656, + "step": 834 + }, + { + "epoch": 2.135549872122762, + "grad_norm": 0.4549224765225602, + "learning_rate": 0.0001133158870976946, + "loss": 0.9968, + "step": 835 + }, + { + "epoch": 2.1381074168797953, + "grad_norm": 0.4460508304219039, + "learning_rate": 0.00011318593258330785, + "loss": 1.0134, + "step": 836 + }, + { + "epoch": 2.1406649616368285, + "grad_norm": 0.46592246024671363, + "learning_rate": 0.00011305587221133718, + "loss": 0.9522, + "step": 837 + }, + { + "epoch": 2.1432225063938617, + "grad_norm": 0.4489945484428353, + "learning_rate": 0.00011292570639665342, + "loss": 1.0104, + "step": 838 + }, + { + "epoch": 2.1457800511508953, + "grad_norm": 0.46784938019320965, + "learning_rate": 0.00011279543555446379, + "loss": 0.988, + "step": 839 + }, + { + "epoch": 2.1483375959079285, + "grad_norm": 0.4200222134898951, + "learning_rate": 0.00011266506010031052, + "loss": 1.0119, + "step": 840 + }, + { + "epoch": 2.1508951406649617, + "grad_norm": 0.3655050664603677, + "learning_rate": 0.00011253458045006955, + "loss": 0.9895, + "step": 841 + }, + { + "epoch": 2.153452685421995, + "grad_norm": 0.3022642865356664, + "learning_rate": 0.00011240399701994919, + "loss": 1.001, + "step": 842 + }, + { + "epoch": 2.156010230179028, + "grad_norm": 0.3188747440198214, + "learning_rate": 0.00011227331022648877, + "loss": 0.9773, + "step": 843 + }, + { + "epoch": 2.1585677749360612, + "grad_norm": 0.41190200456297044, + "learning_rate": 0.00011214252048655733, + "loss": 1.024, + "step": 844 + }, + { + "epoch": 2.1611253196930944, + "grad_norm": 0.33803198230453474, + "learning_rate": 0.00011201162821735228, + "loss": 0.9843, + "step": 845 + }, + { + "epoch": 2.163682864450128, + "grad_norm": 0.36583158073668925, + "learning_rate": 0.00011188063383639817, + "loss": 0.9809, + "step": 846 + }, + { + "epoch": 2.166240409207161, + "grad_norm": 0.39675634848639996, + "learning_rate": 0.00011174953776154516, + "loss": 0.942, + "step": 847 + }, + { + "epoch": 2.1687979539641944, + "grad_norm": 0.4164372273567332, + "learning_rate": 0.00011161834041096782, + "loss": 1.0337, + "step": 848 + }, + { + "epoch": 2.1713554987212276, + "grad_norm": 0.42306948681428896, + "learning_rate": 0.00011148704220316387, + "loss": 0.9913, + "step": 849 + }, + { + "epoch": 2.1739130434782608, + "grad_norm": 0.374454297267049, + "learning_rate": 0.0001113556435569526, + "loss": 0.9928, + "step": 850 + }, + { + "epoch": 2.176470588235294, + "grad_norm": 0.31767286286037444, + "learning_rate": 0.00011122414489147376, + "loss": 0.9972, + "step": 851 + }, + { + "epoch": 2.1790281329923276, + "grad_norm": 0.36673595005863613, + "learning_rate": 0.00011109254662618616, + "loss": 1.0105, + "step": 852 + }, + { + "epoch": 2.1815856777493607, + "grad_norm": 0.5025085408193712, + "learning_rate": 0.00011096084918086626, + "loss": 0.9508, + "step": 853 + }, + { + "epoch": 2.184143222506394, + "grad_norm": 0.5453118752197188, + "learning_rate": 0.00011082905297560697, + "loss": 0.9354, + "step": 854 + }, + { + "epoch": 2.186700767263427, + "grad_norm": 0.535508310533172, + "learning_rate": 0.00011069715843081613, + "loss": 0.986, + "step": 855 + }, + { + "epoch": 2.1892583120204603, + "grad_norm": 0.5550105153386212, + "learning_rate": 0.00011056516596721534, + "loss": 1.0047, + "step": 856 + }, + { + "epoch": 2.1918158567774935, + "grad_norm": 0.5522958050937595, + "learning_rate": 0.00011043307600583854, + "loss": 1.0204, + "step": 857 + }, + { + "epoch": 2.1943734015345266, + "grad_norm": 0.514732209947304, + "learning_rate": 0.0001103008889680306, + "loss": 1.0137, + "step": 858 + }, + { + "epoch": 2.1969309462915603, + "grad_norm": 0.5281211410564769, + "learning_rate": 0.00011016860527544616, + "loss": 1.0085, + "step": 859 + }, + { + "epoch": 2.1994884910485935, + "grad_norm": 0.46959816689384604, + "learning_rate": 0.00011003622535004806, + "loss": 1.0058, + "step": 860 + }, + { + "epoch": 2.2020460358056266, + "grad_norm": 0.3407338275520536, + "learning_rate": 0.0001099037496141062, + "loss": 0.9986, + "step": 861 + }, + { + "epoch": 2.20460358056266, + "grad_norm": 0.47884582066611536, + "learning_rate": 0.00010977117849019604, + "loss": 0.9707, + "step": 862 + }, + { + "epoch": 2.207161125319693, + "grad_norm": 0.6169099163617163, + "learning_rate": 0.00010963851240119731, + "loss": 0.9957, + "step": 863 + }, + { + "epoch": 2.209718670076726, + "grad_norm": 0.5842777084702644, + "learning_rate": 0.00010950575177029271, + "loss": 0.9971, + "step": 864 + }, + { + "epoch": 2.21227621483376, + "grad_norm": 0.5415512252484223, + "learning_rate": 0.00010937289702096648, + "loss": 0.955, + "step": 865 + }, + { + "epoch": 2.214833759590793, + "grad_norm": 0.5584987591506012, + "learning_rate": 0.00010923994857700308, + "loss": 0.9858, + "step": 866 + }, + { + "epoch": 2.217391304347826, + "grad_norm": 0.5438681169787357, + "learning_rate": 0.00010910690686248587, + "loss": 1.0272, + "step": 867 + }, + { + "epoch": 2.2199488491048593, + "grad_norm": 0.45923876211266634, + "learning_rate": 0.00010897377230179568, + "loss": 0.9689, + "step": 868 + }, + { + "epoch": 2.2225063938618925, + "grad_norm": 0.344989298275585, + "learning_rate": 0.00010884054531960956, + "loss": 1.005, + "step": 869 + }, + { + "epoch": 2.2250639386189257, + "grad_norm": 0.3203832886307522, + "learning_rate": 0.00010870722634089927, + "loss": 0.9904, + "step": 870 + }, + { + "epoch": 2.227621483375959, + "grad_norm": 0.4050058894119621, + "learning_rate": 0.0001085738157909302, + "loss": 0.9716, + "step": 871 + }, + { + "epoch": 2.2301790281329925, + "grad_norm": 0.5042105083367587, + "learning_rate": 0.00010844031409525962, + "loss": 0.9921, + "step": 872 + }, + { + "epoch": 2.2327365728900257, + "grad_norm": 0.5771976233792036, + "learning_rate": 0.00010830672167973572, + "loss": 1.0081, + "step": 873 + }, + { + "epoch": 2.235294117647059, + "grad_norm": 0.6444239077326948, + "learning_rate": 0.00010817303897049597, + "loss": 0.9961, + "step": 874 + }, + { + "epoch": 2.237851662404092, + "grad_norm": 0.6303091061510789, + "learning_rate": 0.0001080392663939659, + "loss": 0.9648, + "step": 875 + }, + { + "epoch": 2.2404092071611252, + "grad_norm": 0.5383211537711221, + "learning_rate": 0.00010790540437685771, + "loss": 0.9835, + "step": 876 + }, + { + "epoch": 2.2429667519181584, + "grad_norm": 0.4021404516007495, + "learning_rate": 0.00010777145334616884, + "loss": 0.9732, + "step": 877 + }, + { + "epoch": 2.2455242966751916, + "grad_norm": 0.31439318271272565, + "learning_rate": 0.00010763741372918076, + "loss": 0.9799, + "step": 878 + }, + { + "epoch": 2.2480818414322252, + "grad_norm": 0.4404091457741591, + "learning_rate": 0.00010750328595345744, + "loss": 0.9798, + "step": 879 + }, + { + "epoch": 2.2506393861892584, + "grad_norm": 0.5676899676174939, + "learning_rate": 0.00010736907044684409, + "loss": 0.956, + "step": 880 + }, + { + "epoch": 2.2531969309462916, + "grad_norm": 0.6251515987816799, + "learning_rate": 0.00010723476763746578, + "loss": 0.9766, + "step": 881 + }, + { + "epoch": 2.2557544757033248, + "grad_norm": 0.6188152066667294, + "learning_rate": 0.00010710037795372604, + "loss": 0.9436, + "step": 882 + }, + { + "epoch": 2.258312020460358, + "grad_norm": 0.561619175816319, + "learning_rate": 0.00010696590182430552, + "loss": 0.9829, + "step": 883 + }, + { + "epoch": 2.260869565217391, + "grad_norm": 0.42915411587906266, + "learning_rate": 0.00010683133967816062, + "loss": 0.9776, + "step": 884 + }, + { + "epoch": 2.2634271099744243, + "grad_norm": 0.3524127037006637, + "learning_rate": 0.00010669669194452213, + "loss": 0.9966, + "step": 885 + }, + { + "epoch": 2.265984654731458, + "grad_norm": 0.3537805903644639, + "learning_rate": 0.00010656195905289382, + "loss": 1.0042, + "step": 886 + }, + { + "epoch": 2.268542199488491, + "grad_norm": 0.38907067845530163, + "learning_rate": 0.00010642714143305115, + "loss": 0.9591, + "step": 887 + }, + { + "epoch": 2.2710997442455243, + "grad_norm": 0.4388187336605131, + "learning_rate": 0.00010629223951503975, + "loss": 0.9657, + "step": 888 + }, + { + "epoch": 2.2736572890025575, + "grad_norm": 0.5259226887120563, + "learning_rate": 0.00010615725372917429, + "loss": 0.9902, + "step": 889 + }, + { + "epoch": 2.2762148337595907, + "grad_norm": 0.5228861897572435, + "learning_rate": 0.00010602218450603687, + "loss": 1.0222, + "step": 890 + }, + { + "epoch": 2.2787723785166243, + "grad_norm": 0.5036534202887699, + "learning_rate": 0.00010588703227647573, + "loss": 1.0003, + "step": 891 + }, + { + "epoch": 2.2813299232736575, + "grad_norm": 0.3581923819862395, + "learning_rate": 0.00010575179747160391, + "loss": 0.9834, + "step": 892 + }, + { + "epoch": 2.2838874680306906, + "grad_norm": 0.3410033765731837, + "learning_rate": 0.00010561648052279792, + "loss": 0.9893, + "step": 893 + }, + { + "epoch": 2.286445012787724, + "grad_norm": 0.48497621648344247, + "learning_rate": 0.00010548108186169619, + "loss": 1.0097, + "step": 894 + }, + { + "epoch": 2.289002557544757, + "grad_norm": 0.4811056602507645, + "learning_rate": 0.00010534560192019784, + "loss": 0.9987, + "step": 895 + }, + { + "epoch": 2.29156010230179, + "grad_norm": 0.5430558900686754, + "learning_rate": 0.00010521004113046126, + "loss": 0.9863, + "step": 896 + }, + { + "epoch": 2.2941176470588234, + "grad_norm": 0.5520225619306299, + "learning_rate": 0.00010507439992490274, + "loss": 0.9854, + "step": 897 + }, + { + "epoch": 2.296675191815857, + "grad_norm": 0.5368891057768155, + "learning_rate": 0.00010493867873619509, + "loss": 0.962, + "step": 898 + }, + { + "epoch": 2.29923273657289, + "grad_norm": 0.45785580350946786, + "learning_rate": 0.00010480287799726624, + "loss": 0.9951, + "step": 899 + }, + { + "epoch": 2.3017902813299234, + "grad_norm": 0.3134044741551554, + "learning_rate": 0.00010466699814129784, + "loss": 0.9808, + "step": 900 + }, + { + "epoch": 2.3043478260869565, + "grad_norm": 0.3718160522616458, + "learning_rate": 0.00010453103960172399, + "loss": 0.9722, + "step": 901 + }, + { + "epoch": 2.3069053708439897, + "grad_norm": 0.42777708592376057, + "learning_rate": 0.0001043950028122297, + "loss": 0.9778, + "step": 902 + }, + { + "epoch": 2.309462915601023, + "grad_norm": 0.5114598924445181, + "learning_rate": 0.00010425888820674964, + "loss": 0.9999, + "step": 903 + }, + { + "epoch": 2.312020460358056, + "grad_norm": 0.42665599355653705, + "learning_rate": 0.00010412269621946664, + "loss": 0.9277, + "step": 904 + }, + { + "epoch": 2.3145780051150897, + "grad_norm": 0.32425667546420855, + "learning_rate": 0.0001039864272848104, + "loss": 0.9623, + "step": 905 + }, + { + "epoch": 2.317135549872123, + "grad_norm": 0.278767997134977, + "learning_rate": 0.00010385008183745614, + "loss": 0.9709, + "step": 906 + }, + { + "epoch": 2.319693094629156, + "grad_norm": 0.2973268406415685, + "learning_rate": 0.00010371366031232298, + "loss": 0.9752, + "step": 907 + }, + { + "epoch": 2.3222506393861893, + "grad_norm": 0.32805655210523665, + "learning_rate": 0.00010357716314457286, + "loss": 1.0151, + "step": 908 + }, + { + "epoch": 2.3248081841432224, + "grad_norm": 0.3136457006720511, + "learning_rate": 0.00010344059076960893, + "loss": 0.9525, + "step": 909 + }, + { + "epoch": 2.3273657289002556, + "grad_norm": 0.36706796314794027, + "learning_rate": 0.00010330394362307426, + "loss": 1.0263, + "step": 910 + }, + { + "epoch": 2.329923273657289, + "grad_norm": 0.3628334304816528, + "learning_rate": 0.00010316722214085048, + "loss": 1.0032, + "step": 911 + }, + { + "epoch": 2.3324808184143224, + "grad_norm": 0.4614008122870428, + "learning_rate": 0.00010303042675905623, + "loss": 0.9655, + "step": 912 + }, + { + "epoch": 2.3350383631713556, + "grad_norm": 0.5091780040539386, + "learning_rate": 0.00010289355791404597, + "loss": 0.9963, + "step": 913 + }, + { + "epoch": 2.337595907928389, + "grad_norm": 0.4886959522852251, + "learning_rate": 0.00010275661604240844, + "loss": 0.9959, + "step": 914 + }, + { + "epoch": 2.340153452685422, + "grad_norm": 0.3477812096500851, + "learning_rate": 0.00010261960158096538, + "loss": 0.9923, + "step": 915 + }, + { + "epoch": 2.342710997442455, + "grad_norm": 0.3003617995320152, + "learning_rate": 0.00010248251496677002, + "loss": 1.0133, + "step": 916 + }, + { + "epoch": 2.3452685421994883, + "grad_norm": 0.3907656568645366, + "learning_rate": 0.00010234535663710578, + "loss": 0.9559, + "step": 917 + }, + { + "epoch": 2.3478260869565215, + "grad_norm": 0.44450800877616453, + "learning_rate": 0.00010220812702948483, + "loss": 0.9839, + "step": 918 + }, + { + "epoch": 2.350383631713555, + "grad_norm": 0.41444476133681435, + "learning_rate": 0.00010207082658164668, + "loss": 0.9695, + "step": 919 + }, + { + "epoch": 2.3529411764705883, + "grad_norm": 0.3486015741078046, + "learning_rate": 0.00010193345573155686, + "loss": 0.9699, + "step": 920 + }, + { + "epoch": 2.3554987212276215, + "grad_norm": 0.305313779906682, + "learning_rate": 0.00010179601491740546, + "loss": 0.9737, + "step": 921 + }, + { + "epoch": 2.3580562659846547, + "grad_norm": 0.3210944860271877, + "learning_rate": 0.00010165850457760569, + "loss": 0.9734, + "step": 922 + }, + { + "epoch": 2.360613810741688, + "grad_norm": 0.33354001864174027, + "learning_rate": 0.00010152092515079263, + "loss": 0.9758, + "step": 923 + }, + { + "epoch": 2.363171355498721, + "grad_norm": 0.3630435985390137, + "learning_rate": 0.00010138327707582161, + "loss": 0.9843, + "step": 924 + }, + { + "epoch": 2.3657289002557547, + "grad_norm": 0.3068154551503405, + "learning_rate": 0.00010124556079176705, + "loss": 0.9718, + "step": 925 + }, + { + "epoch": 2.368286445012788, + "grad_norm": 0.3145375023118287, + "learning_rate": 0.0001011077767379209, + "loss": 0.9485, + "step": 926 + }, + { + "epoch": 2.370843989769821, + "grad_norm": 0.4562062846091247, + "learning_rate": 0.00010096992535379125, + "loss": 1.0041, + "step": 927 + }, + { + "epoch": 2.373401534526854, + "grad_norm": 0.4613854636034836, + "learning_rate": 0.00010083200707910109, + "loss": 1.0095, + "step": 928 + }, + { + "epoch": 2.3759590792838874, + "grad_norm": 0.5020460478647006, + "learning_rate": 0.00010069402235378657, + "loss": 0.9793, + "step": 929 + }, + { + "epoch": 2.3785166240409206, + "grad_norm": 0.47032502181209285, + "learning_rate": 0.000100555971617996, + "loss": 1.003, + "step": 930 + }, + { + "epoch": 2.381074168797954, + "grad_norm": 0.37153265133623853, + "learning_rate": 0.00010041785531208813, + "loss": 0.9707, + "step": 931 + }, + { + "epoch": 2.3836317135549874, + "grad_norm": 0.2954908430723523, + "learning_rate": 0.00010027967387663098, + "loss": 0.9943, + "step": 932 + }, + { + "epoch": 2.3861892583120206, + "grad_norm": 0.2860326087524264, + "learning_rate": 0.00010014142775240018, + "loss": 0.978, + "step": 933 + }, + { + "epoch": 2.3887468030690537, + "grad_norm": 0.36670864980970264, + "learning_rate": 0.00010000311738037786, + "loss": 0.9654, + "step": 934 + }, + { + "epoch": 2.391304347826087, + "grad_norm": 0.39639852002586273, + "learning_rate": 9.986474320175097e-05, + "loss": 0.964, + "step": 935 + }, + { + "epoch": 2.39386189258312, + "grad_norm": 0.3585981520256939, + "learning_rate": 9.972630565791003e-05, + "loss": 0.9825, + "step": 936 + }, + { + "epoch": 2.3964194373401533, + "grad_norm": 0.3189834091257556, + "learning_rate": 9.958780519044772e-05, + "loss": 0.9851, + "step": 937 + }, + { + "epoch": 2.398976982097187, + "grad_norm": 0.3049358905004256, + "learning_rate": 9.944924224115737e-05, + "loss": 0.9939, + "step": 938 + }, + { + "epoch": 2.40153452685422, + "grad_norm": 0.2622458924767327, + "learning_rate": 9.931061725203167e-05, + "loss": 0.9781, + "step": 939 + }, + { + "epoch": 2.4040920716112533, + "grad_norm": 0.2924257759631161, + "learning_rate": 9.917193066526122e-05, + "loss": 0.9868, + "step": 940 + }, + { + "epoch": 2.4066496163682864, + "grad_norm": 0.3604978006726876, + "learning_rate": 9.903318292323301e-05, + "loss": 0.9754, + "step": 941 + }, + { + "epoch": 2.4092071611253196, + "grad_norm": 0.29745498369836404, + "learning_rate": 9.889437446852923e-05, + "loss": 0.9859, + "step": 942 + }, + { + "epoch": 2.411764705882353, + "grad_norm": 0.37371862497237623, + "learning_rate": 9.875550574392565e-05, + "loss": 0.9896, + "step": 943 + }, + { + "epoch": 2.414322250639386, + "grad_norm": 0.38638295584959187, + "learning_rate": 9.86165771923903e-05, + "loss": 0.9881, + "step": 944 + }, + { + "epoch": 2.4168797953964196, + "grad_norm": 0.4041126989806797, + "learning_rate": 9.84775892570821e-05, + "loss": 0.9428, + "step": 945 + }, + { + "epoch": 2.419437340153453, + "grad_norm": 0.395096912214402, + "learning_rate": 9.833854238134931e-05, + "loss": 0.9622, + "step": 946 + }, + { + "epoch": 2.421994884910486, + "grad_norm": 0.3464290247147215, + "learning_rate": 9.819943700872828e-05, + "loss": 1.0125, + "step": 947 + }, + { + "epoch": 2.424552429667519, + "grad_norm": 0.28843985739584715, + "learning_rate": 9.806027358294195e-05, + "loss": 0.9712, + "step": 948 + }, + { + "epoch": 2.4271099744245523, + "grad_norm": 0.38051542261971155, + "learning_rate": 9.792105254789834e-05, + "loss": 0.9851, + "step": 949 + }, + { + "epoch": 2.4296675191815855, + "grad_norm": 0.4466310758086544, + "learning_rate": 9.778177434768935e-05, + "loss": 0.9683, + "step": 950 + }, + { + "epoch": 2.4322250639386187, + "grad_norm": 0.4692147641165216, + "learning_rate": 9.764243942658919e-05, + "loss": 0.9841, + "step": 951 + }, + { + "epoch": 2.4347826086956523, + "grad_norm": 0.35373867138680226, + "learning_rate": 9.750304822905297e-05, + "loss": 0.9492, + "step": 952 + }, + { + "epoch": 2.4373401534526855, + "grad_norm": 0.28385300113252654, + "learning_rate": 9.736360119971537e-05, + "loss": 0.9996, + "step": 953 + }, + { + "epoch": 2.4398976982097187, + "grad_norm": 0.2937003946020655, + "learning_rate": 9.722409878338908e-05, + "loss": 1.0015, + "step": 954 + }, + { + "epoch": 2.442455242966752, + "grad_norm": 0.3969860787197417, + "learning_rate": 9.708454142506354e-05, + "loss": 0.9774, + "step": 955 + }, + { + "epoch": 2.445012787723785, + "grad_norm": 0.5498839614052679, + "learning_rate": 9.694492956990345e-05, + "loss": 0.9847, + "step": 956 + }, + { + "epoch": 2.4475703324808182, + "grad_norm": 0.5513989094448135, + "learning_rate": 9.680526366324726e-05, + "loss": 0.9565, + "step": 957 + }, + { + "epoch": 2.4501278772378514, + "grad_norm": 0.506905247181652, + "learning_rate": 9.666554415060596e-05, + "loss": 0.9517, + "step": 958 + }, + { + "epoch": 2.452685421994885, + "grad_norm": 0.44474310752723095, + "learning_rate": 9.652577147766142e-05, + "loss": 0.9743, + "step": 959 + }, + { + "epoch": 2.455242966751918, + "grad_norm": 0.37097475676427244, + "learning_rate": 9.638594609026515e-05, + "loss": 0.9506, + "step": 960 + }, + { + "epoch": 2.4578005115089514, + "grad_norm": 0.2734924283931777, + "learning_rate": 9.624606843443675e-05, + "loss": 1.0158, + "step": 961 + }, + { + "epoch": 2.4603580562659846, + "grad_norm": 0.31804819233085263, + "learning_rate": 9.610613895636263e-05, + "loss": 0.992, + "step": 962 + }, + { + "epoch": 2.4629156010230178, + "grad_norm": 0.41664714320663915, + "learning_rate": 9.596615810239445e-05, + "loss": 0.999, + "step": 963 + }, + { + "epoch": 2.4654731457800514, + "grad_norm": 0.5523065515247985, + "learning_rate": 9.582612631904779e-05, + "loss": 1.0055, + "step": 964 + }, + { + "epoch": 2.4680306905370846, + "grad_norm": 0.4671305490762141, + "learning_rate": 9.568604405300062e-05, + "loss": 0.9579, + "step": 965 + }, + { + "epoch": 2.4705882352941178, + "grad_norm": 0.3279722497396409, + "learning_rate": 9.554591175109194e-05, + "loss": 0.9731, + "step": 966 + }, + { + "epoch": 2.473145780051151, + "grad_norm": 0.25846610901040445, + "learning_rate": 9.54057298603205e-05, + "loss": 0.9817, + "step": 967 + }, + { + "epoch": 2.475703324808184, + "grad_norm": 0.3730225408971352, + "learning_rate": 9.526549882784305e-05, + "loss": 0.9874, + "step": 968 + }, + { + "epoch": 2.4782608695652173, + "grad_norm": 0.7271461728885226, + "learning_rate": 9.512521910097316e-05, + "loss": 1.0348, + "step": 969 + }, + { + "epoch": 2.4808184143222505, + "grad_norm": 0.32875046425746846, + "learning_rate": 9.49848911271798e-05, + "loss": 0.9565, + "step": 970 + }, + { + "epoch": 2.483375959079284, + "grad_norm": 0.3205410594330121, + "learning_rate": 9.484451535408572e-05, + "loss": 0.9784, + "step": 971 + }, + { + "epoch": 2.4859335038363173, + "grad_norm": 0.26205949445440796, + "learning_rate": 9.470409222946623e-05, + "loss": 0.9983, + "step": 972 + }, + { + "epoch": 2.4884910485933505, + "grad_norm": 0.3237027571460551, + "learning_rate": 9.456362220124766e-05, + "loss": 0.98, + "step": 973 + }, + { + "epoch": 2.4910485933503836, + "grad_norm": 0.35272232039199597, + "learning_rate": 9.442310571750588e-05, + "loss": 0.9779, + "step": 974 + }, + { + "epoch": 2.493606138107417, + "grad_norm": 0.305939353717968, + "learning_rate": 9.42825432264651e-05, + "loss": 0.9581, + "step": 975 + }, + { + "epoch": 2.49616368286445, + "grad_norm": 0.2932577303248136, + "learning_rate": 9.414193517649614e-05, + "loss": 0.9855, + "step": 976 + }, + { + "epoch": 2.498721227621483, + "grad_norm": 0.30059710492898495, + "learning_rate": 9.400128201611521e-05, + "loss": 0.9754, + "step": 977 + }, + { + "epoch": 2.501278772378517, + "grad_norm": 0.2973031341519278, + "learning_rate": 9.386058419398243e-05, + "loss": 0.9909, + "step": 978 + }, + { + "epoch": 2.50383631713555, + "grad_norm": 0.3722883437832787, + "learning_rate": 9.371984215890032e-05, + "loss": 0.9946, + "step": 979 + }, + { + "epoch": 2.506393861892583, + "grad_norm": 0.3473263838445932, + "learning_rate": 9.357905635981251e-05, + "loss": 0.9543, + "step": 980 + }, + { + "epoch": 2.5089514066496164, + "grad_norm": 0.2867570028047222, + "learning_rate": 9.34382272458022e-05, + "loss": 0.9638, + "step": 981 + }, + { + "epoch": 2.5115089514066495, + "grad_norm": 0.30564756429493334, + "learning_rate": 9.329735526609071e-05, + "loss": 0.9464, + "step": 982 + }, + { + "epoch": 2.5140664961636827, + "grad_norm": 0.277493802953859, + "learning_rate": 9.315644087003614e-05, + "loss": 0.9565, + "step": 983 + }, + { + "epoch": 2.516624040920716, + "grad_norm": 0.32107200459340096, + "learning_rate": 9.301548450713193e-05, + "loss": 0.987, + "step": 984 + }, + { + "epoch": 2.5191815856777495, + "grad_norm": 0.34282165398687586, + "learning_rate": 9.28744866270053e-05, + "loss": 0.985, + "step": 985 + }, + { + "epoch": 2.5217391304347827, + "grad_norm": 0.32220988156237623, + "learning_rate": 9.273344767941595e-05, + "loss": 0.958, + "step": 986 + }, + { + "epoch": 2.524296675191816, + "grad_norm": 0.2659763342921004, + "learning_rate": 9.259236811425458e-05, + "loss": 0.9693, + "step": 987 + }, + { + "epoch": 2.526854219948849, + "grad_norm": 0.31738841820079255, + "learning_rate": 9.245124838154145e-05, + "loss": 0.9938, + "step": 988 + }, + { + "epoch": 2.5294117647058822, + "grad_norm": 0.32830918791297703, + "learning_rate": 9.231008893142496e-05, + "loss": 0.9934, + "step": 989 + }, + { + "epoch": 2.531969309462916, + "grad_norm": 0.3402708856013208, + "learning_rate": 9.216889021418015e-05, + "loss": 1.0013, + "step": 990 + }, + { + "epoch": 2.5345268542199486, + "grad_norm": 0.4044102426145664, + "learning_rate": 9.202765268020734e-05, + "loss": 0.9831, + "step": 991 + }, + { + "epoch": 2.5370843989769822, + "grad_norm": 0.42862262278596586, + "learning_rate": 9.188637678003078e-05, + "loss": 0.9997, + "step": 992 + }, + { + "epoch": 2.5396419437340154, + "grad_norm": 0.4484266743548927, + "learning_rate": 9.17450629642969e-05, + "loss": 0.9828, + "step": 993 + }, + { + "epoch": 2.5421994884910486, + "grad_norm": 0.3265912580211292, + "learning_rate": 9.160371168377322e-05, + "loss": 0.9643, + "step": 994 + }, + { + "epoch": 2.544757033248082, + "grad_norm": 0.32534751123207517, + "learning_rate": 9.146232338934671e-05, + "loss": 0.9582, + "step": 995 + }, + { + "epoch": 2.547314578005115, + "grad_norm": 0.38239024918470127, + "learning_rate": 9.132089853202243e-05, + "loss": 0.9744, + "step": 996 + }, + { + "epoch": 2.5498721227621486, + "grad_norm": 0.46563347602108834, + "learning_rate": 9.117943756292208e-05, + "loss": 0.9792, + "step": 997 + }, + { + "epoch": 2.5524296675191813, + "grad_norm": 0.39461054417861174, + "learning_rate": 9.103794093328248e-05, + "loss": 0.9755, + "step": 998 + }, + { + "epoch": 2.554987212276215, + "grad_norm": 0.3125908044097884, + "learning_rate": 9.089640909445431e-05, + "loss": 0.9716, + "step": 999 + }, + { + "epoch": 2.557544757033248, + "grad_norm": 0.2684368877044592, + "learning_rate": 9.075484249790048e-05, + "loss": 0.9747, + "step": 1000 + }, + { + "epoch": 2.5601023017902813, + "grad_norm": 0.28891578856074146, + "learning_rate": 9.061324159519476e-05, + "loss": 0.9762, + "step": 1001 + }, + { + "epoch": 2.5626598465473145, + "grad_norm": 0.3034677475712927, + "learning_rate": 9.047160683802046e-05, + "loss": 0.9674, + "step": 1002 + }, + { + "epoch": 2.5652173913043477, + "grad_norm": 0.31908253316340884, + "learning_rate": 9.032993867816876e-05, + "loss": 0.9942, + "step": 1003 + }, + { + "epoch": 2.5677749360613813, + "grad_norm": 0.2544491678916064, + "learning_rate": 9.018823756753746e-05, + "loss": 1.0001, + "step": 1004 + }, + { + "epoch": 2.5703324808184145, + "grad_norm": 0.2995352776229395, + "learning_rate": 9.00465039581294e-05, + "loss": 0.9929, + "step": 1005 + }, + { + "epoch": 2.5728900255754477, + "grad_norm": 0.35913882534331126, + "learning_rate": 8.990473830205118e-05, + "loss": 0.9318, + "step": 1006 + }, + { + "epoch": 2.575447570332481, + "grad_norm": 0.37010668314829087, + "learning_rate": 8.976294105151154e-05, + "loss": 1.0079, + "step": 1007 + }, + { + "epoch": 2.578005115089514, + "grad_norm": 0.2570784147501355, + "learning_rate": 8.962111265882006e-05, + "loss": 0.9952, + "step": 1008 + }, + { + "epoch": 2.580562659846547, + "grad_norm": 0.3149539278736431, + "learning_rate": 8.947925357638561e-05, + "loss": 0.9941, + "step": 1009 + }, + { + "epoch": 2.5831202046035804, + "grad_norm": 0.2855340149405739, + "learning_rate": 8.933736425671495e-05, + "loss": 0.9816, + "step": 1010 + }, + { + "epoch": 2.585677749360614, + "grad_norm": 0.25345884892793763, + "learning_rate": 8.91954451524114e-05, + "loss": 0.9818, + "step": 1011 + }, + { + "epoch": 2.588235294117647, + "grad_norm": 0.29694516426804485, + "learning_rate": 8.905349671617313e-05, + "loss": 0.9876, + "step": 1012 + }, + { + "epoch": 2.5907928388746804, + "grad_norm": 0.3052840810260173, + "learning_rate": 8.891151940079198e-05, + "loss": 0.9702, + "step": 1013 + }, + { + "epoch": 2.5933503836317136, + "grad_norm": 0.2661838830871243, + "learning_rate": 8.87695136591519e-05, + "loss": 0.9877, + "step": 1014 + }, + { + "epoch": 2.5959079283887467, + "grad_norm": 0.2986390559549456, + "learning_rate": 8.862747994422744e-05, + "loss": 0.9707, + "step": 1015 + }, + { + "epoch": 2.59846547314578, + "grad_norm": 0.3613476612681819, + "learning_rate": 8.848541870908248e-05, + "loss": 0.9703, + "step": 1016 + }, + { + "epoch": 2.601023017902813, + "grad_norm": 0.33024018130732985, + "learning_rate": 8.834333040686867e-05, + "loss": 0.979, + "step": 1017 + }, + { + "epoch": 2.6035805626598467, + "grad_norm": 0.31187166502347763, + "learning_rate": 8.820121549082389e-05, + "loss": 0.9829, + "step": 1018 + }, + { + "epoch": 2.60613810741688, + "grad_norm": 0.3469288630004611, + "learning_rate": 8.805907441427107e-05, + "loss": 0.9558, + "step": 1019 + }, + { + "epoch": 2.608695652173913, + "grad_norm": 0.3134454892157028, + "learning_rate": 8.791690763061646e-05, + "loss": 0.9644, + "step": 1020 + }, + { + "epoch": 2.6112531969309463, + "grad_norm": 0.30922058220600745, + "learning_rate": 8.777471559334835e-05, + "loss": 0.9769, + "step": 1021 + }, + { + "epoch": 2.6138107416879794, + "grad_norm": 0.3164613704707754, + "learning_rate": 8.763249875603568e-05, + "loss": 0.9699, + "step": 1022 + }, + { + "epoch": 2.6163682864450126, + "grad_norm": 0.3937696035168064, + "learning_rate": 8.74902575723263e-05, + "loss": 0.9913, + "step": 1023 + }, + { + "epoch": 2.618925831202046, + "grad_norm": 0.3269757525342128, + "learning_rate": 8.734799249594593e-05, + "loss": 0.9714, + "step": 1024 + }, + { + "epoch": 2.6214833759590794, + "grad_norm": 0.3137372841061025, + "learning_rate": 8.720570398069639e-05, + "loss": 0.9667, + "step": 1025 + }, + { + "epoch": 2.6240409207161126, + "grad_norm": 0.296905098424126, + "learning_rate": 8.706339248045425e-05, + "loss": 0.9748, + "step": 1026 + }, + { + "epoch": 2.626598465473146, + "grad_norm": 0.3341447796223413, + "learning_rate": 8.692105844916946e-05, + "loss": 0.9813, + "step": 1027 + }, + { + "epoch": 2.629156010230179, + "grad_norm": 0.3756191138022281, + "learning_rate": 8.677870234086383e-05, + "loss": 0.9908, + "step": 1028 + }, + { + "epoch": 2.631713554987212, + "grad_norm": 0.3559465468948902, + "learning_rate": 8.663632460962956e-05, + "loss": 0.9936, + "step": 1029 + }, + { + "epoch": 2.634271099744246, + "grad_norm": 0.300711572823478, + "learning_rate": 8.649392570962781e-05, + "loss": 0.9795, + "step": 1030 + }, + { + "epoch": 2.6368286445012785, + "grad_norm": 0.3320572865051935, + "learning_rate": 8.635150609508733e-05, + "loss": 0.984, + "step": 1031 + }, + { + "epoch": 2.639386189258312, + "grad_norm": 0.3635828441982571, + "learning_rate": 8.620906622030292e-05, + "loss": 0.9536, + "step": 1032 + }, + { + "epoch": 2.6419437340153453, + "grad_norm": 0.3278411915419061, + "learning_rate": 8.6066606539634e-05, + "loss": 1.0088, + "step": 1033 + }, + { + "epoch": 2.6445012787723785, + "grad_norm": 0.32767767702958833, + "learning_rate": 8.592412750750312e-05, + "loss": 0.9876, + "step": 1034 + }, + { + "epoch": 2.6470588235294117, + "grad_norm": 0.35097964529502185, + "learning_rate": 8.578162957839462e-05, + "loss": 0.9915, + "step": 1035 + }, + { + "epoch": 2.649616368286445, + "grad_norm": 0.31991735732581283, + "learning_rate": 8.563911320685312e-05, + "loss": 0.9638, + "step": 1036 + }, + { + "epoch": 2.6521739130434785, + "grad_norm": 0.23787926653601094, + "learning_rate": 8.549657884748205e-05, + "loss": 0.9713, + "step": 1037 + }, + { + "epoch": 2.6547314578005117, + "grad_norm": 0.32244485030641373, + "learning_rate": 8.535402695494221e-05, + "loss": 0.9772, + "step": 1038 + }, + { + "epoch": 2.657289002557545, + "grad_norm": 0.312950136510117, + "learning_rate": 8.521145798395035e-05, + "loss": 0.9841, + "step": 1039 + }, + { + "epoch": 2.659846547314578, + "grad_norm": 0.26212781885375047, + "learning_rate": 8.506887238927764e-05, + "loss": 0.9955, + "step": 1040 + }, + { + "epoch": 2.662404092071611, + "grad_norm": 0.34105099182259796, + "learning_rate": 8.492627062574837e-05, + "loss": 0.9729, + "step": 1041 + }, + { + "epoch": 2.6649616368286444, + "grad_norm": 0.297943326170416, + "learning_rate": 8.478365314823831e-05, + "loss": 1.0041, + "step": 1042 + }, + { + "epoch": 2.6675191815856776, + "grad_norm": 0.23653735859455993, + "learning_rate": 8.464102041167343e-05, + "loss": 0.9385, + "step": 1043 + }, + { + "epoch": 2.670076726342711, + "grad_norm": 0.24103662980964566, + "learning_rate": 8.449837287102837e-05, + "loss": 0.9798, + "step": 1044 + }, + { + "epoch": 2.6726342710997444, + "grad_norm": 0.3266522540557997, + "learning_rate": 8.43557109813249e-05, + "loss": 0.9664, + "step": 1045 + }, + { + "epoch": 2.6751918158567776, + "grad_norm": 0.34157505937073707, + "learning_rate": 8.421303519763067e-05, + "loss": 0.9512, + "step": 1046 + }, + { + "epoch": 2.6777493606138107, + "grad_norm": 0.32745487240393034, + "learning_rate": 8.407034597505762e-05, + "loss": 0.9847, + "step": 1047 + }, + { + "epoch": 2.680306905370844, + "grad_norm": 0.30390244215100753, + "learning_rate": 8.392764376876049e-05, + "loss": 0.9847, + "step": 1048 + }, + { + "epoch": 2.682864450127877, + "grad_norm": 0.28021611753279574, + "learning_rate": 8.378492903393555e-05, + "loss": 0.9592, + "step": 1049 + }, + { + "epoch": 2.6854219948849103, + "grad_norm": 0.3320556275827844, + "learning_rate": 8.364220222581896e-05, + "loss": 0.9846, + "step": 1050 + }, + { + "epoch": 2.687979539641944, + "grad_norm": 0.3136101711766941, + "learning_rate": 8.34994637996854e-05, + "loss": 0.9811, + "step": 1051 + }, + { + "epoch": 2.690537084398977, + "grad_norm": 0.2618192450012102, + "learning_rate": 8.335671421084661e-05, + "loss": 0.9744, + "step": 1052 + }, + { + "epoch": 2.6930946291560103, + "grad_norm": 0.3220025314640929, + "learning_rate": 8.321395391464995e-05, + "loss": 0.9868, + "step": 1053 + }, + { + "epoch": 2.6956521739130435, + "grad_norm": 0.3598315892247714, + "learning_rate": 8.307118336647694e-05, + "loss": 0.951, + "step": 1054 + }, + { + "epoch": 2.6982097186700766, + "grad_norm": 0.4106007096012368, + "learning_rate": 8.292840302174178e-05, + "loss": 0.9643, + "step": 1055 + }, + { + "epoch": 2.70076726342711, + "grad_norm": 0.2548097195613678, + "learning_rate": 8.278561333588993e-05, + "loss": 0.9841, + "step": 1056 + }, + { + "epoch": 2.703324808184143, + "grad_norm": 0.3371557483370203, + "learning_rate": 8.264281476439662e-05, + "loss": 0.984, + "step": 1057 + }, + { + "epoch": 2.7058823529411766, + "grad_norm": 0.38976688577634183, + "learning_rate": 8.250000776276551e-05, + "loss": 0.9731, + "step": 1058 + }, + { + "epoch": 2.70843989769821, + "grad_norm": 0.2695308176694805, + "learning_rate": 8.235719278652704e-05, + "loss": 1.0008, + "step": 1059 + }, + { + "epoch": 2.710997442455243, + "grad_norm": 0.2799834287903197, + "learning_rate": 8.221437029123715e-05, + "loss": 0.96, + "step": 1060 + }, + { + "epoch": 2.713554987212276, + "grad_norm": 0.3887662531222578, + "learning_rate": 8.20715407324758e-05, + "loss": 1.0134, + "step": 1061 + }, + { + "epoch": 2.7161125319693094, + "grad_norm": 0.36475843384332224, + "learning_rate": 8.192870456584536e-05, + "loss": 0.9869, + "step": 1062 + }, + { + "epoch": 2.718670076726343, + "grad_norm": 0.3842950619442295, + "learning_rate": 8.178586224696938e-05, + "loss": 1.0191, + "step": 1063 + }, + { + "epoch": 2.7212276214833757, + "grad_norm": 0.29521526511075435, + "learning_rate": 8.164301423149104e-05, + "loss": 0.9847, + "step": 1064 + }, + { + "epoch": 2.7237851662404093, + "grad_norm": 0.2510688717518455, + "learning_rate": 8.150016097507161e-05, + "loss": 0.9537, + "step": 1065 + }, + { + "epoch": 2.7263427109974425, + "grad_norm": 0.31175386208986516, + "learning_rate": 8.135730293338918e-05, + "loss": 0.9715, + "step": 1066 + }, + { + "epoch": 2.7289002557544757, + "grad_norm": 0.2969969026627777, + "learning_rate": 8.121444056213698e-05, + "loss": 0.9778, + "step": 1067 + }, + { + "epoch": 2.731457800511509, + "grad_norm": 0.316196872282454, + "learning_rate": 8.107157431702219e-05, + "loss": 0.9979, + "step": 1068 + }, + { + "epoch": 2.734015345268542, + "grad_norm": 0.2677096371345643, + "learning_rate": 8.092870465376422e-05, + "loss": 0.972, + "step": 1069 + }, + { + "epoch": 2.7365728900255757, + "grad_norm": 0.25111395109245066, + "learning_rate": 8.078583202809347e-05, + "loss": 1.0173, + "step": 1070 + }, + { + "epoch": 2.7391304347826084, + "grad_norm": 0.23618007037740435, + "learning_rate": 8.064295689574979e-05, + "loss": 0.9681, + "step": 1071 + }, + { + "epoch": 2.741687979539642, + "grad_norm": 0.2462154966468633, + "learning_rate": 8.050007971248095e-05, + "loss": 0.9977, + "step": 1072 + }, + { + "epoch": 2.7442455242966752, + "grad_norm": 0.2396576027964869, + "learning_rate": 8.035720093404133e-05, + "loss": 0.9817, + "step": 1073 + }, + { + "epoch": 2.7468030690537084, + "grad_norm": 0.23288900252567163, + "learning_rate": 8.021432101619034e-05, + "loss": 0.9677, + "step": 1074 + }, + { + "epoch": 2.7493606138107416, + "grad_norm": 0.309943456329605, + "learning_rate": 8.007144041469111e-05, + "loss": 1.0198, + "step": 1075 + }, + { + "epoch": 2.7519181585677748, + "grad_norm": 0.2438257902275988, + "learning_rate": 7.992855958530893e-05, + "loss": 0.9774, + "step": 1076 + }, + { + "epoch": 2.7544757033248084, + "grad_norm": 0.24225939294568138, + "learning_rate": 7.978567898380968e-05, + "loss": 0.9975, + "step": 1077 + }, + { + "epoch": 2.7570332480818416, + "grad_norm": 0.2557453042666024, + "learning_rate": 7.96427990659587e-05, + "loss": 0.9601, + "step": 1078 + }, + { + "epoch": 2.7595907928388748, + "grad_norm": 0.25399744095479343, + "learning_rate": 7.949992028751908e-05, + "loss": 0.94, + "step": 1079 + }, + { + "epoch": 2.762148337595908, + "grad_norm": 0.25806395609838956, + "learning_rate": 7.935704310425022e-05, + "loss": 0.9856, + "step": 1080 + }, + { + "epoch": 2.764705882352941, + "grad_norm": 0.2778516319437345, + "learning_rate": 7.921416797190653e-05, + "loss": 0.9485, + "step": 1081 + }, + { + "epoch": 2.7672634271099743, + "grad_norm": 0.2652382709743763, + "learning_rate": 7.90712953462358e-05, + "loss": 0.9852, + "step": 1082 + }, + { + "epoch": 2.7698209718670075, + "grad_norm": 0.3078124836381294, + "learning_rate": 7.892842568297784e-05, + "loss": 0.9843, + "step": 1083 + }, + { + "epoch": 2.772378516624041, + "grad_norm": 0.2630029283693419, + "learning_rate": 7.878555943786304e-05, + "loss": 0.9866, + "step": 1084 + }, + { + "epoch": 2.7749360613810743, + "grad_norm": 0.3230772942242779, + "learning_rate": 7.864269706661084e-05, + "loss": 0.9617, + "step": 1085 + }, + { + "epoch": 2.7774936061381075, + "grad_norm": 0.33688102829350425, + "learning_rate": 7.84998390249284e-05, + "loss": 1.0151, + "step": 1086 + }, + { + "epoch": 2.7800511508951407, + "grad_norm": 0.27010473360932136, + "learning_rate": 7.8356985768509e-05, + "loss": 0.9416, + "step": 1087 + }, + { + "epoch": 2.782608695652174, + "grad_norm": 0.3216032949279463, + "learning_rate": 7.821413775303063e-05, + "loss": 0.9677, + "step": 1088 + }, + { + "epoch": 2.785166240409207, + "grad_norm": 0.3184797598775921, + "learning_rate": 7.807129543415467e-05, + "loss": 0.9878, + "step": 1089 + }, + { + "epoch": 2.78772378516624, + "grad_norm": 0.26980179286312655, + "learning_rate": 7.792845926752422e-05, + "loss": 0.9559, + "step": 1090 + }, + { + "epoch": 2.790281329923274, + "grad_norm": 0.2788560924053536, + "learning_rate": 7.778562970876285e-05, + "loss": 0.9315, + "step": 1091 + }, + { + "epoch": 2.792838874680307, + "grad_norm": 0.34225351537345716, + "learning_rate": 7.764280721347296e-05, + "loss": 0.9905, + "step": 1092 + }, + { + "epoch": 2.79539641943734, + "grad_norm": 0.3181751957801659, + "learning_rate": 7.749999223723451e-05, + "loss": 0.992, + "step": 1093 + }, + { + "epoch": 2.7979539641943734, + "grad_norm": 0.2617895154207013, + "learning_rate": 7.73571852356034e-05, + "loss": 0.976, + "step": 1094 + }, + { + "epoch": 2.8005115089514065, + "grad_norm": 0.26160435542511723, + "learning_rate": 7.72143866641101e-05, + "loss": 0.9717, + "step": 1095 + }, + { + "epoch": 2.80306905370844, + "grad_norm": 0.3005466825228635, + "learning_rate": 7.707159697825824e-05, + "loss": 1.019, + "step": 1096 + }, + { + "epoch": 2.805626598465473, + "grad_norm": 0.2737567544420114, + "learning_rate": 7.692881663352306e-05, + "loss": 0.9877, + "step": 1097 + }, + { + "epoch": 2.8081841432225065, + "grad_norm": 0.25383083364525466, + "learning_rate": 7.678604608535007e-05, + "loss": 1.0, + "step": 1098 + }, + { + "epoch": 2.8107416879795397, + "grad_norm": 0.24966621455789795, + "learning_rate": 7.664328578915341e-05, + "loss": 0.9913, + "step": 1099 + }, + { + "epoch": 2.813299232736573, + "grad_norm": 0.26731325577468995, + "learning_rate": 7.650053620031461e-05, + "loss": 0.9667, + "step": 1100 + }, + { + "epoch": 2.815856777493606, + "grad_norm": 0.24369512341274932, + "learning_rate": 7.635779777418105e-05, + "loss": 0.9941, + "step": 1101 + }, + { + "epoch": 2.8184143222506393, + "grad_norm": 0.22967457166848224, + "learning_rate": 7.621507096606445e-05, + "loss": 0.9755, + "step": 1102 + }, + { + "epoch": 2.820971867007673, + "grad_norm": 0.2571549233122558, + "learning_rate": 7.607235623123952e-05, + "loss": 0.9896, + "step": 1103 + }, + { + "epoch": 2.8235294117647056, + "grad_norm": 0.21308122874558627, + "learning_rate": 7.592965402494242e-05, + "loss": 0.9671, + "step": 1104 + }, + { + "epoch": 2.8260869565217392, + "grad_norm": 0.23965692093466115, + "learning_rate": 7.578696480236935e-05, + "loss": 0.9572, + "step": 1105 + }, + { + "epoch": 2.8286445012787724, + "grad_norm": 0.20206088609556147, + "learning_rate": 7.564428901867512e-05, + "loss": 0.9874, + "step": 1106 + }, + { + "epoch": 2.8312020460358056, + "grad_norm": 0.24456595967971878, + "learning_rate": 7.550162712897166e-05, + "loss": 0.9834, + "step": 1107 + }, + { + "epoch": 2.833759590792839, + "grad_norm": 0.2395628798306672, + "learning_rate": 7.535897958832657e-05, + "loss": 0.9932, + "step": 1108 + }, + { + "epoch": 2.836317135549872, + "grad_norm": 0.24488788117262922, + "learning_rate": 7.521634685176171e-05, + "loss": 0.9976, + "step": 1109 + }, + { + "epoch": 2.8388746803069056, + "grad_norm": 0.2475079536458042, + "learning_rate": 7.507372937425166e-05, + "loss": 0.979, + "step": 1110 + }, + { + "epoch": 2.8414322250639388, + "grad_norm": 0.25103418982918085, + "learning_rate": 7.493112761072238e-05, + "loss": 0.9784, + "step": 1111 + }, + { + "epoch": 2.843989769820972, + "grad_norm": 0.21080156526173952, + "learning_rate": 7.478854201604967e-05, + "loss": 0.9861, + "step": 1112 + }, + { + "epoch": 2.846547314578005, + "grad_norm": 0.2636072879534979, + "learning_rate": 7.464597304505779e-05, + "loss": 0.9767, + "step": 1113 + }, + { + "epoch": 2.8491048593350383, + "grad_norm": 0.3447559742850428, + "learning_rate": 7.450342115251793e-05, + "loss": 0.9763, + "step": 1114 + }, + { + "epoch": 2.8516624040920715, + "grad_norm": 0.3554201272513753, + "learning_rate": 7.436088679314689e-05, + "loss": 0.9814, + "step": 1115 + }, + { + "epoch": 2.8542199488491047, + "grad_norm": 0.2338897866384284, + "learning_rate": 7.42183704216054e-05, + "loss": 0.9737, + "step": 1116 + }, + { + "epoch": 2.8567774936061383, + "grad_norm": 0.3005337593534035, + "learning_rate": 7.407587249249691e-05, + "loss": 0.9593, + "step": 1117 + }, + { + "epoch": 2.8593350383631715, + "grad_norm": 0.28306065139483866, + "learning_rate": 7.393339346036604e-05, + "loss": 0.9912, + "step": 1118 + }, + { + "epoch": 2.8618925831202047, + "grad_norm": 0.32462258403513267, + "learning_rate": 7.379093377969708e-05, + "loss": 0.9636, + "step": 1119 + }, + { + "epoch": 2.864450127877238, + "grad_norm": 0.23458466619854929, + "learning_rate": 7.364849390491269e-05, + "loss": 1.0179, + "step": 1120 + }, + { + "epoch": 2.867007672634271, + "grad_norm": 0.26599173050846503, + "learning_rate": 7.350607429037222e-05, + "loss": 0.9865, + "step": 1121 + }, + { + "epoch": 2.869565217391304, + "grad_norm": 0.28672176422376533, + "learning_rate": 7.336367539037047e-05, + "loss": 0.9697, + "step": 1122 + }, + { + "epoch": 2.8721227621483374, + "grad_norm": 0.38174167324236646, + "learning_rate": 7.32212976591362e-05, + "loss": 0.9394, + "step": 1123 + }, + { + "epoch": 2.874680306905371, + "grad_norm": 0.3008937451500426, + "learning_rate": 7.307894155083054e-05, + "loss": 1.0193, + "step": 1124 + }, + { + "epoch": 2.877237851662404, + "grad_norm": 0.2647744376072329, + "learning_rate": 7.293660751954576e-05, + "loss": 0.9959, + "step": 1125 + }, + { + "epoch": 2.8797953964194374, + "grad_norm": 0.3361184185105208, + "learning_rate": 7.279429601930365e-05, + "loss": 0.9886, + "step": 1126 + }, + { + "epoch": 2.8823529411764706, + "grad_norm": 0.28703805124273124, + "learning_rate": 7.265200750405408e-05, + "loss": 0.9552, + "step": 1127 + }, + { + "epoch": 2.8849104859335037, + "grad_norm": 0.2282314607084684, + "learning_rate": 7.250974242767372e-05, + "loss": 0.9613, + "step": 1128 + }, + { + "epoch": 2.887468030690537, + "grad_norm": 0.2492748754541012, + "learning_rate": 7.236750124396435e-05, + "loss": 0.9668, + "step": 1129 + }, + { + "epoch": 2.89002557544757, + "grad_norm": 0.25888788395575085, + "learning_rate": 7.222528440665167e-05, + "loss": 0.9925, + "step": 1130 + }, + { + "epoch": 2.8925831202046037, + "grad_norm": 0.24496080625420605, + "learning_rate": 7.20830923693836e-05, + "loss": 1.0041, + "step": 1131 + }, + { + "epoch": 2.895140664961637, + "grad_norm": 0.23733176427430222, + "learning_rate": 7.194092558572897e-05, + "loss": 0.9425, + "step": 1132 + }, + { + "epoch": 2.89769820971867, + "grad_norm": 0.27037826071655174, + "learning_rate": 7.179878450917613e-05, + "loss": 0.9618, + "step": 1133 + }, + { + "epoch": 2.9002557544757033, + "grad_norm": 0.2110486047552461, + "learning_rate": 7.165666959313135e-05, + "loss": 0.9625, + "step": 1134 + }, + { + "epoch": 2.9028132992327365, + "grad_norm": 0.2356138250996952, + "learning_rate": 7.151458129091752e-05, + "loss": 0.9868, + "step": 1135 + }, + { + "epoch": 2.90537084398977, + "grad_norm": 0.2507648626394698, + "learning_rate": 7.137252005577256e-05, + "loss": 0.9579, + "step": 1136 + }, + { + "epoch": 2.907928388746803, + "grad_norm": 0.21729817798268314, + "learning_rate": 7.123048634084815e-05, + "loss": 1.0193, + "step": 1137 + }, + { + "epoch": 2.9104859335038364, + "grad_norm": 0.25511738825377567, + "learning_rate": 7.108848059920805e-05, + "loss": 0.9594, + "step": 1138 + }, + { + "epoch": 2.9130434782608696, + "grad_norm": 0.25447395942517514, + "learning_rate": 7.09465032838269e-05, + "loss": 0.9746, + "step": 1139 + }, + { + "epoch": 2.915601023017903, + "grad_norm": 0.24784365067022293, + "learning_rate": 7.080455484758863e-05, + "loss": 0.9659, + "step": 1140 + }, + { + "epoch": 2.918158567774936, + "grad_norm": 0.2730224277035152, + "learning_rate": 7.066263574328505e-05, + "loss": 0.9818, + "step": 1141 + }, + { + "epoch": 2.920716112531969, + "grad_norm": 0.30594100479026, + "learning_rate": 7.052074642361444e-05, + "loss": 0.9915, + "step": 1142 + }, + { + "epoch": 2.923273657289003, + "grad_norm": 0.32054932862442914, + "learning_rate": 7.037888734117998e-05, + "loss": 0.9882, + "step": 1143 + }, + { + "epoch": 2.9258312020460355, + "grad_norm": 0.23958919561701653, + "learning_rate": 7.023705894848848e-05, + "loss": 0.9666, + "step": 1144 + }, + { + "epoch": 2.928388746803069, + "grad_norm": 0.27076318118261017, + "learning_rate": 7.009526169794885e-05, + "loss": 0.9746, + "step": 1145 + }, + { + "epoch": 2.9309462915601023, + "grad_norm": 0.2729574133461879, + "learning_rate": 6.995349604187061e-05, + "loss": 0.9624, + "step": 1146 + }, + { + "epoch": 2.9335038363171355, + "grad_norm": 0.3259725455577868, + "learning_rate": 6.981176243246257e-05, + "loss": 0.9795, + "step": 1147 + }, + { + "epoch": 2.9360613810741687, + "grad_norm": 0.34256481150449963, + "learning_rate": 6.967006132183127e-05, + "loss": 0.977, + "step": 1148 + }, + { + "epoch": 2.938618925831202, + "grad_norm": 0.2828018012599345, + "learning_rate": 6.952839316197956e-05, + "loss": 0.9928, + "step": 1149 + }, + { + "epoch": 2.9411764705882355, + "grad_norm": 0.2397889702793678, + "learning_rate": 6.938675840480525e-05, + "loss": 0.9822, + "step": 1150 + }, + { + "epoch": 2.9437340153452687, + "grad_norm": 0.331164422112377, + "learning_rate": 6.924515750209954e-05, + "loss": 0.9973, + "step": 1151 + }, + { + "epoch": 2.946291560102302, + "grad_norm": 0.2704740780802998, + "learning_rate": 6.910359090554572e-05, + "loss": 0.9685, + "step": 1152 + }, + { + "epoch": 2.948849104859335, + "grad_norm": 0.2437699512495755, + "learning_rate": 6.896205906671755e-05, + "loss": 0.9896, + "step": 1153 + }, + { + "epoch": 2.9514066496163682, + "grad_norm": 0.24008371878492457, + "learning_rate": 6.882056243707796e-05, + "loss": 0.9948, + "step": 1154 + }, + { + "epoch": 2.9539641943734014, + "grad_norm": 0.2714718735118312, + "learning_rate": 6.86791014679776e-05, + "loss": 1.0107, + "step": 1155 + }, + { + "epoch": 2.9565217391304346, + "grad_norm": 0.2689100345729253, + "learning_rate": 6.85376766106533e-05, + "loss": 0.9844, + "step": 1156 + }, + { + "epoch": 2.959079283887468, + "grad_norm": 0.217002318039709, + "learning_rate": 6.839628831622681e-05, + "loss": 0.9748, + "step": 1157 + }, + { + "epoch": 2.9616368286445014, + "grad_norm": 0.2919920400101465, + "learning_rate": 6.825493703570311e-05, + "loss": 0.9699, + "step": 1158 + }, + { + "epoch": 2.9641943734015346, + "grad_norm": 0.3490734108048557, + "learning_rate": 6.811362321996926e-05, + "loss": 0.9694, + "step": 1159 + }, + { + "epoch": 2.9667519181585678, + "grad_norm": 0.3103643754348234, + "learning_rate": 6.797234731979267e-05, + "loss": 0.991, + "step": 1160 + }, + { + "epoch": 2.969309462915601, + "grad_norm": 0.1939069857875497, + "learning_rate": 6.783110978581989e-05, + "loss": 0.9614, + "step": 1161 + }, + { + "epoch": 2.971867007672634, + "grad_norm": 0.2495187824732926, + "learning_rate": 6.768991106857508e-05, + "loss": 0.9656, + "step": 1162 + }, + { + "epoch": 2.9744245524296673, + "grad_norm": 0.3034345894428266, + "learning_rate": 6.754875161845855e-05, + "loss": 1.0069, + "step": 1163 + }, + { + "epoch": 2.976982097186701, + "grad_norm": 0.3567922857742952, + "learning_rate": 6.740763188574546e-05, + "loss": 0.9612, + "step": 1164 + }, + { + "epoch": 2.979539641943734, + "grad_norm": 0.25891106467169334, + "learning_rate": 6.726655232058409e-05, + "loss": 0.9696, + "step": 1165 + }, + { + "epoch": 2.9820971867007673, + "grad_norm": 0.25153156564503487, + "learning_rate": 6.712551337299473e-05, + "loss": 1.0014, + "step": 1166 + }, + { + "epoch": 2.9846547314578005, + "grad_norm": 0.32964252932862226, + "learning_rate": 6.69845154928681e-05, + "loss": 0.9773, + "step": 1167 + }, + { + "epoch": 2.9872122762148337, + "grad_norm": 0.2917177962042733, + "learning_rate": 6.684355912996386e-05, + "loss": 0.9911, + "step": 1168 + }, + { + "epoch": 2.9897698209718673, + "grad_norm": 0.2002913243087303, + "learning_rate": 6.670264473390931e-05, + "loss": 0.9683, + "step": 1169 + }, + { + "epoch": 2.9923273657289, + "grad_norm": 0.26813771266232983, + "learning_rate": 6.656177275419785e-05, + "loss": 0.967, + "step": 1170 + }, + { + "epoch": 2.9948849104859336, + "grad_norm": 0.2590485360645914, + "learning_rate": 6.64209436401875e-05, + "loss": 0.9638, + "step": 1171 + }, + { + "epoch": 2.997442455242967, + "grad_norm": 0.26357426110685056, + "learning_rate": 6.62801578410997e-05, + "loss": 1.0056, + "step": 1172 + }, + { + "epoch": 3.0, + "grad_norm": 0.22456837673610008, + "learning_rate": 6.61394158060176e-05, + "loss": 0.9933, + "step": 1173 + }, + { + "epoch": 3.002557544757033, + "grad_norm": 0.22123515970304183, + "learning_rate": 6.59987179838848e-05, + "loss": 0.9712, + "step": 1174 + }, + { + "epoch": 3.0051150895140664, + "grad_norm": 0.2497098271402969, + "learning_rate": 6.58580648235039e-05, + "loss": 0.9701, + "step": 1175 + }, + { + "epoch": 3.0076726342710995, + "grad_norm": 0.2264514281442564, + "learning_rate": 6.571745677353492e-05, + "loss": 0.9498, + "step": 1176 + }, + { + "epoch": 3.010230179028133, + "grad_norm": 0.24110920081950274, + "learning_rate": 6.557689428249414e-05, + "loss": 0.9841, + "step": 1177 + }, + { + "epoch": 3.0127877237851663, + "grad_norm": 0.28882150068726187, + "learning_rate": 6.543637779875237e-05, + "loss": 0.9728, + "step": 1178 + }, + { + "epoch": 3.0153452685421995, + "grad_norm": 0.22165888817736834, + "learning_rate": 6.529590777053378e-05, + "loss": 0.9263, + "step": 1179 + }, + { + "epoch": 3.0179028132992327, + "grad_norm": 0.2715939791147568, + "learning_rate": 6.515548464591428e-05, + "loss": 0.9353, + "step": 1180 + }, + { + "epoch": 3.020460358056266, + "grad_norm": 0.3321798212445876, + "learning_rate": 6.501510887282024e-05, + "loss": 0.948, + "step": 1181 + }, + { + "epoch": 3.023017902813299, + "grad_norm": 0.2852631687681614, + "learning_rate": 6.487478089902685e-05, + "loss": 0.9406, + "step": 1182 + }, + { + "epoch": 3.0255754475703327, + "grad_norm": 0.23938138232215803, + "learning_rate": 6.473450117215699e-05, + "loss": 0.9612, + "step": 1183 + }, + { + "epoch": 3.028132992327366, + "grad_norm": 0.2897634546793638, + "learning_rate": 6.459427013967953e-05, + "loss": 0.93, + "step": 1184 + }, + { + "epoch": 3.030690537084399, + "grad_norm": 0.28668995967161215, + "learning_rate": 6.445408824890805e-05, + "loss": 0.943, + "step": 1185 + }, + { + "epoch": 3.0332480818414322, + "grad_norm": 0.23250708905243717, + "learning_rate": 6.431395594699943e-05, + "loss": 0.9264, + "step": 1186 + }, + { + "epoch": 3.0358056265984654, + "grad_norm": 0.3127461016723165, + "learning_rate": 6.417387368095225e-05, + "loss": 0.9492, + "step": 1187 + }, + { + "epoch": 3.0383631713554986, + "grad_norm": 0.26702473205124055, + "learning_rate": 6.403384189760556e-05, + "loss": 0.9173, + "step": 1188 + }, + { + "epoch": 3.040920716112532, + "grad_norm": 0.2692197582092417, + "learning_rate": 6.389386104363738e-05, + "loss": 0.9483, + "step": 1189 + }, + { + "epoch": 3.0434782608695654, + "grad_norm": 0.29389458281034464, + "learning_rate": 6.375393156556325e-05, + "loss": 0.938, + "step": 1190 + }, + { + "epoch": 3.0460358056265986, + "grad_norm": 0.24003231343808254, + "learning_rate": 6.361405390973489e-05, + "loss": 0.9174, + "step": 1191 + }, + { + "epoch": 3.0485933503836318, + "grad_norm": 0.25208756985944336, + "learning_rate": 6.347422852233862e-05, + "loss": 0.9542, + "step": 1192 + }, + { + "epoch": 3.051150895140665, + "grad_norm": 0.24466794377181064, + "learning_rate": 6.333445584939407e-05, + "loss": 0.9617, + "step": 1193 + }, + { + "epoch": 3.053708439897698, + "grad_norm": 0.23317237737554486, + "learning_rate": 6.319473633675275e-05, + "loss": 0.9349, + "step": 1194 + }, + { + "epoch": 3.0562659846547313, + "grad_norm": 0.24590715837760968, + "learning_rate": 6.305507043009657e-05, + "loss": 0.9414, + "step": 1195 + }, + { + "epoch": 3.0588235294117645, + "grad_norm": 0.21035477411097228, + "learning_rate": 6.291545857493645e-05, + "loss": 0.9512, + "step": 1196 + }, + { + "epoch": 3.061381074168798, + "grad_norm": 0.2248505455887991, + "learning_rate": 6.277590121661098e-05, + "loss": 0.9522, + "step": 1197 + }, + { + "epoch": 3.0639386189258313, + "grad_norm": 0.2471462687532793, + "learning_rate": 6.263639880028468e-05, + "loss": 0.9493, + "step": 1198 + }, + { + "epoch": 3.0664961636828645, + "grad_norm": 0.22868376945738234, + "learning_rate": 6.249695177094707e-05, + "loss": 0.9668, + "step": 1199 + }, + { + "epoch": 3.0690537084398977, + "grad_norm": 0.23527194146680278, + "learning_rate": 6.235756057341084e-05, + "loss": 0.9279, + "step": 1200 + }, + { + "epoch": 3.071611253196931, + "grad_norm": 0.2513612868250463, + "learning_rate": 6.221822565231066e-05, + "loss": 0.9403, + "step": 1201 + }, + { + "epoch": 3.074168797953964, + "grad_norm": 0.22860913544864897, + "learning_rate": 6.207894745210168e-05, + "loss": 0.9616, + "step": 1202 + }, + { + "epoch": 3.0767263427109977, + "grad_norm": 0.24014291985565175, + "learning_rate": 6.193972641705809e-05, + "loss": 0.9664, + "step": 1203 + }, + { + "epoch": 3.079283887468031, + "grad_norm": 0.22572397342217615, + "learning_rate": 6.180056299127174e-05, + "loss": 0.9663, + "step": 1204 + }, + { + "epoch": 3.081841432225064, + "grad_norm": 0.25121933762619786, + "learning_rate": 6.16614576186507e-05, + "loss": 0.9676, + "step": 1205 + }, + { + "epoch": 3.084398976982097, + "grad_norm": 0.21264743561877053, + "learning_rate": 6.152241074291791e-05, + "loss": 0.9385, + "step": 1206 + }, + { + "epoch": 3.0869565217391304, + "grad_norm": 0.2110657205113156, + "learning_rate": 6.13834228076097e-05, + "loss": 0.9593, + "step": 1207 + }, + { + "epoch": 3.0895140664961636, + "grad_norm": 0.23064076505093895, + "learning_rate": 6.12444942560744e-05, + "loss": 0.9859, + "step": 1208 + }, + { + "epoch": 3.0920716112531967, + "grad_norm": 0.2327889001545048, + "learning_rate": 6.110562553147078e-05, + "loss": 0.9343, + "step": 1209 + }, + { + "epoch": 3.0946291560102304, + "grad_norm": 0.22081121627352496, + "learning_rate": 6.0966817076767e-05, + "loss": 0.9572, + "step": 1210 + }, + { + "epoch": 3.0971867007672635, + "grad_norm": 0.21410596357542921, + "learning_rate": 6.08280693347388e-05, + "loss": 0.9577, + "step": 1211 + }, + { + "epoch": 3.0997442455242967, + "grad_norm": 0.22670771449737367, + "learning_rate": 6.068938274796834e-05, + "loss": 0.9253, + "step": 1212 + }, + { + "epoch": 3.10230179028133, + "grad_norm": 0.205343189542066, + "learning_rate": 6.055075775884263e-05, + "loss": 0.9896, + "step": 1213 + }, + { + "epoch": 3.104859335038363, + "grad_norm": 0.22769741326879356, + "learning_rate": 6.0412194809552316e-05, + "loss": 0.9387, + "step": 1214 + }, + { + "epoch": 3.1074168797953963, + "grad_norm": 0.19822402152888394, + "learning_rate": 6.027369434208999e-05, + "loss": 0.9808, + "step": 1215 + }, + { + "epoch": 3.10997442455243, + "grad_norm": 0.23051970557462004, + "learning_rate": 6.0135256798249047e-05, + "loss": 0.933, + "step": 1216 + }, + { + "epoch": 3.112531969309463, + "grad_norm": 0.20329115598362008, + "learning_rate": 5.999688261962216e-05, + "loss": 0.9684, + "step": 1217 + }, + { + "epoch": 3.1150895140664963, + "grad_norm": 0.21036340816499827, + "learning_rate": 5.985857224759981e-05, + "loss": 0.944, + "step": 1218 + }, + { + "epoch": 3.1176470588235294, + "grad_norm": 0.20307590074585102, + "learning_rate": 5.972032612336906e-05, + "loss": 0.9598, + "step": 1219 + }, + { + "epoch": 3.1202046035805626, + "grad_norm": 0.2259792004822342, + "learning_rate": 5.958214468791189e-05, + "loss": 0.9483, + "step": 1220 + }, + { + "epoch": 3.122762148337596, + "grad_norm": 0.21243681629633632, + "learning_rate": 5.944402838200404e-05, + "loss": 0.9455, + "step": 1221 + }, + { + "epoch": 3.125319693094629, + "grad_norm": 0.21205256563770825, + "learning_rate": 5.930597764621347e-05, + "loss": 0.8963, + "step": 1222 + }, + { + "epoch": 3.1278772378516626, + "grad_norm": 0.19717448713959743, + "learning_rate": 5.916799292089895e-05, + "loss": 0.9564, + "step": 1223 + }, + { + "epoch": 3.130434782608696, + "grad_norm": 0.2244196417767959, + "learning_rate": 5.9030074646208745e-05, + "loss": 0.9272, + "step": 1224 + }, + { + "epoch": 3.132992327365729, + "grad_norm": 0.21563385011040548, + "learning_rate": 5.8892223262079144e-05, + "loss": 0.9316, + "step": 1225 + }, + { + "epoch": 3.135549872122762, + "grad_norm": 0.2350946628160643, + "learning_rate": 5.875443920823297e-05, + "loss": 0.9487, + "step": 1226 + }, + { + "epoch": 3.1381074168797953, + "grad_norm": 0.2865769039296874, + "learning_rate": 5.861672292417842e-05, + "loss": 0.9492, + "step": 1227 + }, + { + "epoch": 3.1406649616368285, + "grad_norm": 0.23430970345425967, + "learning_rate": 5.84790748492074e-05, + "loss": 0.966, + "step": 1228 + }, + { + "epoch": 3.1432225063938617, + "grad_norm": 0.2467472265535791, + "learning_rate": 5.834149542239431e-05, + "loss": 0.9708, + "step": 1229 + }, + { + "epoch": 3.1457800511508953, + "grad_norm": 0.26772393728125105, + "learning_rate": 5.8203985082594575e-05, + "loss": 0.9557, + "step": 1230 + }, + { + "epoch": 3.1483375959079285, + "grad_norm": 0.2338023529317996, + "learning_rate": 5.806654426844315e-05, + "loss": 0.9638, + "step": 1231 + }, + { + "epoch": 3.1508951406649617, + "grad_norm": 0.2523069016121197, + "learning_rate": 5.792917341835335e-05, + "loss": 0.9434, + "step": 1232 + }, + { + "epoch": 3.153452685421995, + "grad_norm": 0.2766552697496739, + "learning_rate": 5.77918729705152e-05, + "loss": 0.9809, + "step": 1233 + }, + { + "epoch": 3.156010230179028, + "grad_norm": 0.22646812781120942, + "learning_rate": 5.765464336289424e-05, + "loss": 0.9639, + "step": 1234 + }, + { + "epoch": 3.1585677749360612, + "grad_norm": 0.2205961359884855, + "learning_rate": 5.751748503322999e-05, + "loss": 0.954, + "step": 1235 + }, + { + "epoch": 3.1611253196930944, + "grad_norm": 0.2701811323136191, + "learning_rate": 5.7380398419034644e-05, + "loss": 0.9589, + "step": 1236 + }, + { + "epoch": 3.163682864450128, + "grad_norm": 0.2081039558632908, + "learning_rate": 5.7243383957591586e-05, + "loss": 0.9471, + "step": 1237 + }, + { + "epoch": 3.166240409207161, + "grad_norm": 0.19643865068397245, + "learning_rate": 5.7106442085954045e-05, + "loss": 0.9518, + "step": 1238 + }, + { + "epoch": 3.1687979539641944, + "grad_norm": 0.30921257471256036, + "learning_rate": 5.69695732409438e-05, + "loss": 0.9242, + "step": 1239 + }, + { + "epoch": 3.1713554987212276, + "grad_norm": 0.24583021366711547, + "learning_rate": 5.6832777859149536e-05, + "loss": 0.9423, + "step": 1240 + }, + { + "epoch": 3.1739130434782608, + "grad_norm": 0.18950822302407402, + "learning_rate": 5.669605637692575e-05, + "loss": 0.932, + "step": 1241 + }, + { + "epoch": 3.176470588235294, + "grad_norm": 0.25157456578331905, + "learning_rate": 5.655940923039111e-05, + "loss": 0.9379, + "step": 1242 + }, + { + "epoch": 3.1790281329923276, + "grad_norm": 0.18343916898513093, + "learning_rate": 5.642283685542717e-05, + "loss": 0.9456, + "step": 1243 + }, + { + "epoch": 3.1815856777493607, + "grad_norm": 0.19560349844702873, + "learning_rate": 5.6286339687677044e-05, + "loss": 0.9328, + "step": 1244 + }, + { + "epoch": 3.184143222506394, + "grad_norm": 0.189610936953741, + "learning_rate": 5.614991816254388e-05, + "loss": 0.9109, + "step": 1245 + }, + { + "epoch": 3.186700767263427, + "grad_norm": 0.18320058939508785, + "learning_rate": 5.601357271518959e-05, + "loss": 0.9584, + "step": 1246 + }, + { + "epoch": 3.1892583120204603, + "grad_norm": 0.17494234166851327, + "learning_rate": 5.587730378053339e-05, + "loss": 0.9656, + "step": 1247 + }, + { + "epoch": 3.1918158567774935, + "grad_norm": 0.19092078945148688, + "learning_rate": 5.574111179325039e-05, + "loss": 0.9487, + "step": 1248 + }, + { + "epoch": 3.1943734015345266, + "grad_norm": 0.1860857981568226, + "learning_rate": 5.560499718777031e-05, + "loss": 0.9372, + "step": 1249 + }, + { + "epoch": 3.1969309462915603, + "grad_norm": 0.18572653447801232, + "learning_rate": 5.5468960398276014e-05, + "loss": 0.9459, + "step": 1250 + }, + { + "epoch": 3.1994884910485935, + "grad_norm": 0.19107345846336404, + "learning_rate": 5.5333001858702164e-05, + "loss": 0.9255, + "step": 1251 + }, + { + "epoch": 3.2020460358056266, + "grad_norm": 0.20057541760798753, + "learning_rate": 5.519712200273381e-05, + "loss": 0.9615, + "step": 1252 + }, + { + "epoch": 3.20460358056266, + "grad_norm": 0.20198119736904155, + "learning_rate": 5.5061321263804933e-05, + "loss": 0.9204, + "step": 1253 + }, + { + "epoch": 3.207161125319693, + "grad_norm": 0.21942879387381486, + "learning_rate": 5.4925600075097285e-05, + "loss": 0.945, + "step": 1254 + }, + { + "epoch": 3.209718670076726, + "grad_norm": 0.19469068958831684, + "learning_rate": 5.4789958869538756e-05, + "loss": 0.9435, + "step": 1255 + }, + { + "epoch": 3.21227621483376, + "grad_norm": 0.20250937006123632, + "learning_rate": 5.4654398079802183e-05, + "loss": 0.9364, + "step": 1256 + }, + { + "epoch": 3.214833759590793, + "grad_norm": 0.19846072138477766, + "learning_rate": 5.451891813830382e-05, + "loss": 0.94, + "step": 1257 + }, + { + "epoch": 3.217391304347826, + "grad_norm": 0.20425114535656635, + "learning_rate": 5.4383519477202103e-05, + "loss": 0.9363, + "step": 1258 + }, + { + "epoch": 3.2199488491048593, + "grad_norm": 0.185008322081447, + "learning_rate": 5.42482025283961e-05, + "loss": 0.9815, + "step": 1259 + }, + { + "epoch": 3.2225063938618925, + "grad_norm": 0.2151529732841821, + "learning_rate": 5.41129677235243e-05, + "loss": 0.9498, + "step": 1260 + }, + { + "epoch": 3.2250639386189257, + "grad_norm": 0.1885448397273564, + "learning_rate": 5.397781549396316e-05, + "loss": 0.9337, + "step": 1261 + }, + { + "epoch": 3.227621483375959, + "grad_norm": 0.21418784649002942, + "learning_rate": 5.3842746270825705e-05, + "loss": 0.9171, + "step": 1262 + }, + { + "epoch": 3.2301790281329925, + "grad_norm": 0.20068889946827412, + "learning_rate": 5.370776048496026e-05, + "loss": 0.9376, + "step": 1263 + }, + { + "epoch": 3.2327365728900257, + "grad_norm": 0.24899426008654885, + "learning_rate": 5.357285856694891e-05, + "loss": 0.9429, + "step": 1264 + }, + { + "epoch": 3.235294117647059, + "grad_norm": 0.19686757692012147, + "learning_rate": 5.34380409471062e-05, + "loss": 0.9377, + "step": 1265 + }, + { + "epoch": 3.237851662404092, + "grad_norm": 0.24870949090788627, + "learning_rate": 5.33033080554779e-05, + "loss": 0.945, + "step": 1266 + }, + { + "epoch": 3.2404092071611252, + "grad_norm": 0.20621519140618658, + "learning_rate": 5.3168660321839386e-05, + "loss": 0.9379, + "step": 1267 + }, + { + "epoch": 3.2429667519181584, + "grad_norm": 0.21652792479122668, + "learning_rate": 5.303409817569449e-05, + "loss": 0.9021, + "step": 1268 + }, + { + "epoch": 3.2455242966751916, + "grad_norm": 0.19103019263904417, + "learning_rate": 5.2899622046274e-05, + "loss": 0.9613, + "step": 1269 + }, + { + "epoch": 3.2480818414322252, + "grad_norm": 0.21245341007957305, + "learning_rate": 5.276523236253425e-05, + "loss": 0.9387, + "step": 1270 + }, + { + "epoch": 3.2506393861892584, + "grad_norm": 0.2106216561170891, + "learning_rate": 5.263092955315595e-05, + "loss": 0.9546, + "step": 1271 + }, + { + "epoch": 3.2531969309462916, + "grad_norm": 0.197972453520414, + "learning_rate": 5.2496714046542583e-05, + "loss": 0.9391, + "step": 1272 + }, + { + "epoch": 3.2557544757033248, + "grad_norm": 0.199650022114146, + "learning_rate": 5.2362586270819256e-05, + "loss": 0.9386, + "step": 1273 + }, + { + "epoch": 3.258312020460358, + "grad_norm": 0.18979777369555925, + "learning_rate": 5.222854665383116e-05, + "loss": 0.9495, + "step": 1274 + }, + { + "epoch": 3.260869565217391, + "grad_norm": 0.2173804109344821, + "learning_rate": 5.2094595623142326e-05, + "loss": 0.9588, + "step": 1275 + }, + { + "epoch": 3.2634271099744243, + "grad_norm": 0.2016383197459456, + "learning_rate": 5.1960733606034126e-05, + "loss": 0.9151, + "step": 1276 + }, + { + "epoch": 3.265984654731458, + "grad_norm": 0.2047292724222713, + "learning_rate": 5.182696102950404e-05, + "loss": 0.9686, + "step": 1277 + }, + { + "epoch": 3.268542199488491, + "grad_norm": 0.2065833579125683, + "learning_rate": 5.1693278320264304e-05, + "loss": 0.9384, + "step": 1278 + }, + { + "epoch": 3.2710997442455243, + "grad_norm": 0.20569255957459082, + "learning_rate": 5.1559685904740386e-05, + "loss": 0.9869, + "step": 1279 + }, + { + "epoch": 3.2736572890025575, + "grad_norm": 0.19840584494069785, + "learning_rate": 5.142618420906985e-05, + "loss": 0.9557, + "step": 1280 + }, + { + "epoch": 3.2762148337595907, + "grad_norm": 0.20387885459079644, + "learning_rate": 5.1292773659100755e-05, + "loss": 0.9642, + "step": 1281 + }, + { + "epoch": 3.2787723785166243, + "grad_norm": 0.2101778694530114, + "learning_rate": 5.115945468039048e-05, + "loss": 0.9509, + "step": 1282 + }, + { + "epoch": 3.2813299232736575, + "grad_norm": 0.2155780933816927, + "learning_rate": 5.1026227698204335e-05, + "loss": 0.9499, + "step": 1283 + }, + { + "epoch": 3.2838874680306906, + "grad_norm": 0.24104255752130535, + "learning_rate": 5.089309313751415e-05, + "loss": 0.9458, + "step": 1284 + }, + { + "epoch": 3.286445012787724, + "grad_norm": 0.2121724580915078, + "learning_rate": 5.0760051422996925e-05, + "loss": 0.9499, + "step": 1285 + }, + { + "epoch": 3.289002557544757, + "grad_norm": 0.20440164305922942, + "learning_rate": 5.0627102979033546e-05, + "loss": 0.9458, + "step": 1286 + }, + { + "epoch": 3.29156010230179, + "grad_norm": 0.21910653895674295, + "learning_rate": 5.049424822970731e-05, + "loss": 0.9379, + "step": 1287 + }, + { + "epoch": 3.2941176470588234, + "grad_norm": 0.17657372919405595, + "learning_rate": 5.036148759880272e-05, + "loss": 0.9249, + "step": 1288 + }, + { + "epoch": 3.296675191815857, + "grad_norm": 0.22994935624931387, + "learning_rate": 5.0228821509803984e-05, + "loss": 0.9247, + "step": 1289 + }, + { + "epoch": 3.29923273657289, + "grad_norm": 0.18809716520389427, + "learning_rate": 5.0096250385893825e-05, + "loss": 0.9236, + "step": 1290 + }, + { + "epoch": 3.3017902813299234, + "grad_norm": 0.20395108123985592, + "learning_rate": 4.9963774649951975e-05, + "loss": 0.9351, + "step": 1291 + }, + { + "epoch": 3.3043478260869565, + "grad_norm": 0.21017478598124728, + "learning_rate": 4.983139472455387e-05, + "loss": 0.9603, + "step": 1292 + }, + { + "epoch": 3.3069053708439897, + "grad_norm": 0.21877137266724161, + "learning_rate": 4.969911103196942e-05, + "loss": 0.9067, + "step": 1293 + }, + { + "epoch": 3.309462915601023, + "grad_norm": 0.18726348177523444, + "learning_rate": 4.956692399416149e-05, + "loss": 0.9368, + "step": 1294 + }, + { + "epoch": 3.312020460358056, + "grad_norm": 0.2241750270363803, + "learning_rate": 4.943483403278468e-05, + "loss": 0.947, + "step": 1295 + }, + { + "epoch": 3.3145780051150897, + "grad_norm": 0.20581443285806397, + "learning_rate": 4.9302841569183884e-05, + "loss": 0.9575, + "step": 1296 + }, + { + "epoch": 3.317135549872123, + "grad_norm": 0.17452182993008977, + "learning_rate": 4.9170947024393074e-05, + "loss": 0.9156, + "step": 1297 + }, + { + "epoch": 3.319693094629156, + "grad_norm": 0.198949333785195, + "learning_rate": 4.9039150819133775e-05, + "loss": 0.9348, + "step": 1298 + }, + { + "epoch": 3.3222506393861893, + "grad_norm": 0.16601657169918604, + "learning_rate": 4.890745337381388e-05, + "loss": 0.9587, + "step": 1299 + }, + { + "epoch": 3.3248081841432224, + "grad_norm": 0.23036877304791145, + "learning_rate": 4.877585510852627e-05, + "loss": 0.9792, + "step": 1300 + }, + { + "epoch": 3.3273657289002556, + "grad_norm": 0.18765197640496664, + "learning_rate": 4.864435644304742e-05, + "loss": 0.9253, + "step": 1301 + }, + { + "epoch": 3.329923273657289, + "grad_norm": 0.19041731553942576, + "learning_rate": 4.851295779683616e-05, + "loss": 0.9535, + "step": 1302 + }, + { + "epoch": 3.3324808184143224, + "grad_norm": 0.2087435808060436, + "learning_rate": 4.8381659589032186e-05, + "loss": 0.9338, + "step": 1303 + }, + { + "epoch": 3.3350383631713556, + "grad_norm": 0.1903448069067344, + "learning_rate": 4.825046223845486e-05, + "loss": 0.9499, + "step": 1304 + }, + { + "epoch": 3.337595907928389, + "grad_norm": 0.21308090181205586, + "learning_rate": 4.811936616360186e-05, + "loss": 0.9256, + "step": 1305 + }, + { + "epoch": 3.340153452685422, + "grad_norm": 0.2023342708755437, + "learning_rate": 4.798837178264772e-05, + "loss": 0.9582, + "step": 1306 + }, + { + "epoch": 3.342710997442455, + "grad_norm": 0.21619791962247753, + "learning_rate": 4.78574795134427e-05, + "loss": 0.9125, + "step": 1307 + }, + { + "epoch": 3.3452685421994883, + "grad_norm": 0.2487539660815107, + "learning_rate": 4.772668977351128e-05, + "loss": 0.9537, + "step": 1308 + }, + { + "epoch": 3.3478260869565215, + "grad_norm": 0.2240156883350933, + "learning_rate": 4.7596002980050834e-05, + "loss": 0.9401, + "step": 1309 + }, + { + "epoch": 3.350383631713555, + "grad_norm": 0.2251746608186689, + "learning_rate": 4.7465419549930476e-05, + "loss": 0.9782, + "step": 1310 + }, + { + "epoch": 3.3529411764705883, + "grad_norm": 0.22881310384597994, + "learning_rate": 4.733493989968949e-05, + "loss": 0.9458, + "step": 1311 + }, + { + "epoch": 3.3554987212276215, + "grad_norm": 0.2141099007638843, + "learning_rate": 4.7204564445536234e-05, + "loss": 0.9396, + "step": 1312 + }, + { + "epoch": 3.3580562659846547, + "grad_norm": 0.1882802550926345, + "learning_rate": 4.707429360334662e-05, + "loss": 0.942, + "step": 1313 + }, + { + "epoch": 3.360613810741688, + "grad_norm": 0.2179119833942681, + "learning_rate": 4.694412778866285e-05, + "loss": 0.9504, + "step": 1314 + }, + { + "epoch": 3.363171355498721, + "grad_norm": 0.16843886415285414, + "learning_rate": 4.681406741669216e-05, + "loss": 0.9221, + "step": 1315 + }, + { + "epoch": 3.3657289002557547, + "grad_norm": 0.21980007814521796, + "learning_rate": 4.668411290230543e-05, + "loss": 0.944, + "step": 1316 + }, + { + "epoch": 3.368286445012788, + "grad_norm": 0.1510130725197139, + "learning_rate": 4.655426466003586e-05, + "loss": 0.9563, + "step": 1317 + }, + { + "epoch": 3.370843989769821, + "grad_norm": 0.19586517189701522, + "learning_rate": 4.6424523104077654e-05, + "loss": 0.9508, + "step": 1318 + }, + { + "epoch": 3.373401534526854, + "grad_norm": 0.1995467600478656, + "learning_rate": 4.629488864828472e-05, + "loss": 0.9502, + "step": 1319 + }, + { + "epoch": 3.3759590792838874, + "grad_norm": 0.1742993616386661, + "learning_rate": 4.6165361706169325e-05, + "loss": 0.9268, + "step": 1320 + }, + { + "epoch": 3.3785166240409206, + "grad_norm": 0.2067544794585532, + "learning_rate": 4.603594269090078e-05, + "loss": 0.9268, + "step": 1321 + }, + { + "epoch": 3.381074168797954, + "grad_norm": 0.2227068577818483, + "learning_rate": 4.5906632015304116e-05, + "loss": 0.9358, + "step": 1322 + }, + { + "epoch": 3.3836317135549874, + "grad_norm": 0.2034466989052333, + "learning_rate": 4.5777430091858855e-05, + "loss": 0.9302, + "step": 1323 + }, + { + "epoch": 3.3861892583120206, + "grad_norm": 0.20709571806774676, + "learning_rate": 4.564833733269755e-05, + "loss": 0.9427, + "step": 1324 + }, + { + "epoch": 3.3887468030690537, + "grad_norm": 0.22013092566675613, + "learning_rate": 4.5519354149604474e-05, + "loss": 0.9437, + "step": 1325 + }, + { + "epoch": 3.391304347826087, + "grad_norm": 0.18450541197105383, + "learning_rate": 4.539048095401452e-05, + "loss": 0.9466, + "step": 1326 + }, + { + "epoch": 3.39386189258312, + "grad_norm": 0.22548387813850762, + "learning_rate": 4.526171815701165e-05, + "loss": 0.9336, + "step": 1327 + }, + { + "epoch": 3.3964194373401533, + "grad_norm": 0.1820733823905873, + "learning_rate": 4.513306616932764e-05, + "loss": 0.9215, + "step": 1328 + }, + { + "epoch": 3.398976982097187, + "grad_norm": 0.21404349632115405, + "learning_rate": 4.5004525401340915e-05, + "loss": 0.9801, + "step": 1329 + }, + { + "epoch": 3.40153452685422, + "grad_norm": 0.18377817821243256, + "learning_rate": 4.487609626307508e-05, + "loss": 0.9655, + "step": 1330 + }, + { + "epoch": 3.4040920716112533, + "grad_norm": 0.1923893878636668, + "learning_rate": 4.4747779164197535e-05, + "loss": 0.9382, + "step": 1331 + }, + { + "epoch": 3.4066496163682864, + "grad_norm": 0.19516009680845245, + "learning_rate": 4.4619574514018486e-05, + "loss": 0.9557, + "step": 1332 + }, + { + "epoch": 3.4092071611253196, + "grad_norm": 0.19144644869283248, + "learning_rate": 4.449148272148934e-05, + "loss": 0.9345, + "step": 1333 + }, + { + "epoch": 3.411764705882353, + "grad_norm": 0.1817955488888704, + "learning_rate": 4.436350419520154e-05, + "loss": 0.9608, + "step": 1334 + }, + { + "epoch": 3.414322250639386, + "grad_norm": 0.2056911128568184, + "learning_rate": 4.423563934338519e-05, + "loss": 0.9458, + "step": 1335 + }, + { + "epoch": 3.4168797953964196, + "grad_norm": 0.1693771378014072, + "learning_rate": 4.410788857390785e-05, + "loss": 0.9466, + "step": 1336 + }, + { + "epoch": 3.419437340153453, + "grad_norm": 0.20830311663566495, + "learning_rate": 4.39802522942731e-05, + "loss": 0.9408, + "step": 1337 + }, + { + "epoch": 3.421994884910486, + "grad_norm": 0.1698790309922409, + "learning_rate": 4.385273091161937e-05, + "loss": 0.9305, + "step": 1338 + }, + { + "epoch": 3.424552429667519, + "grad_norm": 0.19474240897387077, + "learning_rate": 4.372532483271863e-05, + "loss": 0.9375, + "step": 1339 + }, + { + "epoch": 3.4271099744245523, + "grad_norm": 0.2059429092680418, + "learning_rate": 4.3598034463974966e-05, + "loss": 0.9869, + "step": 1340 + }, + { + "epoch": 3.4296675191815855, + "grad_norm": 0.19031026060303782, + "learning_rate": 4.347086021142339e-05, + "loss": 0.9765, + "step": 1341 + }, + { + "epoch": 3.4322250639386187, + "grad_norm": 0.19960933133782244, + "learning_rate": 4.3343802480728544e-05, + "loss": 0.9431, + "step": 1342 + }, + { + "epoch": 3.4347826086956523, + "grad_norm": 0.1924073308227482, + "learning_rate": 4.321686167718337e-05, + "loss": 0.9545, + "step": 1343 + }, + { + "epoch": 3.4373401534526855, + "grad_norm": 0.2028658725938022, + "learning_rate": 4.309003820570785e-05, + "loss": 0.9377, + "step": 1344 + }, + { + "epoch": 3.4398976982097187, + "grad_norm": 0.2106823975486889, + "learning_rate": 4.296333247084764e-05, + "loss": 0.9283, + "step": 1345 + }, + { + "epoch": 3.442455242966752, + "grad_norm": 0.21370019365379003, + "learning_rate": 4.283674487677297e-05, + "loss": 0.9663, + "step": 1346 + }, + { + "epoch": 3.445012787723785, + "grad_norm": 0.20381679039668288, + "learning_rate": 4.271027582727703e-05, + "loss": 0.9425, + "step": 1347 + }, + { + "epoch": 3.4475703324808182, + "grad_norm": 0.2465303759456818, + "learning_rate": 4.2583925725774996e-05, + "loss": 0.963, + "step": 1348 + }, + { + "epoch": 3.4501278772378514, + "grad_norm": 0.2017710128697274, + "learning_rate": 4.2457694975302625e-05, + "loss": 0.969, + "step": 1349 + }, + { + "epoch": 3.452685421994885, + "grad_norm": 0.2599485575517086, + "learning_rate": 4.233158397851494e-05, + "loss": 0.9578, + "step": 1350 + }, + { + "epoch": 3.455242966751918, + "grad_norm": 0.20994916380961168, + "learning_rate": 4.220559313768492e-05, + "loss": 0.9517, + "step": 1351 + }, + { + "epoch": 3.4578005115089514, + "grad_norm": 0.25562334357376887, + "learning_rate": 4.207972285470236e-05, + "loss": 0.9593, + "step": 1352 + }, + { + "epoch": 3.4603580562659846, + "grad_norm": 0.2018942765243476, + "learning_rate": 4.1953973531072403e-05, + "loss": 0.9238, + "step": 1353 + }, + { + "epoch": 3.4629156010230178, + "grad_norm": 0.23893893502461097, + "learning_rate": 4.1828345567914426e-05, + "loss": 0.9463, + "step": 1354 + }, + { + "epoch": 3.4654731457800514, + "grad_norm": 0.2377570507765394, + "learning_rate": 4.17028393659606e-05, + "loss": 0.9379, + "step": 1355 + }, + { + "epoch": 3.4680306905370846, + "grad_norm": 0.21617110584103066, + "learning_rate": 4.157745532555484e-05, + "loss": 0.9445, + "step": 1356 + }, + { + "epoch": 3.4705882352941178, + "grad_norm": 0.20973373939841763, + "learning_rate": 4.145219384665128e-05, + "loss": 0.9471, + "step": 1357 + }, + { + "epoch": 3.473145780051151, + "grad_norm": 0.19248666440528944, + "learning_rate": 4.1327055328813036e-05, + "loss": 0.9492, + "step": 1358 + }, + { + "epoch": 3.475703324808184, + "grad_norm": 0.19782620860430303, + "learning_rate": 4.1202040171211195e-05, + "loss": 0.9677, + "step": 1359 + }, + { + "epoch": 3.4782608695652173, + "grad_norm": 0.18288110899297144, + "learning_rate": 4.107714877262318e-05, + "loss": 0.9574, + "step": 1360 + }, + { + "epoch": 3.4808184143222505, + "grad_norm": 0.18982354052970898, + "learning_rate": 4.0952381531431716e-05, + "loss": 0.9411, + "step": 1361 + }, + { + "epoch": 3.483375959079284, + "grad_norm": 0.19047078322563796, + "learning_rate": 4.082773884562342e-05, + "loss": 0.9465, + "step": 1362 + }, + { + "epoch": 3.4859335038363173, + "grad_norm": 0.20024490556690386, + "learning_rate": 4.0703221112787774e-05, + "loss": 0.9631, + "step": 1363 + }, + { + "epoch": 3.4884910485933505, + "grad_norm": 0.18855297057246742, + "learning_rate": 4.057882873011543e-05, + "loss": 0.9333, + "step": 1364 + }, + { + "epoch": 3.4910485933503836, + "grad_norm": 0.18121257314529818, + "learning_rate": 4.045456209439734e-05, + "loss": 0.9683, + "step": 1365 + }, + { + "epoch": 3.493606138107417, + "grad_norm": 0.19866185503250056, + "learning_rate": 4.033042160202337e-05, + "loss": 0.9872, + "step": 1366 + }, + { + "epoch": 3.49616368286445, + "grad_norm": 0.17010036933663283, + "learning_rate": 4.020640764898096e-05, + "loss": 0.9685, + "step": 1367 + }, + { + "epoch": 3.498721227621483, + "grad_norm": 0.18176622769606524, + "learning_rate": 4.0082520630853865e-05, + "loss": 0.9112, + "step": 1368 + }, + { + "epoch": 3.501278772378517, + "grad_norm": 0.1861883153790341, + "learning_rate": 3.995876094282104e-05, + "loss": 0.9585, + "step": 1369 + }, + { + "epoch": 3.50383631713555, + "grad_norm": 0.19579755858911602, + "learning_rate": 3.983512897965519e-05, + "loss": 0.959, + "step": 1370 + }, + { + "epoch": 3.506393861892583, + "grad_norm": 0.18488711544490097, + "learning_rate": 3.9711625135721664e-05, + "loss": 0.9555, + "step": 1371 + }, + { + "epoch": 3.5089514066496164, + "grad_norm": 0.2073614939639127, + "learning_rate": 3.958824980497704e-05, + "loss": 0.9744, + "step": 1372 + }, + { + "epoch": 3.5115089514066495, + "grad_norm": 0.17154095562950622, + "learning_rate": 3.946500338096811e-05, + "loss": 0.9353, + "step": 1373 + }, + { + "epoch": 3.5140664961636827, + "grad_norm": 0.20478213377969626, + "learning_rate": 3.934188625683037e-05, + "loss": 0.9568, + "step": 1374 + }, + { + "epoch": 3.516624040920716, + "grad_norm": 0.18373687324276738, + "learning_rate": 3.9218898825286806e-05, + "loss": 0.9279, + "step": 1375 + }, + { + "epoch": 3.5191815856777495, + "grad_norm": 0.1716453870437831, + "learning_rate": 3.9096041478646885e-05, + "loss": 0.9342, + "step": 1376 + }, + { + "epoch": 3.5217391304347827, + "grad_norm": 0.18268819201544698, + "learning_rate": 3.8973314608805e-05, + "loss": 0.962, + "step": 1377 + }, + { + "epoch": 3.524296675191816, + "grad_norm": 0.16258821810908097, + "learning_rate": 3.885071860723937e-05, + "loss": 0.9293, + "step": 1378 + }, + { + "epoch": 3.526854219948849, + "grad_norm": 0.165376063640211, + "learning_rate": 3.8728253865010765e-05, + "loss": 0.9895, + "step": 1379 + }, + { + "epoch": 3.5294117647058822, + "grad_norm": 0.16721193942916188, + "learning_rate": 3.8605920772761274e-05, + "loss": 0.9328, + "step": 1380 + }, + { + "epoch": 3.531969309462916, + "grad_norm": 0.16130857457103082, + "learning_rate": 3.848371972071304e-05, + "loss": 0.9859, + "step": 1381 + }, + { + "epoch": 3.5345268542199486, + "grad_norm": 0.16278759213568428, + "learning_rate": 3.8361651098666967e-05, + "loss": 0.9569, + "step": 1382 + }, + { + "epoch": 3.5370843989769822, + "grad_norm": 0.17183294163130294, + "learning_rate": 3.8239715296001654e-05, + "loss": 0.9418, + "step": 1383 + }, + { + "epoch": 3.5396419437340154, + "grad_norm": 0.155240959003008, + "learning_rate": 3.8117912701671905e-05, + "loss": 0.9696, + "step": 1384 + }, + { + "epoch": 3.5421994884910486, + "grad_norm": 0.17273359598041008, + "learning_rate": 3.7996243704207686e-05, + "loss": 0.9502, + "step": 1385 + }, + { + "epoch": 3.544757033248082, + "grad_norm": 0.1703572907276737, + "learning_rate": 3.787470869171277e-05, + "loss": 0.9673, + "step": 1386 + }, + { + "epoch": 3.547314578005115, + "grad_norm": 0.163047329660931, + "learning_rate": 3.7753308051863534e-05, + "loss": 0.9244, + "step": 1387 + }, + { + "epoch": 3.5498721227621486, + "grad_norm": 0.16125670043718637, + "learning_rate": 3.763204217190778e-05, + "loss": 0.9414, + "step": 1388 + }, + { + "epoch": 3.5524296675191813, + "grad_norm": 0.17450887360011574, + "learning_rate": 3.751091143866338e-05, + "loss": 0.9677, + "step": 1389 + }, + { + "epoch": 3.554987212276215, + "grad_norm": 0.15580595508138104, + "learning_rate": 3.7389916238517224e-05, + "loss": 0.9758, + "step": 1390 + }, + { + "epoch": 3.557544757033248, + "grad_norm": 0.17069367779408143, + "learning_rate": 3.726905695742372e-05, + "loss": 0.9142, + "step": 1391 + }, + { + "epoch": 3.5601023017902813, + "grad_norm": 0.16910211167776398, + "learning_rate": 3.7148333980903796e-05, + "loss": 0.9389, + "step": 1392 + }, + { + "epoch": 3.5626598465473145, + "grad_norm": 0.1663225487056752, + "learning_rate": 3.7027747694043645e-05, + "loss": 0.9557, + "step": 1393 + }, + { + "epoch": 3.5652173913043477, + "grad_norm": 0.16804185773204355, + "learning_rate": 3.690729848149335e-05, + "loss": 0.9588, + "step": 1394 + }, + { + "epoch": 3.5677749360613813, + "grad_norm": 0.16402784688128466, + "learning_rate": 3.678698672746581e-05, + "loss": 0.964, + "step": 1395 + }, + { + "epoch": 3.5703324808184145, + "grad_norm": 0.18174268933477528, + "learning_rate": 3.6666812815735424e-05, + "loss": 0.9433, + "step": 1396 + }, + { + "epoch": 3.5728900255754477, + "grad_norm": 0.15614453400715234, + "learning_rate": 3.6546777129636886e-05, + "loss": 0.9252, + "step": 1397 + }, + { + "epoch": 3.575447570332481, + "grad_norm": 0.16700607138470522, + "learning_rate": 3.6426880052064026e-05, + "loss": 0.9636, + "step": 1398 + }, + { + "epoch": 3.578005115089514, + "grad_norm": 0.20568461367374485, + "learning_rate": 3.630712196546844e-05, + "loss": 0.9649, + "step": 1399 + }, + { + "epoch": 3.580562659846547, + "grad_norm": 0.14660657078481024, + "learning_rate": 3.6187503251858505e-05, + "loss": 0.9267, + "step": 1400 + }, + { + "epoch": 3.5831202046035804, + "grad_norm": 0.16935747703951526, + "learning_rate": 3.6068024292797945e-05, + "loss": 0.9356, + "step": 1401 + }, + { + "epoch": 3.585677749360614, + "grad_norm": 0.15782075450424704, + "learning_rate": 3.59486854694046e-05, + "loss": 0.9548, + "step": 1402 + }, + { + "epoch": 3.588235294117647, + "grad_norm": 0.17132410907270623, + "learning_rate": 3.582948716234948e-05, + "loss": 0.9493, + "step": 1403 + }, + { + "epoch": 3.5907928388746804, + "grad_norm": 0.16858095077712948, + "learning_rate": 3.571042975185524e-05, + "loss": 0.9552, + "step": 1404 + }, + { + "epoch": 3.5933503836317136, + "grad_norm": 0.1634251285228488, + "learning_rate": 3.559151361769517e-05, + "loss": 0.9466, + "step": 1405 + }, + { + "epoch": 3.5959079283887467, + "grad_norm": 0.1729430282795056, + "learning_rate": 3.547273913919182e-05, + "loss": 0.95, + "step": 1406 + }, + { + "epoch": 3.59846547314578, + "grad_norm": 0.1821907434145911, + "learning_rate": 3.535410669521605e-05, + "loss": 0.9588, + "step": 1407 + }, + { + "epoch": 3.601023017902813, + "grad_norm": 0.15781654283531932, + "learning_rate": 3.5235616664185465e-05, + "loss": 0.9591, + "step": 1408 + }, + { + "epoch": 3.6035805626598467, + "grad_norm": 0.1677674098580371, + "learning_rate": 3.5117269424063466e-05, + "loss": 0.9372, + "step": 1409 + }, + { + "epoch": 3.60613810741688, + "grad_norm": 0.1668467714604029, + "learning_rate": 3.4999065352358055e-05, + "loss": 0.9128, + "step": 1410 + }, + { + "epoch": 3.608695652173913, + "grad_norm": 0.16023804099695482, + "learning_rate": 3.488100482612046e-05, + "loss": 0.9533, + "step": 1411 + }, + { + "epoch": 3.6112531969309463, + "grad_norm": 0.17448057130149636, + "learning_rate": 3.476308822194404e-05, + "loss": 0.9696, + "step": 1412 + }, + { + "epoch": 3.6138107416879794, + "grad_norm": 0.17176757036978785, + "learning_rate": 3.4645315915963085e-05, + "loss": 0.9295, + "step": 1413 + }, + { + "epoch": 3.6163682864450126, + "grad_norm": 0.16582442582314796, + "learning_rate": 3.452768828385156e-05, + "loss": 0.9478, + "step": 1414 + }, + { + "epoch": 3.618925831202046, + "grad_norm": 0.16508960150611576, + "learning_rate": 3.4410205700822e-05, + "loss": 0.9267, + "step": 1415 + }, + { + "epoch": 3.6214833759590794, + "grad_norm": 0.15842544276922507, + "learning_rate": 3.42928685416242e-05, + "loss": 0.9487, + "step": 1416 + }, + { + "epoch": 3.6240409207161126, + "grad_norm": 0.16737847990453103, + "learning_rate": 3.417567718054413e-05, + "loss": 0.9257, + "step": 1417 + }, + { + "epoch": 3.626598465473146, + "grad_norm": 0.16179442819088455, + "learning_rate": 3.405863199140271e-05, + "loss": 0.9594, + "step": 1418 + }, + { + "epoch": 3.629156010230179, + "grad_norm": 0.17740705653386357, + "learning_rate": 3.3941733347554434e-05, + "loss": 0.954, + "step": 1419 + }, + { + "epoch": 3.631713554987212, + "grad_norm": 0.1745105989485467, + "learning_rate": 3.3824981621886545e-05, + "loss": 0.9536, + "step": 1420 + }, + { + "epoch": 3.634271099744246, + "grad_norm": 0.1927262004385616, + "learning_rate": 3.370837718681754e-05, + "loss": 0.9685, + "step": 1421 + }, + { + "epoch": 3.6368286445012785, + "grad_norm": 0.15752590578867717, + "learning_rate": 3.3591920414296094e-05, + "loss": 0.9248, + "step": 1422 + }, + { + "epoch": 3.639386189258312, + "grad_norm": 0.21240595387549532, + "learning_rate": 3.347561167579986e-05, + "loss": 0.9521, + "step": 1423 + }, + { + "epoch": 3.6419437340153453, + "grad_norm": 0.17508530317965004, + "learning_rate": 3.3359451342334306e-05, + "loss": 0.9431, + "step": 1424 + }, + { + "epoch": 3.6445012787723785, + "grad_norm": 0.21738581132916354, + "learning_rate": 3.324343978443148e-05, + "loss": 0.9716, + "step": 1425 + }, + { + "epoch": 3.6470588235294117, + "grad_norm": 0.16746773638107448, + "learning_rate": 3.3127577372148874e-05, + "loss": 0.9322, + "step": 1426 + }, + { + "epoch": 3.649616368286445, + "grad_norm": 0.2122059201301744, + "learning_rate": 3.301186447506827e-05, + "loss": 0.9422, + "step": 1427 + }, + { + "epoch": 3.6521739130434785, + "grad_norm": 0.15741451467355758, + "learning_rate": 3.289630146229449e-05, + "loss": 0.9366, + "step": 1428 + }, + { + "epoch": 3.6547314578005117, + "grad_norm": 0.19813994445803942, + "learning_rate": 3.278088870245423e-05, + "loss": 0.9286, + "step": 1429 + }, + { + "epoch": 3.657289002557545, + "grad_norm": 0.16851843081939155, + "learning_rate": 3.2665626563694937e-05, + "loss": 0.9572, + "step": 1430 + }, + { + "epoch": 3.659846547314578, + "grad_norm": 0.20717471275600138, + "learning_rate": 3.2550515413683574e-05, + "loss": 0.9512, + "step": 1431 + }, + { + "epoch": 3.662404092071611, + "grad_norm": 0.16245953402744545, + "learning_rate": 3.2435555619605504e-05, + "loss": 0.9542, + "step": 1432 + }, + { + "epoch": 3.6649616368286444, + "grad_norm": 0.19641538640030912, + "learning_rate": 3.232074754816323e-05, + "loss": 0.9306, + "step": 1433 + }, + { + "epoch": 3.6675191815856776, + "grad_norm": 0.1594631052144963, + "learning_rate": 3.220609156557544e-05, + "loss": 0.9363, + "step": 1434 + }, + { + "epoch": 3.670076726342711, + "grad_norm": 0.18455147659478868, + "learning_rate": 3.209158803757546e-05, + "loss": 0.9321, + "step": 1435 + }, + { + "epoch": 3.6726342710997444, + "grad_norm": 0.1790498881096886, + "learning_rate": 3.1977237329410446e-05, + "loss": 0.9608, + "step": 1436 + }, + { + "epoch": 3.6751918158567776, + "grad_norm": 0.1870454897435218, + "learning_rate": 3.186303980584012e-05, + "loss": 0.9389, + "step": 1437 + }, + { + "epoch": 3.6777493606138107, + "grad_norm": 0.20530561810770268, + "learning_rate": 3.174899583113548e-05, + "loss": 0.9945, + "step": 1438 + }, + { + "epoch": 3.680306905370844, + "grad_norm": 0.18019213638281067, + "learning_rate": 3.1635105769077766e-05, + "loss": 0.9307, + "step": 1439 + }, + { + "epoch": 3.682864450127877, + "grad_norm": 0.20610761052130405, + "learning_rate": 3.152136998295727e-05, + "loss": 0.9321, + "step": 1440 + }, + { + "epoch": 3.6854219948849103, + "grad_norm": 0.17985929842660886, + "learning_rate": 3.140778883557213e-05, + "loss": 0.932, + "step": 1441 + }, + { + "epoch": 3.687979539641944, + "grad_norm": 0.20013068677532989, + "learning_rate": 3.129436268922728e-05, + "loss": 0.9324, + "step": 1442 + }, + { + "epoch": 3.690537084398977, + "grad_norm": 0.17562501633026537, + "learning_rate": 3.118109190573313e-05, + "loss": 0.9145, + "step": 1443 + }, + { + "epoch": 3.6930946291560103, + "grad_norm": 0.18827294282018908, + "learning_rate": 3.106797684640464e-05, + "loss": 0.9402, + "step": 1444 + }, + { + "epoch": 3.6956521739130435, + "grad_norm": 0.20170283801470837, + "learning_rate": 3.0955017872059956e-05, + "loss": 0.9591, + "step": 1445 + }, + { + "epoch": 3.6982097186700766, + "grad_norm": 0.15387225427234089, + "learning_rate": 3.084221534301926e-05, + "loss": 0.9253, + "step": 1446 + }, + { + "epoch": 3.70076726342711, + "grad_norm": 0.24032338349831264, + "learning_rate": 3.0729569619103876e-05, + "loss": 0.9501, + "step": 1447 + }, + { + "epoch": 3.703324808184143, + "grad_norm": 0.1613801252077293, + "learning_rate": 3.061708105963481e-05, + "loss": 0.9706, + "step": 1448 + }, + { + "epoch": 3.7058823529411766, + "grad_norm": 0.18342909310635377, + "learning_rate": 3.0504750023431787e-05, + "loss": 0.9268, + "step": 1449 + }, + { + "epoch": 3.70843989769821, + "grad_norm": 0.1656531219879725, + "learning_rate": 3.039257686881209e-05, + "loss": 0.9385, + "step": 1450 + }, + { + "epoch": 3.710997442455243, + "grad_norm": 0.1781080191407481, + "learning_rate": 3.028056195358936e-05, + "loss": 0.9201, + "step": 1451 + }, + { + "epoch": 3.713554987212276, + "grad_norm": 0.1682926250161123, + "learning_rate": 3.016870563507241e-05, + "loss": 0.9486, + "step": 1452 + }, + { + "epoch": 3.7161125319693094, + "grad_norm": 0.17403568022524737, + "learning_rate": 3.0057008270064226e-05, + "loss": 0.9326, + "step": 1453 + }, + { + "epoch": 3.718670076726343, + "grad_norm": 0.17412534323602966, + "learning_rate": 2.9945470214860815e-05, + "loss": 0.9737, + "step": 1454 + }, + { + "epoch": 3.7212276214833757, + "grad_norm": 0.2012938530305388, + "learning_rate": 2.9834091825249908e-05, + "loss": 0.9319, + "step": 1455 + }, + { + "epoch": 3.7237851662404093, + "grad_norm": 0.15521247782508635, + "learning_rate": 2.9722873456509985e-05, + "loss": 0.9289, + "step": 1456 + }, + { + "epoch": 3.7263427109974425, + "grad_norm": 0.15552821509875525, + "learning_rate": 2.961181546340906e-05, + "loss": 0.9707, + "step": 1457 + }, + { + "epoch": 3.7289002557544757, + "grad_norm": 0.19037886779641314, + "learning_rate": 2.95009182002036e-05, + "loss": 0.9313, + "step": 1458 + }, + { + "epoch": 3.731457800511509, + "grad_norm": 0.16615970202045902, + "learning_rate": 2.939018202063732e-05, + "loss": 0.9647, + "step": 1459 + }, + { + "epoch": 3.734015345268542, + "grad_norm": 0.17646317393385902, + "learning_rate": 2.9279607277940196e-05, + "loss": 0.9474, + "step": 1460 + }, + { + "epoch": 3.7365728900255757, + "grad_norm": 0.16080135640987508, + "learning_rate": 2.9169194324827183e-05, + "loss": 0.926, + "step": 1461 + }, + { + "epoch": 3.7391304347826084, + "grad_norm": 0.17325852442311754, + "learning_rate": 2.9058943513497158e-05, + "loss": 0.9312, + "step": 1462 + }, + { + "epoch": 3.741687979539642, + "grad_norm": 0.2657172615999172, + "learning_rate": 2.8948855195631797e-05, + "loss": 0.9417, + "step": 1463 + }, + { + "epoch": 3.7442455242966752, + "grad_norm": 0.18232454995244132, + "learning_rate": 2.883892972239445e-05, + "loss": 0.9596, + "step": 1464 + }, + { + "epoch": 3.7468030690537084, + "grad_norm": 0.15153887237658853, + "learning_rate": 2.8729167444429042e-05, + "loss": 0.9476, + "step": 1465 + }, + { + "epoch": 3.7493606138107416, + "grad_norm": 0.17675913819692224, + "learning_rate": 2.8619568711858858e-05, + "loss": 0.945, + "step": 1466 + }, + { + "epoch": 3.7519181585677748, + "grad_norm": 0.16206615280321732, + "learning_rate": 2.8510133874285633e-05, + "loss": 0.9462, + "step": 1467 + }, + { + "epoch": 3.7544757033248084, + "grad_norm": 0.1553778010776279, + "learning_rate": 2.8400863280788207e-05, + "loss": 0.9407, + "step": 1468 + }, + { + "epoch": 3.7570332480818416, + "grad_norm": 0.16829547679009138, + "learning_rate": 2.829175727992147e-05, + "loss": 0.963, + "step": 1469 + }, + { + "epoch": 3.7595907928388748, + "grad_norm": 0.13746655170307476, + "learning_rate": 2.818281621971541e-05, + "loss": 0.9221, + "step": 1470 + }, + { + "epoch": 3.762148337595908, + "grad_norm": 0.16271667131621254, + "learning_rate": 2.8074040447673794e-05, + "loss": 0.9535, + "step": 1471 + }, + { + "epoch": 3.764705882352941, + "grad_norm": 0.16318435465235073, + "learning_rate": 2.7965430310773184e-05, + "loss": 0.9475, + "step": 1472 + }, + { + "epoch": 3.7672634271099743, + "grad_norm": 0.16520541373584413, + "learning_rate": 2.7856986155461777e-05, + "loss": 0.9315, + "step": 1473 + }, + { + "epoch": 3.7698209718670075, + "grad_norm": 0.32117889861607873, + "learning_rate": 2.7748708327658317e-05, + "loss": 0.9455, + "step": 1474 + }, + { + "epoch": 3.772378516624041, + "grad_norm": 0.17314463246020131, + "learning_rate": 2.7640597172751004e-05, + "loss": 0.9525, + "step": 1475 + }, + { + "epoch": 3.7749360613810743, + "grad_norm": 0.15225032038812816, + "learning_rate": 2.7532653035596336e-05, + "loss": 0.9453, + "step": 1476 + }, + { + "epoch": 3.7774936061381075, + "grad_norm": 0.17247417052786013, + "learning_rate": 2.7424876260518146e-05, + "loss": 0.9152, + "step": 1477 + }, + { + "epoch": 3.7800511508951407, + "grad_norm": 0.15503112719134568, + "learning_rate": 2.7317267191306318e-05, + "loss": 0.9398, + "step": 1478 + }, + { + "epoch": 3.782608695652174, + "grad_norm": 0.1631084235061464, + "learning_rate": 2.7209826171215827e-05, + "loss": 0.9246, + "step": 1479 + }, + { + "epoch": 3.785166240409207, + "grad_norm": 0.15506280568530903, + "learning_rate": 2.7102553542965577e-05, + "loss": 0.936, + "step": 1480 + }, + { + "epoch": 3.78772378516624, + "grad_norm": 0.1404687271754989, + "learning_rate": 2.6995449648737343e-05, + "loss": 0.9359, + "step": 1481 + }, + { + "epoch": 3.790281329923274, + "grad_norm": 0.1557007128341937, + "learning_rate": 2.6888514830174678e-05, + "loss": 0.954, + "step": 1482 + }, + { + "epoch": 3.792838874680307, + "grad_norm": 0.16612555940333462, + "learning_rate": 2.6781749428381752e-05, + "loss": 1.0034, + "step": 1483 + }, + { + "epoch": 3.79539641943734, + "grad_norm": 0.1733496961568388, + "learning_rate": 2.6675153783922457e-05, + "loss": 0.9518, + "step": 1484 + }, + { + "epoch": 3.7979539641943734, + "grad_norm": 0.15940418283478483, + "learning_rate": 2.6568728236819023e-05, + "loss": 0.9817, + "step": 1485 + }, + { + "epoch": 3.8005115089514065, + "grad_norm": 0.19079011728203774, + "learning_rate": 2.6462473126551187e-05, + "loss": 0.9735, + "step": 1486 + }, + { + "epoch": 3.80306905370844, + "grad_norm": 0.16130729906636684, + "learning_rate": 2.635638879205504e-05, + "loss": 0.9579, + "step": 1487 + }, + { + "epoch": 3.805626598465473, + "grad_norm": 0.1745866503183891, + "learning_rate": 2.625047557172189e-05, + "loss": 0.9402, + "step": 1488 + }, + { + "epoch": 3.8081841432225065, + "grad_norm": 0.18057372768582713, + "learning_rate": 2.6144733803397212e-05, + "loss": 0.9474, + "step": 1489 + }, + { + "epoch": 3.8107416879795397, + "grad_norm": 0.1560777993171654, + "learning_rate": 2.6039163824379588e-05, + "loss": 0.9506, + "step": 1490 + }, + { + "epoch": 3.813299232736573, + "grad_norm": 0.1674616567029557, + "learning_rate": 2.5933765971419647e-05, + "loss": 0.9488, + "step": 1491 + }, + { + "epoch": 3.815856777493606, + "grad_norm": 0.15672982172497663, + "learning_rate": 2.582854058071892e-05, + "loss": 0.9458, + "step": 1492 + }, + { + "epoch": 3.8184143222506393, + "grad_norm": 0.1558200464104945, + "learning_rate": 2.5723487987928817e-05, + "loss": 0.9518, + "step": 1493 + }, + { + "epoch": 3.820971867007673, + "grad_norm": 0.14208299213871128, + "learning_rate": 2.5618608528149614e-05, + "loss": 0.93, + "step": 1494 + }, + { + "epoch": 3.8235294117647056, + "grad_norm": 0.16087610572734629, + "learning_rate": 2.5513902535929288e-05, + "loss": 0.9763, + "step": 1495 + }, + { + "epoch": 3.8260869565217392, + "grad_norm": 0.1493299114392072, + "learning_rate": 2.5409370345262385e-05, + "loss": 0.9471, + "step": 1496 + }, + { + "epoch": 3.8286445012787724, + "grad_norm": 0.15214002644065255, + "learning_rate": 2.5305012289589223e-05, + "loss": 0.9588, + "step": 1497 + }, + { + "epoch": 3.8312020460358056, + "grad_norm": 0.15727057443971326, + "learning_rate": 2.5200828701794543e-05, + "loss": 0.9294, + "step": 1498 + }, + { + "epoch": 3.833759590792839, + "grad_norm": 0.14966978310373255, + "learning_rate": 2.5096819914206592e-05, + "loss": 0.9372, + "step": 1499 + }, + { + "epoch": 3.836317135549872, + "grad_norm": 0.160200304381001, + "learning_rate": 2.4992986258596023e-05, + "loss": 0.9648, + "step": 1500 + }, + { + "epoch": 3.8388746803069056, + "grad_norm": 0.1364407301299318, + "learning_rate": 2.4889328066174932e-05, + "loss": 0.9458, + "step": 1501 + }, + { + "epoch": 3.8414322250639388, + "grad_norm": 0.15554384512550426, + "learning_rate": 2.4785845667595565e-05, + "loss": 0.9532, + "step": 1502 + }, + { + "epoch": 3.843989769820972, + "grad_norm": 0.14270917443883158, + "learning_rate": 2.4682539392949494e-05, + "loss": 0.9194, + "step": 1503 + }, + { + "epoch": 3.846547314578005, + "grad_norm": 0.15315949958673647, + "learning_rate": 2.4579409571766543e-05, + "loss": 0.9619, + "step": 1504 + }, + { + "epoch": 3.8491048593350383, + "grad_norm": 0.14236120859618645, + "learning_rate": 2.4476456533013597e-05, + "loss": 0.9637, + "step": 1505 + }, + { + "epoch": 3.8516624040920715, + "grad_norm": 0.14065482492078218, + "learning_rate": 2.437368060509365e-05, + "loss": 0.9406, + "step": 1506 + }, + { + "epoch": 3.8542199488491047, + "grad_norm": 0.13361767868605823, + "learning_rate": 2.427108211584476e-05, + "loss": 0.9595, + "step": 1507 + }, + { + "epoch": 3.8567774936061383, + "grad_norm": 0.13594955260031957, + "learning_rate": 2.4168661392538982e-05, + "loss": 0.9421, + "step": 1508 + }, + { + "epoch": 3.8593350383631715, + "grad_norm": 0.13851801316117543, + "learning_rate": 2.4066418761881308e-05, + "loss": 0.9687, + "step": 1509 + }, + { + "epoch": 3.8618925831202047, + "grad_norm": 0.13380711931983305, + "learning_rate": 2.396435455000864e-05, + "loss": 0.9468, + "step": 1510 + }, + { + "epoch": 3.864450127877238, + "grad_norm": 0.13649849585417867, + "learning_rate": 2.386246908248883e-05, + "loss": 0.9228, + "step": 1511 + }, + { + "epoch": 3.867007672634271, + "grad_norm": 0.13210578639270845, + "learning_rate": 2.3760762684319508e-05, + "loss": 0.9094, + "step": 1512 + }, + { + "epoch": 3.869565217391304, + "grad_norm": 0.14259288669579517, + "learning_rate": 2.3659235679927016e-05, + "loss": 0.9351, + "step": 1513 + }, + { + "epoch": 3.8721227621483374, + "grad_norm": 0.1388101682540646, + "learning_rate": 2.3557888393165627e-05, + "loss": 0.9454, + "step": 1514 + }, + { + "epoch": 3.874680306905371, + "grad_norm": 0.12901592134412895, + "learning_rate": 2.345672114731624e-05, + "loss": 0.9481, + "step": 1515 + }, + { + "epoch": 3.877237851662404, + "grad_norm": 0.13894304934030247, + "learning_rate": 2.335573426508547e-05, + "loss": 0.9583, + "step": 1516 + }, + { + "epoch": 3.8797953964194374, + "grad_norm": 0.1370325882290817, + "learning_rate": 2.325492806860462e-05, + "loss": 0.9799, + "step": 1517 + }, + { + "epoch": 3.8823529411764706, + "grad_norm": 0.13421409804749201, + "learning_rate": 2.315430287942862e-05, + "loss": 0.9533, + "step": 1518 + }, + { + "epoch": 3.8849104859335037, + "grad_norm": 0.13298313283238028, + "learning_rate": 2.3053859018535026e-05, + "loss": 0.9709, + "step": 1519 + }, + { + "epoch": 3.887468030690537, + "grad_norm": 0.1361450777437208, + "learning_rate": 2.295359680632295e-05, + "loss": 0.9615, + "step": 1520 + }, + { + "epoch": 3.89002557544757, + "grad_norm": 0.1486100399377403, + "learning_rate": 2.2853516562612173e-05, + "loss": 0.9376, + "step": 1521 + }, + { + "epoch": 3.8925831202046037, + "grad_norm": 0.13690524401965368, + "learning_rate": 2.2753618606641928e-05, + "loss": 0.9092, + "step": 1522 + }, + { + "epoch": 3.895140664961637, + "grad_norm": 0.15669583951357616, + "learning_rate": 2.2653903257070012e-05, + "loss": 0.9443, + "step": 1523 + }, + { + "epoch": 3.89769820971867, + "grad_norm": 0.12931778250099024, + "learning_rate": 2.2554370831971743e-05, + "loss": 0.9406, + "step": 1524 + }, + { + "epoch": 3.9002557544757033, + "grad_norm": 0.17258200785982056, + "learning_rate": 2.2455021648838935e-05, + "loss": 0.9614, + "step": 1525 + }, + { + "epoch": 3.9028132992327365, + "grad_norm": 0.1521157336174598, + "learning_rate": 2.235585602457891e-05, + "loss": 0.9487, + "step": 1526 + }, + { + "epoch": 3.90537084398977, + "grad_norm": 0.14390268768179504, + "learning_rate": 2.225687427551341e-05, + "loss": 0.9401, + "step": 1527 + }, + { + "epoch": 3.907928388746803, + "grad_norm": 0.16337966447000044, + "learning_rate": 2.2158076717377765e-05, + "loss": 0.9536, + "step": 1528 + }, + { + "epoch": 3.9104859335038364, + "grad_norm": 0.15324748802477992, + "learning_rate": 2.2059463665319623e-05, + "loss": 0.9198, + "step": 1529 + }, + { + "epoch": 3.9130434782608696, + "grad_norm": 0.14907378875032545, + "learning_rate": 2.196103543389815e-05, + "loss": 0.9481, + "step": 1530 + }, + { + "epoch": 3.915601023017903, + "grad_norm": 0.14207939797213323, + "learning_rate": 2.1862792337083017e-05, + "loss": 0.9387, + "step": 1531 + }, + { + "epoch": 3.918158567774936, + "grad_norm": 0.13959510597089575, + "learning_rate": 2.176473468825328e-05, + "loss": 0.9536, + "step": 1532 + }, + { + "epoch": 3.920716112531969, + "grad_norm": 0.14016454333503284, + "learning_rate": 2.1666862800196454e-05, + "loss": 0.9491, + "step": 1533 + }, + { + "epoch": 3.923273657289003, + "grad_norm": 0.14885818803453518, + "learning_rate": 2.1569176985107535e-05, + "loss": 0.9612, + "step": 1534 + }, + { + "epoch": 3.9258312020460355, + "grad_norm": 0.14403866973582788, + "learning_rate": 2.1471677554587958e-05, + "loss": 0.9511, + "step": 1535 + }, + { + "epoch": 3.928388746803069, + "grad_norm": 0.13223516573639468, + "learning_rate": 2.1374364819644623e-05, + "loss": 0.9373, + "step": 1536 + }, + { + "epoch": 3.9309462915601023, + "grad_norm": 0.14036184466315108, + "learning_rate": 2.1277239090688894e-05, + "loss": 0.9353, + "step": 1537 + }, + { + "epoch": 3.9335038363171355, + "grad_norm": 0.1396968491520172, + "learning_rate": 2.1180300677535655e-05, + "loss": 0.9531, + "step": 1538 + }, + { + "epoch": 3.9360613810741687, + "grad_norm": 0.13659743962984422, + "learning_rate": 2.108354988940228e-05, + "loss": 0.936, + "step": 1539 + }, + { + "epoch": 3.938618925831202, + "grad_norm": 0.1508626854215839, + "learning_rate": 2.0986987034907554e-05, + "loss": 0.9452, + "step": 1540 + }, + { + "epoch": 3.9411764705882355, + "grad_norm": 0.14129695624224084, + "learning_rate": 2.089061242207092e-05, + "loss": 0.9369, + "step": 1541 + }, + { + "epoch": 3.9437340153452687, + "grad_norm": 0.1428765331179949, + "learning_rate": 2.0794426358311294e-05, + "loss": 0.9142, + "step": 1542 + }, + { + "epoch": 3.946291560102302, + "grad_norm": 0.1330347524331098, + "learning_rate": 2.069842915044614e-05, + "loss": 0.9381, + "step": 1543 + }, + { + "epoch": 3.948849104859335, + "grad_norm": 0.14069953111767788, + "learning_rate": 2.0602621104690517e-05, + "loss": 0.921, + "step": 1544 + }, + { + "epoch": 3.9514066496163682, + "grad_norm": 0.1456949051715094, + "learning_rate": 2.050700252665615e-05, + "loss": 0.9549, + "step": 1545 + }, + { + "epoch": 3.9539641943734014, + "grad_norm": 0.13746866783044756, + "learning_rate": 2.041157372135028e-05, + "loss": 0.9287, + "step": 1546 + }, + { + "epoch": 3.9565217391304346, + "grad_norm": 0.15606889468360874, + "learning_rate": 2.0316334993174856e-05, + "loss": 0.9555, + "step": 1547 + }, + { + "epoch": 3.959079283887468, + "grad_norm": 0.14118323164397703, + "learning_rate": 2.0221286645925558e-05, + "loss": 0.9343, + "step": 1548 + }, + { + "epoch": 3.9616368286445014, + "grad_norm": 0.1363380304979579, + "learning_rate": 2.012642898279074e-05, + "loss": 0.9961, + "step": 1549 + }, + { + "epoch": 3.9641943734015346, + "grad_norm": 0.14317404024733354, + "learning_rate": 2.003176230635049e-05, + "loss": 0.9647, + "step": 1550 + }, + { + "epoch": 3.9667519181585678, + "grad_norm": 0.14674699824614082, + "learning_rate": 1.9937286918575713e-05, + "loss": 0.9541, + "step": 1551 + }, + { + "epoch": 3.969309462915601, + "grad_norm": 0.1392728526341487, + "learning_rate": 1.984300312082711e-05, + "loss": 0.9549, + "step": 1552 + }, + { + "epoch": 3.971867007672634, + "grad_norm": 0.1388687318173855, + "learning_rate": 1.9748911213854267e-05, + "loss": 0.9538, + "step": 1553 + }, + { + "epoch": 3.9744245524296673, + "grad_norm": 0.13901730161036177, + "learning_rate": 1.9655011497794616e-05, + "loss": 0.9426, + "step": 1554 + }, + { + "epoch": 3.976982097186701, + "grad_norm": 0.13747089636524243, + "learning_rate": 1.9561304272172644e-05, + "loss": 0.9639, + "step": 1555 + }, + { + "epoch": 3.979539641943734, + "grad_norm": 0.1395863657318075, + "learning_rate": 1.946778983589873e-05, + "loss": 0.9733, + "step": 1556 + }, + { + "epoch": 3.9820971867007673, + "grad_norm": 0.1388892460599247, + "learning_rate": 1.9374468487268254e-05, + "loss": 0.944, + "step": 1557 + }, + { + "epoch": 3.9846547314578005, + "grad_norm": 0.1542426182338673, + "learning_rate": 1.9281340523960806e-05, + "loss": 0.9575, + "step": 1558 + }, + { + "epoch": 3.9872122762148337, + "grad_norm": 0.14702194394411322, + "learning_rate": 1.9188406243039015e-05, + "loss": 0.939, + "step": 1559 + }, + { + "epoch": 3.9897698209718673, + "grad_norm": 0.15088719580788107, + "learning_rate": 1.9095665940947717e-05, + "loss": 0.9523, + "step": 1560 + }, + { + "epoch": 3.9923273657289, + "grad_norm": 0.13979637370531914, + "learning_rate": 1.9003119913512992e-05, + "loss": 0.9518, + "step": 1561 + }, + { + "epoch": 3.9948849104859336, + "grad_norm": 0.13293457854923818, + "learning_rate": 1.891076845594122e-05, + "loss": 0.966, + "step": 1562 + }, + { + "epoch": 3.997442455242967, + "grad_norm": 0.1330659091048459, + "learning_rate": 1.881861186281813e-05, + "loss": 0.9425, + "step": 1563 + }, + { + "epoch": 4.0, + "grad_norm": 0.15532958865697588, + "learning_rate": 1.872665042810784e-05, + "loss": 0.9491, + "step": 1564 + }, + { + "epoch": 4.002557544757034, + "grad_norm": 0.172134213325208, + "learning_rate": 1.863488444515203e-05, + "loss": 0.9131, + "step": 1565 + }, + { + "epoch": 4.005115089514066, + "grad_norm": 0.15705142364202992, + "learning_rate": 1.854331420666882e-05, + "loss": 0.9254, + "step": 1566 + }, + { + "epoch": 4.0076726342711, + "grad_norm": 0.16319791463669756, + "learning_rate": 1.845194000475199e-05, + "loss": 0.9005, + "step": 1567 + }, + { + "epoch": 4.010230179028133, + "grad_norm": 0.16550445546270565, + "learning_rate": 1.836076213087e-05, + "loss": 0.9177, + "step": 1568 + }, + { + "epoch": 4.012787723785166, + "grad_norm": 0.17000604940332, + "learning_rate": 1.826978087586502e-05, + "loss": 0.9288, + "step": 1569 + }, + { + "epoch": 4.015345268542199, + "grad_norm": 0.17439370178321326, + "learning_rate": 1.8178996529952088e-05, + "loss": 0.9302, + "step": 1570 + }, + { + "epoch": 4.017902813299233, + "grad_norm": 0.16621808084873166, + "learning_rate": 1.808840938271807e-05, + "loss": 0.9277, + "step": 1571 + }, + { + "epoch": 4.020460358056266, + "grad_norm": 0.1502855048809297, + "learning_rate": 1.799801972312092e-05, + "loss": 0.9146, + "step": 1572 + }, + { + "epoch": 4.023017902813299, + "grad_norm": 0.15792591947199125, + "learning_rate": 1.7907827839488474e-05, + "loss": 0.9175, + "step": 1573 + }, + { + "epoch": 4.025575447570333, + "grad_norm": 0.1563775392864349, + "learning_rate": 1.7817834019517805e-05, + "loss": 0.9128, + "step": 1574 + }, + { + "epoch": 4.028132992327365, + "grad_norm": 0.14597718440990778, + "learning_rate": 1.7728038550274193e-05, + "loss": 0.9185, + "step": 1575 + }, + { + "epoch": 4.030690537084399, + "grad_norm": 0.1569564550463153, + "learning_rate": 1.7638441718190192e-05, + "loss": 0.9296, + "step": 1576 + }, + { + "epoch": 4.033248081841432, + "grad_norm": 0.15089755959303894, + "learning_rate": 1.7549043809064697e-05, + "loss": 0.9011, + "step": 1577 + }, + { + "epoch": 4.035805626598465, + "grad_norm": 0.14320940233490406, + "learning_rate": 1.74598451080622e-05, + "loss": 0.9301, + "step": 1578 + }, + { + "epoch": 4.038363171355499, + "grad_norm": 0.1640364740345872, + "learning_rate": 1.737084589971157e-05, + "loss": 0.9294, + "step": 1579 + }, + { + "epoch": 4.040920716112532, + "grad_norm": 0.15372462860199906, + "learning_rate": 1.728204646790544e-05, + "loss": 0.9464, + "step": 1580 + }, + { + "epoch": 4.043478260869565, + "grad_norm": 0.14792763942080298, + "learning_rate": 1.7193447095899206e-05, + "loss": 0.9224, + "step": 1581 + }, + { + "epoch": 4.046035805626598, + "grad_norm": 0.13951058738523123, + "learning_rate": 1.710504806631005e-05, + "loss": 0.9087, + "step": 1582 + }, + { + "epoch": 4.048593350383632, + "grad_norm": 0.13260882878617228, + "learning_rate": 1.701684966111615e-05, + "loss": 0.9036, + "step": 1583 + }, + { + "epoch": 4.051150895140665, + "grad_norm": 0.14125256658288957, + "learning_rate": 1.6928852161655616e-05, + "loss": 0.92, + "step": 1584 + }, + { + "epoch": 4.053708439897698, + "grad_norm": 0.13237438231494236, + "learning_rate": 1.684105584862584e-05, + "loss": 0.9156, + "step": 1585 + }, + { + "epoch": 4.056265984654732, + "grad_norm": 0.1359119819403516, + "learning_rate": 1.6753461002082395e-05, + "loss": 0.9554, + "step": 1586 + }, + { + "epoch": 4.0588235294117645, + "grad_norm": 0.136943228077222, + "learning_rate": 1.6666067901438178e-05, + "loss": 0.8844, + "step": 1587 + }, + { + "epoch": 4.061381074168798, + "grad_norm": 0.14746043096646916, + "learning_rate": 1.657887682546264e-05, + "loss": 0.9091, + "step": 1588 + }, + { + "epoch": 4.063938618925831, + "grad_norm": 0.13289891251117492, + "learning_rate": 1.649188805228076e-05, + "loss": 0.9462, + "step": 1589 + }, + { + "epoch": 4.0664961636828645, + "grad_norm": 0.14117852752538673, + "learning_rate": 1.6405101859372123e-05, + "loss": 0.9153, + "step": 1590 + }, + { + "epoch": 4.069053708439898, + "grad_norm": 0.12613455462183037, + "learning_rate": 1.631851852357026e-05, + "loss": 0.9519, + "step": 1591 + }, + { + "epoch": 4.071611253196931, + "grad_norm": 0.1396860703236042, + "learning_rate": 1.6232138321061544e-05, + "loss": 0.9412, + "step": 1592 + }, + { + "epoch": 4.0741687979539645, + "grad_norm": 0.1360638603818121, + "learning_rate": 1.6145961527384395e-05, + "loss": 0.9517, + "step": 1593 + }, + { + "epoch": 4.076726342710997, + "grad_norm": 0.1324923155606263, + "learning_rate": 1.6059988417428396e-05, + "loss": 0.9513, + "step": 1594 + }, + { + "epoch": 4.079283887468031, + "grad_norm": 0.14265745538296148, + "learning_rate": 1.5974219265433406e-05, + "loss": 0.9154, + "step": 1595 + }, + { + "epoch": 4.081841432225064, + "grad_norm": 0.14492559140570338, + "learning_rate": 1.58886543449887e-05, + "loss": 0.9394, + "step": 1596 + }, + { + "epoch": 4.084398976982097, + "grad_norm": 0.12579546842676975, + "learning_rate": 1.5803293929032078e-05, + "loss": 0.9281, + "step": 1597 + }, + { + "epoch": 4.086956521739131, + "grad_norm": 0.14549537683931857, + "learning_rate": 1.5718138289849055e-05, + "loss": 0.8957, + "step": 1598 + }, + { + "epoch": 4.089514066496164, + "grad_norm": 0.14813650458162753, + "learning_rate": 1.563318769907187e-05, + "loss": 0.9004, + "step": 1599 + }, + { + "epoch": 4.092071611253197, + "grad_norm": 0.12523568970989923, + "learning_rate": 1.554844242767872e-05, + "loss": 0.9311, + "step": 1600 + }, + { + "epoch": 4.09462915601023, + "grad_norm": 0.13296174952051867, + "learning_rate": 1.546390274599289e-05, + "loss": 0.9256, + "step": 1601 + }, + { + "epoch": 4.0971867007672635, + "grad_norm": 0.12809367590620266, + "learning_rate": 1.5379568923681833e-05, + "loss": 0.9136, + "step": 1602 + }, + { + "epoch": 4.099744245524296, + "grad_norm": 0.13109260024902633, + "learning_rate": 1.5295441229756364e-05, + "loss": 0.9007, + "step": 1603 + }, + { + "epoch": 4.10230179028133, + "grad_norm": 0.12407094954940708, + "learning_rate": 1.521151993256977e-05, + "loss": 0.9406, + "step": 1604 + }, + { + "epoch": 4.1048593350383635, + "grad_norm": 0.1298161922376652, + "learning_rate": 1.5127805299817025e-05, + "loss": 0.9264, + "step": 1605 + }, + { + "epoch": 4.107416879795396, + "grad_norm": 0.1481163518427539, + "learning_rate": 1.5044297598533777e-05, + "loss": 0.9285, + "step": 1606 + }, + { + "epoch": 4.10997442455243, + "grad_norm": 0.12078740228639545, + "learning_rate": 1.496099709509565e-05, + "loss": 0.9078, + "step": 1607 + }, + { + "epoch": 4.112531969309463, + "grad_norm": 0.13027908099413282, + "learning_rate": 1.4877904055217376e-05, + "loss": 0.9149, + "step": 1608 + }, + { + "epoch": 4.115089514066496, + "grad_norm": 0.1468019204651356, + "learning_rate": 1.4795018743951857e-05, + "loss": 0.9304, + "step": 1609 + }, + { + "epoch": 4.117647058823529, + "grad_norm": 0.1349316946630024, + "learning_rate": 1.4712341425689406e-05, + "loss": 0.926, + "step": 1610 + }, + { + "epoch": 4.120204603580563, + "grad_norm": 0.1228754724620514, + "learning_rate": 1.4629872364156854e-05, + "loss": 0.9185, + "step": 1611 + }, + { + "epoch": 4.122762148337596, + "grad_norm": 0.14313419206388078, + "learning_rate": 1.4547611822416748e-05, + "loss": 0.9126, + "step": 1612 + }, + { + "epoch": 4.125319693094629, + "grad_norm": 0.14531581013669995, + "learning_rate": 1.446556006286648e-05, + "loss": 0.9372, + "step": 1613 + }, + { + "epoch": 4.127877237851663, + "grad_norm": 0.12636103579388067, + "learning_rate": 1.4383717347237425e-05, + "loss": 0.9255, + "step": 1614 + }, + { + "epoch": 4.130434782608695, + "grad_norm": 0.13484501378576969, + "learning_rate": 1.4302083936594247e-05, + "loss": 0.9267, + "step": 1615 + }, + { + "epoch": 4.132992327365729, + "grad_norm": 0.1306495047012211, + "learning_rate": 1.4220660091333875e-05, + "loss": 0.9237, + "step": 1616 + }, + { + "epoch": 4.135549872122763, + "grad_norm": 0.12979097348457122, + "learning_rate": 1.4139446071184737e-05, + "loss": 0.9197, + "step": 1617 + }, + { + "epoch": 4.138107416879795, + "grad_norm": 0.13739201337062779, + "learning_rate": 1.405844213520604e-05, + "loss": 0.9197, + "step": 1618 + }, + { + "epoch": 4.140664961636829, + "grad_norm": 0.1294644982423319, + "learning_rate": 1.3977648541786804e-05, + "loss": 0.896, + "step": 1619 + }, + { + "epoch": 4.143222506393862, + "grad_norm": 0.12588348274914363, + "learning_rate": 1.3897065548645104e-05, + "loss": 0.9453, + "step": 1620 + }, + { + "epoch": 4.145780051150895, + "grad_norm": 0.15398362387202247, + "learning_rate": 1.381669341282721e-05, + "loss": 0.9317, + "step": 1621 + }, + { + "epoch": 4.148337595907928, + "grad_norm": 0.13197721364304257, + "learning_rate": 1.3736532390706878e-05, + "loss": 0.9279, + "step": 1622 + }, + { + "epoch": 4.150895140664962, + "grad_norm": 0.12322044737512756, + "learning_rate": 1.3656582737984318e-05, + "loss": 0.9439, + "step": 1623 + }, + { + "epoch": 4.153452685421995, + "grad_norm": 0.12440470950789576, + "learning_rate": 1.3576844709685583e-05, + "loss": 0.9088, + "step": 1624 + }, + { + "epoch": 4.156010230179028, + "grad_norm": 0.12465116010990127, + "learning_rate": 1.3497318560161704e-05, + "loss": 0.9211, + "step": 1625 + }, + { + "epoch": 4.158567774936062, + "grad_norm": 0.13358086347052778, + "learning_rate": 1.3418004543087792e-05, + "loss": 0.9312, + "step": 1626 + }, + { + "epoch": 4.161125319693094, + "grad_norm": 0.1224560124714394, + "learning_rate": 1.3338902911462336e-05, + "loss": 0.9253, + "step": 1627 + }, + { + "epoch": 4.163682864450128, + "grad_norm": 0.12240140914681184, + "learning_rate": 1.3260013917606319e-05, + "loss": 0.9383, + "step": 1628 + }, + { + "epoch": 4.166240409207161, + "grad_norm": 0.12945740752464988, + "learning_rate": 1.318133781316247e-05, + "loss": 0.9416, + "step": 1629 + }, + { + "epoch": 4.168797953964194, + "grad_norm": 0.13087100044291045, + "learning_rate": 1.3102874849094414e-05, + "loss": 0.9316, + "step": 1630 + }, + { + "epoch": 4.171355498721228, + "grad_norm": 0.14189296661844325, + "learning_rate": 1.3024625275685891e-05, + "loss": 0.9465, + "step": 1631 + }, + { + "epoch": 4.173913043478261, + "grad_norm": 0.1297951759919457, + "learning_rate": 1.2946589342540023e-05, + "loss": 0.9275, + "step": 1632 + }, + { + "epoch": 4.176470588235294, + "grad_norm": 0.11911786087772278, + "learning_rate": 1.2868767298578395e-05, + "loss": 0.9225, + "step": 1633 + }, + { + "epoch": 4.179028132992327, + "grad_norm": 0.12225398214034955, + "learning_rate": 1.2791159392040275e-05, + "loss": 0.9196, + "step": 1634 + }, + { + "epoch": 4.181585677749361, + "grad_norm": 0.1310216078232746, + "learning_rate": 1.2713765870481995e-05, + "loss": 0.9353, + "step": 1635 + }, + { + "epoch": 4.1841432225063935, + "grad_norm": 0.12742055135018454, + "learning_rate": 1.2636586980775945e-05, + "loss": 0.9666, + "step": 1636 + }, + { + "epoch": 4.186700767263427, + "grad_norm": 0.12384487664186089, + "learning_rate": 1.2559622969109886e-05, + "loss": 0.9209, + "step": 1637 + }, + { + "epoch": 4.189258312020461, + "grad_norm": 0.1340544434519516, + "learning_rate": 1.2482874080986176e-05, + "loss": 0.9377, + "step": 1638 + }, + { + "epoch": 4.1918158567774935, + "grad_norm": 0.13746772119236356, + "learning_rate": 1.2406340561220947e-05, + "loss": 0.9207, + "step": 1639 + }, + { + "epoch": 4.194373401534527, + "grad_norm": 0.1280603990954687, + "learning_rate": 1.2330022653943358e-05, + "loss": 0.914, + "step": 1640 + }, + { + "epoch": 4.19693094629156, + "grad_norm": 0.12374468420399631, + "learning_rate": 1.2253920602594759e-05, + "loss": 0.8923, + "step": 1641 + }, + { + "epoch": 4.1994884910485935, + "grad_norm": 0.12384342114389504, + "learning_rate": 1.2178034649928034e-05, + "loss": 0.9396, + "step": 1642 + }, + { + "epoch": 4.202046035805626, + "grad_norm": 0.1230247461338335, + "learning_rate": 1.2102365038006672e-05, + "loss": 0.8981, + "step": 1643 + }, + { + "epoch": 4.20460358056266, + "grad_norm": 0.12441020446608941, + "learning_rate": 1.2026912008204117e-05, + "loss": 0.9395, + "step": 1644 + }, + { + "epoch": 4.207161125319693, + "grad_norm": 0.1207928603043833, + "learning_rate": 1.195167580120292e-05, + "loss": 0.9257, + "step": 1645 + }, + { + "epoch": 4.209718670076726, + "grad_norm": 0.12168214916803673, + "learning_rate": 1.1876656656994032e-05, + "loss": 0.907, + "step": 1646 + }, + { + "epoch": 4.21227621483376, + "grad_norm": 0.12409121363381591, + "learning_rate": 1.180185481487599e-05, + "loss": 0.9082, + "step": 1647 + }, + { + "epoch": 4.2148337595907925, + "grad_norm": 0.12218546237016087, + "learning_rate": 1.1727270513454161e-05, + "loss": 0.9207, + "step": 1648 + }, + { + "epoch": 4.217391304347826, + "grad_norm": 0.1373741099688316, + "learning_rate": 1.1652903990640075e-05, + "loss": 0.9041, + "step": 1649 + }, + { + "epoch": 4.21994884910486, + "grad_norm": 0.126043833861761, + "learning_rate": 1.1578755483650465e-05, + "loss": 0.9071, + "step": 1650 + }, + { + "epoch": 4.2225063938618925, + "grad_norm": 0.12907468546494064, + "learning_rate": 1.150482522900668e-05, + "loss": 0.9267, + "step": 1651 + }, + { + "epoch": 4.225063938618926, + "grad_norm": 0.11696490881508001, + "learning_rate": 1.1431113462533942e-05, + "loss": 0.9188, + "step": 1652 + }, + { + "epoch": 4.227621483375959, + "grad_norm": 0.1219772936698238, + "learning_rate": 1.1357620419360438e-05, + "loss": 0.93, + "step": 1653 + }, + { + "epoch": 4.2301790281329925, + "grad_norm": 0.12317189729882781, + "learning_rate": 1.128434633391673e-05, + "loss": 0.9248, + "step": 1654 + }, + { + "epoch": 4.232736572890025, + "grad_norm": 0.12135967777000363, + "learning_rate": 1.121129143993489e-05, + "loss": 0.9482, + "step": 1655 + }, + { + "epoch": 4.235294117647059, + "grad_norm": 0.12569146595438008, + "learning_rate": 1.1138455970447857e-05, + "loss": 0.9237, + "step": 1656 + }, + { + "epoch": 4.2378516624040925, + "grad_norm": 0.12009749843054457, + "learning_rate": 1.1065840157788599e-05, + "loss": 0.9117, + "step": 1657 + }, + { + "epoch": 4.240409207161125, + "grad_norm": 0.12262206120182582, + "learning_rate": 1.099344423358943e-05, + "loss": 0.944, + "step": 1658 + }, + { + "epoch": 4.242966751918159, + "grad_norm": 0.12739673009436395, + "learning_rate": 1.0921268428781277e-05, + "loss": 0.928, + "step": 1659 + }, + { + "epoch": 4.245524296675192, + "grad_norm": 0.12049563257356445, + "learning_rate": 1.084931297359293e-05, + "loss": 0.9307, + "step": 1660 + }, + { + "epoch": 4.248081841432225, + "grad_norm": 0.1268732696430339, + "learning_rate": 1.0777578097550206e-05, + "loss": 0.938, + "step": 1661 + }, + { + "epoch": 4.250639386189258, + "grad_norm": 0.1302689278877736, + "learning_rate": 1.0706064029475436e-05, + "loss": 0.9339, + "step": 1662 + }, + { + "epoch": 4.253196930946292, + "grad_norm": 0.1207622169109695, + "learning_rate": 1.0634770997486546e-05, + "loss": 0.9153, + "step": 1663 + }, + { + "epoch": 4.255754475703325, + "grad_norm": 0.11706181174774555, + "learning_rate": 1.0563699228996405e-05, + "loss": 0.9129, + "step": 1664 + }, + { + "epoch": 4.258312020460358, + "grad_norm": 0.11849875702011481, + "learning_rate": 1.0492848950712067e-05, + "loss": 0.9183, + "step": 1665 + }, + { + "epoch": 4.260869565217392, + "grad_norm": 0.12286048694545573, + "learning_rate": 1.0422220388634145e-05, + "loss": 0.9194, + "step": 1666 + }, + { + "epoch": 4.263427109974424, + "grad_norm": 0.12106155524848677, + "learning_rate": 1.03518137680559e-05, + "loss": 0.93, + "step": 1667 + }, + { + "epoch": 4.265984654731458, + "grad_norm": 0.11931612070623257, + "learning_rate": 1.0281629313562704e-05, + "loss": 0.8812, + "step": 1668 + }, + { + "epoch": 4.268542199488491, + "grad_norm": 0.12412002218869622, + "learning_rate": 1.0211667249031278e-05, + "loss": 0.9211, + "step": 1669 + }, + { + "epoch": 4.271099744245524, + "grad_norm": 0.11050129272365039, + "learning_rate": 1.0141927797628913e-05, + "loss": 0.9346, + "step": 1670 + }, + { + "epoch": 4.273657289002558, + "grad_norm": 0.11696142916514798, + "learning_rate": 1.0072411181812805e-05, + "loss": 0.9103, + "step": 1671 + }, + { + "epoch": 4.276214833759591, + "grad_norm": 0.12523114611535077, + "learning_rate": 1.0003117623329373e-05, + "loss": 0.9188, + "step": 1672 + }, + { + "epoch": 4.278772378516624, + "grad_norm": 0.1211246626009557, + "learning_rate": 9.934047343213468e-06, + "loss": 0.8779, + "step": 1673 + }, + { + "epoch": 4.281329923273657, + "grad_norm": 0.11896385138151676, + "learning_rate": 9.865200561787779e-06, + "loss": 0.916, + "step": 1674 + }, + { + "epoch": 4.283887468030691, + "grad_norm": 0.12907351319734606, + "learning_rate": 9.796577498662017e-06, + "loss": 0.9316, + "step": 1675 + }, + { + "epoch": 4.286445012787723, + "grad_norm": 0.1175024733129538, + "learning_rate": 9.728178372732323e-06, + "loss": 0.9175, + "step": 1676 + }, + { + "epoch": 4.289002557544757, + "grad_norm": 0.11765409328640529, + "learning_rate": 9.660003402180495e-06, + "loss": 0.9322, + "step": 1677 + }, + { + "epoch": 4.291560102301791, + "grad_norm": 0.11606048414482627, + "learning_rate": 9.592052804473248e-06, + "loss": 0.9338, + "step": 1678 + }, + { + "epoch": 4.294117647058823, + "grad_norm": 0.12217997194310143, + "learning_rate": 9.524326796361704e-06, + "loss": 0.9198, + "step": 1679 + }, + { + "epoch": 4.296675191815857, + "grad_norm": 0.13681552209998984, + "learning_rate": 9.456825593880502e-06, + "loss": 0.9381, + "step": 1680 + }, + { + "epoch": 4.29923273657289, + "grad_norm": 0.11707040245774833, + "learning_rate": 9.389549412347204e-06, + "loss": 0.9114, + "step": 1681 + }, + { + "epoch": 4.301790281329923, + "grad_norm": 0.11739134713610266, + "learning_rate": 9.322498466361574e-06, + "loss": 0.9564, + "step": 1682 + }, + { + "epoch": 4.304347826086957, + "grad_norm": 0.11490889884017837, + "learning_rate": 9.25567296980499e-06, + "loss": 0.9372, + "step": 1683 + }, + { + "epoch": 4.30690537084399, + "grad_norm": 0.13548343430667473, + "learning_rate": 9.18907313583958e-06, + "loss": 0.9571, + "step": 1684 + }, + { + "epoch": 4.309462915601023, + "grad_norm": 0.1169879093609689, + "learning_rate": 9.122699176907699e-06, + "loss": 0.91, + "step": 1685 + }, + { + "epoch": 4.312020460358056, + "grad_norm": 0.12181883918771313, + "learning_rate": 9.056551304731216e-06, + "loss": 0.9403, + "step": 1686 + }, + { + "epoch": 4.31457800511509, + "grad_norm": 0.11516301601447926, + "learning_rate": 8.990629730310787e-06, + "loss": 0.9045, + "step": 1687 + }, + { + "epoch": 4.3171355498721224, + "grad_norm": 0.1130886469711019, + "learning_rate": 8.924934663925228e-06, + "loss": 0.9005, + "step": 1688 + }, + { + "epoch": 4.319693094629156, + "grad_norm": 0.12056683149234801, + "learning_rate": 8.859466315130833e-06, + "loss": 0.905, + "step": 1689 + }, + { + "epoch": 4.322250639386189, + "grad_norm": 0.12131053610936289, + "learning_rate": 8.794224892760694e-06, + "loss": 0.964, + "step": 1690 + }, + { + "epoch": 4.324808184143222, + "grad_norm": 0.11072666373506544, + "learning_rate": 8.729210604924075e-06, + "loss": 0.9168, + "step": 1691 + }, + { + "epoch": 4.327365728900256, + "grad_norm": 0.11419375138008123, + "learning_rate": 8.66442365900566e-06, + "loss": 0.9155, + "step": 1692 + }, + { + "epoch": 4.329923273657289, + "grad_norm": 0.11067325544749756, + "learning_rate": 8.599864261665032e-06, + "loss": 0.929, + "step": 1693 + }, + { + "epoch": 4.332480818414322, + "grad_norm": 0.13119769270640452, + "learning_rate": 8.535532618835894e-06, + "loss": 0.9196, + "step": 1694 + }, + { + "epoch": 4.335038363171355, + "grad_norm": 0.12122259309350006, + "learning_rate": 8.471428935725394e-06, + "loss": 0.9097, + "step": 1695 + }, + { + "epoch": 4.337595907928389, + "grad_norm": 0.1186567073290791, + "learning_rate": 8.407553416813621e-06, + "loss": 0.9486, + "step": 1696 + }, + { + "epoch": 4.340153452685422, + "grad_norm": 0.13863787273855152, + "learning_rate": 8.343906265852806e-06, + "loss": 0.9194, + "step": 1697 + }, + { + "epoch": 4.342710997442455, + "grad_norm": 0.11736813648606277, + "learning_rate": 8.280487685866707e-06, + "loss": 0.8964, + "step": 1698 + }, + { + "epoch": 4.345268542199489, + "grad_norm": 0.11874382513666652, + "learning_rate": 8.217297879150065e-06, + "loss": 0.9305, + "step": 1699 + }, + { + "epoch": 4.3478260869565215, + "grad_norm": 0.12096917615982158, + "learning_rate": 8.154337047267763e-06, + "loss": 0.926, + "step": 1700 + }, + { + "epoch": 4.350383631713555, + "grad_norm": 0.12459874607610563, + "learning_rate": 8.091605391054354e-06, + "loss": 0.8922, + "step": 1701 + }, + { + "epoch": 4.352941176470588, + "grad_norm": 0.12221739613538536, + "learning_rate": 8.02910311061333e-06, + "loss": 0.9401, + "step": 1702 + }, + { + "epoch": 4.3554987212276215, + "grad_norm": 0.12254645629749011, + "learning_rate": 7.966830405316561e-06, + "loss": 0.9547, + "step": 1703 + }, + { + "epoch": 4.358056265984655, + "grad_norm": 0.12001133797508247, + "learning_rate": 7.90478747380357e-06, + "loss": 0.9103, + "step": 1704 + }, + { + "epoch": 4.360613810741688, + "grad_norm": 0.12199519070925526, + "learning_rate": 7.842974513980946e-06, + "loss": 0.9271, + "step": 1705 + }, + { + "epoch": 4.3631713554987215, + "grad_norm": 0.11295241635294967, + "learning_rate": 7.781391723021711e-06, + "loss": 0.9363, + "step": 1706 + }, + { + "epoch": 4.365728900255754, + "grad_norm": 0.12686526411244078, + "learning_rate": 7.720039297364681e-06, + "loss": 0.9274, + "step": 1707 + }, + { + "epoch": 4.368286445012788, + "grad_norm": 0.1333081116381865, + "learning_rate": 7.658917432713839e-06, + "loss": 0.9172, + "step": 1708 + }, + { + "epoch": 4.370843989769821, + "grad_norm": 0.12577470275328256, + "learning_rate": 7.598026324037762e-06, + "loss": 0.939, + "step": 1709 + }, + { + "epoch": 4.373401534526854, + "grad_norm": 0.12345544691397578, + "learning_rate": 7.537366165568909e-06, + "loss": 0.9288, + "step": 1710 + }, + { + "epoch": 4.375959079283888, + "grad_norm": 0.11948532376497799, + "learning_rate": 7.476937150803025e-06, + "loss": 0.9497, + "step": 1711 + }, + { + "epoch": 4.378516624040921, + "grad_norm": 0.12876903997603817, + "learning_rate": 7.416739472498613e-06, + "loss": 0.9479, + "step": 1712 + }, + { + "epoch": 4.381074168797954, + "grad_norm": 0.11529385831506739, + "learning_rate": 7.356773322676205e-06, + "loss": 0.9158, + "step": 1713 + }, + { + "epoch": 4.383631713554987, + "grad_norm": 0.11078825541988917, + "learning_rate": 7.2970388926178045e-06, + "loss": 0.937, + "step": 1714 + }, + { + "epoch": 4.3861892583120206, + "grad_norm": 0.11173435690628004, + "learning_rate": 7.237536372866247e-06, + "loss": 0.9327, + "step": 1715 + }, + { + "epoch": 4.388746803069053, + "grad_norm": 0.1223612229123131, + "learning_rate": 7.178265953224701e-06, + "loss": 0.9227, + "step": 1716 + }, + { + "epoch": 4.391304347826087, + "grad_norm": 0.12507251852936713, + "learning_rate": 7.119227822755843e-06, + "loss": 0.9571, + "step": 1717 + }, + { + "epoch": 4.3938618925831205, + "grad_norm": 0.11397092222799754, + "learning_rate": 7.060422169781467e-06, + "loss": 0.9041, + "step": 1718 + }, + { + "epoch": 4.396419437340153, + "grad_norm": 0.10753667090584995, + "learning_rate": 7.001849181881808e-06, + "loss": 0.9166, + "step": 1719 + }, + { + "epoch": 4.398976982097187, + "grad_norm": 0.12054572854799732, + "learning_rate": 6.943509045894905e-06, + "loss": 0.9341, + "step": 1720 + }, + { + "epoch": 4.40153452685422, + "grad_norm": 0.11185867845020742, + "learning_rate": 6.885401947916048e-06, + "loss": 0.9514, + "step": 1721 + }, + { + "epoch": 4.404092071611253, + "grad_norm": 0.11085335077105966, + "learning_rate": 6.827528073297185e-06, + "loss": 0.9382, + "step": 1722 + }, + { + "epoch": 4.406649616368286, + "grad_norm": 0.11479224410155166, + "learning_rate": 6.769887606646306e-06, + "loss": 0.9414, + "step": 1723 + }, + { + "epoch": 4.40920716112532, + "grad_norm": 0.11417555802279347, + "learning_rate": 6.712480731826878e-06, + "loss": 0.912, + "step": 1724 + }, + { + "epoch": 4.411764705882353, + "grad_norm": 0.11413292812828428, + "learning_rate": 6.6553076319572394e-06, + "loss": 0.9268, + "step": 1725 + }, + { + "epoch": 4.414322250639386, + "grad_norm": 0.10996848327532169, + "learning_rate": 6.59836848941005e-06, + "loss": 0.9253, + "step": 1726 + }, + { + "epoch": 4.41687979539642, + "grad_norm": 0.12150368369219573, + "learning_rate": 6.541663485811667e-06, + "loss": 0.915, + "step": 1727 + }, + { + "epoch": 4.419437340153452, + "grad_norm": 0.11980533715997778, + "learning_rate": 6.485192802041553e-06, + "loss": 0.9156, + "step": 1728 + }, + { + "epoch": 4.421994884910486, + "grad_norm": 0.11392894414591724, + "learning_rate": 6.428956618231788e-06, + "loss": 0.9197, + "step": 1729 + }, + { + "epoch": 4.42455242966752, + "grad_norm": 0.11760332661995491, + "learning_rate": 6.3729551137664055e-06, + "loss": 0.9545, + "step": 1730 + }, + { + "epoch": 4.427109974424552, + "grad_norm": 0.10904085632244291, + "learning_rate": 6.3171884672808524e-06, + "loss": 0.9103, + "step": 1731 + }, + { + "epoch": 4.429667519181586, + "grad_norm": 0.10863502669554059, + "learning_rate": 6.26165685666142e-06, + "loss": 0.9016, + "step": 1732 + }, + { + "epoch": 4.432225063938619, + "grad_norm": 0.11509438949225145, + "learning_rate": 6.206360459044671e-06, + "loss": 0.931, + "step": 1733 + }, + { + "epoch": 4.434782608695652, + "grad_norm": 0.11748690634314717, + "learning_rate": 6.15129945081689e-06, + "loss": 0.9151, + "step": 1734 + }, + { + "epoch": 4.437340153452685, + "grad_norm": 0.11639698873895774, + "learning_rate": 6.096474007613476e-06, + "loss": 0.9365, + "step": 1735 + }, + { + "epoch": 4.439897698209719, + "grad_norm": 0.11159987657775047, + "learning_rate": 6.0418843043184636e-06, + "loss": 0.9552, + "step": 1736 + }, + { + "epoch": 4.442455242966752, + "grad_norm": 0.10952923402441073, + "learning_rate": 5.987530515063889e-06, + "loss": 0.9194, + "step": 1737 + }, + { + "epoch": 4.445012787723785, + "grad_norm": 0.11072771958857656, + "learning_rate": 5.933412813229256e-06, + "loss": 0.9189, + "step": 1738 + }, + { + "epoch": 4.447570332480819, + "grad_norm": 0.11775592911375234, + "learning_rate": 5.879531371440994e-06, + "loss": 0.9388, + "step": 1739 + }, + { + "epoch": 4.450127877237851, + "grad_norm": 0.11460729784468633, + "learning_rate": 5.825886361571922e-06, + "loss": 0.8945, + "step": 1740 + }, + { + "epoch": 4.452685421994885, + "grad_norm": 0.11581761610879335, + "learning_rate": 5.772477954740652e-06, + "loss": 0.9126, + "step": 1741 + }, + { + "epoch": 4.455242966751918, + "grad_norm": 0.11118413455302595, + "learning_rate": 5.719306321311075e-06, + "loss": 0.9565, + "step": 1742 + }, + { + "epoch": 4.457800511508951, + "grad_norm": 0.10749836975161339, + "learning_rate": 5.666371630891858e-06, + "loss": 0.9127, + "step": 1743 + }, + { + "epoch": 4.460358056265985, + "grad_norm": 0.10944652966346073, + "learning_rate": 5.613674052335798e-06, + "loss": 0.9184, + "step": 1744 + }, + { + "epoch": 4.462915601023018, + "grad_norm": 0.11540805854208941, + "learning_rate": 5.561213753739356e-06, + "loss": 0.9281, + "step": 1745 + }, + { + "epoch": 4.465473145780051, + "grad_norm": 0.11318814770450754, + "learning_rate": 5.5089909024421685e-06, + "loss": 0.9327, + "step": 1746 + }, + { + "epoch": 4.468030690537084, + "grad_norm": 0.11689654113549015, + "learning_rate": 5.4570056650263784e-06, + "loss": 0.9196, + "step": 1747 + }, + { + "epoch": 4.470588235294118, + "grad_norm": 0.11410697533075874, + "learning_rate": 5.405258207316228e-06, + "loss": 0.9248, + "step": 1748 + }, + { + "epoch": 4.4731457800511505, + "grad_norm": 0.11032997359153394, + "learning_rate": 5.3537486943774674e-06, + "loss": 0.9278, + "step": 1749 + }, + { + "epoch": 4.475703324808184, + "grad_norm": 0.11362254544830364, + "learning_rate": 5.302477290516832e-06, + "loss": 0.9508, + "step": 1750 + }, + { + "epoch": 4.478260869565218, + "grad_norm": 0.114903272001298, + "learning_rate": 5.251444159281551e-06, + "loss": 0.9177, + "step": 1751 + }, + { + "epoch": 4.4808184143222505, + "grad_norm": 0.11311594662750116, + "learning_rate": 5.200649463458769e-06, + "loss": 0.9315, + "step": 1752 + }, + { + "epoch": 4.483375959079284, + "grad_norm": 0.1080019715192445, + "learning_rate": 5.150093365075117e-06, + "loss": 0.9423, + "step": 1753 + }, + { + "epoch": 4.485933503836317, + "grad_norm": 0.11099521632078349, + "learning_rate": 5.0997760253961036e-06, + "loss": 0.9432, + "step": 1754 + }, + { + "epoch": 4.4884910485933505, + "grad_norm": 0.1115281668793938, + "learning_rate": 5.049697604925605e-06, + "loss": 0.9201, + "step": 1755 + }, + { + "epoch": 4.491048593350383, + "grad_norm": 0.11559474894332394, + "learning_rate": 4.999858263405468e-06, + "loss": 0.9335, + "step": 1756 + }, + { + "epoch": 4.493606138107417, + "grad_norm": 0.10752469888696953, + "learning_rate": 4.9502581598148425e-06, + "loss": 0.9326, + "step": 1757 + }, + { + "epoch": 4.4961636828644505, + "grad_norm": 0.11823364858584975, + "learning_rate": 4.900897452369782e-06, + "loss": 0.9085, + "step": 1758 + }, + { + "epoch": 4.498721227621483, + "grad_norm": 0.12367303838985884, + "learning_rate": 4.851776298522692e-06, + "loss": 0.8962, + "step": 1759 + }, + { + "epoch": 4.501278772378517, + "grad_norm": 0.11649199224229981, + "learning_rate": 4.802894854961882e-06, + "loss": 0.945, + "step": 1760 + }, + { + "epoch": 4.5038363171355495, + "grad_norm": 0.10951836253938066, + "learning_rate": 4.754253277610969e-06, + "loss": 0.9362, + "step": 1761 + }, + { + "epoch": 4.506393861892583, + "grad_norm": 0.11824940633958814, + "learning_rate": 4.705851721628465e-06, + "loss": 0.9489, + "step": 1762 + }, + { + "epoch": 4.508951406649617, + "grad_norm": 0.11623129349141179, + "learning_rate": 4.6576903414072576e-06, + "loss": 0.9345, + "step": 1763 + }, + { + "epoch": 4.5115089514066495, + "grad_norm": 0.10609179613886349, + "learning_rate": 4.6097692905741194e-06, + "loss": 0.912, + "step": 1764 + }, + { + "epoch": 4.514066496163683, + "grad_norm": 0.1110236313063869, + "learning_rate": 4.562088721989178e-06, + "loss": 0.9263, + "step": 1765 + }, + { + "epoch": 4.516624040920716, + "grad_norm": 0.10545968825146992, + "learning_rate": 4.514648787745506e-06, + "loss": 0.9132, + "step": 1766 + }, + { + "epoch": 4.5191815856777495, + "grad_norm": 0.11497860724139544, + "learning_rate": 4.467449639168564e-06, + "loss": 0.9435, + "step": 1767 + }, + { + "epoch": 4.521739130434782, + "grad_norm": 0.11514110122345275, + "learning_rate": 4.420491426815758e-06, + "loss": 0.9405, + "step": 1768 + }, + { + "epoch": 4.524296675191816, + "grad_norm": 0.1123546579246865, + "learning_rate": 4.373774300475928e-06, + "loss": 0.9013, + "step": 1769 + }, + { + "epoch": 4.526854219948849, + "grad_norm": 0.10434900776877028, + "learning_rate": 4.327298409168928e-06, + "loss": 0.9234, + "step": 1770 + }, + { + "epoch": 4.529411764705882, + "grad_norm": 0.10753377323226707, + "learning_rate": 4.281063901145102e-06, + "loss": 0.9191, + "step": 1771 + }, + { + "epoch": 4.531969309462916, + "grad_norm": 0.10990039699899636, + "learning_rate": 4.235070923884772e-06, + "loss": 0.9218, + "step": 1772 + }, + { + "epoch": 4.534526854219949, + "grad_norm": 0.10914742733757979, + "learning_rate": 4.18931962409789e-06, + "loss": 0.9109, + "step": 1773 + }, + { + "epoch": 4.537084398976982, + "grad_norm": 0.10959258250347798, + "learning_rate": 4.143810147723448e-06, + "loss": 0.9152, + "step": 1774 + }, + { + "epoch": 4.539641943734015, + "grad_norm": 0.11106116826490182, + "learning_rate": 4.098542639929086e-06, + "loss": 0.9046, + "step": 1775 + }, + { + "epoch": 4.542199488491049, + "grad_norm": 0.10748546841476085, + "learning_rate": 4.0535172451105785e-06, + "loss": 0.9128, + "step": 1776 + }, + { + "epoch": 4.544757033248082, + "grad_norm": 0.11225561412585737, + "learning_rate": 4.008734106891439e-06, + "loss": 0.929, + "step": 1777 + }, + { + "epoch": 4.547314578005115, + "grad_norm": 0.10831404168834766, + "learning_rate": 3.964193368122384e-06, + "loss": 0.9397, + "step": 1778 + }, + { + "epoch": 4.549872122762149, + "grad_norm": 0.11033594472176086, + "learning_rate": 3.919895170880938e-06, + "loss": 0.9252, + "step": 1779 + }, + { + "epoch": 4.552429667519181, + "grad_norm": 0.10441833953450541, + "learning_rate": 3.875839656470959e-06, + "loss": 0.9182, + "step": 1780 + }, + { + "epoch": 4.554987212276215, + "grad_norm": 0.11080119595164395, + "learning_rate": 3.832026965422184e-06, + "loss": 0.949, + "step": 1781 + }, + { + "epoch": 4.557544757033249, + "grad_norm": 0.11022335632664775, + "learning_rate": 3.788457237489773e-06, + "loss": 0.9238, + "step": 1782 + }, + { + "epoch": 4.560102301790281, + "grad_norm": 0.11308201432747443, + "learning_rate": 3.7451306116538867e-06, + "loss": 0.9711, + "step": 1783 + }, + { + "epoch": 4.562659846547315, + "grad_norm": 0.1028220418076954, + "learning_rate": 3.7020472261192253e-06, + "loss": 0.9005, + "step": 1784 + }, + { + "epoch": 4.565217391304348, + "grad_norm": 0.10528950924867539, + "learning_rate": 3.6592072183146043e-06, + "loss": 0.9014, + "step": 1785 + }, + { + "epoch": 4.567774936061381, + "grad_norm": 0.10885389205625104, + "learning_rate": 3.616610724892473e-06, + "loss": 0.9105, + "step": 1786 + }, + { + "epoch": 4.570332480818414, + "grad_norm": 0.10574673017545647, + "learning_rate": 3.5742578817285777e-06, + "loss": 0.9193, + "step": 1787 + }, + { + "epoch": 4.572890025575448, + "grad_norm": 0.1117883112559058, + "learning_rate": 3.532148823921375e-06, + "loss": 0.91, + "step": 1788 + }, + { + "epoch": 4.57544757033248, + "grad_norm": 0.1096961353796292, + "learning_rate": 3.490283685791722e-06, + "loss": 0.9594, + "step": 1789 + }, + { + "epoch": 4.578005115089514, + "grad_norm": 0.11161221492802147, + "learning_rate": 3.4486626008824575e-06, + "loss": 0.9327, + "step": 1790 + }, + { + "epoch": 4.580562659846548, + "grad_norm": 0.10744759992585007, + "learning_rate": 3.4072857019578787e-06, + "loss": 0.9219, + "step": 1791 + }, + { + "epoch": 4.58312020460358, + "grad_norm": 0.10620450789029019, + "learning_rate": 3.3661531210033684e-06, + "loss": 0.9256, + "step": 1792 + }, + { + "epoch": 4.585677749360614, + "grad_norm": 0.11017512262461532, + "learning_rate": 3.3252649892250123e-06, + "loss": 0.9188, + "step": 1793 + }, + { + "epoch": 4.588235294117647, + "grad_norm": 0.10649203584062787, + "learning_rate": 3.2846214370491114e-06, + "loss": 0.9286, + "step": 1794 + }, + { + "epoch": 4.59079283887468, + "grad_norm": 0.10775649571843056, + "learning_rate": 3.2442225941218175e-06, + "loss": 0.91, + "step": 1795 + }, + { + "epoch": 4.593350383631714, + "grad_norm": 0.10474409566182012, + "learning_rate": 3.20406858930868e-06, + "loss": 0.9187, + "step": 1796 + }, + { + "epoch": 4.595907928388747, + "grad_norm": 0.10901379780591824, + "learning_rate": 3.164159550694299e-06, + "loss": 0.9268, + "step": 1797 + }, + { + "epoch": 4.59846547314578, + "grad_norm": 0.10466246579829651, + "learning_rate": 3.12449560558183e-06, + "loss": 0.9045, + "step": 1798 + }, + { + "epoch": 4.601023017902813, + "grad_norm": 0.10734422633494305, + "learning_rate": 3.085076880492608e-06, + "loss": 0.9131, + "step": 1799 + }, + { + "epoch": 4.603580562659847, + "grad_norm": 0.1102245685075459, + "learning_rate": 3.045903501165821e-06, + "loss": 0.9456, + "step": 1800 + }, + { + "epoch": 4.6061381074168795, + "grad_norm": 0.10268613459994491, + "learning_rate": 3.0069755925579945e-06, + "loss": 0.9068, + "step": 1801 + }, + { + "epoch": 4.608695652173913, + "grad_norm": 0.1041191008417218, + "learning_rate": 2.9682932788426622e-06, + "loss": 0.8961, + "step": 1802 + }, + { + "epoch": 4.611253196930946, + "grad_norm": 0.10864214050559602, + "learning_rate": 2.9298566834099307e-06, + "loss": 0.9196, + "step": 1803 + }, + { + "epoch": 4.6138107416879794, + "grad_norm": 0.10289987799334356, + "learning_rate": 2.891665928866152e-06, + "loss": 0.8891, + "step": 1804 + }, + { + "epoch": 4.616368286445013, + "grad_norm": 0.10627932552480018, + "learning_rate": 2.853721137033425e-06, + "loss": 0.9309, + "step": 1805 + }, + { + "epoch": 4.618925831202046, + "grad_norm": 0.10976448315029629, + "learning_rate": 2.816022428949303e-06, + "loss": 0.8956, + "step": 1806 + }, + { + "epoch": 4.621483375959079, + "grad_norm": 0.10383428088111558, + "learning_rate": 2.7785699248663946e-06, + "loss": 0.9245, + "step": 1807 + }, + { + "epoch": 4.624040920716112, + "grad_norm": 0.10746935820829795, + "learning_rate": 2.741363744251917e-06, + "loss": 0.9641, + "step": 1808 + }, + { + "epoch": 4.626598465473146, + "grad_norm": 0.1077084422715649, + "learning_rate": 2.70440400578738e-06, + "loss": 0.936, + "step": 1809 + }, + { + "epoch": 4.629156010230179, + "grad_norm": 0.10619050887196295, + "learning_rate": 2.6676908273681745e-06, + "loss": 0.9236, + "step": 1810 + }, + { + "epoch": 4.631713554987212, + "grad_norm": 0.09868786010783248, + "learning_rate": 2.63122432610321e-06, + "loss": 0.9235, + "step": 1811 + }, + { + "epoch": 4.634271099744246, + "grad_norm": 0.10946907000550939, + "learning_rate": 2.5950046183145315e-06, + "loss": 0.9477, + "step": 1812 + }, + { + "epoch": 4.6368286445012785, + "grad_norm": 0.10911271296863308, + "learning_rate": 2.559031819536966e-06, + "loss": 0.8923, + "step": 1813 + }, + { + "epoch": 4.639386189258312, + "grad_norm": 0.1057852003057491, + "learning_rate": 2.523306044517737e-06, + "loss": 0.9575, + "step": 1814 + }, + { + "epoch": 4.641943734015345, + "grad_norm": 0.10597129201414962, + "learning_rate": 2.4878274072161147e-06, + "loss": 0.9478, + "step": 1815 + }, + { + "epoch": 4.6445012787723785, + "grad_norm": 0.10530345780753828, + "learning_rate": 2.4525960208029843e-06, + "loss": 0.9468, + "step": 1816 + }, + { + "epoch": 4.647058823529412, + "grad_norm": 0.11128520568838593, + "learning_rate": 2.417611997660636e-06, + "loss": 0.9441, + "step": 1817 + }, + { + "epoch": 4.649616368286445, + "grad_norm": 0.10763480468498407, + "learning_rate": 2.3828754493822315e-06, + "loss": 0.9342, + "step": 1818 + }, + { + "epoch": 4.6521739130434785, + "grad_norm": 0.10157629367738297, + "learning_rate": 2.348386486771572e-06, + "loss": 0.9121, + "step": 1819 + }, + { + "epoch": 4.654731457800511, + "grad_norm": 0.10471609831813257, + "learning_rate": 2.314145219842683e-06, + "loss": 0.8991, + "step": 1820 + }, + { + "epoch": 4.657289002557545, + "grad_norm": 0.10785688490272143, + "learning_rate": 2.2801517578194997e-06, + "loss": 0.9023, + "step": 1821 + }, + { + "epoch": 4.659846547314578, + "grad_norm": 0.10437430915631776, + "learning_rate": 2.246406209135481e-06, + "loss": 0.9526, + "step": 1822 + }, + { + "epoch": 4.662404092071611, + "grad_norm": 0.09976754454013415, + "learning_rate": 2.212908681433286e-06, + "loss": 0.9032, + "step": 1823 + }, + { + "epoch": 4.664961636828645, + "grad_norm": 0.10687421431181417, + "learning_rate": 2.179659281564446e-06, + "loss": 0.9164, + "step": 1824 + }, + { + "epoch": 4.667519181585678, + "grad_norm": 0.10095706529924005, + "learning_rate": 2.146658115589002e-06, + "loss": 0.9191, + "step": 1825 + }, + { + "epoch": 4.670076726342711, + "grad_norm": 0.10132269971777201, + "learning_rate": 2.113905288775149e-06, + "loss": 0.9155, + "step": 1826 + }, + { + "epoch": 4.672634271099744, + "grad_norm": 0.10307251320208077, + "learning_rate": 2.0814009055989403e-06, + "loss": 0.9165, + "step": 1827 + }, + { + "epoch": 4.675191815856778, + "grad_norm": 0.10286096825987698, + "learning_rate": 2.0491450697439362e-06, + "loss": 0.9101, + "step": 1828 + }, + { + "epoch": 4.677749360613811, + "grad_norm": 0.11262366728295894, + "learning_rate": 2.017137884100855e-06, + "loss": 0.914, + "step": 1829 + }, + { + "epoch": 4.680306905370844, + "grad_norm": 0.11116962011162274, + "learning_rate": 1.9853794507672885e-06, + "loss": 0.9376, + "step": 1830 + }, + { + "epoch": 4.6828644501278776, + "grad_norm": 0.1040833044448223, + "learning_rate": 1.9538698710473404e-06, + "loss": 0.9236, + "step": 1831 + }, + { + "epoch": 4.68542199488491, + "grad_norm": 0.10541970140434043, + "learning_rate": 1.9226092454512945e-06, + "loss": 0.9449, + "step": 1832 + }, + { + "epoch": 4.687979539641944, + "grad_norm": 0.10066677117893352, + "learning_rate": 1.8915976736953157e-06, + "loss": 0.9138, + "step": 1833 + }, + { + "epoch": 4.690537084398977, + "grad_norm": 0.10836258727940289, + "learning_rate": 1.8608352547011722e-06, + "loss": 0.9687, + "step": 1834 + }, + { + "epoch": 4.69309462915601, + "grad_norm": 0.11074221672096896, + "learning_rate": 1.8303220865958194e-06, + "loss": 0.9331, + "step": 1835 + }, + { + "epoch": 4.695652173913043, + "grad_norm": 0.10768331106543749, + "learning_rate": 1.8000582667111777e-06, + "loss": 0.945, + "step": 1836 + }, + { + "epoch": 4.698209718670077, + "grad_norm": 0.11098771435258944, + "learning_rate": 1.7700438915837858e-06, + "loss": 0.9284, + "step": 1837 + }, + { + "epoch": 4.70076726342711, + "grad_norm": 0.10799063090442731, + "learning_rate": 1.7402790569544813e-06, + "loss": 0.9, + "step": 1838 + }, + { + "epoch": 4.703324808184143, + "grad_norm": 0.1063256441527157, + "learning_rate": 1.7107638577681073e-06, + "loss": 0.8962, + "step": 1839 + }, + { + "epoch": 4.705882352941177, + "grad_norm": 0.1040346093959911, + "learning_rate": 1.681498388173246e-06, + "loss": 0.9516, + "step": 1840 + }, + { + "epoch": 4.708439897698209, + "grad_norm": 0.10335093559260676, + "learning_rate": 1.652482741521837e-06, + "loss": 0.9131, + "step": 1841 + }, + { + "epoch": 4.710997442455243, + "grad_norm": 0.10497266871186595, + "learning_rate": 1.6237170103689547e-06, + "loss": 0.9119, + "step": 1842 + }, + { + "epoch": 4.713554987212277, + "grad_norm": 0.09874397507531227, + "learning_rate": 1.5952012864724898e-06, + "loss": 0.9141, + "step": 1843 + }, + { + "epoch": 4.716112531969309, + "grad_norm": 0.10588059236614217, + "learning_rate": 1.5669356607928188e-06, + "loss": 0.9331, + "step": 1844 + }, + { + "epoch": 4.718670076726343, + "grad_norm": 0.10070088788493103, + "learning_rate": 1.5389202234925837e-06, + "loss": 0.929, + "step": 1845 + }, + { + "epoch": 4.721227621483376, + "grad_norm": 0.10575607673396381, + "learning_rate": 1.5111550639363447e-06, + "loss": 0.9195, + "step": 1846 + }, + { + "epoch": 4.723785166240409, + "grad_norm": 0.1052143724728097, + "learning_rate": 1.483640270690332e-06, + "loss": 0.9236, + "step": 1847 + }, + { + "epoch": 4.726342710997442, + "grad_norm": 0.10525748489261051, + "learning_rate": 1.4563759315221515e-06, + "loss": 0.9515, + "step": 1848 + }, + { + "epoch": 4.728900255754476, + "grad_norm": 0.10259868287875906, + "learning_rate": 1.4293621334004581e-06, + "loss": 0.9522, + "step": 1849 + }, + { + "epoch": 4.731457800511509, + "grad_norm": 0.10136041128342929, + "learning_rate": 1.4025989624947856e-06, + "loss": 0.9207, + "step": 1850 + }, + { + "epoch": 4.734015345268542, + "grad_norm": 0.09781638687367422, + "learning_rate": 1.3760865041751736e-06, + "loss": 0.9226, + "step": 1851 + }, + { + "epoch": 4.736572890025576, + "grad_norm": 0.10175570288516775, + "learning_rate": 1.3498248430119465e-06, + "loss": 0.9141, + "step": 1852 + }, + { + "epoch": 4.739130434782608, + "grad_norm": 0.10920419786681472, + "learning_rate": 1.3238140627754014e-06, + "loss": 0.9544, + "step": 1853 + }, + { + "epoch": 4.741687979539642, + "grad_norm": 0.10426566657693524, + "learning_rate": 1.2980542464355962e-06, + "loss": 0.9492, + "step": 1854 + }, + { + "epoch": 4.744245524296675, + "grad_norm": 0.10161986714655702, + "learning_rate": 1.272545476162037e-06, + "loss": 0.9253, + "step": 1855 + }, + { + "epoch": 4.746803069053708, + "grad_norm": 0.10568474804520346, + "learning_rate": 1.2472878333234407e-06, + "loss": 0.895, + "step": 1856 + }, + { + "epoch": 4.749360613810742, + "grad_norm": 0.10079844884131213, + "learning_rate": 1.2222813984874749e-06, + "loss": 0.9146, + "step": 1857 + }, + { + "epoch": 4.751918158567775, + "grad_norm": 0.09772653572503225, + "learning_rate": 1.197526251420502e-06, + "loss": 0.9434, + "step": 1858 + }, + { + "epoch": 4.754475703324808, + "grad_norm": 0.10521061309223152, + "learning_rate": 1.1730224710872862e-06, + "loss": 0.917, + "step": 1859 + }, + { + "epoch": 4.757033248081841, + "grad_norm": 0.10102811382690155, + "learning_rate": 1.148770135650814e-06, + "loss": 0.9402, + "step": 1860 + }, + { + "epoch": 4.759590792838875, + "grad_norm": 0.10184925109076563, + "learning_rate": 1.1247693224719768e-06, + "loss": 0.9341, + "step": 1861 + }, + { + "epoch": 4.762148337595908, + "grad_norm": 0.10416605640976224, + "learning_rate": 1.1010201081093653e-06, + "loss": 0.9258, + "step": 1862 + }, + { + "epoch": 4.764705882352941, + "grad_norm": 0.10242702305319981, + "learning_rate": 1.0775225683190027e-06, + "loss": 0.9401, + "step": 1863 + }, + { + "epoch": 4.767263427109975, + "grad_norm": 0.1054355472195325, + "learning_rate": 1.0542767780541242e-06, + "loss": 0.9452, + "step": 1864 + }, + { + "epoch": 4.7698209718670075, + "grad_norm": 0.09850748287302327, + "learning_rate": 1.0312828114649175e-06, + "loss": 0.9147, + "step": 1865 + }, + { + "epoch": 4.772378516624041, + "grad_norm": 0.10426914175715249, + "learning_rate": 1.008540741898285e-06, + "loss": 0.9364, + "step": 1866 + }, + { + "epoch": 4.774936061381074, + "grad_norm": 0.10421190980413071, + "learning_rate": 9.860506418976556e-07, + "loss": 0.9155, + "step": 1867 + }, + { + "epoch": 4.7774936061381075, + "grad_norm": 0.09974968560728949, + "learning_rate": 9.638125832026658e-07, + "loss": 0.9164, + "step": 1868 + }, + { + "epoch": 4.78005115089514, + "grad_norm": 0.10323506252287525, + "learning_rate": 9.418266367490347e-07, + "loss": 0.9294, + "step": 1869 + }, + { + "epoch": 4.782608695652174, + "grad_norm": 0.10057988567304277, + "learning_rate": 9.200928726682456e-07, + "loss": 0.9198, + "step": 1870 + }, + { + "epoch": 4.7851662404092075, + "grad_norm": 0.10109533674227822, + "learning_rate": 8.986113602873758e-07, + "loss": 0.9696, + "step": 1871 + }, + { + "epoch": 4.78772378516624, + "grad_norm": 0.10248654252247842, + "learning_rate": 8.773821681288752e-07, + "loss": 0.9059, + "step": 1872 + }, + { + "epoch": 4.790281329923274, + "grad_norm": 0.10623698814695832, + "learning_rate": 8.564053639103087e-07, + "loss": 0.9104, + "step": 1873 + }, + { + "epoch": 4.792838874680307, + "grad_norm": 0.10184589368398628, + "learning_rate": 8.356810145441874e-07, + "loss": 0.8999, + "step": 1874 + }, + { + "epoch": 4.79539641943734, + "grad_norm": 0.09973933906653507, + "learning_rate": 8.152091861377198e-07, + "loss": 0.9281, + "step": 1875 + }, + { + "epoch": 4.797953964194374, + "grad_norm": 0.0965602895068992, + "learning_rate": 7.949899439926345e-07, + "loss": 0.8972, + "step": 1876 + }, + { + "epoch": 4.8005115089514065, + "grad_norm": 0.09817984542309073, + "learning_rate": 7.750233526049222e-07, + "loss": 0.9374, + "step": 1877 + }, + { + "epoch": 4.80306905370844, + "grad_norm": 0.10767556941660049, + "learning_rate": 7.553094756646761e-07, + "loss": 0.922, + "step": 1878 + }, + { + "epoch": 4.805626598465473, + "grad_norm": 0.09968854723854502, + "learning_rate": 7.358483760558877e-07, + "loss": 0.9092, + "step": 1879 + }, + { + "epoch": 4.8081841432225065, + "grad_norm": 0.10013368895859236, + "learning_rate": 7.166401158561886e-07, + "loss": 0.9053, + "step": 1880 + }, + { + "epoch": 4.810741687979539, + "grad_norm": 0.10050188953527933, + "learning_rate": 6.976847563367539e-07, + "loss": 0.9342, + "step": 1881 + }, + { + "epoch": 4.813299232736573, + "grad_norm": 0.10572001540704473, + "learning_rate": 6.789823579619992e-07, + "loss": 0.9055, + "step": 1882 + }, + { + "epoch": 4.8158567774936065, + "grad_norm": 0.0958884248641111, + "learning_rate": 6.605329803894389e-07, + "loss": 0.8971, + "step": 1883 + }, + { + "epoch": 4.818414322250639, + "grad_norm": 0.10042711105691594, + "learning_rate": 6.423366824695265e-07, + "loss": 0.9176, + "step": 1884 + }, + { + "epoch": 4.820971867007673, + "grad_norm": 0.10511225981510647, + "learning_rate": 6.243935222454145e-07, + "loss": 0.9176, + "step": 1885 + }, + { + "epoch": 4.823529411764706, + "grad_norm": 0.09696941259664335, + "learning_rate": 6.067035569527768e-07, + "loss": 0.9336, + "step": 1886 + }, + { + "epoch": 4.826086956521739, + "grad_norm": 0.09743670957958701, + "learning_rate": 5.89266843019658e-07, + "loss": 0.9335, + "step": 1887 + }, + { + "epoch": 4.828644501278772, + "grad_norm": 0.10334868098940422, + "learning_rate": 5.720834360662597e-07, + "loss": 0.9302, + "step": 1888 + }, + { + "epoch": 4.831202046035806, + "grad_norm": 0.10567530011947436, + "learning_rate": 5.551533909047812e-07, + "loss": 0.9173, + "step": 1889 + }, + { + "epoch": 4.833759590792839, + "grad_norm": 0.10109569243664909, + "learning_rate": 5.384767615392328e-07, + "loss": 0.8973, + "step": 1890 + }, + { + "epoch": 4.836317135549872, + "grad_norm": 0.10107099176370515, + "learning_rate": 5.220536011652933e-07, + "loss": 0.9327, + "step": 1891 + }, + { + "epoch": 4.838874680306906, + "grad_norm": 0.09592817542499839, + "learning_rate": 5.058839621700973e-07, + "loss": 0.8986, + "step": 1892 + }, + { + "epoch": 4.841432225063938, + "grad_norm": 0.10402134439975212, + "learning_rate": 4.899678961320842e-07, + "loss": 0.8783, + "step": 1893 + }, + { + "epoch": 4.843989769820972, + "grad_norm": 0.09879349396951775, + "learning_rate": 4.743054538208558e-07, + "loss": 0.9265, + "step": 1894 + }, + { + "epoch": 4.846547314578006, + "grad_norm": 0.10801219003494308, + "learning_rate": 4.5889668519698117e-07, + "loss": 0.917, + "step": 1895 + }, + { + "epoch": 4.849104859335038, + "grad_norm": 0.10336628048777474, + "learning_rate": 4.437416394118721e-07, + "loss": 0.9475, + "step": 1896 + }, + { + "epoch": 4.851662404092072, + "grad_norm": 0.09915519846574018, + "learning_rate": 4.2884036480757896e-07, + "loss": 0.9136, + "step": 1897 + }, + { + "epoch": 4.854219948849105, + "grad_norm": 0.10488853611936978, + "learning_rate": 4.1419290891669293e-07, + "loss": 0.9276, + "step": 1898 + }, + { + "epoch": 4.856777493606138, + "grad_norm": 0.10257283710076046, + "learning_rate": 3.997993184621418e-07, + "loss": 0.9584, + "step": 1899 + }, + { + "epoch": 4.859335038363171, + "grad_norm": 0.10288770850501508, + "learning_rate": 3.856596393570744e-07, + "loss": 0.9128, + "step": 1900 + }, + { + "epoch": 4.861892583120205, + "grad_norm": 0.09729119851077626, + "learning_rate": 3.717739167047185e-07, + "loss": 0.912, + "step": 1901 + }, + { + "epoch": 4.864450127877237, + "grad_norm": 0.1024901619430387, + "learning_rate": 3.581421947982122e-07, + "loss": 0.9166, + "step": 1902 + }, + { + "epoch": 4.867007672634271, + "grad_norm": 0.10281823220549692, + "learning_rate": 3.447645171204528e-07, + "loss": 0.9308, + "step": 1903 + }, + { + "epoch": 4.869565217391305, + "grad_norm": 0.1014220238267167, + "learning_rate": 3.316409263440168e-07, + "loss": 0.9401, + "step": 1904 + }, + { + "epoch": 4.872122762148337, + "grad_norm": 0.10082233886495114, + "learning_rate": 3.1877146433095584e-07, + "loss": 0.9349, + "step": 1905 + }, + { + "epoch": 4.874680306905371, + "grad_norm": 0.09966232794121334, + "learning_rate": 3.0615617213271664e-07, + "loss": 0.9218, + "step": 1906 + }, + { + "epoch": 4.877237851662404, + "grad_norm": 0.09941244859685047, + "learning_rate": 2.937950899899633e-07, + "loss": 0.9278, + "step": 1907 + }, + { + "epoch": 4.879795396419437, + "grad_norm": 0.09951897237383148, + "learning_rate": 2.816882573324886e-07, + "loss": 0.949, + "step": 1908 + }, + { + "epoch": 4.882352941176471, + "grad_norm": 0.10401741016384587, + "learning_rate": 2.6983571277907184e-07, + "loss": 0.9563, + "step": 1909 + }, + { + "epoch": 4.884910485933504, + "grad_norm": 0.09725714975876674, + "learning_rate": 2.582374941373456e-07, + "loss": 0.9211, + "step": 1910 + }, + { + "epoch": 4.887468030690537, + "grad_norm": 0.10133318561817573, + "learning_rate": 2.468936384036891e-07, + "loss": 0.9013, + "step": 1911 + }, + { + "epoch": 4.89002557544757, + "grad_norm": 0.10119524228199774, + "learning_rate": 2.3580418176311293e-07, + "loss": 0.9417, + "step": 1912 + }, + { + "epoch": 4.892583120204604, + "grad_norm": 0.09951712783614965, + "learning_rate": 2.2496915958913458e-07, + "loss": 0.9253, + "step": 1913 + }, + { + "epoch": 4.8951406649616365, + "grad_norm": 0.0988058097334845, + "learning_rate": 2.143886064436629e-07, + "loss": 0.9344, + "step": 1914 + }, + { + "epoch": 4.89769820971867, + "grad_norm": 0.0988533205503812, + "learning_rate": 2.0406255607688274e-07, + "loss": 0.9258, + "step": 1915 + }, + { + "epoch": 4.900255754475703, + "grad_norm": 0.09899535759420186, + "learning_rate": 1.9399104142719283e-07, + "loss": 0.9484, + "step": 1916 + }, + { + "epoch": 4.9028132992327365, + "grad_norm": 0.10153569163687459, + "learning_rate": 1.8417409462102798e-07, + "loss": 0.9073, + "step": 1917 + }, + { + "epoch": 4.90537084398977, + "grad_norm": 0.09957601677253938, + "learning_rate": 1.746117469728148e-07, + "loss": 0.8841, + "step": 1918 + }, + { + "epoch": 4.907928388746803, + "grad_norm": 0.10184723073884586, + "learning_rate": 1.6530402898484733e-07, + "loss": 0.9525, + "step": 1919 + }, + { + "epoch": 4.910485933503836, + "grad_norm": 0.09694091907819868, + "learning_rate": 1.5625097034719815e-07, + "loss": 0.9193, + "step": 1920 + }, + { + "epoch": 4.913043478260869, + "grad_norm": 0.10383046531826044, + "learning_rate": 1.474525999375942e-07, + "loss": 0.9339, + "step": 1921 + }, + { + "epoch": 4.915601023017903, + "grad_norm": 0.09727962611523398, + "learning_rate": 1.3890894582138103e-07, + "loss": 0.9271, + "step": 1922 + }, + { + "epoch": 4.918158567774936, + "grad_norm": 0.10045856203495888, + "learning_rate": 1.3062003525138089e-07, + "loss": 0.9129, + "step": 1923 + }, + { + "epoch": 4.920716112531969, + "grad_norm": 0.09953247096750498, + "learning_rate": 1.225858946678393e-07, + "loss": 0.9149, + "step": 1924 + }, + { + "epoch": 4.923273657289003, + "grad_norm": 0.10381806462155738, + "learning_rate": 1.1480654969833638e-07, + "loss": 0.9473, + "step": 1925 + }, + { + "epoch": 4.9258312020460355, + "grad_norm": 0.09951540982333777, + "learning_rate": 1.0728202515766228e-07, + "loss": 0.9452, + "step": 1926 + }, + { + "epoch": 4.928388746803069, + "grad_norm": 0.09714908717583805, + "learning_rate": 1.0001234504779966e-07, + "loss": 0.9478, + "step": 1927 + }, + { + "epoch": 4.930946291560103, + "grad_norm": 0.10355673013634514, + "learning_rate": 9.299753255781696e-08, + "loss": 0.9113, + "step": 1928 + }, + { + "epoch": 4.9335038363171355, + "grad_norm": 0.1010600576834511, + "learning_rate": 8.623761006379738e-08, + "loss": 0.9322, + "step": 1929 + }, + { + "epoch": 4.936061381074169, + "grad_norm": 0.09937740112494577, + "learning_rate": 7.973259912875897e-08, + "loss": 0.9529, + "step": 1930 + }, + { + "epoch": 4.938618925831202, + "grad_norm": 0.10172138015837517, + "learning_rate": 7.348252050261018e-08, + "loss": 0.9516, + "step": 1931 + }, + { + "epoch": 4.9411764705882355, + "grad_norm": 0.10153203845561144, + "learning_rate": 6.748739412205218e-08, + "loss": 0.9327, + "step": 1932 + }, + { + "epoch": 4.943734015345268, + "grad_norm": 0.09630467933849142, + "learning_rate": 6.174723911053449e-08, + "loss": 0.9033, + "step": 1933 + }, + { + "epoch": 4.946291560102302, + "grad_norm": 0.09792982830145779, + "learning_rate": 5.6262073778192705e-08, + "loss": 0.9289, + "step": 1934 + }, + { + "epoch": 4.948849104859335, + "grad_norm": 0.10137971801200332, + "learning_rate": 5.1031915621795325e-08, + "loss": 0.9127, + "step": 1935 + }, + { + "epoch": 4.951406649616368, + "grad_norm": 0.09867014858433792, + "learning_rate": 4.605678132467262e-08, + "loss": 0.9195, + "step": 1936 + }, + { + "epoch": 4.953964194373402, + "grad_norm": 0.09945447399480298, + "learning_rate": 4.133668675666336e-08, + "loss": 0.9235, + "step": 1937 + }, + { + "epoch": 4.956521739130435, + "grad_norm": 0.09740241154451518, + "learning_rate": 3.687164697408818e-08, + "loss": 0.8983, + "step": 1938 + }, + { + "epoch": 4.959079283887468, + "grad_norm": 0.10216904139394242, + "learning_rate": 3.266167621967853e-08, + "loss": 0.9333, + "step": 1939 + }, + { + "epoch": 4.961636828644501, + "grad_norm": 1.7447830173428402, + "learning_rate": 2.8706787922541112e-08, + "loss": 0.9677, + "step": 1940 + }, + { + "epoch": 4.964194373401535, + "grad_norm": 0.10248140850999501, + "learning_rate": 2.5006994698095754e-08, + "loss": 0.9205, + "step": 1941 + }, + { + "epoch": 4.966751918158568, + "grad_norm": 0.10291780599089813, + "learning_rate": 2.156230834808426e-08, + "loss": 0.9314, + "step": 1942 + }, + { + "epoch": 4.969309462915601, + "grad_norm": 0.09792527077121264, + "learning_rate": 1.837273986046384e-08, + "loss": 0.9289, + "step": 1943 + }, + { + "epoch": 4.971867007672635, + "grad_norm": 0.0960164691356107, + "learning_rate": 1.5438299409433755e-08, + "loss": 0.9013, + "step": 1944 + }, + { + "epoch": 4.974424552429667, + "grad_norm": 0.09979959822032446, + "learning_rate": 1.2758996355373144e-08, + "loss": 0.9203, + "step": 1945 + }, + { + "epoch": 4.976982097186701, + "grad_norm": 0.10827315260460384, + "learning_rate": 1.0334839244805495e-08, + "loss": 0.9541, + "step": 1946 + }, + { + "epoch": 4.979539641943734, + "grad_norm": 0.0988359933652592, + "learning_rate": 8.165835810389766e-09, + "loss": 0.9064, + "step": 1947 + }, + { + "epoch": 4.982097186700767, + "grad_norm": 0.09820054319763678, + "learning_rate": 6.251992970875975e-09, + "loss": 0.9214, + "step": 1948 + }, + { + "epoch": 4.9846547314578, + "grad_norm": 0.10015641951197356, + "learning_rate": 4.5933168311140805e-09, + "loss": 0.9461, + "step": 1949 + }, + { + "epoch": 4.987212276214834, + "grad_norm": 0.10040227257081992, + "learning_rate": 3.1898126820006924e-09, + "loss": 0.9465, + "step": 1950 + }, + { + "epoch": 4.989769820971867, + "grad_norm": 0.09609050872126598, + "learning_rate": 2.041485000479071e-09, + "loss": 0.9108, + "step": 1951 + }, + { + "epoch": 4.9923273657289, + "grad_norm": 0.09913441529294063, + "learning_rate": 1.148337449521364e-09, + "loss": 0.9356, + "step": 1952 + }, + { + "epoch": 4.994884910485934, + "grad_norm": 0.09800757849537761, + "learning_rate": 5.103728781197248e-10, + "loss": 0.9002, + "step": 1953 + }, + { + "epoch": 4.997442455242966, + "grad_norm": 0.09827002033578132, + "learning_rate": 1.275933212774305e-10, + "loss": 0.9081, + "step": 1954 + }, + { + "epoch": 5.0, + "grad_norm": 0.10411131044626397, + "learning_rate": 0.0, + "loss": 0.9254, + "step": 1955 + }, + { + "epoch": 5.0, + "step": 1955, + "total_flos": 7122204608430080.0, + "train_loss": 1.0036099467436066, + "train_runtime": 36219.8634, + "train_samples_per_second": 13.805, + "train_steps_per_second": 0.054 + } + ], + "logging_steps": 1.0, + "max_steps": 1955, + "num_input_tokens_seen": 0, + "num_train_epochs": 5, + "save_steps": 500, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": true, + "should_training_stop": true + }, + "attributes": {} + } + }, + "total_flos": 7122204608430080.0, + "train_batch_size": 4, + "trial_name": null, + "trial_params": null +} diff --git a/training_args.bin b/training_args.bin new file mode 100644 index 0000000..9bff276 --- /dev/null +++ b/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9910e6226e981cc222cb4c6f4085fabb30e68b5e70fb24fb403cb6b7bfc07cc3 +size 7224 diff --git a/training_loss.png b/training_loss.png new file mode 100644 index 0000000..4071e8b Binary files /dev/null and b/training_loss.png differ diff --git a/vocab.json b/vocab.json new file mode 100644 index 0000000..6c49fc6 --- /dev/null +++ b/vocab.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ca10d7e9fb3ed18575dd1e277a2579c16d108e32f27439684afa0e10b1440910 +size 2776833