commit fab7e2bb35d77e4651f3f8e5e1e44a9e00583697 Author: ModelHub XC Date: Thu Jun 11 18:32:12 2026 +0800 初始化项目,由ModelHub XC社区提供模型 Model: mlfoundations-dev/openthoughts3_100k_qwen25_1b_bsz256_lr2e5_epochs7 Source: Original Platform diff --git a/.gitattributes b/.gitattributes new file mode 100644 index 0000000..4021b5a --- /dev/null +++ b/.gitattributes @@ -0,0 +1,53 @@ +*.7z filter=lfs diff=lfs merge=lfs -text +*.arrow filter=lfs diff=lfs merge=lfs -text + + +*.bz2 filter=lfs diff=lfs merge=lfs -text +*.ftz filter=lfs diff=lfs merge=lfs -text +*.gz filter=lfs diff=lfs merge=lfs -text +*.h5 filter=lfs diff=lfs merge=lfs -text +*.joblib filter=lfs diff=lfs merge=lfs -text +*.lfs.* filter=lfs diff=lfs merge=lfs -text +*.model filter=lfs diff=lfs merge=lfs -text +*.msgpack filter=lfs diff=lfs merge=lfs -text +*.onnx filter=lfs diff=lfs merge=lfs -text +*.ot filter=lfs diff=lfs merge=lfs -text +*.parquet filter=lfs diff=lfs merge=lfs -text +*.pb filter=lfs diff=lfs merge=lfs -text +*.pt filter=lfs diff=lfs merge=lfs -text +*.pth filter=lfs diff=lfs merge=lfs -text +*.rar filter=lfs diff=lfs merge=lfs -text +saved_model/**/* filter=lfs diff=lfs merge=lfs -text +*.tar.* filter=lfs diff=lfs merge=lfs -text +*.tflite filter=lfs diff=lfs merge=lfs -text +*.tgz filter=lfs diff=lfs merge=lfs -text +*.xz filter=lfs diff=lfs merge=lfs -text +*.zip filter=lfs diff=lfs merge=lfs -text +*.zstandard filter=lfs diff=lfs merge=lfs -text +*.tfevents* filter=lfs diff=lfs merge=lfs -text +*.db* filter=lfs diff=lfs merge=lfs -text +*.ark* filter=lfs diff=lfs merge=lfs -text +**/*ckpt*data* filter=lfs diff=lfs merge=lfs -text +**/*ckpt*.meta filter=lfs diff=lfs merge=lfs -text +**/*ckpt*.index filter=lfs diff=lfs merge=lfs -text + +*.ckpt filter=lfs diff=lfs merge=lfs -text +*.gguf* filter=lfs diff=lfs merge=lfs -text +*.ggml filter=lfs diff=lfs merge=lfs -text +*.llamafile* filter=lfs diff=lfs merge=lfs -text +*.pt2 filter=lfs diff=lfs merge=lfs -text +*.mlmodel filter=lfs diff=lfs merge=lfs -text +*.npy filter=lfs diff=lfs merge=lfs -text +*.npz filter=lfs diff=lfs merge=lfs -text +*.pickle filter=lfs diff=lfs merge=lfs -text +*.pkl filter=lfs diff=lfs merge=lfs -text +*.tar filter=lfs diff=lfs merge=lfs -text +*.wasm filter=lfs diff=lfs merge=lfs -text +*.zst filter=lfs diff=lfs merge=lfs -text +*tfevents* filter=lfs diff=lfs merge=lfs -text + +tokenizer.json filter=lfs diff=lfs merge=lfs -text +vocab.json filter=lfs diff=lfs merge=lfs -text +training_args.bin filter=lfs diff=lfs merge=lfs -text +model.safetensors filter=lfs diff=lfs merge=lfs -text +merges.txt filter=lfs diff=lfs merge=lfs -text \ No newline at end of file diff --git a/README.md b/README.md new file mode 100644 index 0000000..ac01fa1 --- /dev/null +++ b/README.md @@ -0,0 +1,61 @@ +--- +library_name: transformers +license: apache-2.0 +base_model: Qwen/Qwen2.5-1.5B-Instruct +tags: +- llama-factory +- full +- generated_from_trainer +model-index: +- name: openthoughts3_100k_qwen25_1b_bsz256_lr2e5_epochs7 + results: [] +--- + + + +# openthoughts3_100k_qwen25_1b_bsz256_lr2e5_epochs7 + +This model is a fine-tuned version of [Qwen/Qwen2.5-1.5B-Instruct](https://huggingface.co/Qwen/Qwen2.5-1.5B-Instruct) on the mlfoundations-dev/openthoughts3_100k dataset. + +## Model description + +More information needed + +## Intended uses & limitations + +More information needed + +## Training and evaluation data + +More information needed + +## Training procedure + +### Training hyperparameters + +The following hyperparameters were used during training: +- learning_rate: 2e-05 +- train_batch_size: 4 +- eval_batch_size: 8 +- seed: 42 +- distributed_type: multi-GPU +- num_devices: 32 +- gradient_accumulation_steps: 2 +- total_train_batch_size: 256 +- total_eval_batch_size: 256 +- optimizer: Use OptimizerNames.ADAMW_TORCH with betas=(0.9,0.999) and epsilon=1e-08 and optimizer_args=No additional optimizer arguments +- lr_scheduler_type: cosine +- lr_scheduler_warmup_ratio: 0.1 +- num_epochs: 7.0 + +### Training results + + + +### Framework versions + +- Transformers 4.46.1 +- Pytorch 2.3.0 +- Datasets 3.1.0 +- Tokenizers 0.20.3 diff --git a/added_tokens.json b/added_tokens.json new file mode 100644 index 0000000..482ced4 --- /dev/null +++ b/added_tokens.json @@ -0,0 +1,24 @@ +{ + "": 151658, + "": 151657, + "<|box_end|>": 151649, + "<|box_start|>": 151648, + "<|endoftext|>": 151643, + "<|file_sep|>": 151664, + "<|fim_middle|>": 151660, + "<|fim_pad|>": 151662, + "<|fim_prefix|>": 151659, + "<|fim_suffix|>": 151661, + "<|im_end|>": 151645, + "<|im_start|>": 151644, + "<|image_pad|>": 151655, + "<|object_ref_end|>": 151647, + "<|object_ref_start|>": 151646, + "<|quad_end|>": 151651, + "<|quad_start|>": 151650, + "<|repo_name|>": 151663, + "<|video_pad|>": 151656, + "<|vision_end|>": 151653, + "<|vision_pad|>": 151654, + "<|vision_start|>": 151652 +} diff --git a/all_results.json b/all_results.json new file mode 100644 index 0000000..367ff0f --- /dev/null +++ b/all_results.json @@ -0,0 +1,8 @@ +{ + "epoch": 7.0, + "total_flos": 9969287656374272.0, + "train_loss": 1.063590354692078, + "train_runtime": 97730.0822, + "train_samples_per_second": 7.163, + "train_steps_per_second": 0.028 +} \ No newline at end of file diff --git a/config.json b/config.json new file mode 100644 index 0000000..0e002a8 --- /dev/null +++ b/config.json @@ -0,0 +1,29 @@ +{ + "_name_or_path": "Qwen/Qwen2.5-1.5B-Instruct", + "architectures": [ + "Qwen2ForCausalLM" + ], + "attention_dropout": 0.0, + "bos_token_id": 151643, + "eos_token_id": 151645, + "hidden_act": "silu", + "hidden_size": 1536, + "initializer_range": 0.02, + "intermediate_size": 8960, + "max_position_embeddings": 32768, + "max_window_layers": 21, + "model_type": "qwen2", + "num_attention_heads": 12, + "num_hidden_layers": 28, + "num_key_value_heads": 2, + "rms_norm_eps": 1e-06, + "rope_scaling": null, + "rope_theta": 1000000.0, + "sliding_window": null, + "tie_word_embeddings": true, + "torch_dtype": "bfloat16", + "transformers_version": "4.46.1", + "use_cache": false, + "use_sliding_window": false, + "vocab_size": 151936 +} diff --git a/configs.yaml b/configs.yaml new file mode 100644 index 0000000..e960deb --- /dev/null +++ b/configs.yaml @@ -0,0 +1,39 @@ +assistant_tag: gpt +bf16: 'True' +content_tag: value +cutoff_len: '16384' +dataloader_num_workers: '4' +dataloader_persistent_workers: 'True' +dataloader_pin_memory: 'True' +dataset: mlfoundations-dev/openthoughts3_100k +dataset_dir: ONLINE +ddp_timeout: '180000000' +deepspeed: /opt/ml/code/zero3.json +do_train: 'True' +enable_liger_kernel: 'True' +finetuning_type: full +formatting: sharegpt +global_batch_size: '256' +gradient_accumulation_steps: '2' +hub_model_id: mlfoundations-dev/openthoughts3_100k_qwen25_1b_bsz256_lr2e5_epochs7 +learning_rate: 2e-05 +logging_steps: '1' +lr_scheduler_type: cosine +messages: conversations +model_name_or_path: Qwen/Qwen2.5-1.5B-Instruct +num_train_epochs: '7.0' +output_dir: /opt/ml/model +overwrite_cache: 'True' +per_device_train_batch_size: '4' +plot_loss: 'True' +preprocessing_num_workers: '16' +push_to_db: 'True' +push_to_hub: 'True' +report_to: wandb +role_tag: from +run_name: openthoughts3_100k_qwen25_1b_bsz256_lr2e5_epochs7 +save_strategy: epoch +stage: sft +template: qwen25 +user_tag: human +warmup_ratio: '0.1' diff --git a/configuration.json b/configuration.json new file mode 100644 index 0000000..bbeeda1 --- /dev/null +++ b/configuration.json @@ -0,0 +1 @@ +{"framework": "pytorch", "task": "text-generation", "allow_remote": true} \ No newline at end of file diff --git a/generation_config.json b/generation_config.json new file mode 100644 index 0000000..16e88f7 --- /dev/null +++ b/generation_config.json @@ -0,0 +1,14 @@ +{ + "bos_token_id": 151643, + "do_sample": true, + "eos_token_id": [ + 151645, + 151643 + ], + "pad_token_id": 151643, + "repetition_penalty": 1.1, + "temperature": 0.7, + "top_k": 20, + "top_p": 0.8, + "transformers_version": "4.46.1" +} diff --git a/merges.txt b/merges.txt new file mode 100644 index 0000000..80c1a19 --- /dev/null +++ b/merges.txt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8831e4f1a044471340f7c0a83d7bd71306a5b867e95fd870f74d0c5308a904d5 +size 1671853 diff --git a/model.safetensors b/model.safetensors new file mode 100644 index 0000000..ee879d8 --- /dev/null +++ b/model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5c60564991f06a824231c2ab647ead17138280b146d7f692974fdf04d319bc5a +size 3087467144 diff --git a/special_tokens_map.json b/special_tokens_map.json new file mode 100644 index 0000000..17305b3 --- /dev/null +++ b/special_tokens_map.json @@ -0,0 +1,31 @@ +{ + "additional_special_tokens": [ + "<|im_start|>", + "<|im_end|>", + "<|object_ref_start|>", + "<|object_ref_end|>", + "<|box_start|>", + "<|box_end|>", + "<|quad_start|>", + "<|quad_end|>", + "<|vision_start|>", + "<|vision_end|>", + "<|vision_pad|>", + "<|image_pad|>", + "<|video_pad|>" + ], + "eos_token": { + "content": "<|endoftext|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "pad_token": { + "content": "<|endoftext|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + } +} diff --git a/tokenizer.json b/tokenizer.json new file mode 100644 index 0000000..51ebb3b --- /dev/null +++ b/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9c5ae00e602b8860cbd784ba82a8aa14e8feecec692e7076590d014d7b7fdafa +size 11421896 diff --git a/tokenizer_config.json b/tokenizer_config.json new file mode 100644 index 0000000..b84f53a --- /dev/null +++ b/tokenizer_config.json @@ -0,0 +1,208 @@ +{ + "add_bos_token": false, + "add_prefix_space": false, + "added_tokens_decoder": { + "151643": { + "content": "<|endoftext|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "151644": { + "content": "<|im_start|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "151645": { + "content": "<|im_end|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "151646": { + "content": "<|object_ref_start|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "151647": { + "content": "<|object_ref_end|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "151648": { + "content": "<|box_start|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "151649": { + "content": "<|box_end|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "151650": { + "content": "<|quad_start|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "151651": { + "content": "<|quad_end|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "151652": { + "content": "<|vision_start|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "151653": { + "content": "<|vision_end|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "151654": { + "content": "<|vision_pad|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "151655": { + "content": "<|image_pad|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "151656": { + "content": "<|video_pad|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "151657": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "151658": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "151659": { + "content": "<|fim_prefix|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "151660": { + "content": "<|fim_middle|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "151661": { + "content": "<|fim_suffix|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "151662": { + "content": "<|fim_pad|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "151663": { + "content": "<|repo_name|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "151664": { + "content": "<|file_sep|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + } + }, + "additional_special_tokens": [ + "<|im_start|>", + "<|im_end|>", + "<|object_ref_start|>", + "<|object_ref_end|>", + "<|box_start|>", + "<|box_end|>", + "<|quad_start|>", + "<|quad_end|>", + "<|vision_start|>", + "<|vision_end|>", + "<|vision_pad|>", + "<|image_pad|>", + "<|video_pad|>" + ], + "bos_token": null, + "chat_template": "{%- if tools %}\n {{- '<|im_start|>system\\n' }}\n {%- if messages[0]['role'] == 'system' %}\n {{- messages[0]['content'] }}\n {%- else %}\n {{- 'You are Qwen, created by Alibaba Cloud. You are a helpful assistant.' }}\n {%- endif %}\n {{- \"\\n\\n# Tools\\n\\nYou may call one or more functions to assist with the user query.\\n\\nYou are provided with function signatures within XML tags:\\n\" }}\n {%- for tool in tools %}\n {{- \"\\n\" }}\n {{- tool | tojson }}\n {%- endfor %}\n {{- \"\\n\\n\\nFor each function call, return a json object with function name and arguments within XML tags:\\n\\n{\\\"name\\\": , \\\"arguments\\\": }\\n<|im_end|>\\n\" }}\n{%- else %}\n {%- if messages[0]['role'] == 'system' %}\n {{- '<|im_start|>system\\n' + messages[0]['content'] + '<|im_end|>\\n' }}\n {%- else %}\n {{- '<|im_start|>system\\nYou are Qwen, created by Alibaba Cloud. You are a helpful assistant.<|im_end|>\\n' }}\n {%- endif %}\n{%- endif %}\n{%- for message in messages %}\n {%- if (message.role == \"user\") or (message.role == \"system\" and not loop.first) or (message.role == \"assistant\" and not message.tool_calls) %}\n {{- '<|im_start|>' + message.role + '\\n' + message.content + '<|im_end|>' + '\\n' }}\n {%- elif message.role == \"assistant\" %}\n {{- '<|im_start|>' + message.role }}\n {%- if message.content %}\n {{- '\\n' + message.content }}\n {%- endif %}\n {%- for tool_call in message.tool_calls %}\n {%- if tool_call.function is defined %}\n {%- set tool_call = tool_call.function %}\n {%- endif %}\n {{- '\\n\\n{\"name\": \"' }}\n {{- tool_call.name }}\n {{- '\", \"arguments\": ' }}\n {{- tool_call.arguments | tojson }}\n {{- '}\\n' }}\n {%- endfor %}\n {{- '<|im_end|>\\n' }}\n {%- elif message.role == \"tool\" %}\n {%- if (loop.index0 == 0) or (messages[loop.index0 - 1].role != \"tool\") %}\n {{- '<|im_start|>user' }}\n {%- endif %}\n {{- '\\n\\n' }}\n {{- message.content }}\n {{- '\\n' }}\n {%- if loop.last or (messages[loop.index0 + 1].role != \"tool\") %}\n {{- '<|im_end|>\\n' }}\n {%- endif %}\n {%- endif %}\n{%- endfor %}\n{%- if add_generation_prompt %}\n {{- '<|im_start|>assistant\\n' }}\n{%- endif %}\n", + "clean_up_tokenization_spaces": false, + "eos_token": "<|endoftext|>", + "errors": "replace", + "model_max_length": 131072, + "pad_token": "<|endoftext|>", + "padding_side": "right", + "split_special_tokens": false, + "tokenizer_class": "Qwen2Tokenizer", + "unk_token": null +} diff --git a/train_results.json b/train_results.json new file mode 100644 index 0000000..367ff0f --- /dev/null +++ b/train_results.json @@ -0,0 +1,8 @@ +{ + "epoch": 7.0, + "total_flos": 9969287656374272.0, + "train_loss": 1.063590354692078, + "train_runtime": 97730.0822, + "train_samples_per_second": 7.163, + "train_steps_per_second": 0.028 +} \ No newline at end of file diff --git a/trainer_log.jsonl b/trainer_log.jsonl new file mode 100644 index 0000000..3a095f0 --- /dev/null +++ b/trainer_log.jsonl @@ -0,0 +1,2738 @@ +{"current_steps": 1, "total_steps": 2737, "loss": 1.5218, "lr": 7.299270072992701e-08, "epoch": 0.0025575447570332483, "percentage": 0.04, "elapsed_time": "0:00:42", "remaining_time": "1 day, 8:22:53"} +{"current_steps": 2, "total_steps": 2737, "loss": 1.4755, "lr": 1.4598540145985402e-07, "epoch": 0.005115089514066497, "percentage": 0.07, "elapsed_time": "0:01:18", "remaining_time": "1 day, 5:45:59"} +{"current_steps": 3, "total_steps": 2737, "loss": 1.4935, "lr": 2.1897810218978106e-07, "epoch": 0.0076726342710997444, "percentage": 0.11, "elapsed_time": "0:01:53", "remaining_time": "1 day, 4:48:00"} +{"current_steps": 4, "total_steps": 2737, "loss": 1.4813, "lr": 2.9197080291970804e-07, "epoch": 0.010230179028132993, "percentage": 0.15, "elapsed_time": "0:02:29", "remaining_time": "1 day, 4:20:39"} +{"current_steps": 5, "total_steps": 2737, "loss": 1.4941, "lr": 3.6496350364963505e-07, "epoch": 0.01278772378516624, "percentage": 0.18, "elapsed_time": "0:03:04", "remaining_time": "1 day, 4:04:09"} +{"current_steps": 6, "total_steps": 2737, "loss": 1.5066, "lr": 4.379562043795621e-07, "epoch": 0.015345268542199489, "percentage": 0.22, "elapsed_time": "0:03:41", "remaining_time": "1 day, 3:59:35"} +{"current_steps": 7, "total_steps": 2737, "loss": 1.478, "lr": 5.109489051094891e-07, "epoch": 0.017902813299232736, "percentage": 0.26, "elapsed_time": "0:04:16", "remaining_time": "1 day, 3:50:27"} +{"current_steps": 8, "total_steps": 2737, "loss": 1.4882, "lr": 5.839416058394161e-07, "epoch": 0.020460358056265986, "percentage": 0.29, "elapsed_time": "0:04:53", "remaining_time": "1 day, 3:49:30"} +{"current_steps": 9, "total_steps": 2737, "loss": 1.5219, "lr": 6.569343065693432e-07, "epoch": 0.023017902813299233, "percentage": 0.33, "elapsed_time": "0:05:29", "remaining_time": "1 day, 3:43:30"} +{"current_steps": 10, "total_steps": 2737, "loss": 1.5149, "lr": 7.299270072992701e-07, "epoch": 0.02557544757033248, "percentage": 0.37, "elapsed_time": "0:06:04", "remaining_time": "1 day, 3:38:24"} +{"current_steps": 11, "total_steps": 2737, "loss": 1.5065, "lr": 8.029197080291971e-07, "epoch": 0.028132992327365727, "percentage": 0.4, "elapsed_time": "0:06:40", "remaining_time": "1 day, 3:34:07"} +{"current_steps": 12, "total_steps": 2737, "loss": 1.47, "lr": 8.759124087591242e-07, "epoch": 0.030690537084398978, "percentage": 0.44, "elapsed_time": "0:07:16", "remaining_time": "1 day, 3:30:40"} +{"current_steps": 13, "total_steps": 2737, "loss": 1.5126, "lr": 9.489051094890511e-07, "epoch": 0.03324808184143223, "percentage": 0.47, "elapsed_time": "0:07:51", "remaining_time": "1 day, 3:27:33"} +{"current_steps": 14, "total_steps": 2737, "loss": 1.4605, "lr": 1.0218978102189781e-06, "epoch": 0.03580562659846547, "percentage": 0.51, "elapsed_time": "0:08:27", "remaining_time": "1 day, 3:24:43"} +{"current_steps": 15, "total_steps": 2737, "loss": 1.4985, "lr": 1.0948905109489052e-06, "epoch": 0.03836317135549872, "percentage": 0.55, "elapsed_time": "0:09:03", "remaining_time": "1 day, 3:22:17"} +{"current_steps": 16, "total_steps": 2737, "loss": 1.4523, "lr": 1.1678832116788322e-06, "epoch": 0.04092071611253197, "percentage": 0.58, "elapsed_time": "0:09:39", "remaining_time": "1 day, 3:23:02"} +{"current_steps": 17, "total_steps": 2737, "loss": 1.4734, "lr": 1.2408759124087592e-06, "epoch": 0.043478260869565216, "percentage": 0.62, "elapsed_time": "0:10:15", "remaining_time": "1 day, 3:20:50"} +{"current_steps": 18, "total_steps": 2737, "loss": 1.479, "lr": 1.3138686131386864e-06, "epoch": 0.04603580562659847, "percentage": 0.66, "elapsed_time": "0:10:50", "remaining_time": "1 day, 3:17:56"} +{"current_steps": 19, "total_steps": 2737, "loss": 1.4702, "lr": 1.3868613138686132e-06, "epoch": 0.04859335038363171, "percentage": 0.69, "elapsed_time": "0:11:26", "remaining_time": "1 day, 3:16:00"} +{"current_steps": 20, "total_steps": 2737, "loss": 1.4617, "lr": 1.4598540145985402e-06, "epoch": 0.05115089514066496, "percentage": 0.73, "elapsed_time": "0:12:03", "remaining_time": "1 day, 3:18:07"} +{"current_steps": 21, "total_steps": 2737, "loss": 1.4463, "lr": 1.5328467153284674e-06, "epoch": 0.05370843989769821, "percentage": 0.77, "elapsed_time": "0:12:39", "remaining_time": "1 day, 3:16:14"} +{"current_steps": 22, "total_steps": 2737, "loss": 1.4599, "lr": 1.6058394160583942e-06, "epoch": 0.056265984654731455, "percentage": 0.8, "elapsed_time": "0:13:14", "remaining_time": "1 day, 3:14:21"} +{"current_steps": 23, "total_steps": 2737, "loss": 1.4157, "lr": 1.6788321167883212e-06, "epoch": 0.058823529411764705, "percentage": 0.84, "elapsed_time": "0:13:50", "remaining_time": "1 day, 3:12:35"} +{"current_steps": 24, "total_steps": 2737, "loss": 1.4439, "lr": 1.7518248175182485e-06, "epoch": 0.061381074168797956, "percentage": 0.88, "elapsed_time": "0:14:25", "remaining_time": "1 day, 3:11:02"} +{"current_steps": 25, "total_steps": 2737, "loss": 1.4218, "lr": 1.8248175182481753e-06, "epoch": 0.0639386189258312, "percentage": 0.91, "elapsed_time": "0:15:01", "remaining_time": "1 day, 3:10:11"} +{"current_steps": 26, "total_steps": 2737, "loss": 1.4269, "lr": 1.8978102189781023e-06, "epoch": 0.06649616368286446, "percentage": 0.95, "elapsed_time": "0:15:37", "remaining_time": "1 day, 3:08:41"} +{"current_steps": 27, "total_steps": 2737, "loss": 1.4158, "lr": 1.9708029197080293e-06, "epoch": 0.06905370843989769, "percentage": 0.99, "elapsed_time": "0:16:12", "remaining_time": "1 day, 3:07:35"} +{"current_steps": 28, "total_steps": 2737, "loss": 1.4405, "lr": 2.0437956204379563e-06, "epoch": 0.07161125319693094, "percentage": 1.02, "elapsed_time": "0:16:48", "remaining_time": "1 day, 3:06:22"} +{"current_steps": 29, "total_steps": 2737, "loss": 1.4151, "lr": 2.1167883211678833e-06, "epoch": 0.0741687979539642, "percentage": 1.06, "elapsed_time": "0:17:24", "remaining_time": "1 day, 3:05:07"} +{"current_steps": 30, "total_steps": 2737, "loss": 1.419, "lr": 2.1897810218978103e-06, "epoch": 0.07672634271099744, "percentage": 1.1, "elapsed_time": "0:17:59", "remaining_time": "1 day, 3:04:00"} +{"current_steps": 31, "total_steps": 2737, "loss": 1.412, "lr": 2.2627737226277373e-06, "epoch": 0.0792838874680307, "percentage": 1.13, "elapsed_time": "0:18:35", "remaining_time": "1 day, 3:02:50"} +{"current_steps": 32, "total_steps": 2737, "loss": 1.3866, "lr": 2.3357664233576643e-06, "epoch": 0.08184143222506395, "percentage": 1.17, "elapsed_time": "0:19:11", "remaining_time": "1 day, 3:01:36"} +{"current_steps": 33, "total_steps": 2737, "loss": 1.4127, "lr": 2.4087591240875918e-06, "epoch": 0.08439897698209718, "percentage": 1.21, "elapsed_time": "0:19:47", "remaining_time": "1 day, 3:01:24"} +{"current_steps": 34, "total_steps": 2737, "loss": 1.4281, "lr": 2.4817518248175183e-06, "epoch": 0.08695652173913043, "percentage": 1.24, "elapsed_time": "0:20:22", "remaining_time": "1 day, 3:00:18"} +{"current_steps": 35, "total_steps": 2737, "loss": 1.3731, "lr": 2.5547445255474458e-06, "epoch": 0.08951406649616368, "percentage": 1.28, "elapsed_time": "0:20:58", "remaining_time": "1 day, 2:59:13"} +{"current_steps": 36, "total_steps": 2737, "loss": 1.3866, "lr": 2.627737226277373e-06, "epoch": 0.09207161125319693, "percentage": 1.32, "elapsed_time": "0:21:33", "remaining_time": "1 day, 2:57:36"} +{"current_steps": 37, "total_steps": 2737, "loss": 1.4026, "lr": 2.7007299270072994e-06, "epoch": 0.09462915601023018, "percentage": 1.35, "elapsed_time": "0:22:09", "remaining_time": "1 day, 2:56:38"} +{"current_steps": 38, "total_steps": 2737, "loss": 1.372, "lr": 2.7737226277372264e-06, "epoch": 0.09718670076726342, "percentage": 1.39, "elapsed_time": "0:22:45", "remaining_time": "1 day, 2:56:43"} +{"current_steps": 39, "total_steps": 2737, "loss": 1.3914, "lr": 2.8467153284671534e-06, "epoch": 0.09974424552429667, "percentage": 1.42, "elapsed_time": "0:23:21", "remaining_time": "1 day, 2:55:45"} +{"current_steps": 40, "total_steps": 2737, "loss": 1.328, "lr": 2.9197080291970804e-06, "epoch": 0.10230179028132992, "percentage": 1.46, "elapsed_time": "0:23:57", "remaining_time": "1 day, 2:54:51"} +{"current_steps": 41, "total_steps": 2737, "loss": 1.3525, "lr": 2.992700729927008e-06, "epoch": 0.10485933503836317, "percentage": 1.5, "elapsed_time": "0:24:32", "remaining_time": "1 day, 2:53:51"} +{"current_steps": 42, "total_steps": 2737, "loss": 1.3478, "lr": 3.065693430656935e-06, "epoch": 0.10741687979539642, "percentage": 1.53, "elapsed_time": "0:25:08", "remaining_time": "1 day, 2:52:55"} +{"current_steps": 43, "total_steps": 2737, "loss": 1.3191, "lr": 3.1386861313868614e-06, "epoch": 0.10997442455242967, "percentage": 1.57, "elapsed_time": "0:25:43", "remaining_time": "1 day, 2:51:26"} +{"current_steps": 44, "total_steps": 2737, "loss": 1.3222, "lr": 3.2116788321167884e-06, "epoch": 0.11253196930946291, "percentage": 1.61, "elapsed_time": "0:26:19", "remaining_time": "1 day, 2:50:50"} +{"current_steps": 45, "total_steps": 2737, "loss": 1.3238, "lr": 3.2846715328467155e-06, "epoch": 0.11508951406649616, "percentage": 1.64, "elapsed_time": "0:26:54", "remaining_time": "1 day, 2:49:55"} +{"current_steps": 46, "total_steps": 2737, "loss": 1.3436, "lr": 3.3576642335766425e-06, "epoch": 0.11764705882352941, "percentage": 1.68, "elapsed_time": "0:27:30", "remaining_time": "1 day, 2:49:05"} +{"current_steps": 47, "total_steps": 2737, "loss": 1.3458, "lr": 3.43065693430657e-06, "epoch": 0.12020460358056266, "percentage": 1.72, "elapsed_time": "0:28:07", "remaining_time": "1 day, 2:49:24"} +{"current_steps": 48, "total_steps": 2737, "loss": 1.3132, "lr": 3.503649635036497e-06, "epoch": 0.12276214833759591, "percentage": 1.75, "elapsed_time": "0:28:42", "remaining_time": "1 day, 2:48:29"} +{"current_steps": 49, "total_steps": 2737, "loss": 1.3268, "lr": 3.576642335766424e-06, "epoch": 0.12531969309462915, "percentage": 1.79, "elapsed_time": "0:29:17", "remaining_time": "1 day, 2:47:10"} +{"current_steps": 50, "total_steps": 2737, "loss": 1.2966, "lr": 3.6496350364963505e-06, "epoch": 0.1278772378516624, "percentage": 1.83, "elapsed_time": "0:29:53", "remaining_time": "1 day, 2:46:20"} +{"current_steps": 51, "total_steps": 2737, "loss": 1.3004, "lr": 3.7226277372262775e-06, "epoch": 0.13043478260869565, "percentage": 1.86, "elapsed_time": "0:30:28", "remaining_time": "1 day, 2:45:26"} +{"current_steps": 52, "total_steps": 2737, "loss": 1.2812, "lr": 3.7956204379562045e-06, "epoch": 0.1329923273657289, "percentage": 1.9, "elapsed_time": "0:31:05", "remaining_time": "1 day, 2:45:39"} +{"current_steps": 53, "total_steps": 2737, "loss": 1.2774, "lr": 3.868613138686132e-06, "epoch": 0.13554987212276215, "percentage": 1.94, "elapsed_time": "0:31:41", "remaining_time": "1 day, 2:44:45"} +{"current_steps": 54, "total_steps": 2737, "loss": 1.3168, "lr": 3.9416058394160585e-06, "epoch": 0.13810741687979539, "percentage": 1.97, "elapsed_time": "0:32:16", "remaining_time": "1 day, 2:43:23"} +{"current_steps": 55, "total_steps": 2737, "loss": 1.3283, "lr": 4.014598540145986e-06, "epoch": 0.14066496163682865, "percentage": 2.01, "elapsed_time": "0:32:51", "remaining_time": "1 day, 2:42:35"} +{"current_steps": 56, "total_steps": 2737, "loss": 1.3135, "lr": 4.0875912408759126e-06, "epoch": 0.1432225063938619, "percentage": 2.05, "elapsed_time": "0:33:28", "remaining_time": "1 day, 2:42:20"} +{"current_steps": 57, "total_steps": 2737, "loss": 1.2989, "lr": 4.16058394160584e-06, "epoch": 0.14578005115089515, "percentage": 2.08, "elapsed_time": "0:34:03", "remaining_time": "1 day, 2:41:36"} +{"current_steps": 58, "total_steps": 2737, "loss": 1.3137, "lr": 4.233576642335767e-06, "epoch": 0.1483375959079284, "percentage": 2.12, "elapsed_time": "0:34:39", "remaining_time": "1 day, 2:40:51"} +{"current_steps": 59, "total_steps": 2737, "loss": 1.2743, "lr": 4.306569343065693e-06, "epoch": 0.15089514066496162, "percentage": 2.16, "elapsed_time": "0:35:15", "remaining_time": "1 day, 2:40:07"} +{"current_steps": 60, "total_steps": 2737, "loss": 1.2987, "lr": 4.379562043795621e-06, "epoch": 0.1534526854219949, "percentage": 2.19, "elapsed_time": "0:35:50", "remaining_time": "1 day, 2:39:19"} +{"current_steps": 61, "total_steps": 2737, "loss": 1.2869, "lr": 4.452554744525548e-06, "epoch": 0.15601023017902813, "percentage": 2.23, "elapsed_time": "0:36:26", "remaining_time": "1 day, 2:38:33"} +{"current_steps": 62, "total_steps": 2737, "loss": 1.3199, "lr": 4.525547445255475e-06, "epoch": 0.1585677749360614, "percentage": 2.27, "elapsed_time": "0:37:01", "remaining_time": "1 day, 2:37:44"} +{"current_steps": 63, "total_steps": 2737, "loss": 1.2972, "lr": 4.598540145985402e-06, "epoch": 0.16112531969309463, "percentage": 2.3, "elapsed_time": "0:37:37", "remaining_time": "1 day, 2:36:58"} +{"current_steps": 64, "total_steps": 2737, "loss": 1.2928, "lr": 4.671532846715329e-06, "epoch": 0.1636828644501279, "percentage": 2.34, "elapsed_time": "0:38:12", "remaining_time": "1 day, 2:35:58"} +{"current_steps": 65, "total_steps": 2737, "loss": 1.2861, "lr": 4.744525547445255e-06, "epoch": 0.16624040920716113, "percentage": 2.37, "elapsed_time": "0:38:49", "remaining_time": "1 day, 2:35:39"} +{"current_steps": 66, "total_steps": 2737, "loss": 1.2461, "lr": 4.8175182481751835e-06, "epoch": 0.16879795396419436, "percentage": 2.41, "elapsed_time": "0:39:25", "remaining_time": "1 day, 2:35:16"} +{"current_steps": 67, "total_steps": 2737, "loss": 1.2661, "lr": 4.89051094890511e-06, "epoch": 0.17135549872122763, "percentage": 2.45, "elapsed_time": "0:40:00", "remaining_time": "1 day, 2:34:31"} +{"current_steps": 68, "total_steps": 2737, "loss": 1.2467, "lr": 4.963503649635037e-06, "epoch": 0.17391304347826086, "percentage": 2.48, "elapsed_time": "0:40:36", "remaining_time": "1 day, 2:34:09"} +{"current_steps": 69, "total_steps": 2737, "loss": 1.2303, "lr": 5.036496350364964e-06, "epoch": 0.17647058823529413, "percentage": 2.52, "elapsed_time": "0:41:12", "remaining_time": "1 day, 2:33:22"} +{"current_steps": 70, "total_steps": 2737, "loss": 1.2399, "lr": 5.1094890510948916e-06, "epoch": 0.17902813299232737, "percentage": 2.56, "elapsed_time": "0:41:48", "remaining_time": "1 day, 2:32:39"} +{"current_steps": 71, "total_steps": 2737, "loss": 1.2498, "lr": 5.182481751824818e-06, "epoch": 0.1815856777493606, "percentage": 2.59, "elapsed_time": "0:42:23", "remaining_time": "1 day, 2:31:54"} +{"current_steps": 72, "total_steps": 2737, "loss": 1.2643, "lr": 5.255474452554746e-06, "epoch": 0.18414322250639387, "percentage": 2.63, "elapsed_time": "0:42:59", "remaining_time": "1 day, 2:31:11"} +{"current_steps": 73, "total_steps": 2737, "loss": 1.2958, "lr": 5.328467153284672e-06, "epoch": 0.1867007672634271, "percentage": 2.67, "elapsed_time": "0:43:34", "remaining_time": "1 day, 2:30:25"} +{"current_steps": 74, "total_steps": 2737, "loss": 1.2422, "lr": 5.401459854014599e-06, "epoch": 0.18925831202046037, "percentage": 2.7, "elapsed_time": "0:44:10", "remaining_time": "1 day, 2:29:40"} +{"current_steps": 75, "total_steps": 2737, "loss": 1.2407, "lr": 5.474452554744526e-06, "epoch": 0.1918158567774936, "percentage": 2.74, "elapsed_time": "0:44:45", "remaining_time": "1 day, 2:28:54"} +{"current_steps": 76, "total_steps": 2737, "loss": 1.2456, "lr": 5.547445255474453e-06, "epoch": 0.19437340153452684, "percentage": 2.78, "elapsed_time": "0:45:21", "remaining_time": "1 day, 2:28:10"} +{"current_steps": 77, "total_steps": 2737, "loss": 1.2514, "lr": 5.62043795620438e-06, "epoch": 0.1969309462915601, "percentage": 2.81, "elapsed_time": "0:45:57", "remaining_time": "1 day, 2:27:27"} +{"current_steps": 78, "total_steps": 2737, "loss": 1.2335, "lr": 5.693430656934307e-06, "epoch": 0.19948849104859334, "percentage": 2.85, "elapsed_time": "0:46:32", "remaining_time": "1 day, 2:26:43"} +{"current_steps": 79, "total_steps": 2737, "loss": 1.2276, "lr": 5.766423357664233e-06, "epoch": 0.2020460358056266, "percentage": 2.89, "elapsed_time": "0:47:08", "remaining_time": "1 day, 2:26:01"} +{"current_steps": 80, "total_steps": 2737, "loss": 1.2435, "lr": 5.839416058394161e-06, "epoch": 0.20460358056265984, "percentage": 2.92, "elapsed_time": "0:47:43", "remaining_time": "1 day, 2:24:59"} +{"current_steps": 81, "total_steps": 2737, "loss": 1.2266, "lr": 5.912408759124088e-06, "epoch": 0.2071611253196931, "percentage": 2.96, "elapsed_time": "0:48:19", "remaining_time": "1 day, 2:24:40"} +{"current_steps": 82, "total_steps": 2737, "loss": 1.2261, "lr": 5.985401459854016e-06, "epoch": 0.20971867007672634, "percentage": 3.0, "elapsed_time": "0:48:55", "remaining_time": "1 day, 2:23:58"} +{"current_steps": 83, "total_steps": 2737, "loss": 1.2384, "lr": 6.058394160583942e-06, "epoch": 0.21227621483375958, "percentage": 3.03, "elapsed_time": "0:49:30", "remaining_time": "1 day, 2:23:15"} +{"current_steps": 84, "total_steps": 2737, "loss": 1.235, "lr": 6.13138686131387e-06, "epoch": 0.21483375959079284, "percentage": 3.07, "elapsed_time": "0:50:06", "remaining_time": "1 day, 2:22:34"} +{"current_steps": 85, "total_steps": 2737, "loss": 1.2838, "lr": 6.204379562043796e-06, "epoch": 0.21739130434782608, "percentage": 3.11, "elapsed_time": "0:50:42", "remaining_time": "1 day, 2:21:51"} +{"current_steps": 86, "total_steps": 2737, "loss": 1.2358, "lr": 6.277372262773723e-06, "epoch": 0.21994884910485935, "percentage": 3.14, "elapsed_time": "0:51:17", "remaining_time": "1 day, 2:21:09"} +{"current_steps": 87, "total_steps": 2737, "loss": 1.2419, "lr": 6.35036496350365e-06, "epoch": 0.22250639386189258, "percentage": 3.18, "elapsed_time": "0:51:53", "remaining_time": "1 day, 2:20:29"} +{"current_steps": 88, "total_steps": 2737, "loss": 1.2641, "lr": 6.423357664233577e-06, "epoch": 0.22506393861892582, "percentage": 3.22, "elapsed_time": "0:52:28", "remaining_time": "1 day, 2:19:46"} +{"current_steps": 89, "total_steps": 2737, "loss": 1.2239, "lr": 6.496350364963504e-06, "epoch": 0.22762148337595908, "percentage": 3.25, "elapsed_time": "0:53:04", "remaining_time": "1 day, 2:19:07"} +{"current_steps": 90, "total_steps": 2737, "loss": 1.2524, "lr": 6.569343065693431e-06, "epoch": 0.23017902813299232, "percentage": 3.29, "elapsed_time": "0:53:40", "remaining_time": "1 day, 2:18:25"} +{"current_steps": 91, "total_steps": 2737, "loss": 1.2494, "lr": 6.6423357664233575e-06, "epoch": 0.23273657289002558, "percentage": 3.32, "elapsed_time": "0:54:15", "remaining_time": "1 day, 2:17:44"} +{"current_steps": 92, "total_steps": 2737, "loss": 1.2537, "lr": 6.715328467153285e-06, "epoch": 0.23529411764705882, "percentage": 3.36, "elapsed_time": "0:54:51", "remaining_time": "1 day, 2:17:14"} +{"current_steps": 93, "total_steps": 2737, "loss": 1.2349, "lr": 6.7883211678832115e-06, "epoch": 0.23785166240409208, "percentage": 3.4, "elapsed_time": "0:55:27", "remaining_time": "1 day, 2:16:33"} +{"current_steps": 94, "total_steps": 2737, "loss": 1.2583, "lr": 6.86131386861314e-06, "epoch": 0.24040920716112532, "percentage": 3.43, "elapsed_time": "0:56:02", "remaining_time": "1 day, 2:15:51"} +{"current_steps": 95, "total_steps": 2737, "loss": 1.2029, "lr": 6.934306569343066e-06, "epoch": 0.24296675191815856, "percentage": 3.47, "elapsed_time": "0:56:38", "remaining_time": "1 day, 2:15:11"} +{"current_steps": 96, "total_steps": 2737, "loss": 1.2423, "lr": 7.007299270072994e-06, "epoch": 0.24552429667519182, "percentage": 3.51, "elapsed_time": "0:57:13", "remaining_time": "1 day, 2:14:22"} +{"current_steps": 97, "total_steps": 2737, "loss": 1.2394, "lr": 7.08029197080292e-06, "epoch": 0.24808184143222506, "percentage": 3.54, "elapsed_time": "0:57:49", "remaining_time": "1 day, 2:13:40"} +{"current_steps": 98, "total_steps": 2737, "loss": 1.2122, "lr": 7.153284671532848e-06, "epoch": 0.2506393861892583, "percentage": 3.58, "elapsed_time": "0:58:24", "remaining_time": "1 day, 2:13:01"} +{"current_steps": 99, "total_steps": 2737, "loss": 1.2117, "lr": 7.2262773722627744e-06, "epoch": 0.2531969309462916, "percentage": 3.62, "elapsed_time": "0:59:00", "remaining_time": "1 day, 2:12:21"} +{"current_steps": 100, "total_steps": 2737, "loss": 1.2041, "lr": 7.299270072992701e-06, "epoch": 0.2557544757033248, "percentage": 3.65, "elapsed_time": "0:59:36", "remaining_time": "1 day, 2:11:41"} +{"current_steps": 101, "total_steps": 2737, "loss": 1.2231, "lr": 7.3722627737226285e-06, "epoch": 0.25831202046035806, "percentage": 3.69, "elapsed_time": "1:00:11", "remaining_time": "1 day, 2:10:54"} +{"current_steps": 102, "total_steps": 2737, "loss": 1.2325, "lr": 7.445255474452555e-06, "epoch": 0.2608695652173913, "percentage": 3.73, "elapsed_time": "1:00:46", "remaining_time": "1 day, 2:10:13"} +{"current_steps": 103, "total_steps": 2737, "loss": 1.2026, "lr": 7.5182481751824825e-06, "epoch": 0.26342710997442453, "percentage": 3.76, "elapsed_time": "1:01:22", "remaining_time": "1 day, 2:09:33"} +{"current_steps": 104, "total_steps": 2737, "loss": 1.193, "lr": 7.591240875912409e-06, "epoch": 0.2659846547314578, "percentage": 3.8, "elapsed_time": "1:01:58", "remaining_time": "1 day, 2:08:54"} +{"current_steps": 105, "total_steps": 2737, "loss": 1.2009, "lr": 7.664233576642336e-06, "epoch": 0.26854219948849106, "percentage": 3.84, "elapsed_time": "1:02:33", "remaining_time": "1 day, 2:08:14"} +{"current_steps": 106, "total_steps": 2737, "loss": 1.1909, "lr": 7.737226277372264e-06, "epoch": 0.2710997442455243, "percentage": 3.87, "elapsed_time": "1:03:09", "remaining_time": "1 day, 2:07:34"} +{"current_steps": 107, "total_steps": 2737, "loss": 1.2194, "lr": 7.810218978102191e-06, "epoch": 0.27365728900255754, "percentage": 3.91, "elapsed_time": "1:03:44", "remaining_time": "1 day, 2:06:53"} +{"current_steps": 108, "total_steps": 2737, "loss": 1.1985, "lr": 7.883211678832117e-06, "epoch": 0.27621483375959077, "percentage": 3.95, "elapsed_time": "1:04:20", "remaining_time": "1 day, 2:06:15"} +{"current_steps": 109, "total_steps": 2737, "loss": 1.2218, "lr": 7.956204379562045e-06, "epoch": 0.27877237851662406, "percentage": 3.98, "elapsed_time": "1:04:56", "remaining_time": "1 day, 2:05:38"} +{"current_steps": 110, "total_steps": 2737, "loss": 1.222, "lr": 8.029197080291972e-06, "epoch": 0.2813299232736573, "percentage": 4.02, "elapsed_time": "1:05:31", "remaining_time": "1 day, 2:04:59"} +{"current_steps": 111, "total_steps": 2737, "loss": 1.2242, "lr": 8.1021897810219e-06, "epoch": 0.28388746803069054, "percentage": 4.06, "elapsed_time": "1:06:07", "remaining_time": "1 day, 2:04:28"} +{"current_steps": 112, "total_steps": 2737, "loss": 1.2118, "lr": 8.175182481751825e-06, "epoch": 0.2864450127877238, "percentage": 4.09, "elapsed_time": "1:06:43", "remaining_time": "1 day, 2:03:41"} +{"current_steps": 113, "total_steps": 2737, "loss": 1.1849, "lr": 8.248175182481753e-06, "epoch": 0.289002557544757, "percentage": 4.13, "elapsed_time": "1:07:19", "remaining_time": "1 day, 2:03:32"} +{"current_steps": 114, "total_steps": 2737, "loss": 1.2103, "lr": 8.32116788321168e-06, "epoch": 0.2915601023017903, "percentage": 4.17, "elapsed_time": "1:07:55", "remaining_time": "1 day, 2:02:53"} +{"current_steps": 115, "total_steps": 2737, "loss": 1.1937, "lr": 8.394160583941606e-06, "epoch": 0.29411764705882354, "percentage": 4.2, "elapsed_time": "1:08:31", "remaining_time": "1 day, 2:02:14"} +{"current_steps": 116, "total_steps": 2737, "loss": 1.2028, "lr": 8.467153284671533e-06, "epoch": 0.2966751918158568, "percentage": 4.24, "elapsed_time": "1:09:06", "remaining_time": "1 day, 2:01:35"} +{"current_steps": 117, "total_steps": 2737, "loss": 1.1976, "lr": 8.54014598540146e-06, "epoch": 0.29923273657289, "percentage": 4.27, "elapsed_time": "1:09:41", "remaining_time": "1 day, 2:00:45"} +{"current_steps": 118, "total_steps": 2737, "loss": 1.2476, "lr": 8.613138686131386e-06, "epoch": 0.30179028132992325, "percentage": 4.31, "elapsed_time": "1:10:17", "remaining_time": "1 day, 2:00:05"} +{"current_steps": 119, "total_steps": 2737, "loss": 1.2087, "lr": 8.686131386861315e-06, "epoch": 0.30434782608695654, "percentage": 4.35, "elapsed_time": "1:10:53", "remaining_time": "1 day, 1:59:28"} +{"current_steps": 120, "total_steps": 2737, "loss": 1.214, "lr": 8.759124087591241e-06, "epoch": 0.3069053708439898, "percentage": 4.38, "elapsed_time": "1:11:28", "remaining_time": "1 day, 1:58:37"} +{"current_steps": 121, "total_steps": 2737, "loss": 1.2276, "lr": 8.832116788321169e-06, "epoch": 0.309462915601023, "percentage": 4.42, "elapsed_time": "1:12:03", "remaining_time": "1 day, 1:57:58"} +{"current_steps": 122, "total_steps": 2737, "loss": 1.1805, "lr": 8.905109489051096e-06, "epoch": 0.31202046035805625, "percentage": 4.46, "elapsed_time": "1:12:39", "remaining_time": "1 day, 1:57:20"} +{"current_steps": 123, "total_steps": 2737, "loss": 1.2007, "lr": 8.978102189781024e-06, "epoch": 0.3145780051150895, "percentage": 4.49, "elapsed_time": "1:13:14", "remaining_time": "1 day, 1:56:41"} +{"current_steps": 124, "total_steps": 2737, "loss": 1.1966, "lr": 9.05109489051095e-06, "epoch": 0.3171355498721228, "percentage": 4.53, "elapsed_time": "1:13:50", "remaining_time": "1 day, 1:56:03"} +{"current_steps": 125, "total_steps": 2737, "loss": 1.1739, "lr": 9.124087591240877e-06, "epoch": 0.319693094629156, "percentage": 4.57, "elapsed_time": "1:14:26", "remaining_time": "1 day, 1:55:25"} +{"current_steps": 126, "total_steps": 2737, "loss": 1.2309, "lr": 9.197080291970804e-06, "epoch": 0.32225063938618925, "percentage": 4.6, "elapsed_time": "1:15:02", "remaining_time": "1 day, 1:55:08"} +{"current_steps": 127, "total_steps": 2737, "loss": 1.1718, "lr": 9.27007299270073e-06, "epoch": 0.3248081841432225, "percentage": 4.64, "elapsed_time": "1:15:38", "remaining_time": "1 day, 1:54:29"} +{"current_steps": 128, "total_steps": 2737, "loss": 1.1981, "lr": 9.343065693430657e-06, "epoch": 0.3273657289002558, "percentage": 4.68, "elapsed_time": "1:16:14", "remaining_time": "1 day, 1:53:52"} +{"current_steps": 129, "total_steps": 2737, "loss": 1.187, "lr": 9.416058394160585e-06, "epoch": 0.329923273657289, "percentage": 4.71, "elapsed_time": "1:16:49", "remaining_time": "1 day, 1:53:14"} +{"current_steps": 130, "total_steps": 2737, "loss": 1.2154, "lr": 9.48905109489051e-06, "epoch": 0.33248081841432225, "percentage": 4.75, "elapsed_time": "1:17:25", "remaining_time": "1 day, 1:52:35"} +{"current_steps": 131, "total_steps": 2737, "loss": 1.1823, "lr": 9.56204379562044e-06, "epoch": 0.3350383631713555, "percentage": 4.79, "elapsed_time": "1:18:00", "remaining_time": "1 day, 1:51:57"} +{"current_steps": 132, "total_steps": 2737, "loss": 1.2399, "lr": 9.635036496350367e-06, "epoch": 0.3375959079283887, "percentage": 4.82, "elapsed_time": "1:18:36", "remaining_time": "1 day, 1:51:19"} +{"current_steps": 133, "total_steps": 2737, "loss": 1.2114, "lr": 9.708029197080293e-06, "epoch": 0.340153452685422, "percentage": 4.86, "elapsed_time": "1:19:13", "remaining_time": "1 day, 1:50:59"} +{"current_steps": 134, "total_steps": 2737, "loss": 1.1832, "lr": 9.78102189781022e-06, "epoch": 0.34271099744245526, "percentage": 4.9, "elapsed_time": "1:19:48", "remaining_time": "1 day, 1:50:21"} +{"current_steps": 135, "total_steps": 2737, "loss": 1.2389, "lr": 9.854014598540148e-06, "epoch": 0.3452685421994885, "percentage": 4.93, "elapsed_time": "1:20:24", "remaining_time": "1 day, 1:49:46"} +{"current_steps": 136, "total_steps": 2737, "loss": 1.1965, "lr": 9.927007299270073e-06, "epoch": 0.34782608695652173, "percentage": 4.97, "elapsed_time": "1:20:59", "remaining_time": "1 day, 1:49:06"} +{"current_steps": 137, "total_steps": 2737, "loss": 1.2094, "lr": 1e-05, "epoch": 0.35038363171355497, "percentage": 5.01, "elapsed_time": "1:21:35", "remaining_time": "1 day, 1:48:27"} +{"current_steps": 138, "total_steps": 2737, "loss": 1.1974, "lr": 1.0072992700729928e-05, "epoch": 0.35294117647058826, "percentage": 5.04, "elapsed_time": "1:22:12", "remaining_time": "1 day, 1:48:09"} +{"current_steps": 139, "total_steps": 2737, "loss": 1.1736, "lr": 1.0145985401459854e-05, "epoch": 0.3554987212276215, "percentage": 5.08, "elapsed_time": "1:22:47", "remaining_time": "1 day, 1:47:31"} +{"current_steps": 140, "total_steps": 2737, "loss": 1.2279, "lr": 1.0218978102189783e-05, "epoch": 0.35805626598465473, "percentage": 5.12, "elapsed_time": "1:23:23", "remaining_time": "1 day, 1:46:51"} +{"current_steps": 141, "total_steps": 2737, "loss": 1.1409, "lr": 1.0291970802919709e-05, "epoch": 0.36061381074168797, "percentage": 5.15, "elapsed_time": "1:23:58", "remaining_time": "1 day, 1:46:12"} +{"current_steps": 142, "total_steps": 2737, "loss": 1.1957, "lr": 1.0364963503649636e-05, "epoch": 0.3631713554987212, "percentage": 5.19, "elapsed_time": "1:24:34", "remaining_time": "1 day, 1:45:36"} +{"current_steps": 143, "total_steps": 2737, "loss": 1.2193, "lr": 1.0437956204379562e-05, "epoch": 0.3657289002557545, "percentage": 5.22, "elapsed_time": "1:25:11", "remaining_time": "1 day, 1:45:15"} +{"current_steps": 144, "total_steps": 2737, "loss": 1.1866, "lr": 1.0510948905109491e-05, "epoch": 0.36828644501278773, "percentage": 5.26, "elapsed_time": "1:25:46", "remaining_time": "1 day, 1:44:36"} +{"current_steps": 145, "total_steps": 2737, "loss": 1.2015, "lr": 1.0583941605839417e-05, "epoch": 0.37084398976982097, "percentage": 5.3, "elapsed_time": "1:26:22", "remaining_time": "1 day, 1:43:57"} +{"current_steps": 146, "total_steps": 2737, "loss": 1.1886, "lr": 1.0656934306569344e-05, "epoch": 0.3734015345268542, "percentage": 5.33, "elapsed_time": "1:26:57", "remaining_time": "1 day, 1:43:19"} +{"current_steps": 147, "total_steps": 2737, "loss": 1.2199, "lr": 1.072992700729927e-05, "epoch": 0.37595907928388744, "percentage": 5.37, "elapsed_time": "1:27:32", "remaining_time": "1 day, 1:42:31"} +{"current_steps": 148, "total_steps": 2737, "loss": 1.1829, "lr": 1.0802919708029198e-05, "epoch": 0.37851662404092073, "percentage": 5.41, "elapsed_time": "1:28:08", "remaining_time": "1 day, 1:41:53"} +{"current_steps": 149, "total_steps": 2737, "loss": 1.1655, "lr": 1.0875912408759123e-05, "epoch": 0.38107416879795397, "percentage": 5.44, "elapsed_time": "1:28:44", "remaining_time": "1 day, 1:41:14"} +{"current_steps": 150, "total_steps": 2737, "loss": 1.1815, "lr": 1.0948905109489052e-05, "epoch": 0.3836317135549872, "percentage": 5.48, "elapsed_time": "1:29:18", "remaining_time": "1 day, 1:40:22"} +{"current_steps": 151, "total_steps": 2737, "loss": 1.1848, "lr": 1.102189781021898e-05, "epoch": 0.38618925831202044, "percentage": 5.52, "elapsed_time": "1:29:55", "remaining_time": "1 day, 1:39:54"} +{"current_steps": 152, "total_steps": 2737, "loss": 1.188, "lr": 1.1094890510948906e-05, "epoch": 0.3887468030690537, "percentage": 5.55, "elapsed_time": "1:30:30", "remaining_time": "1 day, 1:39:15"} +{"current_steps": 153, "total_steps": 2737, "loss": 1.1772, "lr": 1.1167883211678833e-05, "epoch": 0.391304347826087, "percentage": 5.59, "elapsed_time": "1:31:06", "remaining_time": "1 day, 1:38:37"} +{"current_steps": 154, "total_steps": 2737, "loss": 1.1807, "lr": 1.124087591240876e-05, "epoch": 0.3938618925831202, "percentage": 5.63, "elapsed_time": "1:31:41", "remaining_time": "1 day, 1:37:56"} +{"current_steps": 155, "total_steps": 2737, "loss": 1.1949, "lr": 1.1313868613138688e-05, "epoch": 0.39641943734015345, "percentage": 5.66, "elapsed_time": "1:32:17", "remaining_time": "1 day, 1:37:18"} +{"current_steps": 156, "total_steps": 2737, "loss": 1.1996, "lr": 1.1386861313868614e-05, "epoch": 0.3989769820971867, "percentage": 5.7, "elapsed_time": "1:32:52", "remaining_time": "1 day, 1:36:41"} +{"current_steps": 157, "total_steps": 2737, "loss": 1.2097, "lr": 1.1459854014598541e-05, "epoch": 0.40153452685422, "percentage": 5.74, "elapsed_time": "1:33:28", "remaining_time": "1 day, 1:36:11"} +{"current_steps": 158, "total_steps": 2737, "loss": 1.2082, "lr": 1.1532846715328467e-05, "epoch": 0.4040920716112532, "percentage": 5.77, "elapsed_time": "1:34:04", "remaining_time": "1 day, 1:35:30"} +{"current_steps": 159, "total_steps": 2737, "loss": 1.1586, "lr": 1.1605839416058396e-05, "epoch": 0.40664961636828645, "percentage": 5.81, "elapsed_time": "1:34:39", "remaining_time": "1 day, 1:34:52"} +{"current_steps": 160, "total_steps": 2737, "loss": 1.1765, "lr": 1.1678832116788322e-05, "epoch": 0.4092071611253197, "percentage": 5.85, "elapsed_time": "1:35:15", "remaining_time": "1 day, 1:34:14"} +{"current_steps": 161, "total_steps": 2737, "loss": 1.1701, "lr": 1.1751824817518249e-05, "epoch": 0.4117647058823529, "percentage": 5.88, "elapsed_time": "1:35:51", "remaining_time": "1 day, 1:33:36"} +{"current_steps": 162, "total_steps": 2737, "loss": 1.1686, "lr": 1.1824817518248176e-05, "epoch": 0.4143222506393862, "percentage": 5.92, "elapsed_time": "1:36:26", "remaining_time": "1 day, 1:33:02"} +{"current_steps": 163, "total_steps": 2737, "loss": 1.169, "lr": 1.1897810218978102e-05, "epoch": 0.41687979539641945, "percentage": 5.96, "elapsed_time": "1:37:02", "remaining_time": "1 day, 1:32:26"} +{"current_steps": 164, "total_steps": 2737, "loss": 1.1821, "lr": 1.1970802919708031e-05, "epoch": 0.4194373401534527, "percentage": 5.99, "elapsed_time": "1:37:38", "remaining_time": "1 day, 1:31:48"} +{"current_steps": 165, "total_steps": 2737, "loss": 1.1538, "lr": 1.2043795620437957e-05, "epoch": 0.4219948849104859, "percentage": 6.03, "elapsed_time": "1:38:13", "remaining_time": "1 day, 1:31:09"} +{"current_steps": 166, "total_steps": 2737, "loss": 1.1787, "lr": 1.2116788321167885e-05, "epoch": 0.42455242966751916, "percentage": 6.07, "elapsed_time": "1:38:49", "remaining_time": "1 day, 1:30:31"} +{"current_steps": 167, "total_steps": 2737, "loss": 1.1774, "lr": 1.218978102189781e-05, "epoch": 0.42710997442455245, "percentage": 6.1, "elapsed_time": "1:39:24", "remaining_time": "1 day, 1:29:55"} +{"current_steps": 168, "total_steps": 2737, "loss": 1.1823, "lr": 1.226277372262774e-05, "epoch": 0.4296675191815857, "percentage": 6.14, "elapsed_time": "1:40:00", "remaining_time": "1 day, 1:29:18"} +{"current_steps": 169, "total_steps": 2737, "loss": 1.1808, "lr": 1.2335766423357665e-05, "epoch": 0.4322250639386189, "percentage": 6.17, "elapsed_time": "1:40:36", "remaining_time": "1 day, 1:28:43"} +{"current_steps": 170, "total_steps": 2737, "loss": 1.1646, "lr": 1.2408759124087593e-05, "epoch": 0.43478260869565216, "percentage": 6.21, "elapsed_time": "1:41:11", "remaining_time": "1 day, 1:28:06"} +{"current_steps": 171, "total_steps": 2737, "loss": 1.1742, "lr": 1.2481751824817518e-05, "epoch": 0.4373401534526854, "percentage": 6.25, "elapsed_time": "1:41:47", "remaining_time": "1 day, 1:27:29"} +{"current_steps": 172, "total_steps": 2737, "loss": 1.1741, "lr": 1.2554744525547446e-05, "epoch": 0.4398976982097187, "percentage": 6.28, "elapsed_time": "1:42:23", "remaining_time": "1 day, 1:26:52"} +{"current_steps": 173, "total_steps": 2737, "loss": 1.1927, "lr": 1.2627737226277371e-05, "epoch": 0.4424552429667519, "percentage": 6.32, "elapsed_time": "1:42:58", "remaining_time": "1 day, 1:26:14"} +{"current_steps": 174, "total_steps": 2737, "loss": 1.199, "lr": 1.27007299270073e-05, "epoch": 0.44501278772378516, "percentage": 6.36, "elapsed_time": "1:43:34", "remaining_time": "1 day, 1:25:35"} +{"current_steps": 175, "total_steps": 2737, "loss": 1.1742, "lr": 1.2773722627737228e-05, "epoch": 0.4475703324808184, "percentage": 6.39, "elapsed_time": "1:44:10", "remaining_time": "1 day, 1:25:00"} +{"current_steps": 176, "total_steps": 2737, "loss": 1.2027, "lr": 1.2846715328467154e-05, "epoch": 0.45012787723785164, "percentage": 6.43, "elapsed_time": "1:44:45", "remaining_time": "1 day, 1:24:23"} +{"current_steps": 177, "total_steps": 2737, "loss": 1.1757, "lr": 1.2919708029197083e-05, "epoch": 0.45268542199488493, "percentage": 6.47, "elapsed_time": "1:45:21", "remaining_time": "1 day, 1:23:45"} +{"current_steps": 178, "total_steps": 2737, "loss": 1.1716, "lr": 1.2992700729927009e-05, "epoch": 0.45524296675191817, "percentage": 6.5, "elapsed_time": "1:45:56", "remaining_time": "1 day, 1:23:07"} +{"current_steps": 179, "total_steps": 2737, "loss": 1.1583, "lr": 1.3065693430656936e-05, "epoch": 0.4578005115089514, "percentage": 6.54, "elapsed_time": "1:46:32", "remaining_time": "1 day, 1:22:29"} +{"current_steps": 180, "total_steps": 2737, "loss": 1.1657, "lr": 1.3138686131386862e-05, "epoch": 0.46035805626598464, "percentage": 6.58, "elapsed_time": "1:47:07", "remaining_time": "1 day, 1:21:41"} +{"current_steps": 181, "total_steps": 2737, "loss": 1.1922, "lr": 1.321167883211679e-05, "epoch": 0.4629156010230179, "percentage": 6.61, "elapsed_time": "1:47:42", "remaining_time": "1 day, 1:21:04"} +{"current_steps": 182, "total_steps": 2737, "loss": 1.1717, "lr": 1.3284671532846715e-05, "epoch": 0.46547314578005117, "percentage": 6.65, "elapsed_time": "1:48:18", "remaining_time": "1 day, 1:20:26"} +{"current_steps": 183, "total_steps": 2737, "loss": 1.1801, "lr": 1.3357664233576644e-05, "epoch": 0.4680306905370844, "percentage": 6.69, "elapsed_time": "1:48:54", "remaining_time": "1 day, 1:19:51"} +{"current_steps": 184, "total_steps": 2737, "loss": 1.177, "lr": 1.343065693430657e-05, "epoch": 0.47058823529411764, "percentage": 6.72, "elapsed_time": "1:49:29", "remaining_time": "1 day, 1:19:13"} +{"current_steps": 185, "total_steps": 2737, "loss": 1.1808, "lr": 1.3503649635036497e-05, "epoch": 0.4731457800511509, "percentage": 6.76, "elapsed_time": "1:50:05", "remaining_time": "1 day, 1:18:35"} +{"current_steps": 186, "total_steps": 2737, "loss": 1.1821, "lr": 1.3576642335766423e-05, "epoch": 0.47570332480818417, "percentage": 6.8, "elapsed_time": "1:50:40", "remaining_time": "1 day, 1:17:58"} +{"current_steps": 187, "total_steps": 2737, "loss": 1.1615, "lr": 1.3649635036496352e-05, "epoch": 0.4782608695652174, "percentage": 6.83, "elapsed_time": "1:51:16", "remaining_time": "1 day, 1:17:20"} +{"current_steps": 188, "total_steps": 2737, "loss": 1.1912, "lr": 1.372262773722628e-05, "epoch": 0.48081841432225064, "percentage": 6.87, "elapsed_time": "1:51:51", "remaining_time": "1 day, 1:16:43"} +{"current_steps": 189, "total_steps": 2737, "loss": 1.1678, "lr": 1.3795620437956205e-05, "epoch": 0.4833759590792839, "percentage": 6.91, "elapsed_time": "1:52:27", "remaining_time": "1 day, 1:16:06"} +{"current_steps": 190, "total_steps": 2737, "loss": 1.1745, "lr": 1.3868613138686133e-05, "epoch": 0.4859335038363171, "percentage": 6.94, "elapsed_time": "1:53:03", "remaining_time": "1 day, 1:15:28"} +{"current_steps": 191, "total_steps": 2737, "loss": 1.1193, "lr": 1.3941605839416059e-05, "epoch": 0.4884910485933504, "percentage": 6.98, "elapsed_time": "1:53:38", "remaining_time": "1 day, 1:14:52"} +{"current_steps": 192, "total_steps": 2737, "loss": 1.1622, "lr": 1.4014598540145988e-05, "epoch": 0.49104859335038364, "percentage": 7.01, "elapsed_time": "1:54:14", "remaining_time": "1 day, 1:14:15"} +{"current_steps": 193, "total_steps": 2737, "loss": 1.136, "lr": 1.4087591240875913e-05, "epoch": 0.4936061381074169, "percentage": 7.05, "elapsed_time": "1:54:51", "remaining_time": "1 day, 1:13:54"} +{"current_steps": 194, "total_steps": 2737, "loss": 1.1306, "lr": 1.416058394160584e-05, "epoch": 0.4961636828644501, "percentage": 7.09, "elapsed_time": "1:55:27", "remaining_time": "1 day, 1:13:28"} +{"current_steps": 195, "total_steps": 2737, "loss": 1.2086, "lr": 1.4233576642335767e-05, "epoch": 0.49872122762148335, "percentage": 7.12, "elapsed_time": "1:56:03", "remaining_time": "1 day, 1:12:50"} +{"current_steps": 196, "total_steps": 2737, "loss": 1.1628, "lr": 1.4306569343065696e-05, "epoch": 0.5012787723785166, "percentage": 7.16, "elapsed_time": "1:56:38", "remaining_time": "1 day, 1:12:12"} +{"current_steps": 197, "total_steps": 2737, "loss": 1.1518, "lr": 1.4379562043795621e-05, "epoch": 0.5038363171355499, "percentage": 7.2, "elapsed_time": "1:57:15", "remaining_time": "1 day, 1:11:45"} +{"current_steps": 198, "total_steps": 2737, "loss": 1.1856, "lr": 1.4452554744525549e-05, "epoch": 0.5063938618925832, "percentage": 7.23, "elapsed_time": "1:57:50", "remaining_time": "1 day, 1:11:07"} +{"current_steps": 199, "total_steps": 2737, "loss": 1.1483, "lr": 1.4525547445255475e-05, "epoch": 0.5089514066496164, "percentage": 7.27, "elapsed_time": "1:58:26", "remaining_time": "1 day, 1:10:29"} +{"current_steps": 200, "total_steps": 2737, "loss": 1.1629, "lr": 1.4598540145985402e-05, "epoch": 0.5115089514066496, "percentage": 7.31, "elapsed_time": "1:59:01", "remaining_time": "1 day, 1:09:51"} +{"current_steps": 201, "total_steps": 2737, "loss": 1.1442, "lr": 1.4671532846715331e-05, "epoch": 0.5140664961636828, "percentage": 7.34, "elapsed_time": "1:59:37", "remaining_time": "1 day, 1:09:15"} +{"current_steps": 202, "total_steps": 2737, "loss": 1.1385, "lr": 1.4744525547445257e-05, "epoch": 0.5166240409207161, "percentage": 7.38, "elapsed_time": "2:00:12", "remaining_time": "1 day, 1:08:35"} +{"current_steps": 203, "total_steps": 2737, "loss": 1.171, "lr": 1.4817518248175184e-05, "epoch": 0.5191815856777494, "percentage": 7.42, "elapsed_time": "2:00:48", "remaining_time": "1 day, 1:07:57"} +{"current_steps": 204, "total_steps": 2737, "loss": 1.1418, "lr": 1.489051094890511e-05, "epoch": 0.5217391304347826, "percentage": 7.45, "elapsed_time": "2:01:23", "remaining_time": "1 day, 1:07:20"} +{"current_steps": 205, "total_steps": 2737, "loss": 1.164, "lr": 1.4963503649635038e-05, "epoch": 0.5242966751918159, "percentage": 7.49, "elapsed_time": "2:01:59", "remaining_time": "1 day, 1:06:43"} +{"current_steps": 206, "total_steps": 2737, "loss": 1.1535, "lr": 1.5036496350364965e-05, "epoch": 0.5268542199488491, "percentage": 7.53, "elapsed_time": "2:02:34", "remaining_time": "1 day, 1:05:57"} +{"current_steps": 207, "total_steps": 2737, "loss": 1.1704, "lr": 1.5109489051094892e-05, "epoch": 0.5294117647058824, "percentage": 7.56, "elapsed_time": "2:03:09", "remaining_time": "1 day, 1:05:19"} +{"current_steps": 208, "total_steps": 2737, "loss": 1.1559, "lr": 1.5182481751824818e-05, "epoch": 0.5319693094629157, "percentage": 7.6, "elapsed_time": "2:03:45", "remaining_time": "1 day, 1:04:44"} +{"current_steps": 209, "total_steps": 2737, "loss": 1.1495, "lr": 1.5255474452554746e-05, "epoch": 0.5345268542199488, "percentage": 7.64, "elapsed_time": "2:04:20", "remaining_time": "1 day, 1:04:01"} +{"current_steps": 210, "total_steps": 2737, "loss": 1.1387, "lr": 1.5328467153284673e-05, "epoch": 0.5370843989769821, "percentage": 7.67, "elapsed_time": "2:04:56", "remaining_time": "1 day, 1:03:25"} +{"current_steps": 211, "total_steps": 2737, "loss": 1.1607, "lr": 1.54014598540146e-05, "epoch": 0.5396419437340153, "percentage": 7.71, "elapsed_time": "2:05:31", "remaining_time": "1 day, 1:02:48"} +{"current_steps": 212, "total_steps": 2737, "loss": 1.1286, "lr": 1.5474452554744528e-05, "epoch": 0.5421994884910486, "percentage": 7.75, "elapsed_time": "2:06:07", "remaining_time": "1 day, 1:02:11"} +{"current_steps": 213, "total_steps": 2737, "loss": 1.1701, "lr": 1.5547445255474454e-05, "epoch": 0.5447570332480819, "percentage": 7.78, "elapsed_time": "2:06:43", "remaining_time": "1 day, 1:01:34"} +{"current_steps": 214, "total_steps": 2737, "loss": 1.1236, "lr": 1.5620437956204383e-05, "epoch": 0.5473145780051151, "percentage": 7.82, "elapsed_time": "2:07:18", "remaining_time": "1 day, 1:00:57"} +{"current_steps": 215, "total_steps": 2737, "loss": 1.1289, "lr": 1.569343065693431e-05, "epoch": 0.5498721227621484, "percentage": 7.86, "elapsed_time": "2:07:54", "remaining_time": "1 day, 1:00:20"} +{"current_steps": 216, "total_steps": 2737, "loss": 1.1636, "lr": 1.5766423357664234e-05, "epoch": 0.5524296675191815, "percentage": 7.89, "elapsed_time": "2:08:29", "remaining_time": "1 day, 0:59:43"} +{"current_steps": 217, "total_steps": 2737, "loss": 1.1368, "lr": 1.583941605839416e-05, "epoch": 0.5549872122762148, "percentage": 7.93, "elapsed_time": "2:09:05", "remaining_time": "1 day, 0:59:05"} +{"current_steps": 218, "total_steps": 2737, "loss": 1.1077, "lr": 1.591240875912409e-05, "epoch": 0.5575447570332481, "percentage": 7.96, "elapsed_time": "2:09:40", "remaining_time": "1 day, 0:58:29"} +{"current_steps": 219, "total_steps": 2737, "loss": 1.1333, "lr": 1.5985401459854015e-05, "epoch": 0.5601023017902813, "percentage": 8.0, "elapsed_time": "2:10:16", "remaining_time": "1 day, 0:57:52"} +{"current_steps": 220, "total_steps": 2737, "loss": 1.1865, "lr": 1.6058394160583944e-05, "epoch": 0.5626598465473146, "percentage": 8.04, "elapsed_time": "2:10:52", "remaining_time": "1 day, 0:57:15"} +{"current_steps": 221, "total_steps": 2737, "loss": 1.1293, "lr": 1.613138686131387e-05, "epoch": 0.5652173913043478, "percentage": 8.07, "elapsed_time": "2:11:27", "remaining_time": "1 day, 0:56:39"} +{"current_steps": 222, "total_steps": 2737, "loss": 1.1296, "lr": 1.62043795620438e-05, "epoch": 0.5677749360613811, "percentage": 8.11, "elapsed_time": "2:12:03", "remaining_time": "1 day, 0:56:02"} +{"current_steps": 223, "total_steps": 2737, "loss": 1.1344, "lr": 1.6277372262773725e-05, "epoch": 0.5703324808184144, "percentage": 8.15, "elapsed_time": "2:12:39", "remaining_time": "1 day, 0:55:26"} +{"current_steps": 224, "total_steps": 2737, "loss": 1.1665, "lr": 1.635036496350365e-05, "epoch": 0.5728900255754475, "percentage": 8.18, "elapsed_time": "2:13:15", "remaining_time": "1 day, 0:54:56"} +{"current_steps": 225, "total_steps": 2737, "loss": 1.1616, "lr": 1.642335766423358e-05, "epoch": 0.5754475703324808, "percentage": 8.22, "elapsed_time": "2:13:50", "remaining_time": "1 day, 0:54:20"} +{"current_steps": 226, "total_steps": 2737, "loss": 1.1346, "lr": 1.6496350364963505e-05, "epoch": 0.578005115089514, "percentage": 8.26, "elapsed_time": "2:14:26", "remaining_time": "1 day, 0:53:43"} +{"current_steps": 227, "total_steps": 2737, "loss": 1.1474, "lr": 1.6569343065693434e-05, "epoch": 0.5805626598465473, "percentage": 8.29, "elapsed_time": "2:15:02", "remaining_time": "1 day, 0:53:07"} +{"current_steps": 228, "total_steps": 2737, "loss": 1.1328, "lr": 1.664233576642336e-05, "epoch": 0.5831202046035806, "percentage": 8.33, "elapsed_time": "2:15:37", "remaining_time": "1 day, 0:52:31"} +{"current_steps": 229, "total_steps": 2737, "loss": 1.1507, "lr": 1.6715328467153286e-05, "epoch": 0.5856777493606138, "percentage": 8.37, "elapsed_time": "2:16:13", "remaining_time": "1 day, 0:51:55"} +{"current_steps": 230, "total_steps": 2737, "loss": 1.1556, "lr": 1.678832116788321e-05, "epoch": 0.5882352941176471, "percentage": 8.4, "elapsed_time": "2:16:49", "remaining_time": "1 day, 0:51:18"} +{"current_steps": 231, "total_steps": 2737, "loss": 1.152, "lr": 1.686131386861314e-05, "epoch": 0.5907928388746803, "percentage": 8.44, "elapsed_time": "2:17:24", "remaining_time": "1 day, 0:50:41"} +{"current_steps": 232, "total_steps": 2737, "loss": 1.1398, "lr": 1.6934306569343066e-05, "epoch": 0.5933503836317136, "percentage": 8.48, "elapsed_time": "2:18:00", "remaining_time": "1 day, 0:50:08"} +{"current_steps": 233, "total_steps": 2737, "loss": 1.1447, "lr": 1.7007299270072995e-05, "epoch": 0.5959079283887468, "percentage": 8.51, "elapsed_time": "2:18:36", "remaining_time": "1 day, 0:49:32"} +{"current_steps": 234, "total_steps": 2737, "loss": 1.1005, "lr": 1.708029197080292e-05, "epoch": 0.59846547314578, "percentage": 8.55, "elapsed_time": "2:19:11", "remaining_time": "1 day, 0:48:55"} +{"current_steps": 235, "total_steps": 2737, "loss": 1.1227, "lr": 1.7153284671532847e-05, "epoch": 0.6010230179028133, "percentage": 8.59, "elapsed_time": "2:19:47", "remaining_time": "1 day, 0:48:18"} +{"current_steps": 236, "total_steps": 2737, "loss": 1.1505, "lr": 1.7226277372262773e-05, "epoch": 0.6035805626598465, "percentage": 8.62, "elapsed_time": "2:20:23", "remaining_time": "1 day, 0:47:45"} +{"current_steps": 237, "total_steps": 2737, "loss": 1.1308, "lr": 1.7299270072992702e-05, "epoch": 0.6061381074168798, "percentage": 8.66, "elapsed_time": "2:20:59", "remaining_time": "1 day, 0:47:18"} +{"current_steps": 238, "total_steps": 2737, "loss": 1.1181, "lr": 1.737226277372263e-05, "epoch": 0.6086956521739131, "percentage": 8.7, "elapsed_time": "2:21:35", "remaining_time": "1 day, 0:46:40"} +{"current_steps": 239, "total_steps": 2737, "loss": 1.1584, "lr": 1.7445255474452557e-05, "epoch": 0.6112531969309463, "percentage": 8.73, "elapsed_time": "2:22:10", "remaining_time": "1 day, 0:46:03"} +{"current_steps": 240, "total_steps": 2737, "loss": 1.1264, "lr": 1.7518248175182482e-05, "epoch": 0.6138107416879796, "percentage": 8.77, "elapsed_time": "2:22:46", "remaining_time": "1 day, 0:45:27"} +{"current_steps": 241, "total_steps": 2737, "loss": 1.1234, "lr": 1.7591240875912408e-05, "epoch": 0.6163682864450127, "percentage": 8.81, "elapsed_time": "2:23:21", "remaining_time": "1 day, 0:44:49"} +{"current_steps": 242, "total_steps": 2737, "loss": 1.1473, "lr": 1.7664233576642337e-05, "epoch": 0.618925831202046, "percentage": 8.84, "elapsed_time": "2:23:57", "remaining_time": "1 day, 0:44:12"} +{"current_steps": 243, "total_steps": 2737, "loss": 1.1443, "lr": 1.7737226277372263e-05, "epoch": 0.6214833759590793, "percentage": 8.88, "elapsed_time": "2:24:33", "remaining_time": "1 day, 0:43:35"} +{"current_steps": 244, "total_steps": 2737, "loss": 1.1898, "lr": 1.7810218978102192e-05, "epoch": 0.6240409207161125, "percentage": 8.91, "elapsed_time": "2:25:08", "remaining_time": "1 day, 0:42:55"} +{"current_steps": 245, "total_steps": 2737, "loss": 1.1501, "lr": 1.7883211678832118e-05, "epoch": 0.6265984654731458, "percentage": 8.95, "elapsed_time": "2:25:43", "remaining_time": "1 day, 0:42:15"} +{"current_steps": 246, "total_steps": 2737, "loss": 1.1452, "lr": 1.7956204379562047e-05, "epoch": 0.629156010230179, "percentage": 8.99, "elapsed_time": "2:26:19", "remaining_time": "1 day, 0:41:40"} +{"current_steps": 247, "total_steps": 2737, "loss": 1.1359, "lr": 1.8029197080291973e-05, "epoch": 0.6317135549872123, "percentage": 9.02, "elapsed_time": "2:26:54", "remaining_time": "1 day, 0:41:03"} +{"current_steps": 248, "total_steps": 2737, "loss": 1.1823, "lr": 1.81021897810219e-05, "epoch": 0.6342710997442456, "percentage": 9.06, "elapsed_time": "2:27:30", "remaining_time": "1 day, 0:40:26"} +{"current_steps": 249, "total_steps": 2737, "loss": 1.1632, "lr": 1.8175182481751824e-05, "epoch": 0.6368286445012787, "percentage": 9.1, "elapsed_time": "2:28:06", "remaining_time": "1 day, 0:39:49"} +{"current_steps": 250, "total_steps": 2737, "loss": 1.1409, "lr": 1.8248175182481753e-05, "epoch": 0.639386189258312, "percentage": 9.13, "elapsed_time": "2:28:41", "remaining_time": "1 day, 0:39:13"} +{"current_steps": 251, "total_steps": 2737, "loss": 1.1499, "lr": 1.8321167883211683e-05, "epoch": 0.6419437340153452, "percentage": 9.17, "elapsed_time": "2:29:17", "remaining_time": "1 day, 0:38:38"} +{"current_steps": 252, "total_steps": 2737, "loss": 1.154, "lr": 1.8394160583941608e-05, "epoch": 0.6445012787723785, "percentage": 9.21, "elapsed_time": "2:29:53", "remaining_time": "1 day, 0:38:01"} +{"current_steps": 253, "total_steps": 2737, "loss": 1.1355, "lr": 1.8467153284671534e-05, "epoch": 0.6470588235294118, "percentage": 9.24, "elapsed_time": "2:30:28", "remaining_time": "1 day, 0:37:25"} +{"current_steps": 254, "total_steps": 2737, "loss": 1.1874, "lr": 1.854014598540146e-05, "epoch": 0.649616368286445, "percentage": 9.28, "elapsed_time": "2:31:04", "remaining_time": "1 day, 0:36:49"} +{"current_steps": 255, "total_steps": 2737, "loss": 1.1374, "lr": 1.861313868613139e-05, "epoch": 0.6521739130434783, "percentage": 9.32, "elapsed_time": "2:31:39", "remaining_time": "1 day, 0:36:12"} +{"current_steps": 256, "total_steps": 2737, "loss": 1.1289, "lr": 1.8686131386861315e-05, "epoch": 0.6547314578005116, "percentage": 9.35, "elapsed_time": "2:32:14", "remaining_time": "1 day, 0:35:29"} +{"current_steps": 257, "total_steps": 2737, "loss": 1.1646, "lr": 1.8759124087591244e-05, "epoch": 0.6572890025575447, "percentage": 9.39, "elapsed_time": "2:32:50", "remaining_time": "1 day, 0:34:52"} +{"current_steps": 258, "total_steps": 2737, "loss": 1.1268, "lr": 1.883211678832117e-05, "epoch": 0.659846547314578, "percentage": 9.43, "elapsed_time": "2:33:26", "remaining_time": "1 day, 0:34:16"} +{"current_steps": 259, "total_steps": 2737, "loss": 1.1593, "lr": 1.8905109489051095e-05, "epoch": 0.6624040920716112, "percentage": 9.46, "elapsed_time": "2:34:01", "remaining_time": "1 day, 0:33:40"} +{"current_steps": 260, "total_steps": 2737, "loss": 1.1374, "lr": 1.897810218978102e-05, "epoch": 0.6649616368286445, "percentage": 9.5, "elapsed_time": "2:34:37", "remaining_time": "1 day, 0:33:05"} +{"current_steps": 261, "total_steps": 2737, "loss": 1.1093, "lr": 1.905109489051095e-05, "epoch": 0.6675191815856778, "percentage": 9.54, "elapsed_time": "2:35:12", "remaining_time": "1 day, 0:32:28"} +{"current_steps": 262, "total_steps": 2737, "loss": 1.1232, "lr": 1.912408759124088e-05, "epoch": 0.670076726342711, "percentage": 9.57, "elapsed_time": "2:35:48", "remaining_time": "1 day, 0:31:52"} +{"current_steps": 263, "total_steps": 2737, "loss": 1.1682, "lr": 1.9197080291970805e-05, "epoch": 0.6726342710997443, "percentage": 9.61, "elapsed_time": "2:36:24", "remaining_time": "1 day, 0:31:15"} +{"current_steps": 264, "total_steps": 2737, "loss": 1.1484, "lr": 1.9270072992700734e-05, "epoch": 0.6751918158567775, "percentage": 9.65, "elapsed_time": "2:36:59", "remaining_time": "1 day, 0:30:38"} +{"current_steps": 265, "total_steps": 2737, "loss": 1.1746, "lr": 1.934306569343066e-05, "epoch": 0.6777493606138107, "percentage": 9.68, "elapsed_time": "2:37:35", "remaining_time": "1 day, 0:30:02"} +{"current_steps": 266, "total_steps": 2737, "loss": 1.1414, "lr": 1.9416058394160586e-05, "epoch": 0.680306905370844, "percentage": 9.72, "elapsed_time": "2:38:11", "remaining_time": "1 day, 0:29:27"} +{"current_steps": 267, "total_steps": 2737, "loss": 1.096, "lr": 1.948905109489051e-05, "epoch": 0.6828644501278772, "percentage": 9.76, "elapsed_time": "2:38:46", "remaining_time": "1 day, 0:28:50"} +{"current_steps": 268, "total_steps": 2737, "loss": 1.1383, "lr": 1.956204379562044e-05, "epoch": 0.6854219948849105, "percentage": 9.79, "elapsed_time": "2:39:22", "remaining_time": "1 day, 0:28:16"} +{"current_steps": 269, "total_steps": 2737, "loss": 1.1157, "lr": 1.9635036496350366e-05, "epoch": 0.6879795396419437, "percentage": 9.83, "elapsed_time": "2:39:58", "remaining_time": "1 day, 0:27:40"} +{"current_steps": 270, "total_steps": 2737, "loss": 1.1569, "lr": 1.9708029197080295e-05, "epoch": 0.690537084398977, "percentage": 9.86, "elapsed_time": "2:40:33", "remaining_time": "1 day, 0:27:03"} +{"current_steps": 271, "total_steps": 2737, "loss": 1.1551, "lr": 1.978102189781022e-05, "epoch": 0.6930946291560103, "percentage": 9.9, "elapsed_time": "2:41:09", "remaining_time": "1 day, 0:26:27"} +{"current_steps": 272, "total_steps": 2737, "loss": 1.1155, "lr": 1.9854014598540147e-05, "epoch": 0.6956521739130435, "percentage": 9.94, "elapsed_time": "2:41:44", "remaining_time": "1 day, 0:25:50"} +{"current_steps": 273, "total_steps": 2737, "loss": 1.1293, "lr": 1.9927007299270073e-05, "epoch": 0.6982097186700768, "percentage": 9.97, "elapsed_time": "2:42:20", "remaining_time": "1 day, 0:25:12"} +{"current_steps": 274, "total_steps": 2737, "loss": 1.1495, "lr": 2e-05, "epoch": 0.7007672634271099, "percentage": 10.01, "elapsed_time": "2:42:55", "remaining_time": "1 day, 0:24:35"} +{"current_steps": 275, "total_steps": 2737, "loss": 1.1267, "lr": 1.9999991865312627e-05, "epoch": 0.7033248081841432, "percentage": 10.05, "elapsed_time": "2:43:31", "remaining_time": "1 day, 0:24:00"} +{"current_steps": 276, "total_steps": 2737, "loss": 1.1469, "lr": 1.9999967461263736e-05, "epoch": 0.7058823529411765, "percentage": 10.08, "elapsed_time": "2:44:07", "remaining_time": "1 day, 0:23:23"} +{"current_steps": 277, "total_steps": 2737, "loss": 1.1605, "lr": 1.9999926787893038e-05, "epoch": 0.7084398976982097, "percentage": 10.12, "elapsed_time": "2:44:42", "remaining_time": "1 day, 0:22:47"} +{"current_steps": 278, "total_steps": 2737, "loss": 1.1291, "lr": 1.99998698452667e-05, "epoch": 0.710997442455243, "percentage": 10.16, "elapsed_time": "2:45:18", "remaining_time": "1 day, 0:22:10"} +{"current_steps": 279, "total_steps": 2737, "loss": 1.1594, "lr": 1.999979663347736e-05, "epoch": 0.7135549872122762, "percentage": 10.19, "elapsed_time": "2:45:54", "remaining_time": "1 day, 0:21:43"} +{"current_steps": 280, "total_steps": 2737, "loss": 1.1245, "lr": 1.9999707152644143e-05, "epoch": 0.7161125319693095, "percentage": 10.23, "elapsed_time": "2:46:30", "remaining_time": "1 day, 0:21:05"} +{"current_steps": 281, "total_steps": 2737, "loss": 1.119, "lr": 1.999960140291262e-05, "epoch": 0.7186700767263428, "percentage": 10.27, "elapsed_time": "2:47:06", "remaining_time": "1 day, 0:20:36"} +{"current_steps": 282, "total_steps": 2737, "loss": 1.1468, "lr": 1.9999479384454838e-05, "epoch": 0.7212276214833759, "percentage": 10.3, "elapsed_time": "2:47:41", "remaining_time": "1 day, 0:19:55"} +{"current_steps": 283, "total_steps": 2737, "loss": 1.075, "lr": 1.9999341097469313e-05, "epoch": 0.7237851662404092, "percentage": 10.34, "elapsed_time": "2:48:17", "remaining_time": "1 day, 0:19:18"} +{"current_steps": 284, "total_steps": 2737, "loss": 1.1388, "lr": 1.9999186542181038e-05, "epoch": 0.7263427109974424, "percentage": 10.38, "elapsed_time": "2:48:53", "remaining_time": "1 day, 0:18:42"} +{"current_steps": 285, "total_steps": 2737, "loss": 1.1204, "lr": 1.9999015718841453e-05, "epoch": 0.7289002557544757, "percentage": 10.41, "elapsed_time": "2:49:28", "remaining_time": "1 day, 0:18:05"} +{"current_steps": 286, "total_steps": 2737, "loss": 1.1441, "lr": 1.9998828627728483e-05, "epoch": 0.731457800511509, "percentage": 10.45, "elapsed_time": "2:50:04", "remaining_time": "1 day, 0:17:29"} +{"current_steps": 287, "total_steps": 2737, "loss": 1.1418, "lr": 1.9998625269146515e-05, "epoch": 0.7340153452685422, "percentage": 10.49, "elapsed_time": "2:50:39", "remaining_time": "1 day, 0:16:53"} +{"current_steps": 288, "total_steps": 2737, "loss": 1.107, "lr": 1.9998405643426398e-05, "epoch": 0.7365728900255755, "percentage": 10.52, "elapsed_time": "2:51:16", "remaining_time": "1 day, 0:16:25"} +{"current_steps": 289, "total_steps": 2737, "loss": 1.1386, "lr": 1.999816975092545e-05, "epoch": 0.7391304347826086, "percentage": 10.56, "elapsed_time": "2:51:52", "remaining_time": "1 day, 0:15:48"} +{"current_steps": 290, "total_steps": 2737, "loss": 1.1478, "lr": 1.9997917592027455e-05, "epoch": 0.7416879795396419, "percentage": 10.6, "elapsed_time": "2:52:27", "remaining_time": "1 day, 0:15:12"} +{"current_steps": 291, "total_steps": 2737, "loss": 1.1322, "lr": 1.9997649167142654e-05, "epoch": 0.7442455242966752, "percentage": 10.63, "elapsed_time": "2:53:03", "remaining_time": "1 day, 0:14:35"} +{"current_steps": 292, "total_steps": 2737, "loss": 1.0975, "lr": 1.9997364476707765e-05, "epoch": 0.7468030690537084, "percentage": 10.67, "elapsed_time": "2:53:38", "remaining_time": "1 day, 0:13:58"} +{"current_steps": 293, "total_steps": 2737, "loss": 1.1234, "lr": 1.9997063521185956e-05, "epoch": 0.7493606138107417, "percentage": 10.71, "elapsed_time": "2:54:14", "remaining_time": "1 day, 0:13:23"} +{"current_steps": 294, "total_steps": 2737, "loss": 1.1204, "lr": 1.9996746301066867e-05, "epoch": 0.7519181585677749, "percentage": 10.74, "elapsed_time": "2:54:49", "remaining_time": "1 day, 0:12:46"} +{"current_steps": 295, "total_steps": 2737, "loss": 1.1101, "lr": 1.999641281686659e-05, "epoch": 0.7544757033248082, "percentage": 10.78, "elapsed_time": "2:55:25", "remaining_time": "1 day, 0:12:10"} +{"current_steps": 296, "total_steps": 2737, "loss": 1.1182, "lr": 1.999606306912769e-05, "epoch": 0.7570332480818415, "percentage": 10.81, "elapsed_time": "2:56:01", "remaining_time": "1 day, 0:11:33"} +{"current_steps": 297, "total_steps": 2737, "loss": 1.1576, "lr": 1.999569705841918e-05, "epoch": 0.7595907928388747, "percentage": 10.85, "elapsed_time": "2:56:36", "remaining_time": "1 day, 0:10:57"} +{"current_steps": 298, "total_steps": 2737, "loss": 1.1329, "lr": 1.9995314785336534e-05, "epoch": 0.7621483375959079, "percentage": 10.89, "elapsed_time": "2:57:11", "remaining_time": "1 day, 0:10:16"} +{"current_steps": 299, "total_steps": 2737, "loss": 1.1486, "lr": 1.999491625050169e-05, "epoch": 0.7647058823529411, "percentage": 10.92, "elapsed_time": "2:57:47", "remaining_time": "1 day, 0:09:40"} +{"current_steps": 300, "total_steps": 2737, "loss": 1.1067, "lr": 1.9994501454563046e-05, "epoch": 0.7672634271099744, "percentage": 10.96, "elapsed_time": "2:58:23", "remaining_time": "1 day, 0:09:06"} +{"current_steps": 301, "total_steps": 2737, "loss": 1.1391, "lr": 1.9994070398195437e-05, "epoch": 0.7698209718670077, "percentage": 11.0, "elapsed_time": "2:58:58", "remaining_time": "1 day, 0:08:29"} +{"current_steps": 302, "total_steps": 2737, "loss": 1.1387, "lr": 1.999362308210017e-05, "epoch": 0.7723785166240409, "percentage": 11.03, "elapsed_time": "2:59:35", "remaining_time": "1 day, 0:07:59"} +{"current_steps": 303, "total_steps": 2737, "loss": 1.1084, "lr": 1.9993159507005e-05, "epoch": 0.7749360613810742, "percentage": 11.07, "elapsed_time": "3:00:10", "remaining_time": "1 day, 0:07:23"} +{"current_steps": 304, "total_steps": 2737, "loss": 1.1134, "lr": 1.9992679673664136e-05, "epoch": 0.7774936061381074, "percentage": 11.11, "elapsed_time": "3:00:46", "remaining_time": "1 day, 0:06:48"} +{"current_steps": 305, "total_steps": 2737, "loss": 1.1269, "lr": 1.9992183582858233e-05, "epoch": 0.7800511508951407, "percentage": 11.14, "elapsed_time": "3:01:22", "remaining_time": "1 day, 0:06:11"} +{"current_steps": 306, "total_steps": 2737, "loss": 1.1211, "lr": 1.9991671235394404e-05, "epoch": 0.782608695652174, "percentage": 11.18, "elapsed_time": "3:01:57", "remaining_time": "1 day, 0:05:35"} +{"current_steps": 307, "total_steps": 2737, "loss": 1.0874, "lr": 1.9991142632106205e-05, "epoch": 0.7851662404092071, "percentage": 11.22, "elapsed_time": "3:02:33", "remaining_time": "1 day, 0:04:59"} +{"current_steps": 308, "total_steps": 2737, "loss": 1.1189, "lr": 1.999059777385364e-05, "epoch": 0.7877237851662404, "percentage": 11.25, "elapsed_time": "3:03:09", "remaining_time": "1 day, 0:04:23"} +{"current_steps": 309, "total_steps": 2737, "loss": 1.1368, "lr": 1.9990036661523162e-05, "epoch": 0.7902813299232737, "percentage": 11.29, "elapsed_time": "3:03:45", "remaining_time": "1 day, 0:03:53"} +{"current_steps": 310, "total_steps": 2737, "loss": 1.1041, "lr": 1.998945929602766e-05, "epoch": 0.7928388746803069, "percentage": 11.33, "elapsed_time": "3:04:21", "remaining_time": "1 day, 0:03:16"} +{"current_steps": 311, "total_steps": 2737, "loss": 1.1381, "lr": 1.9988865678306476e-05, "epoch": 0.7953964194373402, "percentage": 11.36, "elapsed_time": "3:04:56", "remaining_time": "1 day, 0:02:40"} +{"current_steps": 312, "total_steps": 2737, "loss": 1.1505, "lr": 1.998825580932539e-05, "epoch": 0.7979539641943734, "percentage": 11.4, "elapsed_time": "3:05:32", "remaining_time": "1 day, 0:02:04"} +{"current_steps": 313, "total_steps": 2737, "loss": 1.116, "lr": 1.9987629690076615e-05, "epoch": 0.8005115089514067, "percentage": 11.44, "elapsed_time": "3:06:07", "remaining_time": "1 day, 0:01:28"} +{"current_steps": 314, "total_steps": 2737, "loss": 1.1233, "lr": 1.998698732157881e-05, "epoch": 0.80306905370844, "percentage": 11.47, "elapsed_time": "3:06:43", "remaining_time": "1 day, 0:00:51"} +{"current_steps": 315, "total_steps": 2737, "loss": 1.1112, "lr": 1.998632870487707e-05, "epoch": 0.8056265984654731, "percentage": 11.51, "elapsed_time": "3:07:19", "remaining_time": "1 day, 0:00:15"} +{"current_steps": 316, "total_steps": 2737, "loss": 1.1089, "lr": 1.9985653841042926e-05, "epoch": 0.8081841432225064, "percentage": 11.55, "elapsed_time": "3:07:54", "remaining_time": "23:59:37"} +{"current_steps": 317, "total_steps": 2737, "loss": 1.1387, "lr": 1.9984962731174336e-05, "epoch": 0.8107416879795396, "percentage": 11.58, "elapsed_time": "3:08:30", "remaining_time": "23:59:01"} +{"current_steps": 318, "total_steps": 2737, "loss": 1.1292, "lr": 1.998425537639569e-05, "epoch": 0.8132992327365729, "percentage": 11.62, "elapsed_time": "3:09:05", "remaining_time": "23:58:25"} +{"current_steps": 319, "total_steps": 2737, "loss": 1.0907, "lr": 1.9983531777857817e-05, "epoch": 0.8158567774936062, "percentage": 11.66, "elapsed_time": "3:09:41", "remaining_time": "23:57:49"} +{"current_steps": 320, "total_steps": 2737, "loss": 1.1157, "lr": 1.998279193673796e-05, "epoch": 0.8184143222506394, "percentage": 11.69, "elapsed_time": "3:10:16", "remaining_time": "23:57:13"} +{"current_steps": 321, "total_steps": 2737, "loss": 1.0971, "lr": 1.9982035854239793e-05, "epoch": 0.8209718670076727, "percentage": 11.73, "elapsed_time": "3:10:52", "remaining_time": "23:56:36"} +{"current_steps": 322, "total_steps": 2737, "loss": 1.1236, "lr": 1.9981263531593422e-05, "epoch": 0.8235294117647058, "percentage": 11.76, "elapsed_time": "3:11:28", "remaining_time": "23:56:01"} +{"current_steps": 323, "total_steps": 2737, "loss": 1.1438, "lr": 1.9980474970055367e-05, "epoch": 0.8260869565217391, "percentage": 11.8, "elapsed_time": "3:12:03", "remaining_time": "23:55:24"} +{"current_steps": 324, "total_steps": 2737, "loss": 1.1465, "lr": 1.997967017090856e-05, "epoch": 0.8286445012787724, "percentage": 11.84, "elapsed_time": "3:12:39", "remaining_time": "23:54:48"} +{"current_steps": 325, "total_steps": 2737, "loss": 1.1061, "lr": 1.9978849135462367e-05, "epoch": 0.8312020460358056, "percentage": 11.87, "elapsed_time": "3:13:14", "remaining_time": "23:54:12"} +{"current_steps": 326, "total_steps": 2737, "loss": 1.1146, "lr": 1.9978011865052554e-05, "epoch": 0.8337595907928389, "percentage": 11.91, "elapsed_time": "3:13:50", "remaining_time": "23:53:35"} +{"current_steps": 327, "total_steps": 2737, "loss": 1.1554, "lr": 1.9977158361041317e-05, "epoch": 0.8363171355498721, "percentage": 11.95, "elapsed_time": "3:14:26", "remaining_time": "23:52:59"} +{"current_steps": 328, "total_steps": 2737, "loss": 1.1274, "lr": 1.997628862481725e-05, "epoch": 0.8388746803069054, "percentage": 11.98, "elapsed_time": "3:15:01", "remaining_time": "23:52:22"} +{"current_steps": 329, "total_steps": 2737, "loss": 1.1669, "lr": 1.9975402657795355e-05, "epoch": 0.8414322250639387, "percentage": 12.02, "elapsed_time": "3:15:37", "remaining_time": "23:51:46"} +{"current_steps": 330, "total_steps": 2737, "loss": 1.1361, "lr": 1.997450046141705e-05, "epoch": 0.8439897698209718, "percentage": 12.06, "elapsed_time": "3:16:12", "remaining_time": "23:51:11"} +{"current_steps": 331, "total_steps": 2737, "loss": 1.1095, "lr": 1.997358203715015e-05, "epoch": 0.8465473145780051, "percentage": 12.09, "elapsed_time": "3:16:48", "remaining_time": "23:50:32"} +{"current_steps": 332, "total_steps": 2737, "loss": 1.1016, "lr": 1.9972647386488873e-05, "epoch": 0.8491048593350383, "percentage": 12.13, "elapsed_time": "3:17:24", "remaining_time": "23:49:57"} +{"current_steps": 333, "total_steps": 2737, "loss": 1.1475, "lr": 1.997169651095384e-05, "epoch": 0.8516624040920716, "percentage": 12.17, "elapsed_time": "3:17:59", "remaining_time": "23:49:21"} +{"current_steps": 334, "total_steps": 2737, "loss": 1.0813, "lr": 1.9970729412092064e-05, "epoch": 0.8542199488491049, "percentage": 12.2, "elapsed_time": "3:18:35", "remaining_time": "23:48:45"} +{"current_steps": 335, "total_steps": 2737, "loss": 1.1067, "lr": 1.9969746091476955e-05, "epoch": 0.8567774936061381, "percentage": 12.24, "elapsed_time": "3:19:10", "remaining_time": "23:48:04"} +{"current_steps": 336, "total_steps": 2737, "loss": 1.1069, "lr": 1.9968746550708313e-05, "epoch": 0.8593350383631714, "percentage": 12.28, "elapsed_time": "3:19:45", "remaining_time": "23:47:28"} +{"current_steps": 337, "total_steps": 2737, "loss": 1.1279, "lr": 1.996773079141233e-05, "epoch": 0.8618925831202046, "percentage": 12.31, "elapsed_time": "3:20:20", "remaining_time": "23:46:47"} +{"current_steps": 338, "total_steps": 2737, "loss": 1.1339, "lr": 1.9966698815241583e-05, "epoch": 0.8644501278772379, "percentage": 12.35, "elapsed_time": "3:20:56", "remaining_time": "23:46:10"} +{"current_steps": 339, "total_steps": 2737, "loss": 1.1039, "lr": 1.9965650623875034e-05, "epoch": 0.8670076726342711, "percentage": 12.39, "elapsed_time": "3:21:31", "remaining_time": "23:45:34"} +{"current_steps": 340, "total_steps": 2737, "loss": 1.1425, "lr": 1.9964586219018018e-05, "epoch": 0.8695652173913043, "percentage": 12.42, "elapsed_time": "3:22:07", "remaining_time": "23:44:57"} +{"current_steps": 341, "total_steps": 2737, "loss": 1.0978, "lr": 1.9963505602402263e-05, "epoch": 0.8721227621483376, "percentage": 12.46, "elapsed_time": "3:22:43", "remaining_time": "23:44:22"} +{"current_steps": 342, "total_steps": 2737, "loss": 1.1242, "lr": 1.996240877578586e-05, "epoch": 0.8746803069053708, "percentage": 12.5, "elapsed_time": "3:23:18", "remaining_time": "23:43:47"} +{"current_steps": 343, "total_steps": 2737, "loss": 1.1191, "lr": 1.996129574095328e-05, "epoch": 0.8772378516624041, "percentage": 12.53, "elapsed_time": "3:23:54", "remaining_time": "23:43:11"} +{"current_steps": 344, "total_steps": 2737, "loss": 1.1253, "lr": 1.996016649971536e-05, "epoch": 0.8797953964194374, "percentage": 12.57, "elapsed_time": "3:24:29", "remaining_time": "23:42:34"} +{"current_steps": 345, "total_steps": 2737, "loss": 1.1097, "lr": 1.9959021053909304e-05, "epoch": 0.8823529411764706, "percentage": 12.61, "elapsed_time": "3:25:05", "remaining_time": "23:41:59"} +{"current_steps": 346, "total_steps": 2737, "loss": 1.1751, "lr": 1.995785940539868e-05, "epoch": 0.8849104859335039, "percentage": 12.64, "elapsed_time": "3:25:41", "remaining_time": "23:41:23"} +{"current_steps": 347, "total_steps": 2737, "loss": 1.06, "lr": 1.995668155607342e-05, "epoch": 0.887468030690537, "percentage": 12.68, "elapsed_time": "3:26:17", "remaining_time": "23:40:47"} +{"current_steps": 348, "total_steps": 2737, "loss": 1.1217, "lr": 1.9955487507849815e-05, "epoch": 0.8900255754475703, "percentage": 12.71, "elapsed_time": "3:26:52", "remaining_time": "23:40:13"} +{"current_steps": 349, "total_steps": 2737, "loss": 1.1016, "lr": 1.9954277262670497e-05, "epoch": 0.8925831202046036, "percentage": 12.75, "elapsed_time": "3:27:28", "remaining_time": "23:39:37"} +{"current_steps": 350, "total_steps": 2737, "loss": 1.1259, "lr": 1.9953050822504466e-05, "epoch": 0.8951406649616368, "percentage": 12.79, "elapsed_time": "3:28:04", "remaining_time": "23:39:01"} +{"current_steps": 351, "total_steps": 2737, "loss": 1.1449, "lr": 1.995180818934706e-05, "epoch": 0.8976982097186701, "percentage": 12.82, "elapsed_time": "3:28:39", "remaining_time": "23:38:24"} +{"current_steps": 352, "total_steps": 2737, "loss": 1.1, "lr": 1.995054936521997e-05, "epoch": 0.9002557544757033, "percentage": 12.86, "elapsed_time": "3:29:15", "remaining_time": "23:37:48"} +{"current_steps": 353, "total_steps": 2737, "loss": 1.1215, "lr": 1.9949274352171218e-05, "epoch": 0.9028132992327366, "percentage": 12.9, "elapsed_time": "3:29:50", "remaining_time": "23:37:12"} +{"current_steps": 354, "total_steps": 2737, "loss": 1.1151, "lr": 1.9947983152275175e-05, "epoch": 0.9053708439897699, "percentage": 12.93, "elapsed_time": "3:30:26", "remaining_time": "23:36:35"} +{"current_steps": 355, "total_steps": 2737, "loss": 1.0909, "lr": 1.9946675767632545e-05, "epoch": 0.907928388746803, "percentage": 12.97, "elapsed_time": "3:31:02", "remaining_time": "23:36:04"} +{"current_steps": 356, "total_steps": 2737, "loss": 1.1065, "lr": 1.9945352200370352e-05, "epoch": 0.9104859335038363, "percentage": 13.01, "elapsed_time": "3:31:39", "remaining_time": "23:35:33"} +{"current_steps": 357, "total_steps": 2737, "loss": 1.1187, "lr": 1.9944012452641966e-05, "epoch": 0.9130434782608695, "percentage": 13.04, "elapsed_time": "3:32:14", "remaining_time": "23:34:59"} +{"current_steps": 358, "total_steps": 2737, "loss": 1.1402, "lr": 1.994265652662707e-05, "epoch": 0.9156010230179028, "percentage": 13.08, "elapsed_time": "3:32:50", "remaining_time": "23:34:24"} +{"current_steps": 359, "total_steps": 2737, "loss": 1.1232, "lr": 1.9941284424531668e-05, "epoch": 0.9181585677749361, "percentage": 13.12, "elapsed_time": "3:33:26", "remaining_time": "23:33:47"} +{"current_steps": 360, "total_steps": 2737, "loss": 1.0879, "lr": 1.9939896148588086e-05, "epoch": 0.9207161125319693, "percentage": 13.15, "elapsed_time": "3:34:01", "remaining_time": "23:33:11"} +{"current_steps": 361, "total_steps": 2737, "loss": 1.1384, "lr": 1.9938491701054965e-05, "epoch": 0.9232736572890026, "percentage": 13.19, "elapsed_time": "3:34:37", "remaining_time": "23:32:35"} +{"current_steps": 362, "total_steps": 2737, "loss": 1.0616, "lr": 1.9937071084217254e-05, "epoch": 0.9258312020460358, "percentage": 13.23, "elapsed_time": "3:35:13", "remaining_time": "23:32:00"} +{"current_steps": 363, "total_steps": 2737, "loss": 1.127, "lr": 1.99356343003862e-05, "epoch": 0.928388746803069, "percentage": 13.26, "elapsed_time": "3:35:48", "remaining_time": "23:31:23"} +{"current_steps": 364, "total_steps": 2737, "loss": 1.1075, "lr": 1.9934181351899365e-05, "epoch": 0.9309462915601023, "percentage": 13.3, "elapsed_time": "3:36:24", "remaining_time": "23:30:47"} +{"current_steps": 365, "total_steps": 2737, "loss": 1.1272, "lr": 1.9932712241120606e-05, "epoch": 0.9335038363171355, "percentage": 13.34, "elapsed_time": "3:36:59", "remaining_time": "23:30:11"} +{"current_steps": 366, "total_steps": 2737, "loss": 1.1469, "lr": 1.9931226970440075e-05, "epoch": 0.9360613810741688, "percentage": 13.37, "elapsed_time": "3:37:35", "remaining_time": "23:29:35"} +{"current_steps": 367, "total_steps": 2737, "loss": 1.1278, "lr": 1.9929725542274215e-05, "epoch": 0.9386189258312021, "percentage": 13.41, "elapsed_time": "3:38:11", "remaining_time": "23:28:59"} +{"current_steps": 368, "total_steps": 2737, "loss": 1.1187, "lr": 1.992820795906575e-05, "epoch": 0.9411764705882353, "percentage": 13.45, "elapsed_time": "3:38:46", "remaining_time": "23:28:23"} +{"current_steps": 369, "total_steps": 2737, "loss": 1.1126, "lr": 1.99266742232837e-05, "epoch": 0.9437340153452686, "percentage": 13.48, "elapsed_time": "3:39:22", "remaining_time": "23:27:46"} +{"current_steps": 370, "total_steps": 2737, "loss": 1.1139, "lr": 1.9925124337423356e-05, "epoch": 0.9462915601023018, "percentage": 13.52, "elapsed_time": "3:39:57", "remaining_time": "23:27:11"} +{"current_steps": 371, "total_steps": 2737, "loss": 1.138, "lr": 1.9923558304006283e-05, "epoch": 0.948849104859335, "percentage": 13.55, "elapsed_time": "3:40:33", "remaining_time": "23:26:31"} +{"current_steps": 372, "total_steps": 2737, "loss": 1.1176, "lr": 1.992197612558032e-05, "epoch": 0.9514066496163683, "percentage": 13.59, "elapsed_time": "3:41:08", "remaining_time": "23:25:55"} +{"current_steps": 373, "total_steps": 2737, "loss": 1.1221, "lr": 1.9920377804719573e-05, "epoch": 0.9539641943734015, "percentage": 13.63, "elapsed_time": "3:41:44", "remaining_time": "23:25:19"} +{"current_steps": 374, "total_steps": 2737, "loss": 1.1198, "lr": 1.991876334402441e-05, "epoch": 0.9565217391304348, "percentage": 13.66, "elapsed_time": "3:42:19", "remaining_time": "23:24:43"} +{"current_steps": 375, "total_steps": 2737, "loss": 1.1438, "lr": 1.9917132746121454e-05, "epoch": 0.959079283887468, "percentage": 13.7, "elapsed_time": "3:42:55", "remaining_time": "23:24:07"} +{"current_steps": 376, "total_steps": 2737, "loss": 1.0946, "lr": 1.9915486013663595e-05, "epoch": 0.9616368286445013, "percentage": 13.74, "elapsed_time": "3:43:31", "remaining_time": "23:23:31"} +{"current_steps": 377, "total_steps": 2737, "loss": 1.1257, "lr": 1.9913823149329952e-05, "epoch": 0.9641943734015346, "percentage": 13.77, "elapsed_time": "3:44:06", "remaining_time": "23:22:55"} +{"current_steps": 378, "total_steps": 2737, "loss": 1.1315, "lr": 1.9912144155825913e-05, "epoch": 0.9667519181585678, "percentage": 13.81, "elapsed_time": "3:44:42", "remaining_time": "23:22:19"} +{"current_steps": 379, "total_steps": 2737, "loss": 1.1005, "lr": 1.9910449035883086e-05, "epoch": 0.969309462915601, "percentage": 13.85, "elapsed_time": "3:45:17", "remaining_time": "23:21:43"} +{"current_steps": 380, "total_steps": 2737, "loss": 1.0831, "lr": 1.990873779225933e-05, "epoch": 0.9718670076726342, "percentage": 13.88, "elapsed_time": "3:45:53", "remaining_time": "23:21:08"} +{"current_steps": 381, "total_steps": 2737, "loss": 1.1116, "lr": 1.990701042773873e-05, "epoch": 0.9744245524296675, "percentage": 13.92, "elapsed_time": "3:46:29", "remaining_time": "23:20:35"} +{"current_steps": 382, "total_steps": 2737, "loss": 1.1172, "lr": 1.99052669451316e-05, "epoch": 0.9769820971867008, "percentage": 13.96, "elapsed_time": "3:47:05", "remaining_time": "23:19:59"} +{"current_steps": 383, "total_steps": 2737, "loss": 1.1243, "lr": 1.9903507347274478e-05, "epoch": 0.979539641943734, "percentage": 13.99, "elapsed_time": "3:47:41", "remaining_time": "23:19:23"} +{"current_steps": 384, "total_steps": 2737, "loss": 1.0751, "lr": 1.9901731637030123e-05, "epoch": 0.9820971867007673, "percentage": 14.03, "elapsed_time": "3:48:16", "remaining_time": "23:18:47"} +{"current_steps": 385, "total_steps": 2737, "loss": 1.1572, "lr": 1.9899939817287494e-05, "epoch": 0.9846547314578005, "percentage": 14.07, "elapsed_time": "3:48:52", "remaining_time": "23:18:12"} +{"current_steps": 386, "total_steps": 2737, "loss": 1.1109, "lr": 1.989813189096178e-05, "epoch": 0.9872122762148338, "percentage": 14.1, "elapsed_time": "3:49:28", "remaining_time": "23:17:36"} +{"current_steps": 387, "total_steps": 2737, "loss": 1.1243, "lr": 1.989630786099436e-05, "epoch": 0.989769820971867, "percentage": 14.14, "elapsed_time": "3:50:03", "remaining_time": "23:17:00"} +{"current_steps": 388, "total_steps": 2737, "loss": 1.1379, "lr": 1.9894467730352817e-05, "epoch": 0.9923273657289002, "percentage": 14.18, "elapsed_time": "3:50:39", "remaining_time": "23:16:24"} +{"current_steps": 389, "total_steps": 2737, "loss": 1.1183, "lr": 1.9892611502030932e-05, "epoch": 0.9948849104859335, "percentage": 14.21, "elapsed_time": "3:51:14", "remaining_time": "23:15:48"} +{"current_steps": 390, "total_steps": 2737, "loss": 1.1019, "lr": 1.9890739179048666e-05, "epoch": 0.9974424552429667, "percentage": 14.25, "elapsed_time": "3:51:50", "remaining_time": "23:15:15"} +{"current_steps": 391, "total_steps": 2737, "loss": 1.1315, "lr": 1.9888850764452177e-05, "epoch": 1.0, "percentage": 14.29, "elapsed_time": "3:52:26", "remaining_time": "23:14:39"} +{"current_steps": 392, "total_steps": 2737, "loss": 1.1027, "lr": 1.988694626131379e-05, "epoch": 1.0025575447570332, "percentage": 14.32, "elapsed_time": "3:53:13", "remaining_time": "23:15:09"} +{"current_steps": 393, "total_steps": 2737, "loss": 1.1255, "lr": 1.9885025672732024e-05, "epoch": 1.0051150895140666, "percentage": 14.36, "elapsed_time": "3:53:48", "remaining_time": "23:14:32"} +{"current_steps": 394, "total_steps": 2737, "loss": 1.0926, "lr": 1.9883089001831545e-05, "epoch": 1.0076726342710998, "percentage": 14.4, "elapsed_time": "3:54:24", "remaining_time": "23:13:55"} +{"current_steps": 395, "total_steps": 2737, "loss": 1.1024, "lr": 1.9881136251763203e-05, "epoch": 1.010230179028133, "percentage": 14.43, "elapsed_time": "3:54:59", "remaining_time": "23:13:19"} +{"current_steps": 396, "total_steps": 2737, "loss": 1.1177, "lr": 1.9879167425703998e-05, "epoch": 1.0127877237851663, "percentage": 14.47, "elapsed_time": "3:55:35", "remaining_time": "23:12:42"} +{"current_steps": 397, "total_steps": 2737, "loss": 1.1194, "lr": 1.9877182526857086e-05, "epoch": 1.0153452685421995, "percentage": 14.5, "elapsed_time": "3:56:10", "remaining_time": "23:12:06"} +{"current_steps": 398, "total_steps": 2737, "loss": 1.1126, "lr": 1.9875181558451774e-05, "epoch": 1.0179028132992327, "percentage": 14.54, "elapsed_time": "3:56:46", "remaining_time": "23:11:30"} +{"current_steps": 399, "total_steps": 2737, "loss": 1.0826, "lr": 1.9873164523743517e-05, "epoch": 1.020460358056266, "percentage": 14.58, "elapsed_time": "3:57:22", "remaining_time": "23:10:54"} +{"current_steps": 400, "total_steps": 2737, "loss": 1.137, "lr": 1.9871131426013894e-05, "epoch": 1.0230179028132993, "percentage": 14.61, "elapsed_time": "3:57:57", "remaining_time": "23:10:18"} +{"current_steps": 401, "total_steps": 2737, "loss": 1.1135, "lr": 1.9869082268570637e-05, "epoch": 1.0255754475703325, "percentage": 14.65, "elapsed_time": "3:58:33", "remaining_time": "23:09:41"} +{"current_steps": 402, "total_steps": 2737, "loss": 1.1316, "lr": 1.9867017054747593e-05, "epoch": 1.0281329923273657, "percentage": 14.69, "elapsed_time": "3:59:08", "remaining_time": "23:09:05"} +{"current_steps": 403, "total_steps": 2737, "loss": 1.1009, "lr": 1.9864935787904734e-05, "epoch": 1.030690537084399, "percentage": 14.72, "elapsed_time": "3:59:44", "remaining_time": "23:08:28"} +{"current_steps": 404, "total_steps": 2737, "loss": 1.1047, "lr": 1.986283847142816e-05, "epoch": 1.0332480818414322, "percentage": 14.76, "elapsed_time": "4:00:20", "remaining_time": "23:07:52"} +{"current_steps": 405, "total_steps": 2737, "loss": 1.1031, "lr": 1.9860725108730065e-05, "epoch": 1.0358056265984654, "percentage": 14.8, "elapsed_time": "4:00:55", "remaining_time": "23:07:16"} +{"current_steps": 406, "total_steps": 2737, "loss": 1.137, "lr": 1.9858595703248755e-05, "epoch": 1.0383631713554988, "percentage": 14.83, "elapsed_time": "4:01:31", "remaining_time": "23:06:40"} +{"current_steps": 407, "total_steps": 2737, "loss": 1.0707, "lr": 1.985645025844865e-05, "epoch": 1.040920716112532, "percentage": 14.87, "elapsed_time": "4:02:06", "remaining_time": "23:06:04"} +{"current_steps": 408, "total_steps": 2737, "loss": 1.1033, "lr": 1.9854288777820246e-05, "epoch": 1.0434782608695652, "percentage": 14.91, "elapsed_time": "4:02:42", "remaining_time": "23:05:28"} +{"current_steps": 409, "total_steps": 2737, "loss": 1.0806, "lr": 1.9852111264880145e-05, "epoch": 1.0460358056265984, "percentage": 14.94, "elapsed_time": "4:03:18", "remaining_time": "23:04:52"} +{"current_steps": 410, "total_steps": 2737, "loss": 1.0756, "lr": 1.984991772317102e-05, "epoch": 1.0485933503836318, "percentage": 14.98, "elapsed_time": "4:03:53", "remaining_time": "23:04:15"} +{"current_steps": 411, "total_steps": 2737, "loss": 1.1055, "lr": 1.9847708156261622e-05, "epoch": 1.051150895140665, "percentage": 15.02, "elapsed_time": "4:04:29", "remaining_time": "23:03:40"} +{"current_steps": 412, "total_steps": 2737, "loss": 1.0836, "lr": 1.9845482567746783e-05, "epoch": 1.0537084398976981, "percentage": 15.05, "elapsed_time": "4:05:05", "remaining_time": "23:03:03"} +{"current_steps": 413, "total_steps": 2737, "loss": 1.0904, "lr": 1.9843240961247398e-05, "epoch": 1.0562659846547315, "percentage": 15.09, "elapsed_time": "4:05:40", "remaining_time": "23:02:27"} +{"current_steps": 414, "total_steps": 2737, "loss": 1.1402, "lr": 1.9840983340410414e-05, "epoch": 1.0588235294117647, "percentage": 15.13, "elapsed_time": "4:06:16", "remaining_time": "23:01:51"} +{"current_steps": 415, "total_steps": 2737, "loss": 1.1108, "lr": 1.9838709708908848e-05, "epoch": 1.061381074168798, "percentage": 15.16, "elapsed_time": "4:06:52", "remaining_time": "23:01:19"} +{"current_steps": 416, "total_steps": 2737, "loss": 1.0894, "lr": 1.983642007044175e-05, "epoch": 1.0639386189258313, "percentage": 15.2, "elapsed_time": "4:07:28", "remaining_time": "23:00:42"} +{"current_steps": 417, "total_steps": 2737, "loss": 1.0751, "lr": 1.983411442873422e-05, "epoch": 1.0664961636828645, "percentage": 15.24, "elapsed_time": "4:08:03", "remaining_time": "23:00:06"} +{"current_steps": 418, "total_steps": 2737, "loss": 1.0867, "lr": 1.983179278753739e-05, "epoch": 1.0690537084398977, "percentage": 15.27, "elapsed_time": "4:08:39", "remaining_time": "22:59:30"} +{"current_steps": 419, "total_steps": 2737, "loss": 1.1428, "lr": 1.9829455150628432e-05, "epoch": 1.0716112531969308, "percentage": 15.31, "elapsed_time": "4:09:14", "remaining_time": "22:58:53"} +{"current_steps": 420, "total_steps": 2737, "loss": 1.0877, "lr": 1.982710152181053e-05, "epoch": 1.0741687979539642, "percentage": 15.35, "elapsed_time": "4:09:50", "remaining_time": "22:58:17"} +{"current_steps": 421, "total_steps": 2737, "loss": 1.1025, "lr": 1.982473190491289e-05, "epoch": 1.0767263427109974, "percentage": 15.38, "elapsed_time": "4:10:26", "remaining_time": "22:57:40"} +{"current_steps": 422, "total_steps": 2737, "loss": 1.0954, "lr": 1.9822346303790732e-05, "epoch": 1.0792838874680306, "percentage": 15.42, "elapsed_time": "4:11:01", "remaining_time": "22:57:04"} +{"current_steps": 423, "total_steps": 2737, "loss": 1.0799, "lr": 1.9819944722325283e-05, "epoch": 1.081841432225064, "percentage": 15.45, "elapsed_time": "4:11:37", "remaining_time": "22:56:28"} +{"current_steps": 424, "total_steps": 2737, "loss": 1.1239, "lr": 1.981752716442376e-05, "epoch": 1.0843989769820972, "percentage": 15.49, "elapsed_time": "4:12:12", "remaining_time": "22:55:52"} +{"current_steps": 425, "total_steps": 2737, "loss": 1.0885, "lr": 1.9815093634019384e-05, "epoch": 1.0869565217391304, "percentage": 15.53, "elapsed_time": "4:12:48", "remaining_time": "22:55:15"} +{"current_steps": 426, "total_steps": 2737, "loss": 1.0789, "lr": 1.9812644135071358e-05, "epoch": 1.0895140664961638, "percentage": 15.56, "elapsed_time": "4:13:23", "remaining_time": "22:54:39"} +{"current_steps": 427, "total_steps": 2737, "loss": 1.1051, "lr": 1.9810178671564853e-05, "epoch": 1.092071611253197, "percentage": 15.6, "elapsed_time": "4:13:59", "remaining_time": "22:54:03"} +{"current_steps": 428, "total_steps": 2737, "loss": 1.0838, "lr": 1.980769724751104e-05, "epoch": 1.0946291560102301, "percentage": 15.64, "elapsed_time": "4:14:35", "remaining_time": "22:53:26"} +{"current_steps": 429, "total_steps": 2737, "loss": 1.114, "lr": 1.9805199866947026e-05, "epoch": 1.0971867007672633, "percentage": 15.67, "elapsed_time": "4:15:10", "remaining_time": "22:52:50"} +{"current_steps": 430, "total_steps": 2737, "loss": 1.0909, "lr": 1.9802686533935903e-05, "epoch": 1.0997442455242967, "percentage": 15.71, "elapsed_time": "4:15:46", "remaining_time": "22:52:13"} +{"current_steps": 431, "total_steps": 2737, "loss": 1.119, "lr": 1.9800157252566698e-05, "epoch": 1.10230179028133, "percentage": 15.75, "elapsed_time": "4:16:22", "remaining_time": "22:51:41"} +{"current_steps": 432, "total_steps": 2737, "loss": 1.1357, "lr": 1.97976120269544e-05, "epoch": 1.104859335038363, "percentage": 15.78, "elapsed_time": "4:16:58", "remaining_time": "22:51:05"} +{"current_steps": 433, "total_steps": 2737, "loss": 1.1153, "lr": 1.9795050861239932e-05, "epoch": 1.1074168797953965, "percentage": 15.82, "elapsed_time": "4:17:33", "remaining_time": "22:50:29"} +{"current_steps": 434, "total_steps": 2737, "loss": 1.1051, "lr": 1.9792473759590148e-05, "epoch": 1.1099744245524297, "percentage": 15.86, "elapsed_time": "4:18:09", "remaining_time": "22:49:52"} +{"current_steps": 435, "total_steps": 2737, "loss": 1.0943, "lr": 1.978988072619783e-05, "epoch": 1.1125319693094629, "percentage": 15.89, "elapsed_time": "4:18:44", "remaining_time": "22:49:17"} +{"current_steps": 436, "total_steps": 2737, "loss": 1.0947, "lr": 1.9787271765281684e-05, "epoch": 1.1150895140664963, "percentage": 15.93, "elapsed_time": "4:19:20", "remaining_time": "22:48:41"} +{"current_steps": 437, "total_steps": 2737, "loss": 1.0743, "lr": 1.9784646881086327e-05, "epoch": 1.1176470588235294, "percentage": 15.97, "elapsed_time": "4:19:56", "remaining_time": "22:48:05"} +{"current_steps": 438, "total_steps": 2737, "loss": 1.0861, "lr": 1.9782006077882282e-05, "epoch": 1.1202046035805626, "percentage": 16.0, "elapsed_time": "4:20:32", "remaining_time": "22:47:34"} +{"current_steps": 439, "total_steps": 2737, "loss": 1.1069, "lr": 1.9779349359965966e-05, "epoch": 1.1227621483375958, "percentage": 16.04, "elapsed_time": "4:21:08", "remaining_time": "22:46:58"} +{"current_steps": 440, "total_steps": 2737, "loss": 1.0849, "lr": 1.9776676731659695e-05, "epoch": 1.1253196930946292, "percentage": 16.08, "elapsed_time": "4:21:43", "remaining_time": "22:46:21"} +{"current_steps": 441, "total_steps": 2737, "loss": 1.1053, "lr": 1.977398819731167e-05, "epoch": 1.1278772378516624, "percentage": 16.11, "elapsed_time": "4:22:19", "remaining_time": "22:45:45"} +{"current_steps": 442, "total_steps": 2737, "loss": 1.0848, "lr": 1.9771283761295966e-05, "epoch": 1.1304347826086956, "percentage": 16.15, "elapsed_time": "4:22:55", "remaining_time": "22:45:09"} +{"current_steps": 443, "total_steps": 2737, "loss": 1.1091, "lr": 1.9768563428012536e-05, "epoch": 1.132992327365729, "percentage": 16.19, "elapsed_time": "4:23:31", "remaining_time": "22:44:36"} +{"current_steps": 444, "total_steps": 2737, "loss": 1.0767, "lr": 1.9765827201887183e-05, "epoch": 1.1355498721227621, "percentage": 16.22, "elapsed_time": "4:24:06", "remaining_time": "22:44:00"} +{"current_steps": 445, "total_steps": 2737, "loss": 1.0996, "lr": 1.9763075087371583e-05, "epoch": 1.1381074168797953, "percentage": 16.26, "elapsed_time": "4:24:42", "remaining_time": "22:43:23"} +{"current_steps": 446, "total_steps": 2737, "loss": 1.0713, "lr": 1.9760307088943254e-05, "epoch": 1.1406649616368287, "percentage": 16.3, "elapsed_time": "4:25:17", "remaining_time": "22:42:45"} +{"current_steps": 447, "total_steps": 2737, "loss": 1.0564, "lr": 1.9757523211105555e-05, "epoch": 1.143222506393862, "percentage": 16.33, "elapsed_time": "4:25:53", "remaining_time": "22:42:09"} +{"current_steps": 448, "total_steps": 2737, "loss": 1.0907, "lr": 1.975472345838768e-05, "epoch": 1.145780051150895, "percentage": 16.37, "elapsed_time": "4:26:28", "remaining_time": "22:41:33"} +{"current_steps": 449, "total_steps": 2737, "loss": 1.0817, "lr": 1.9751907835344654e-05, "epoch": 1.1483375959079285, "percentage": 16.4, "elapsed_time": "4:27:04", "remaining_time": "22:40:57"} +{"current_steps": 450, "total_steps": 2737, "loss": 1.129, "lr": 1.9749076346557318e-05, "epoch": 1.1508951406649617, "percentage": 16.44, "elapsed_time": "4:27:39", "remaining_time": "22:40:19"} +{"current_steps": 451, "total_steps": 2737, "loss": 1.1034, "lr": 1.9746228996632326e-05, "epoch": 1.1534526854219949, "percentage": 16.48, "elapsed_time": "4:28:15", "remaining_time": "22:39:43"} +{"current_steps": 452, "total_steps": 2737, "loss": 1.1076, "lr": 1.974336579020214e-05, "epoch": 1.156010230179028, "percentage": 16.51, "elapsed_time": "4:28:51", "remaining_time": "22:39:07"} +{"current_steps": 453, "total_steps": 2737, "loss": 1.1224, "lr": 1.9740486731925022e-05, "epoch": 1.1585677749360614, "percentage": 16.55, "elapsed_time": "4:29:26", "remaining_time": "22:38:30"} +{"current_steps": 454, "total_steps": 2737, "loss": 1.0962, "lr": 1.9737591826485013e-05, "epoch": 1.1611253196930946, "percentage": 16.59, "elapsed_time": "4:30:02", "remaining_time": "22:37:54"} +{"current_steps": 455, "total_steps": 2737, "loss": 1.0905, "lr": 1.9734681078591943e-05, "epoch": 1.1636828644501278, "percentage": 16.62, "elapsed_time": "4:30:37", "remaining_time": "22:37:19"} +{"current_steps": 456, "total_steps": 2737, "loss": 1.0812, "lr": 1.9731754492981423e-05, "epoch": 1.1662404092071612, "percentage": 16.66, "elapsed_time": "4:31:13", "remaining_time": "22:36:42"} +{"current_steps": 457, "total_steps": 2737, "loss": 1.0729, "lr": 1.9728812074414822e-05, "epoch": 1.1687979539641944, "percentage": 16.7, "elapsed_time": "4:31:48", "remaining_time": "22:36:06"} +{"current_steps": 458, "total_steps": 2737, "loss": 1.078, "lr": 1.9725853827679266e-05, "epoch": 1.1713554987212276, "percentage": 16.73, "elapsed_time": "4:32:24", "remaining_time": "22:35:30"} +{"current_steps": 459, "total_steps": 2737, "loss": 1.0864, "lr": 1.9722879757587647e-05, "epoch": 1.1739130434782608, "percentage": 16.77, "elapsed_time": "4:33:00", "remaining_time": "22:34:53"} +{"current_steps": 460, "total_steps": 2737, "loss": 1.1135, "lr": 1.9719889868978582e-05, "epoch": 1.1764705882352942, "percentage": 16.81, "elapsed_time": "4:33:35", "remaining_time": "22:34:17"} +{"current_steps": 461, "total_steps": 2737, "loss": 1.1363, "lr": 1.971688416671644e-05, "epoch": 1.1790281329923273, "percentage": 16.84, "elapsed_time": "4:34:11", "remaining_time": "22:33:41"} +{"current_steps": 462, "total_steps": 2737, "loss": 1.0791, "lr": 1.9713862655691302e-05, "epoch": 1.1815856777493605, "percentage": 16.88, "elapsed_time": "4:34:46", "remaining_time": "22:33:02"} +{"current_steps": 463, "total_steps": 2737, "loss": 1.0718, "lr": 1.971082534081899e-05, "epoch": 1.184143222506394, "percentage": 16.92, "elapsed_time": "4:35:21", "remaining_time": "22:32:26"} +{"current_steps": 464, "total_steps": 2737, "loss": 1.0961, "lr": 1.970777222704101e-05, "epoch": 1.186700767263427, "percentage": 16.95, "elapsed_time": "4:35:57", "remaining_time": "22:31:50"} +{"current_steps": 465, "total_steps": 2737, "loss": 1.1038, "lr": 1.97047033193246e-05, "epoch": 1.1892583120204603, "percentage": 16.99, "elapsed_time": "4:36:33", "remaining_time": "22:31:14"} +{"current_steps": 466, "total_steps": 2737, "loss": 1.1054, "lr": 1.970161862266268e-05, "epoch": 1.1918158567774937, "percentage": 17.03, "elapsed_time": "4:37:08", "remaining_time": "22:30:38"} +{"current_steps": 467, "total_steps": 2737, "loss": 1.0807, "lr": 1.969851814207385e-05, "epoch": 1.1943734015345269, "percentage": 17.06, "elapsed_time": "4:37:43", "remaining_time": "22:29:59"} +{"current_steps": 468, "total_steps": 2737, "loss": 1.1296, "lr": 1.9695401882602406e-05, "epoch": 1.19693094629156, "percentage": 17.1, "elapsed_time": "4:38:19", "remaining_time": "22:29:23"} +{"current_steps": 469, "total_steps": 2737, "loss": 1.0936, "lr": 1.9692269849318303e-05, "epoch": 1.1994884910485935, "percentage": 17.14, "elapsed_time": "4:38:54", "remaining_time": "22:28:43"} +{"current_steps": 470, "total_steps": 2737, "loss": 1.1155, "lr": 1.9689122047317166e-05, "epoch": 1.2020460358056266, "percentage": 17.17, "elapsed_time": "4:39:29", "remaining_time": "22:28:07"} +{"current_steps": 471, "total_steps": 2737, "loss": 1.0896, "lr": 1.968595848172027e-05, "epoch": 1.2046035805626598, "percentage": 17.21, "elapsed_time": "4:40:05", "remaining_time": "22:27:31"} +{"current_steps": 472, "total_steps": 2737, "loss": 1.0452, "lr": 1.968277915767454e-05, "epoch": 1.207161125319693, "percentage": 17.25, "elapsed_time": "4:40:40", "remaining_time": "22:26:55"} +{"current_steps": 473, "total_steps": 2737, "loss": 1.1045, "lr": 1.9679584080352537e-05, "epoch": 1.2097186700767264, "percentage": 17.28, "elapsed_time": "4:41:16", "remaining_time": "22:26:19"} +{"current_steps": 474, "total_steps": 2737, "loss": 1.0855, "lr": 1.967637325495245e-05, "epoch": 1.2122762148337596, "percentage": 17.32, "elapsed_time": "4:41:52", "remaining_time": "22:25:43"} +{"current_steps": 475, "total_steps": 2737, "loss": 1.1001, "lr": 1.9673146686698093e-05, "epoch": 1.2148337595907928, "percentage": 17.35, "elapsed_time": "4:42:27", "remaining_time": "22:25:07"} +{"current_steps": 476, "total_steps": 2737, "loss": 1.0729, "lr": 1.9669904380838892e-05, "epoch": 1.2173913043478262, "percentage": 17.39, "elapsed_time": "4:43:03", "remaining_time": "22:24:31"} +{"current_steps": 477, "total_steps": 2737, "loss": 1.1242, "lr": 1.966664634264987e-05, "epoch": 1.2199488491048593, "percentage": 17.43, "elapsed_time": "4:43:39", "remaining_time": "22:23:55"} +{"current_steps": 478, "total_steps": 2737, "loss": 1.1087, "lr": 1.9663372577431663e-05, "epoch": 1.2225063938618925, "percentage": 17.46, "elapsed_time": "4:44:14", "remaining_time": "22:23:17"} +{"current_steps": 479, "total_steps": 2737, "loss": 1.1167, "lr": 1.966008309051047e-05, "epoch": 1.2250639386189257, "percentage": 17.5, "elapsed_time": "4:44:50", "remaining_time": "22:22:45"} +{"current_steps": 480, "total_steps": 2737, "loss": 1.07, "lr": 1.965677788723809e-05, "epoch": 1.227621483375959, "percentage": 17.54, "elapsed_time": "4:45:26", "remaining_time": "22:22:10"} +{"current_steps": 481, "total_steps": 2737, "loss": 1.0775, "lr": 1.9653456972991877e-05, "epoch": 1.2301790281329923, "percentage": 17.57, "elapsed_time": "4:46:02", "remaining_time": "22:21:34"} +{"current_steps": 482, "total_steps": 2737, "loss": 1.0967, "lr": 1.965012035317475e-05, "epoch": 1.2327365728900257, "percentage": 17.61, "elapsed_time": "4:46:37", "remaining_time": "22:20:57"} +{"current_steps": 483, "total_steps": 2737, "loss": 1.0879, "lr": 1.9646768033215183e-05, "epoch": 1.2352941176470589, "percentage": 17.65, "elapsed_time": "4:47:13", "remaining_time": "22:20:21"} +{"current_steps": 484, "total_steps": 2737, "loss": 1.1019, "lr": 1.9643400018567195e-05, "epoch": 1.237851662404092, "percentage": 17.68, "elapsed_time": "4:47:48", "remaining_time": "22:19:46"} +{"current_steps": 485, "total_steps": 2737, "loss": 1.1084, "lr": 1.9640016314710323e-05, "epoch": 1.2404092071611252, "percentage": 17.72, "elapsed_time": "4:48:24", "remaining_time": "22:19:10"} +{"current_steps": 486, "total_steps": 2737, "loss": 1.1029, "lr": 1.9636616927149655e-05, "epoch": 1.2429667519181586, "percentage": 17.76, "elapsed_time": "4:49:00", "remaining_time": "22:18:34"} +{"current_steps": 487, "total_steps": 2737, "loss": 1.0735, "lr": 1.9633201861415773e-05, "epoch": 1.2455242966751918, "percentage": 17.79, "elapsed_time": "4:49:35", "remaining_time": "22:17:57"} +{"current_steps": 488, "total_steps": 2737, "loss": 1.0948, "lr": 1.9629771123064784e-05, "epoch": 1.248081841432225, "percentage": 17.83, "elapsed_time": "4:50:11", "remaining_time": "22:17:21"} +{"current_steps": 489, "total_steps": 2737, "loss": 1.0984, "lr": 1.9626324717678275e-05, "epoch": 1.2506393861892584, "percentage": 17.87, "elapsed_time": "4:50:46", "remaining_time": "22:16:45"} +{"current_steps": 490, "total_steps": 2737, "loss": 1.1213, "lr": 1.962286265086334e-05, "epoch": 1.2531969309462916, "percentage": 17.9, "elapsed_time": "4:51:22", "remaining_time": "22:16:09"} +{"current_steps": 491, "total_steps": 2737, "loss": 1.0909, "lr": 1.961938492825254e-05, "epoch": 1.2557544757033248, "percentage": 17.94, "elapsed_time": "4:51:58", "remaining_time": "22:15:33"} +{"current_steps": 492, "total_steps": 2737, "loss": 1.1164, "lr": 1.9615891555503914e-05, "epoch": 1.258312020460358, "percentage": 17.98, "elapsed_time": "4:52:33", "remaining_time": "22:14:57"} +{"current_steps": 493, "total_steps": 2737, "loss": 1.0834, "lr": 1.961238253830096e-05, "epoch": 1.2608695652173914, "percentage": 18.01, "elapsed_time": "4:53:09", "remaining_time": "22:14:21"} +{"current_steps": 494, "total_steps": 2737, "loss": 1.0823, "lr": 1.9608857882352636e-05, "epoch": 1.2634271099744245, "percentage": 18.05, "elapsed_time": "4:53:44", "remaining_time": "22:13:45"} +{"current_steps": 495, "total_steps": 2737, "loss": 1.1084, "lr": 1.9605317593393326e-05, "epoch": 1.265984654731458, "percentage": 18.09, "elapsed_time": "4:54:20", "remaining_time": "22:13:11"} +{"current_steps": 496, "total_steps": 2737, "loss": 1.0978, "lr": 1.9601761677182868e-05, "epoch": 1.2685421994884911, "percentage": 18.12, "elapsed_time": "4:54:56", "remaining_time": "22:12:37"} +{"current_steps": 497, "total_steps": 2737, "loss": 1.0889, "lr": 1.959819013950651e-05, "epoch": 1.2710997442455243, "percentage": 18.16, "elapsed_time": "4:55:32", "remaining_time": "22:12:01"} +{"current_steps": 498, "total_steps": 2737, "loss": 1.0792, "lr": 1.9594602986174923e-05, "epoch": 1.2736572890025575, "percentage": 18.2, "elapsed_time": "4:56:08", "remaining_time": "22:11:25"} +{"current_steps": 499, "total_steps": 2737, "loss": 1.092, "lr": 1.959100022302418e-05, "epoch": 1.2762148337595907, "percentage": 18.23, "elapsed_time": "4:56:43", "remaining_time": "22:10:49"} +{"current_steps": 500, "total_steps": 2737, "loss": 1.0652, "lr": 1.9587381855915754e-05, "epoch": 1.278772378516624, "percentage": 18.27, "elapsed_time": "4:57:19", "remaining_time": "22:10:13"} +{"current_steps": 501, "total_steps": 2737, "loss": 1.0859, "lr": 1.95837478907365e-05, "epoch": 1.2813299232736572, "percentage": 18.3, "elapsed_time": "4:57:55", "remaining_time": "22:09:37"} +{"current_steps": 502, "total_steps": 2737, "loss": 1.0912, "lr": 1.958009833339865e-05, "epoch": 1.2838874680306906, "percentage": 18.34, "elapsed_time": "4:58:31", "remaining_time": "22:09:05"} +{"current_steps": 503, "total_steps": 2737, "loss": 1.1088, "lr": 1.9576433189839807e-05, "epoch": 1.2864450127877238, "percentage": 18.38, "elapsed_time": "4:59:07", "remaining_time": "22:08:29"} +{"current_steps": 504, "total_steps": 2737, "loss": 1.0837, "lr": 1.957275246602293e-05, "epoch": 1.289002557544757, "percentage": 18.41, "elapsed_time": "4:59:42", "remaining_time": "22:07:53"} +{"current_steps": 505, "total_steps": 2737, "loss": 1.105, "lr": 1.9569056167936332e-05, "epoch": 1.2915601023017902, "percentage": 18.45, "elapsed_time": "5:00:18", "remaining_time": "22:07:18"} +{"current_steps": 506, "total_steps": 2737, "loss": 1.0726, "lr": 1.956534430159365e-05, "epoch": 1.2941176470588236, "percentage": 18.49, "elapsed_time": "5:00:54", "remaining_time": "22:06:41"} +{"current_steps": 507, "total_steps": 2737, "loss": 1.1079, "lr": 1.9561616873033867e-05, "epoch": 1.2966751918158568, "percentage": 18.52, "elapsed_time": "5:01:29", "remaining_time": "22:06:05"} +{"current_steps": 508, "total_steps": 2737, "loss": 1.0697, "lr": 1.955787388832127e-05, "epoch": 1.29923273657289, "percentage": 18.56, "elapsed_time": "5:02:05", "remaining_time": "22:05:29"} +{"current_steps": 509, "total_steps": 2737, "loss": 1.1016, "lr": 1.9554115353545464e-05, "epoch": 1.3017902813299234, "percentage": 18.6, "elapsed_time": "5:02:40", "remaining_time": "22:04:53"} +{"current_steps": 510, "total_steps": 2737, "loss": 1.0727, "lr": 1.9550341274821348e-05, "epoch": 1.3043478260869565, "percentage": 18.63, "elapsed_time": "5:03:16", "remaining_time": "22:04:16"} +{"current_steps": 511, "total_steps": 2737, "loss": 1.0792, "lr": 1.9546551658289113e-05, "epoch": 1.3069053708439897, "percentage": 18.67, "elapsed_time": "5:03:51", "remaining_time": "22:03:40"} +{"current_steps": 512, "total_steps": 2737, "loss": 1.1143, "lr": 1.954274651011423e-05, "epoch": 1.309462915601023, "percentage": 18.71, "elapsed_time": "5:04:27", "remaining_time": "22:03:04"} +{"current_steps": 513, "total_steps": 2737, "loss": 1.0674, "lr": 1.9538925836487436e-05, "epoch": 1.3120204603580563, "percentage": 18.74, "elapsed_time": "5:05:02", "remaining_time": "22:02:28"} +{"current_steps": 514, "total_steps": 2737, "loss": 1.0885, "lr": 1.953508964362473e-05, "epoch": 1.3145780051150895, "percentage": 18.78, "elapsed_time": "5:05:38", "remaining_time": "22:01:52"} +{"current_steps": 515, "total_steps": 2737, "loss": 1.0807, "lr": 1.9531237937767352e-05, "epoch": 1.317135549872123, "percentage": 18.82, "elapsed_time": "5:06:14", "remaining_time": "22:01:20"} +{"current_steps": 516, "total_steps": 2737, "loss": 1.0766, "lr": 1.9527370725181793e-05, "epoch": 1.319693094629156, "percentage": 18.85, "elapsed_time": "5:06:50", "remaining_time": "22:00:43"} +{"current_steps": 517, "total_steps": 2737, "loss": 1.0712, "lr": 1.9523488012159762e-05, "epoch": 1.3222506393861893, "percentage": 18.89, "elapsed_time": "5:07:26", "remaining_time": "22:00:08"} +{"current_steps": 518, "total_steps": 2737, "loss": 1.0888, "lr": 1.9519589805018187e-05, "epoch": 1.3248081841432224, "percentage": 18.93, "elapsed_time": "5:08:01", "remaining_time": "21:59:31"} +{"current_steps": 519, "total_steps": 2737, "loss": 1.0801, "lr": 1.951567611009922e-05, "epoch": 1.3273657289002558, "percentage": 18.96, "elapsed_time": "5:08:37", "remaining_time": "21:58:55"} +{"current_steps": 520, "total_steps": 2737, "loss": 1.1149, "lr": 1.9511746933770186e-05, "epoch": 1.329923273657289, "percentage": 19.0, "elapsed_time": "5:09:12", "remaining_time": "21:58:19"} +{"current_steps": 521, "total_steps": 2737, "loss": 1.1202, "lr": 1.9507802282423612e-05, "epoch": 1.3324808184143222, "percentage": 19.04, "elapsed_time": "5:09:48", "remaining_time": "21:57:42"} +{"current_steps": 522, "total_steps": 2737, "loss": 1.1006, "lr": 1.9503842162477205e-05, "epoch": 1.3350383631713556, "percentage": 19.07, "elapsed_time": "5:10:23", "remaining_time": "21:57:06"} +{"current_steps": 523, "total_steps": 2737, "loss": 1.0873, "lr": 1.9499866580373826e-05, "epoch": 1.3375959079283888, "percentage": 19.11, "elapsed_time": "5:11:00", "remaining_time": "21:56:33"} +{"current_steps": 524, "total_steps": 2737, "loss": 1.1154, "lr": 1.94958755425815e-05, "epoch": 1.340153452685422, "percentage": 19.15, "elapsed_time": "5:11:35", "remaining_time": "21:55:56"} +{"current_steps": 525, "total_steps": 2737, "loss": 1.0867, "lr": 1.9491869055593392e-05, "epoch": 1.3427109974424551, "percentage": 19.18, "elapsed_time": "5:12:11", "remaining_time": "21:55:21"} +{"current_steps": 526, "total_steps": 2737, "loss": 1.1126, "lr": 1.9487847125927814e-05, "epoch": 1.3452685421994885, "percentage": 19.22, "elapsed_time": "5:12:47", "remaining_time": "21:54:46"} +{"current_steps": 527, "total_steps": 2737, "loss": 1.0625, "lr": 1.948380976012819e-05, "epoch": 1.3478260869565217, "percentage": 19.25, "elapsed_time": "5:13:23", "remaining_time": "21:54:11"} +{"current_steps": 528, "total_steps": 2737, "loss": 1.1262, "lr": 1.9479756964763062e-05, "epoch": 1.350383631713555, "percentage": 19.29, "elapsed_time": "5:13:58", "remaining_time": "21:53:35"} +{"current_steps": 529, "total_steps": 2737, "loss": 1.0865, "lr": 1.9475688746426075e-05, "epoch": 1.3529411764705883, "percentage": 19.33, "elapsed_time": "5:14:34", "remaining_time": "21:52:59"} +{"current_steps": 530, "total_steps": 2737, "loss": 1.0594, "lr": 1.9471605111735964e-05, "epoch": 1.3554987212276215, "percentage": 19.36, "elapsed_time": "5:15:09", "remaining_time": "21:52:23"} +{"current_steps": 531, "total_steps": 2737, "loss": 1.0955, "lr": 1.9467506067336554e-05, "epoch": 1.3580562659846547, "percentage": 19.4, "elapsed_time": "5:15:45", "remaining_time": "21:51:47"} +{"current_steps": 532, "total_steps": 2737, "loss": 1.0824, "lr": 1.946339161989672e-05, "epoch": 1.3606138107416879, "percentage": 19.44, "elapsed_time": "5:16:21", "remaining_time": "21:51:11"} +{"current_steps": 533, "total_steps": 2737, "loss": 1.1215, "lr": 1.9459261776110426e-05, "epoch": 1.3631713554987213, "percentage": 19.47, "elapsed_time": "5:16:56", "remaining_time": "21:50:35"} +{"current_steps": 534, "total_steps": 2737, "loss": 1.086, "lr": 1.945511654269666e-05, "epoch": 1.3657289002557544, "percentage": 19.51, "elapsed_time": "5:17:32", "remaining_time": "21:49:59"} +{"current_steps": 535, "total_steps": 2737, "loss": 1.0992, "lr": 1.945095592639946e-05, "epoch": 1.3682864450127878, "percentage": 19.55, "elapsed_time": "5:18:07", "remaining_time": "21:49:23"} +{"current_steps": 536, "total_steps": 2737, "loss": 1.1311, "lr": 1.944677993398789e-05, "epoch": 1.370843989769821, "percentage": 19.58, "elapsed_time": "5:18:44", "remaining_time": "21:48:51"} +{"current_steps": 537, "total_steps": 2737, "loss": 1.0869, "lr": 1.944258857225603e-05, "epoch": 1.3734015345268542, "percentage": 19.62, "elapsed_time": "5:19:19", "remaining_time": "21:48:15"} +{"current_steps": 538, "total_steps": 2737, "loss": 1.1034, "lr": 1.943838184802296e-05, "epoch": 1.3759590792838874, "percentage": 19.66, "elapsed_time": "5:19:55", "remaining_time": "21:47:38"} +{"current_steps": 539, "total_steps": 2737, "loss": 1.0834, "lr": 1.9434159768132762e-05, "epoch": 1.3785166240409208, "percentage": 19.69, "elapsed_time": "5:20:31", "remaining_time": "21:47:03"} +{"current_steps": 540, "total_steps": 2737, "loss": 1.0952, "lr": 1.9429922339454486e-05, "epoch": 1.381074168797954, "percentage": 19.73, "elapsed_time": "5:21:06", "remaining_time": "21:46:28"} +{"current_steps": 541, "total_steps": 2737, "loss": 1.1195, "lr": 1.9425669568882175e-05, "epoch": 1.3836317135549872, "percentage": 19.77, "elapsed_time": "5:21:42", "remaining_time": "21:45:52"} +{"current_steps": 542, "total_steps": 2737, "loss": 1.1082, "lr": 1.942140146333481e-05, "epoch": 1.3861892583120206, "percentage": 19.8, "elapsed_time": "5:22:18", "remaining_time": "21:45:16"} +{"current_steps": 543, "total_steps": 2737, "loss": 1.0664, "lr": 1.9417118029756342e-05, "epoch": 1.3887468030690537, "percentage": 19.84, "elapsed_time": "5:22:53", "remaining_time": "21:44:40"} +{"current_steps": 544, "total_steps": 2737, "loss": 1.087, "lr": 1.9412819275115648e-05, "epoch": 1.391304347826087, "percentage": 19.88, "elapsed_time": "5:23:29", "remaining_time": "21:44:03"} +{"current_steps": 545, "total_steps": 2737, "loss": 1.078, "lr": 1.9408505206406526e-05, "epoch": 1.39386189258312, "percentage": 19.91, "elapsed_time": "5:24:04", "remaining_time": "21:43:28"} +{"current_steps": 546, "total_steps": 2737, "loss": 1.0549, "lr": 1.9404175830647703e-05, "epoch": 1.3964194373401535, "percentage": 19.95, "elapsed_time": "5:24:40", "remaining_time": "21:42:52"} +{"current_steps": 547, "total_steps": 2737, "loss": 1.0946, "lr": 1.93998311548828e-05, "epoch": 1.3989769820971867, "percentage": 19.99, "elapsed_time": "5:25:16", "remaining_time": "21:42:16"} +{"current_steps": 548, "total_steps": 2737, "loss": 1.0898, "lr": 1.939547118618033e-05, "epoch": 1.40153452685422, "percentage": 20.02, "elapsed_time": "5:25:51", "remaining_time": "21:41:40"} +{"current_steps": 549, "total_steps": 2737, "loss": 1.1098, "lr": 1.9391095931633694e-05, "epoch": 1.4040920716112533, "percentage": 20.06, "elapsed_time": "5:26:27", "remaining_time": "21:41:04"} +{"current_steps": 550, "total_steps": 2737, "loss": 1.0469, "lr": 1.9386705398361156e-05, "epoch": 1.4066496163682864, "percentage": 20.09, "elapsed_time": "5:27:02", "remaining_time": "21:40:28"} +{"current_steps": 551, "total_steps": 2737, "loss": 1.0616, "lr": 1.938229959350584e-05, "epoch": 1.4092071611253196, "percentage": 20.13, "elapsed_time": "5:27:38", "remaining_time": "21:39:52"} +{"current_steps": 552, "total_steps": 2737, "loss": 1.1083, "lr": 1.937787852423571e-05, "epoch": 1.4117647058823528, "percentage": 20.17, "elapsed_time": "5:28:14", "remaining_time": "21:39:16"} +{"current_steps": 553, "total_steps": 2737, "loss": 1.0908, "lr": 1.937344219774358e-05, "epoch": 1.4143222506393862, "percentage": 20.2, "elapsed_time": "5:28:50", "remaining_time": "21:38:41"} +{"current_steps": 554, "total_steps": 2737, "loss": 1.102, "lr": 1.9368990621247062e-05, "epoch": 1.4168797953964194, "percentage": 20.24, "elapsed_time": "5:29:25", "remaining_time": "21:38:03"} +{"current_steps": 555, "total_steps": 2737, "loss": 1.1147, "lr": 1.9364523801988606e-05, "epoch": 1.4194373401534528, "percentage": 20.28, "elapsed_time": "5:30:00", "remaining_time": "21:37:27"} +{"current_steps": 556, "total_steps": 2737, "loss": 1.0962, "lr": 1.9360041747235437e-05, "epoch": 1.421994884910486, "percentage": 20.31, "elapsed_time": "5:30:36", "remaining_time": "21:36:51"} +{"current_steps": 557, "total_steps": 2737, "loss": 1.0864, "lr": 1.9355544464279587e-05, "epoch": 1.4245524296675192, "percentage": 20.35, "elapsed_time": "5:31:12", "remaining_time": "21:36:15"} +{"current_steps": 558, "total_steps": 2737, "loss": 1.0747, "lr": 1.9351031960437848e-05, "epoch": 1.4271099744245523, "percentage": 20.39, "elapsed_time": "5:31:47", "remaining_time": "21:35:39"} +{"current_steps": 559, "total_steps": 2737, "loss": 1.0731, "lr": 1.934650424305178e-05, "epoch": 1.4296675191815857, "percentage": 20.42, "elapsed_time": "5:32:23", "remaining_time": "21:35:03"} +{"current_steps": 560, "total_steps": 2737, "loss": 1.0598, "lr": 1.9341961319487704e-05, "epoch": 1.432225063938619, "percentage": 20.46, "elapsed_time": "5:32:58", "remaining_time": "21:34:27"} +{"current_steps": 561, "total_steps": 2737, "loss": 1.0712, "lr": 1.9337403197136663e-05, "epoch": 1.434782608695652, "percentage": 20.5, "elapsed_time": "5:33:34", "remaining_time": "21:33:53"} +{"current_steps": 562, "total_steps": 2737, "loss": 1.0883, "lr": 1.9332829883414444e-05, "epoch": 1.4373401534526855, "percentage": 20.53, "elapsed_time": "5:34:10", "remaining_time": "21:33:17"} +{"current_steps": 563, "total_steps": 2737, "loss": 1.1141, "lr": 1.932824138576154e-05, "epoch": 1.4398976982097187, "percentage": 20.57, "elapsed_time": "5:34:45", "remaining_time": "21:32:41"} +{"current_steps": 564, "total_steps": 2737, "loss": 1.1109, "lr": 1.9323637711643147e-05, "epoch": 1.4424552429667519, "percentage": 20.61, "elapsed_time": "5:35:21", "remaining_time": "21:32:05"} +{"current_steps": 565, "total_steps": 2737, "loss": 1.1192, "lr": 1.9319018868549165e-05, "epoch": 1.445012787723785, "percentage": 20.64, "elapsed_time": "5:35:57", "remaining_time": "21:31:29"} +{"current_steps": 566, "total_steps": 2737, "loss": 1.0817, "lr": 1.931438486399415e-05, "epoch": 1.4475703324808185, "percentage": 20.68, "elapsed_time": "5:36:32", "remaining_time": "21:30:53"} +{"current_steps": 567, "total_steps": 2737, "loss": 1.0607, "lr": 1.930973570551735e-05, "epoch": 1.4501278772378516, "percentage": 20.72, "elapsed_time": "5:37:08", "remaining_time": "21:30:17"} +{"current_steps": 568, "total_steps": 2737, "loss": 1.0914, "lr": 1.9305071400682644e-05, "epoch": 1.452685421994885, "percentage": 20.75, "elapsed_time": "5:37:44", "remaining_time": "21:29:41"} +{"current_steps": 569, "total_steps": 2737, "loss": 1.0834, "lr": 1.9300391957078564e-05, "epoch": 1.4552429667519182, "percentage": 20.79, "elapsed_time": "5:38:19", "remaining_time": "21:29:06"} +{"current_steps": 570, "total_steps": 2737, "loss": 1.0733, "lr": 1.9295697382318286e-05, "epoch": 1.4578005115089514, "percentage": 20.83, "elapsed_time": "5:38:55", "remaining_time": "21:28:30"} +{"current_steps": 571, "total_steps": 2737, "loss": 1.0955, "lr": 1.9290987684039576e-05, "epoch": 1.4603580562659846, "percentage": 20.86, "elapsed_time": "5:39:31", "remaining_time": "21:27:54"} +{"current_steps": 572, "total_steps": 2737, "loss": 1.0977, "lr": 1.9286262869904827e-05, "epoch": 1.4629156010230178, "percentage": 20.9, "elapsed_time": "5:40:07", "remaining_time": "21:27:20"} +{"current_steps": 573, "total_steps": 2737, "loss": 1.0826, "lr": 1.928152294760101e-05, "epoch": 1.4654731457800512, "percentage": 20.94, "elapsed_time": "5:40:42", "remaining_time": "21:26:44"} +{"current_steps": 574, "total_steps": 2737, "loss": 1.0693, "lr": 1.9276767924839687e-05, "epoch": 1.4680306905370843, "percentage": 20.97, "elapsed_time": "5:41:18", "remaining_time": "21:26:08"} +{"current_steps": 575, "total_steps": 2737, "loss": 1.1031, "lr": 1.927199780935698e-05, "epoch": 1.4705882352941178, "percentage": 21.01, "elapsed_time": "5:41:53", "remaining_time": "21:25:32"} +{"current_steps": 576, "total_steps": 2737, "loss": 1.1081, "lr": 1.926721260891357e-05, "epoch": 1.473145780051151, "percentage": 21.04, "elapsed_time": "5:42:29", "remaining_time": "21:24:57"} +{"current_steps": 577, "total_steps": 2737, "loss": 1.0984, "lr": 1.9262412331294677e-05, "epoch": 1.4757033248081841, "percentage": 21.08, "elapsed_time": "5:43:04", "remaining_time": "21:24:18"} +{"current_steps": 578, "total_steps": 2737, "loss": 1.0907, "lr": 1.9257596984310055e-05, "epoch": 1.4782608695652173, "percentage": 21.12, "elapsed_time": "5:43:40", "remaining_time": "21:23:42"} +{"current_steps": 579, "total_steps": 2737, "loss": 1.0667, "lr": 1.925276657579397e-05, "epoch": 1.4808184143222507, "percentage": 21.15, "elapsed_time": "5:44:15", "remaining_time": "21:23:06"} +{"current_steps": 580, "total_steps": 2737, "loss": 1.0814, "lr": 1.9247921113605197e-05, "epoch": 1.4833759590792839, "percentage": 21.19, "elapsed_time": "5:44:51", "remaining_time": "21:22:30"} +{"current_steps": 581, "total_steps": 2737, "loss": 1.0984, "lr": 1.9243060605626995e-05, "epoch": 1.485933503836317, "percentage": 21.23, "elapsed_time": "5:45:26", "remaining_time": "21:21:54"} +{"current_steps": 582, "total_steps": 2737, "loss": 1.0903, "lr": 1.9238185059767116e-05, "epoch": 1.4884910485933505, "percentage": 21.26, "elapsed_time": "5:46:02", "remaining_time": "21:21:18"} +{"current_steps": 583, "total_steps": 2737, "loss": 1.1135, "lr": 1.9233294483957758e-05, "epoch": 1.4910485933503836, "percentage": 21.3, "elapsed_time": "5:46:38", "remaining_time": "21:20:42"} +{"current_steps": 584, "total_steps": 2737, "loss": 1.1228, "lr": 1.922838888615559e-05, "epoch": 1.4936061381074168, "percentage": 21.34, "elapsed_time": "5:47:13", "remaining_time": "21:20:07"} +{"current_steps": 585, "total_steps": 2737, "loss": 1.0595, "lr": 1.922346827434171e-05, "epoch": 1.49616368286445, "percentage": 21.37, "elapsed_time": "5:47:49", "remaining_time": "21:19:31"} +{"current_steps": 586, "total_steps": 2737, "loss": 1.0742, "lr": 1.921853265652164e-05, "epoch": 1.4987212276214834, "percentage": 21.41, "elapsed_time": "5:48:25", "remaining_time": "21:18:55"} +{"current_steps": 587, "total_steps": 2737, "loss": 1.0823, "lr": 1.9213582040725333e-05, "epoch": 1.5012787723785166, "percentage": 21.45, "elapsed_time": "5:49:00", "remaining_time": "21:18:19"} +{"current_steps": 588, "total_steps": 2737, "loss": 1.1113, "lr": 1.9208616435007124e-05, "epoch": 1.50383631713555, "percentage": 21.48, "elapsed_time": "5:49:36", "remaining_time": "21:17:43"} +{"current_steps": 589, "total_steps": 2737, "loss": 1.0495, "lr": 1.9203635847445743e-05, "epoch": 1.5063938618925832, "percentage": 21.52, "elapsed_time": "5:50:11", "remaining_time": "21:17:07"} +{"current_steps": 590, "total_steps": 2737, "loss": 1.0778, "lr": 1.9198640286144296e-05, "epoch": 1.5089514066496164, "percentage": 21.56, "elapsed_time": "5:50:47", "remaining_time": "21:16:30"} +{"current_steps": 591, "total_steps": 2737, "loss": 1.0526, "lr": 1.9193629759230252e-05, "epoch": 1.5115089514066495, "percentage": 21.59, "elapsed_time": "5:51:22", "remaining_time": "21:15:54"} +{"current_steps": 592, "total_steps": 2737, "loss": 1.1082, "lr": 1.9188604274855417e-05, "epoch": 1.5140664961636827, "percentage": 21.63, "elapsed_time": "5:51:58", "remaining_time": "21:15:18"} +{"current_steps": 593, "total_steps": 2737, "loss": 1.0358, "lr": 1.9183563841195948e-05, "epoch": 1.5166240409207161, "percentage": 21.67, "elapsed_time": "5:52:33", "remaining_time": "21:14:42"} +{"current_steps": 594, "total_steps": 2737, "loss": 1.1016, "lr": 1.917850846645231e-05, "epoch": 1.5191815856777495, "percentage": 21.7, "elapsed_time": "5:53:09", "remaining_time": "21:14:06"} +{"current_steps": 595, "total_steps": 2737, "loss": 1.0723, "lr": 1.917343815884929e-05, "epoch": 1.5217391304347827, "percentage": 21.74, "elapsed_time": "5:53:45", "remaining_time": "21:13:31"} +{"current_steps": 596, "total_steps": 2737, "loss": 1.0842, "lr": 1.9168352926635948e-05, "epoch": 1.5242966751918159, "percentage": 21.78, "elapsed_time": "5:54:20", "remaining_time": "21:12:55"} +{"current_steps": 597, "total_steps": 2737, "loss": 1.0928, "lr": 1.9163252778085646e-05, "epoch": 1.526854219948849, "percentage": 21.81, "elapsed_time": "5:54:56", "remaining_time": "21:12:19"} +{"current_steps": 598, "total_steps": 2737, "loss": 1.0954, "lr": 1.9158137721496014e-05, "epoch": 1.5294117647058822, "percentage": 21.85, "elapsed_time": "5:55:32", "remaining_time": "21:11:43"} +{"current_steps": 599, "total_steps": 2737, "loss": 1.0703, "lr": 1.9153007765188918e-05, "epoch": 1.5319693094629157, "percentage": 21.89, "elapsed_time": "5:56:07", "remaining_time": "21:11:07"} +{"current_steps": 600, "total_steps": 2737, "loss": 1.1178, "lr": 1.914786291751048e-05, "epoch": 1.5345268542199488, "percentage": 21.92, "elapsed_time": "5:56:43", "remaining_time": "21:10:32"} +{"current_steps": 601, "total_steps": 2737, "loss": 1.0711, "lr": 1.9142703186831044e-05, "epoch": 1.5370843989769822, "percentage": 21.96, "elapsed_time": "5:57:19", "remaining_time": "21:09:56"} +{"current_steps": 602, "total_steps": 2737, "loss": 1.0669, "lr": 1.9137528581545172e-05, "epoch": 1.5396419437340154, "percentage": 21.99, "elapsed_time": "5:57:54", "remaining_time": "21:09:20"} +{"current_steps": 603, "total_steps": 2737, "loss": 1.0738, "lr": 1.9132339110071623e-05, "epoch": 1.5421994884910486, "percentage": 22.03, "elapsed_time": "5:58:31", "remaining_time": "21:08:47"} +{"current_steps": 604, "total_steps": 2737, "loss": 1.0891, "lr": 1.9127134780853343e-05, "epoch": 1.5447570332480818, "percentage": 22.07, "elapsed_time": "5:59:06", "remaining_time": "21:08:11"} +{"current_steps": 605, "total_steps": 2737, "loss": 1.0752, "lr": 1.9121915602357447e-05, "epoch": 1.547314578005115, "percentage": 22.1, "elapsed_time": "5:59:42", "remaining_time": "21:07:35"} +{"current_steps": 606, "total_steps": 2737, "loss": 1.0531, "lr": 1.9116681583075215e-05, "epoch": 1.5498721227621484, "percentage": 22.14, "elapsed_time": "6:00:18", "remaining_time": "21:06:59"} +{"current_steps": 607, "total_steps": 2737, "loss": 1.0775, "lr": 1.9111432731522067e-05, "epoch": 1.5524296675191815, "percentage": 22.18, "elapsed_time": "6:00:53", "remaining_time": "21:06:23"} +{"current_steps": 608, "total_steps": 2737, "loss": 1.0989, "lr": 1.910616905623756e-05, "epoch": 1.554987212276215, "percentage": 22.21, "elapsed_time": "6:01:29", "remaining_time": "21:05:47"} +{"current_steps": 609, "total_steps": 2737, "loss": 1.0942, "lr": 1.910089056578536e-05, "epoch": 1.5575447570332481, "percentage": 22.25, "elapsed_time": "6:02:04", "remaining_time": "21:05:12"} +{"current_steps": 610, "total_steps": 2737, "loss": 1.0639, "lr": 1.9095597268753243e-05, "epoch": 1.5601023017902813, "percentage": 22.29, "elapsed_time": "6:02:41", "remaining_time": "21:04:39"} +{"current_steps": 611, "total_steps": 2737, "loss": 1.1013, "lr": 1.9090289173753077e-05, "epoch": 1.5626598465473145, "percentage": 22.32, "elapsed_time": "6:03:16", "remaining_time": "21:04:03"} +{"current_steps": 612, "total_steps": 2737, "loss": 1.0904, "lr": 1.908496628942079e-05, "epoch": 1.5652173913043477, "percentage": 22.36, "elapsed_time": "6:03:52", "remaining_time": "21:03:28"} +{"current_steps": 613, "total_steps": 2737, "loss": 1.0789, "lr": 1.907962862441639e-05, "epoch": 1.567774936061381, "percentage": 22.4, "elapsed_time": "6:04:28", "remaining_time": "21:02:51"} +{"current_steps": 614, "total_steps": 2737, "loss": 1.083, "lr": 1.9074276187423925e-05, "epoch": 1.5703324808184145, "percentage": 22.43, "elapsed_time": "6:05:03", "remaining_time": "21:02:15"} +{"current_steps": 615, "total_steps": 2737, "loss": 1.1052, "lr": 1.906890898715147e-05, "epoch": 1.5728900255754477, "percentage": 22.47, "elapsed_time": "6:05:39", "remaining_time": "21:01:39"} +{"current_steps": 616, "total_steps": 2737, "loss": 1.0587, "lr": 1.9063527032331128e-05, "epoch": 1.5754475703324808, "percentage": 22.51, "elapsed_time": "6:06:15", "remaining_time": "21:01:04"} +{"current_steps": 617, "total_steps": 2737, "loss": 1.0906, "lr": 1.9058130331719002e-05, "epoch": 1.578005115089514, "percentage": 22.54, "elapsed_time": "6:06:50", "remaining_time": "21:00:28"} +{"current_steps": 618, "total_steps": 2737, "loss": 1.0828, "lr": 1.9052718894095183e-05, "epoch": 1.5805626598465472, "percentage": 22.58, "elapsed_time": "6:07:26", "remaining_time": "20:59:52"} +{"current_steps": 619, "total_steps": 2737, "loss": 1.0697, "lr": 1.904729272826375e-05, "epoch": 1.5831202046035806, "percentage": 22.62, "elapsed_time": "6:08:01", "remaining_time": "20:59:16"} +{"current_steps": 620, "total_steps": 2737, "loss": 1.0556, "lr": 1.9041851843052727e-05, "epoch": 1.5856777493606138, "percentage": 22.65, "elapsed_time": "6:08:37", "remaining_time": "20:58:40"} +{"current_steps": 621, "total_steps": 2737, "loss": 1.0888, "lr": 1.90363962473141e-05, "epoch": 1.5882352941176472, "percentage": 22.69, "elapsed_time": "6:09:13", "remaining_time": "20:58:04"} +{"current_steps": 622, "total_steps": 2737, "loss": 1.0991, "lr": 1.9030925949923777e-05, "epoch": 1.5907928388746804, "percentage": 22.73, "elapsed_time": "6:09:49", "remaining_time": "20:57:29"} +{"current_steps": 623, "total_steps": 2737, "loss": 1.0721, "lr": 1.9025440959781593e-05, "epoch": 1.5933503836317136, "percentage": 22.76, "elapsed_time": "6:10:24", "remaining_time": "20:56:53"} +{"current_steps": 624, "total_steps": 2737, "loss": 1.1146, "lr": 1.9019941285811284e-05, "epoch": 1.5959079283887467, "percentage": 22.8, "elapsed_time": "6:11:00", "remaining_time": "20:56:18"} +{"current_steps": 625, "total_steps": 2737, "loss": 1.1386, "lr": 1.9014426936960477e-05, "epoch": 1.59846547314578, "percentage": 22.84, "elapsed_time": "6:11:35", "remaining_time": "20:55:42"} +{"current_steps": 626, "total_steps": 2737, "loss": 1.0651, "lr": 1.900889792220067e-05, "epoch": 1.6010230179028133, "percentage": 22.87, "elapsed_time": "6:12:11", "remaining_time": "20:55:07"} +{"current_steps": 627, "total_steps": 2737, "loss": 1.0737, "lr": 1.9003354250527225e-05, "epoch": 1.6035805626598465, "percentage": 22.91, "elapsed_time": "6:12:47", "remaining_time": "20:54:31"} +{"current_steps": 628, "total_steps": 2737, "loss": 1.1093, "lr": 1.899779593095935e-05, "epoch": 1.60613810741688, "percentage": 22.94, "elapsed_time": "6:13:22", "remaining_time": "20:53:55"} +{"current_steps": 629, "total_steps": 2737, "loss": 1.0631, "lr": 1.8992222972540083e-05, "epoch": 1.608695652173913, "percentage": 22.98, "elapsed_time": "6:13:58", "remaining_time": "20:53:19"} +{"current_steps": 630, "total_steps": 2737, "loss": 1.0684, "lr": 1.8986635384336275e-05, "epoch": 1.6112531969309463, "percentage": 23.02, "elapsed_time": "6:14:34", "remaining_time": "20:52:44"} +{"current_steps": 631, "total_steps": 2737, "loss": 1.0793, "lr": 1.8981033175438593e-05, "epoch": 1.6138107416879794, "percentage": 23.05, "elapsed_time": "6:15:09", "remaining_time": "20:52:08"} +{"current_steps": 632, "total_steps": 2737, "loss": 1.0741, "lr": 1.897541635496147e-05, "epoch": 1.6163682864450126, "percentage": 23.09, "elapsed_time": "6:15:45", "remaining_time": "20:51:32"} +{"current_steps": 633, "total_steps": 2737, "loss": 1.0536, "lr": 1.896978493204313e-05, "epoch": 1.618925831202046, "percentage": 23.13, "elapsed_time": "6:16:21", "remaining_time": "20:50:56"} +{"current_steps": 634, "total_steps": 2737, "loss": 1.1041, "lr": 1.896413891584554e-05, "epoch": 1.6214833759590794, "percentage": 23.16, "elapsed_time": "6:16:56", "remaining_time": "20:50:20"} +{"current_steps": 635, "total_steps": 2737, "loss": 1.0554, "lr": 1.8958478315554414e-05, "epoch": 1.6240409207161126, "percentage": 23.2, "elapsed_time": "6:17:32", "remaining_time": "20:49:44"} +{"current_steps": 636, "total_steps": 2737, "loss": 1.105, "lr": 1.8952803140379198e-05, "epoch": 1.6265984654731458, "percentage": 23.24, "elapsed_time": "6:18:08", "remaining_time": "20:49:11"} +{"current_steps": 637, "total_steps": 2737, "loss": 1.0966, "lr": 1.894711339955305e-05, "epoch": 1.629156010230179, "percentage": 23.27, "elapsed_time": "6:18:44", "remaining_time": "20:48:35"} +{"current_steps": 638, "total_steps": 2737, "loss": 1.0801, "lr": 1.8941409102332818e-05, "epoch": 1.6317135549872122, "percentage": 23.31, "elapsed_time": "6:19:20", "remaining_time": "20:48:00"} +{"current_steps": 639, "total_steps": 2737, "loss": 1.1168, "lr": 1.893569025799904e-05, "epoch": 1.6342710997442456, "percentage": 23.35, "elapsed_time": "6:19:55", "remaining_time": "20:47:24"} +{"current_steps": 640, "total_steps": 2737, "loss": 1.044, "lr": 1.8929956875855913e-05, "epoch": 1.6368286445012787, "percentage": 23.38, "elapsed_time": "6:20:31", "remaining_time": "20:46:48"} +{"current_steps": 641, "total_steps": 2737, "loss": 1.0678, "lr": 1.89242089652313e-05, "epoch": 1.6393861892583121, "percentage": 23.42, "elapsed_time": "6:21:07", "remaining_time": "20:46:12"} +{"current_steps": 642, "total_steps": 2737, "loss": 1.0713, "lr": 1.8918446535476683e-05, "epoch": 1.6419437340153453, "percentage": 23.46, "elapsed_time": "6:21:42", "remaining_time": "20:45:37"} +{"current_steps": 643, "total_steps": 2737, "loss": 1.0845, "lr": 1.8912669595967182e-05, "epoch": 1.6445012787723785, "percentage": 23.49, "elapsed_time": "6:22:18", "remaining_time": "20:45:01"} +{"current_steps": 644, "total_steps": 2737, "loss": 1.1325, "lr": 1.890687815610151e-05, "epoch": 1.6470588235294117, "percentage": 23.53, "elapsed_time": "6:22:53", "remaining_time": "20:44:25"} +{"current_steps": 645, "total_steps": 2737, "loss": 1.0417, "lr": 1.8901072225301983e-05, "epoch": 1.6496163682864449, "percentage": 23.57, "elapsed_time": "6:23:29", "remaining_time": "20:43:49"} +{"current_steps": 646, "total_steps": 2737, "loss": 1.0985, "lr": 1.8895251813014486e-05, "epoch": 1.6521739130434783, "percentage": 23.6, "elapsed_time": "6:24:05", "remaining_time": "20:43:13"} +{"current_steps": 647, "total_steps": 2737, "loss": 1.0579, "lr": 1.8889416928708465e-05, "epoch": 1.6547314578005117, "percentage": 23.64, "elapsed_time": "6:24:40", "remaining_time": "20:42:37"} +{"current_steps": 648, "total_steps": 2737, "loss": 1.075, "lr": 1.8883567581876913e-05, "epoch": 1.6572890025575449, "percentage": 23.68, "elapsed_time": "6:25:16", "remaining_time": "20:42:00"} +{"current_steps": 649, "total_steps": 2737, "loss": 1.082, "lr": 1.887770378203635e-05, "epoch": 1.659846547314578, "percentage": 23.71, "elapsed_time": "6:25:51", "remaining_time": "20:41:24"} +{"current_steps": 650, "total_steps": 2737, "loss": 1.0618, "lr": 1.8871825538726815e-05, "epoch": 1.6624040920716112, "percentage": 23.75, "elapsed_time": "6:26:27", "remaining_time": "20:40:49"} +{"current_steps": 651, "total_steps": 2737, "loss": 1.0883, "lr": 1.8865932861511836e-05, "epoch": 1.6649616368286444, "percentage": 23.79, "elapsed_time": "6:27:02", "remaining_time": "20:40:13"} +{"current_steps": 652, "total_steps": 2737, "loss": 1.1136, "lr": 1.8860025759978436e-05, "epoch": 1.6675191815856778, "percentage": 23.82, "elapsed_time": "6:27:39", "remaining_time": "20:39:39"} +{"current_steps": 653, "total_steps": 2737, "loss": 1.0876, "lr": 1.8854104243737096e-05, "epoch": 1.670076726342711, "percentage": 23.86, "elapsed_time": "6:28:14", "remaining_time": "20:39:03"} +{"current_steps": 654, "total_steps": 2737, "loss": 1.0921, "lr": 1.8848168322421756e-05, "epoch": 1.6726342710997444, "percentage": 23.89, "elapsed_time": "6:28:50", "remaining_time": "20:38:27"} +{"current_steps": 655, "total_steps": 2737, "loss": 1.0817, "lr": 1.884221800568979e-05, "epoch": 1.6751918158567776, "percentage": 23.93, "elapsed_time": "6:29:25", "remaining_time": "20:37:51"} +{"current_steps": 656, "total_steps": 2737, "loss": 1.0676, "lr": 1.8836253303221985e-05, "epoch": 1.6777493606138107, "percentage": 23.97, "elapsed_time": "6:30:01", "remaining_time": "20:37:15"} +{"current_steps": 657, "total_steps": 2737, "loss": 1.0694, "lr": 1.8830274224722544e-05, "epoch": 1.680306905370844, "percentage": 24.0, "elapsed_time": "6:30:37", "remaining_time": "20:36:39"} +{"current_steps": 658, "total_steps": 2737, "loss": 1.0939, "lr": 1.8824280779919055e-05, "epoch": 1.682864450127877, "percentage": 24.04, "elapsed_time": "6:31:12", "remaining_time": "20:36:03"} +{"current_steps": 659, "total_steps": 2737, "loss": 1.0949, "lr": 1.8818272978562472e-05, "epoch": 1.6854219948849105, "percentage": 24.08, "elapsed_time": "6:31:48", "remaining_time": "20:35:27"} +{"current_steps": 660, "total_steps": 2737, "loss": 1.1071, "lr": 1.8812250830427116e-05, "epoch": 1.6879795396419437, "percentage": 24.11, "elapsed_time": "6:32:23", "remaining_time": "20:34:51"} +{"current_steps": 661, "total_steps": 2737, "loss": 1.0884, "lr": 1.8806214345310648e-05, "epoch": 1.690537084398977, "percentage": 24.15, "elapsed_time": "6:32:59", "remaining_time": "20:34:15"} +{"current_steps": 662, "total_steps": 2737, "loss": 1.0786, "lr": 1.8800163533034048e-05, "epoch": 1.6930946291560103, "percentage": 24.19, "elapsed_time": "6:33:35", "remaining_time": "20:33:40"} +{"current_steps": 663, "total_steps": 2737, "loss": 1.1025, "lr": 1.879409840344161e-05, "epoch": 1.6956521739130435, "percentage": 24.22, "elapsed_time": "6:34:10", "remaining_time": "20:33:04"} +{"current_steps": 664, "total_steps": 2737, "loss": 1.0862, "lr": 1.8788018966400923e-05, "epoch": 1.6982097186700766, "percentage": 24.26, "elapsed_time": "6:34:46", "remaining_time": "20:32:28"} +{"current_steps": 665, "total_steps": 2737, "loss": 1.0903, "lr": 1.878192523180285e-05, "epoch": 1.7007672634271098, "percentage": 24.3, "elapsed_time": "6:35:21", "remaining_time": "20:31:50"} +{"current_steps": 666, "total_steps": 2737, "loss": 1.0659, "lr": 1.877581720956151e-05, "epoch": 1.7033248081841432, "percentage": 24.33, "elapsed_time": "6:35:56", "remaining_time": "20:31:14"} +{"current_steps": 667, "total_steps": 2737, "loss": 1.0803, "lr": 1.876969490961428e-05, "epoch": 1.7058823529411766, "percentage": 24.37, "elapsed_time": "6:36:32", "remaining_time": "20:30:38"} +{"current_steps": 668, "total_steps": 2737, "loss": 1.0729, "lr": 1.8763558341921762e-05, "epoch": 1.7084398976982098, "percentage": 24.41, "elapsed_time": "6:37:08", "remaining_time": "20:30:05"} +{"current_steps": 669, "total_steps": 2737, "loss": 1.1017, "lr": 1.8757407516467762e-05, "epoch": 1.710997442455243, "percentage": 24.44, "elapsed_time": "6:37:44", "remaining_time": "20:29:29"} +{"current_steps": 670, "total_steps": 2737, "loss": 1.0771, "lr": 1.8751242443259286e-05, "epoch": 1.7135549872122762, "percentage": 24.48, "elapsed_time": "6:38:20", "remaining_time": "20:28:53"} +{"current_steps": 671, "total_steps": 2737, "loss": 1.0972, "lr": 1.874506313232653e-05, "epoch": 1.7161125319693094, "percentage": 24.52, "elapsed_time": "6:38:56", "remaining_time": "20:28:20"} +{"current_steps": 672, "total_steps": 2737, "loss": 1.0948, "lr": 1.873886959372284e-05, "epoch": 1.7186700767263428, "percentage": 24.55, "elapsed_time": "6:39:32", "remaining_time": "20:27:44"} +{"current_steps": 673, "total_steps": 2737, "loss": 1.0726, "lr": 1.8732661837524722e-05, "epoch": 1.721227621483376, "percentage": 24.59, "elapsed_time": "6:40:07", "remaining_time": "20:27:08"} +{"current_steps": 674, "total_steps": 2737, "loss": 1.1154, "lr": 1.8726439873831803e-05, "epoch": 1.7237851662404093, "percentage": 24.63, "elapsed_time": "6:40:43", "remaining_time": "20:26:33"} +{"current_steps": 675, "total_steps": 2737, "loss": 1.0855, "lr": 1.8720203712766833e-05, "epoch": 1.7263427109974425, "percentage": 24.66, "elapsed_time": "6:41:19", "remaining_time": "20:25:58"} +{"current_steps": 676, "total_steps": 2737, "loss": 1.0561, "lr": 1.8713953364475654e-05, "epoch": 1.7289002557544757, "percentage": 24.7, "elapsed_time": "6:41:55", "remaining_time": "20:25:22"} +{"current_steps": 677, "total_steps": 2737, "loss": 1.0717, "lr": 1.8707688839127187e-05, "epoch": 1.7314578005115089, "percentage": 24.74, "elapsed_time": "6:42:30", "remaining_time": "20:24:46"} +{"current_steps": 678, "total_steps": 2737, "loss": 1.0733, "lr": 1.8701410146913427e-05, "epoch": 1.734015345268542, "percentage": 24.77, "elapsed_time": "6:43:06", "remaining_time": "20:24:10"} +{"current_steps": 679, "total_steps": 2737, "loss": 1.0736, "lr": 1.869511729804942e-05, "epoch": 1.7365728900255755, "percentage": 24.81, "elapsed_time": "6:43:41", "remaining_time": "20:23:34"} +{"current_steps": 680, "total_steps": 2737, "loss": 1.0718, "lr": 1.8688810302773225e-05, "epoch": 1.7391304347826086, "percentage": 24.84, "elapsed_time": "6:44:17", "remaining_time": "20:22:59"} +{"current_steps": 681, "total_steps": 2737, "loss": 1.0633, "lr": 1.8682489171345942e-05, "epoch": 1.741687979539642, "percentage": 24.88, "elapsed_time": "6:44:53", "remaining_time": "20:22:23"} +{"current_steps": 682, "total_steps": 2737, "loss": 1.1055, "lr": 1.8676153914051648e-05, "epoch": 1.7442455242966752, "percentage": 24.92, "elapsed_time": "6:45:28", "remaining_time": "20:21:47"} +{"current_steps": 683, "total_steps": 2737, "loss": 1.1019, "lr": 1.866980454119741e-05, "epoch": 1.7468030690537084, "percentage": 24.95, "elapsed_time": "6:46:04", "remaining_time": "20:21:11"} +{"current_steps": 684, "total_steps": 2737, "loss": 1.0856, "lr": 1.8663441063113266e-05, "epoch": 1.7493606138107416, "percentage": 24.99, "elapsed_time": "6:46:40", "remaining_time": "20:20:36"} +{"current_steps": 685, "total_steps": 2737, "loss": 1.0797, "lr": 1.8657063490152193e-05, "epoch": 1.7519181585677748, "percentage": 25.03, "elapsed_time": "6:47:15", "remaining_time": "20:20:00"} +{"current_steps": 686, "total_steps": 2737, "loss": 1.1068, "lr": 1.8650671832690106e-05, "epoch": 1.7544757033248082, "percentage": 25.06, "elapsed_time": "6:47:51", "remaining_time": "20:19:24"} +{"current_steps": 687, "total_steps": 2737, "loss": 1.0801, "lr": 1.864426610112583e-05, "epoch": 1.7570332480818416, "percentage": 25.1, "elapsed_time": "6:48:26", "remaining_time": "20:18:48"} +{"current_steps": 688, "total_steps": 2737, "loss": 1.0712, "lr": 1.8637846305881092e-05, "epoch": 1.7595907928388748, "percentage": 25.14, "elapsed_time": "6:49:02", "remaining_time": "20:18:12"} +{"current_steps": 689, "total_steps": 2737, "loss": 1.0518, "lr": 1.8631412457400494e-05, "epoch": 1.762148337595908, "percentage": 25.17, "elapsed_time": "6:49:38", "remaining_time": "20:17:37"} +{"current_steps": 690, "total_steps": 2737, "loss": 1.0802, "lr": 1.862496456615151e-05, "epoch": 1.7647058823529411, "percentage": 25.21, "elapsed_time": "6:50:13", "remaining_time": "20:17:01"} +{"current_steps": 691, "total_steps": 2737, "loss": 1.0921, "lr": 1.861850264262445e-05, "epoch": 1.7672634271099743, "percentage": 25.25, "elapsed_time": "6:50:49", "remaining_time": "20:16:25"} +{"current_steps": 692, "total_steps": 2737, "loss": 1.0824, "lr": 1.8612026697332466e-05, "epoch": 1.7698209718670077, "percentage": 25.28, "elapsed_time": "6:51:25", "remaining_time": "20:15:49"} +{"current_steps": 693, "total_steps": 2737, "loss": 1.0958, "lr": 1.860553674081151e-05, "epoch": 1.772378516624041, "percentage": 25.32, "elapsed_time": "6:52:00", "remaining_time": "20:15:13"} +{"current_steps": 694, "total_steps": 2737, "loss": 1.0511, "lr": 1.859903278362034e-05, "epoch": 1.7749360613810743, "percentage": 25.36, "elapsed_time": "6:52:36", "remaining_time": "20:14:37"} +{"current_steps": 695, "total_steps": 2737, "loss": 1.064, "lr": 1.8592514836340485e-05, "epoch": 1.7774936061381075, "percentage": 25.39, "elapsed_time": "6:53:11", "remaining_time": "20:14:01"} +{"current_steps": 696, "total_steps": 2737, "loss": 1.0974, "lr": 1.8585982909576243e-05, "epoch": 1.7800511508951407, "percentage": 25.43, "elapsed_time": "6:53:47", "remaining_time": "20:13:25"} +{"current_steps": 697, "total_steps": 2737, "loss": 1.0745, "lr": 1.857943701395464e-05, "epoch": 1.7826086956521738, "percentage": 25.47, "elapsed_time": "6:54:22", "remaining_time": "20:12:49"} +{"current_steps": 698, "total_steps": 2737, "loss": 1.094, "lr": 1.857287716012545e-05, "epoch": 1.785166240409207, "percentage": 25.5, "elapsed_time": "6:54:58", "remaining_time": "20:12:13"} +{"current_steps": 699, "total_steps": 2737, "loss": 1.0764, "lr": 1.8566303358761134e-05, "epoch": 1.7877237851662404, "percentage": 25.54, "elapsed_time": "6:55:34", "remaining_time": "20:11:37"} +{"current_steps": 700, "total_steps": 2737, "loss": 1.095, "lr": 1.8559715620556865e-05, "epoch": 1.7902813299232738, "percentage": 25.58, "elapsed_time": "6:56:09", "remaining_time": "20:11:01"} +{"current_steps": 701, "total_steps": 2737, "loss": 1.0636, "lr": 1.855311395623048e-05, "epoch": 1.792838874680307, "percentage": 25.61, "elapsed_time": "6:56:45", "remaining_time": "20:10:25"} +{"current_steps": 702, "total_steps": 2737, "loss": 1.0836, "lr": 1.854649837652247e-05, "epoch": 1.7953964194373402, "percentage": 25.65, "elapsed_time": "6:57:20", "remaining_time": "20:09:49"} +{"current_steps": 703, "total_steps": 2737, "loss": 1.0848, "lr": 1.8539868892195972e-05, "epoch": 1.7979539641943734, "percentage": 25.69, "elapsed_time": "6:57:56", "remaining_time": "20:09:14"} +{"current_steps": 704, "total_steps": 2737, "loss": 1.0663, "lr": 1.8533225514036742e-05, "epoch": 1.8005115089514065, "percentage": 25.72, "elapsed_time": "6:58:31", "remaining_time": "20:08:36"} +{"current_steps": 705, "total_steps": 2737, "loss": 1.094, "lr": 1.852656825285314e-05, "epoch": 1.80306905370844, "percentage": 25.76, "elapsed_time": "6:59:06", "remaining_time": "20:08:00"} +{"current_steps": 706, "total_steps": 2737, "loss": 1.0455, "lr": 1.8519897119476115e-05, "epoch": 1.8056265984654731, "percentage": 25.79, "elapsed_time": "6:59:42", "remaining_time": "20:07:24"} +{"current_steps": 707, "total_steps": 2737, "loss": 1.0525, "lr": 1.8513212124759185e-05, "epoch": 1.8081841432225065, "percentage": 25.83, "elapsed_time": "7:00:18", "remaining_time": "20:06:48"} +{"current_steps": 708, "total_steps": 2737, "loss": 1.0708, "lr": 1.8506513279578415e-05, "epoch": 1.8107416879795397, "percentage": 25.87, "elapsed_time": "7:00:53", "remaining_time": "20:06:12"} +{"current_steps": 709, "total_steps": 2737, "loss": 1.0269, "lr": 1.849980059483241e-05, "epoch": 1.813299232736573, "percentage": 25.9, "elapsed_time": "7:01:29", "remaining_time": "20:05:36"} +{"current_steps": 710, "total_steps": 2737, "loss": 1.0742, "lr": 1.849307408144229e-05, "epoch": 1.815856777493606, "percentage": 25.94, "elapsed_time": "7:02:04", "remaining_time": "20:05:00"} +{"current_steps": 711, "total_steps": 2737, "loss": 1.1291, "lr": 1.8486333750351668e-05, "epoch": 1.8184143222506393, "percentage": 25.98, "elapsed_time": "7:02:40", "remaining_time": "20:04:25"} +{"current_steps": 712, "total_steps": 2737, "loss": 1.0754, "lr": 1.8479579612526642e-05, "epoch": 1.8209718670076727, "percentage": 26.01, "elapsed_time": "7:03:16", "remaining_time": "20:03:49"} +{"current_steps": 713, "total_steps": 2737, "loss": 1.083, "lr": 1.8472811678955773e-05, "epoch": 1.8235294117647058, "percentage": 26.05, "elapsed_time": "7:03:51", "remaining_time": "20:03:13"} +{"current_steps": 714, "total_steps": 2737, "loss": 1.0749, "lr": 1.8466029960650066e-05, "epoch": 1.8260869565217392, "percentage": 26.09, "elapsed_time": "7:04:27", "remaining_time": "20:02:37"} +{"current_steps": 715, "total_steps": 2737, "loss": 1.0549, "lr": 1.845923446864295e-05, "epoch": 1.8286445012787724, "percentage": 26.12, "elapsed_time": "7:05:02", "remaining_time": "20:02:01"} +{"current_steps": 716, "total_steps": 2737, "loss": 1.0721, "lr": 1.845242521399027e-05, "epoch": 1.8312020460358056, "percentage": 26.16, "elapsed_time": "7:05:38", "remaining_time": "20:01:26"} +{"current_steps": 717, "total_steps": 2737, "loss": 1.0449, "lr": 1.8445602207770254e-05, "epoch": 1.8337595907928388, "percentage": 26.2, "elapsed_time": "7:06:14", "remaining_time": "20:00:50"} +{"current_steps": 718, "total_steps": 2737, "loss": 1.0905, "lr": 1.8438765461083504e-05, "epoch": 1.836317135549872, "percentage": 26.23, "elapsed_time": "7:06:49", "remaining_time": "20:00:14"} +{"current_steps": 719, "total_steps": 2737, "loss": 1.0901, "lr": 1.843191498505299e-05, "epoch": 1.8388746803069054, "percentage": 26.27, "elapsed_time": "7:07:25", "remaining_time": "19:59:37"} +{"current_steps": 720, "total_steps": 2737, "loss": 1.0964, "lr": 1.8425050790823994e-05, "epoch": 1.8414322250639388, "percentage": 26.31, "elapsed_time": "7:08:00", "remaining_time": "19:59:01"} +{"current_steps": 721, "total_steps": 2737, "loss": 1.0962, "lr": 1.8418172889564145e-05, "epoch": 1.843989769820972, "percentage": 26.34, "elapsed_time": "7:08:36", "remaining_time": "19:58:25"} +{"current_steps": 722, "total_steps": 2737, "loss": 1.0545, "lr": 1.8411281292463345e-05, "epoch": 1.8465473145780051, "percentage": 26.38, "elapsed_time": "7:09:11", "remaining_time": "19:57:49"} +{"current_steps": 723, "total_steps": 2737, "loss": 1.0815, "lr": 1.8404376010733802e-05, "epoch": 1.8491048593350383, "percentage": 26.42, "elapsed_time": "7:09:47", "remaining_time": "19:57:14"} +{"current_steps": 724, "total_steps": 2737, "loss": 1.0759, "lr": 1.8397457055609973e-05, "epoch": 1.8516624040920715, "percentage": 26.45, "elapsed_time": "7:10:23", "remaining_time": "19:56:38"} +{"current_steps": 725, "total_steps": 2737, "loss": 1.1021, "lr": 1.8390524438348565e-05, "epoch": 1.854219948849105, "percentage": 26.49, "elapsed_time": "7:10:59", "remaining_time": "19:56:05"} +{"current_steps": 726, "total_steps": 2737, "loss": 1.0248, "lr": 1.8383578170228514e-05, "epoch": 1.856777493606138, "percentage": 26.53, "elapsed_time": "7:11:35", "remaining_time": "19:55:29"} +{"current_steps": 727, "total_steps": 2737, "loss": 1.0528, "lr": 1.8376618262550966e-05, "epoch": 1.8593350383631715, "percentage": 26.56, "elapsed_time": "7:12:10", "remaining_time": "19:54:53"} +{"current_steps": 728, "total_steps": 2737, "loss": 1.058, "lr": 1.836964472663925e-05, "epoch": 1.8618925831202047, "percentage": 26.6, "elapsed_time": "7:12:46", "remaining_time": "19:54:17"} +{"current_steps": 729, "total_steps": 2737, "loss": 1.1157, "lr": 1.8362657573838874e-05, "epoch": 1.8644501278772379, "percentage": 26.64, "elapsed_time": "7:13:22", "remaining_time": "19:53:41"} +{"current_steps": 730, "total_steps": 2737, "loss": 1.0711, "lr": 1.8355656815517505e-05, "epoch": 1.867007672634271, "percentage": 26.67, "elapsed_time": "7:13:57", "remaining_time": "19:53:05"} +{"current_steps": 731, "total_steps": 2737, "loss": 1.0414, "lr": 1.8348642463064937e-05, "epoch": 1.8695652173913042, "percentage": 26.71, "elapsed_time": "7:14:33", "remaining_time": "19:52:29"} +{"current_steps": 732, "total_steps": 2737, "loss": 1.0791, "lr": 1.8341614527893077e-05, "epoch": 1.8721227621483376, "percentage": 26.74, "elapsed_time": "7:15:08", "remaining_time": "19:51:53"} +{"current_steps": 733, "total_steps": 2737, "loss": 1.0878, "lr": 1.833457302143594e-05, "epoch": 1.8746803069053708, "percentage": 26.78, "elapsed_time": "7:15:44", "remaining_time": "19:51:18"} +{"current_steps": 734, "total_steps": 2737, "loss": 1.0484, "lr": 1.832751795514962e-05, "epoch": 1.8772378516624042, "percentage": 26.82, "elapsed_time": "7:16:19", "remaining_time": "19:50:42"} +{"current_steps": 735, "total_steps": 2737, "loss": 1.0762, "lr": 1.832044934051226e-05, "epoch": 1.8797953964194374, "percentage": 26.85, "elapsed_time": "7:16:55", "remaining_time": "19:50:06"} +{"current_steps": 736, "total_steps": 2737, "loss": 1.1082, "lr": 1.8313367189024065e-05, "epoch": 1.8823529411764706, "percentage": 26.89, "elapsed_time": "7:17:31", "remaining_time": "19:49:30"} +{"current_steps": 737, "total_steps": 2737, "loss": 1.0834, "lr": 1.8306271512207242e-05, "epoch": 1.8849104859335037, "percentage": 26.93, "elapsed_time": "7:18:06", "remaining_time": "19:48:54"} +{"current_steps": 738, "total_steps": 2737, "loss": 1.087, "lr": 1.829916232160602e-05, "epoch": 1.887468030690537, "percentage": 26.96, "elapsed_time": "7:18:42", "remaining_time": "19:48:19"} +{"current_steps": 739, "total_steps": 2737, "loss": 1.0718, "lr": 1.829203962878661e-05, "epoch": 1.8900255754475703, "percentage": 27.0, "elapsed_time": "7:19:18", "remaining_time": "19:47:42"} +{"current_steps": 740, "total_steps": 2737, "loss": 1.0435, "lr": 1.8284903445337184e-05, "epoch": 1.8925831202046037, "percentage": 27.04, "elapsed_time": "7:19:53", "remaining_time": "19:47:06"} +{"current_steps": 741, "total_steps": 2737, "loss": 1.0569, "lr": 1.8277753782867865e-05, "epoch": 1.895140664961637, "percentage": 27.07, "elapsed_time": "7:20:29", "remaining_time": "19:46:30"} +{"current_steps": 742, "total_steps": 2737, "loss": 1.0623, "lr": 1.8270590653010706e-05, "epoch": 1.89769820971867, "percentage": 27.11, "elapsed_time": "7:21:04", "remaining_time": "19:45:55"} +{"current_steps": 743, "total_steps": 2737, "loss": 1.101, "lr": 1.8263414067419676e-05, "epoch": 1.9002557544757033, "percentage": 27.15, "elapsed_time": "7:21:40", "remaining_time": "19:45:19"} +{"current_steps": 744, "total_steps": 2737, "loss": 1.0524, "lr": 1.8256224037770628e-05, "epoch": 1.9028132992327365, "percentage": 27.18, "elapsed_time": "7:22:16", "remaining_time": "19:44:43"} +{"current_steps": 745, "total_steps": 2737, "loss": 1.0511, "lr": 1.824902057576129e-05, "epoch": 1.9053708439897699, "percentage": 27.22, "elapsed_time": "7:22:51", "remaining_time": "19:44:08"} +{"current_steps": 746, "total_steps": 2737, "loss": 1.075, "lr": 1.8241803693111245e-05, "epoch": 1.907928388746803, "percentage": 27.26, "elapsed_time": "7:23:27", "remaining_time": "19:43:32"} +{"current_steps": 747, "total_steps": 2737, "loss": 1.0665, "lr": 1.8234573401561914e-05, "epoch": 1.9104859335038364, "percentage": 27.29, "elapsed_time": "7:24:02", "remaining_time": "19:42:56"} +{"current_steps": 748, "total_steps": 2737, "loss": 1.0802, "lr": 1.8227329712876525e-05, "epoch": 1.9130434782608696, "percentage": 27.33, "elapsed_time": "7:24:38", "remaining_time": "19:42:20"} +{"current_steps": 749, "total_steps": 2737, "loss": 1.1035, "lr": 1.8220072638840105e-05, "epoch": 1.9156010230179028, "percentage": 27.37, "elapsed_time": "7:25:14", "remaining_time": "19:41:44"} +{"current_steps": 750, "total_steps": 2737, "loss": 1.0571, "lr": 1.8212802191259465e-05, "epoch": 1.918158567774936, "percentage": 27.4, "elapsed_time": "7:25:49", "remaining_time": "19:41:08"} +{"current_steps": 751, "total_steps": 2737, "loss": 1.1095, "lr": 1.8205518381963165e-05, "epoch": 1.9207161125319692, "percentage": 27.44, "elapsed_time": "7:26:25", "remaining_time": "19:40:33"} +{"current_steps": 752, "total_steps": 2737, "loss": 1.06, "lr": 1.8198221222801506e-05, "epoch": 1.9232736572890026, "percentage": 27.48, "elapsed_time": "7:27:01", "remaining_time": "19:39:57"} +{"current_steps": 753, "total_steps": 2737, "loss": 1.0772, "lr": 1.8190910725646512e-05, "epoch": 1.9258312020460358, "percentage": 27.51, "elapsed_time": "7:27:36", "remaining_time": "19:39:21"} +{"current_steps": 754, "total_steps": 2737, "loss": 1.0708, "lr": 1.8183586902391905e-05, "epoch": 1.9283887468030692, "percentage": 27.55, "elapsed_time": "7:28:12", "remaining_time": "19:38:46"} +{"current_steps": 755, "total_steps": 2737, "loss": 1.0393, "lr": 1.8176249764953088e-05, "epoch": 1.9309462915601023, "percentage": 27.58, "elapsed_time": "7:28:48", "remaining_time": "19:38:10"} +{"current_steps": 756, "total_steps": 2737, "loss": 1.0777, "lr": 1.8168899325267122e-05, "epoch": 1.9335038363171355, "percentage": 27.62, "elapsed_time": "7:29:23", "remaining_time": "19:37:35"} +{"current_steps": 757, "total_steps": 2737, "loss": 1.0738, "lr": 1.8161535595292717e-05, "epoch": 1.9360613810741687, "percentage": 27.66, "elapsed_time": "7:29:59", "remaining_time": "19:37:00"} +{"current_steps": 758, "total_steps": 2737, "loss": 1.0552, "lr": 1.8154158587010195e-05, "epoch": 1.938618925831202, "percentage": 27.69, "elapsed_time": "7:30:35", "remaining_time": "19:36:24"} +{"current_steps": 759, "total_steps": 2737, "loss": 1.1049, "lr": 1.8146768312421495e-05, "epoch": 1.9411764705882353, "percentage": 27.73, "elapsed_time": "7:31:10", "remaining_time": "19:35:48"} +{"current_steps": 760, "total_steps": 2737, "loss": 1.11, "lr": 1.8139364783550128e-05, "epoch": 1.9437340153452687, "percentage": 27.77, "elapsed_time": "7:31:46", "remaining_time": "19:35:12"} +{"current_steps": 761, "total_steps": 2737, "loss": 1.1085, "lr": 1.813194801244117e-05, "epoch": 1.9462915601023019, "percentage": 27.8, "elapsed_time": "7:32:21", "remaining_time": "19:34:36"} +{"current_steps": 762, "total_steps": 2737, "loss": 1.0817, "lr": 1.8124518011161246e-05, "epoch": 1.948849104859335, "percentage": 27.84, "elapsed_time": "7:32:57", "remaining_time": "19:34:00"} +{"current_steps": 763, "total_steps": 2737, "loss": 1.0723, "lr": 1.8117074791798503e-05, "epoch": 1.9514066496163682, "percentage": 27.88, "elapsed_time": "7:33:33", "remaining_time": "19:33:26"} +{"current_steps": 764, "total_steps": 2737, "loss": 1.0878, "lr": 1.8109618366462597e-05, "epoch": 1.9539641943734014, "percentage": 27.91, "elapsed_time": "7:34:09", "remaining_time": "19:32:50"} +{"current_steps": 765, "total_steps": 2737, "loss": 1.0194, "lr": 1.8102148747284662e-05, "epoch": 1.9565217391304348, "percentage": 27.95, "elapsed_time": "7:34:45", "remaining_time": "19:32:14"} +{"current_steps": 766, "total_steps": 2737, "loss": 1.0818, "lr": 1.8094665946417304e-05, "epoch": 1.959079283887468, "percentage": 27.99, "elapsed_time": "7:35:20", "remaining_time": "19:31:39"} +{"current_steps": 767, "total_steps": 2737, "loss": 1.0524, "lr": 1.8087169976034568e-05, "epoch": 1.9616368286445014, "percentage": 28.02, "elapsed_time": "7:35:56", "remaining_time": "19:31:03"} +{"current_steps": 768, "total_steps": 2737, "loss": 1.0804, "lr": 1.807966084833193e-05, "epoch": 1.9641943734015346, "percentage": 28.06, "elapsed_time": "7:36:31", "remaining_time": "19:30:27"} +{"current_steps": 769, "total_steps": 2737, "loss": 1.0876, "lr": 1.8072138575526277e-05, "epoch": 1.9667519181585678, "percentage": 28.1, "elapsed_time": "7:37:07", "remaining_time": "19:29:51"} +{"current_steps": 770, "total_steps": 2737, "loss": 1.0674, "lr": 1.806460316985587e-05, "epoch": 1.969309462915601, "percentage": 28.13, "elapsed_time": "7:37:42", "remaining_time": "19:29:15"} +{"current_steps": 771, "total_steps": 2737, "loss": 1.059, "lr": 1.8057054643580347e-05, "epoch": 1.9718670076726341, "percentage": 28.17, "elapsed_time": "7:38:18", "remaining_time": "19:28:39"} +{"current_steps": 772, "total_steps": 2737, "loss": 1.076, "lr": 1.8049493008980685e-05, "epoch": 1.9744245524296675, "percentage": 28.21, "elapsed_time": "7:38:54", "remaining_time": "19:28:03"} +{"current_steps": 773, "total_steps": 2737, "loss": 1.0884, "lr": 1.8041918278359194e-05, "epoch": 1.976982097186701, "percentage": 28.24, "elapsed_time": "7:39:30", "remaining_time": "19:27:28"} +{"current_steps": 774, "total_steps": 2737, "loss": 1.0564, "lr": 1.8034330464039485e-05, "epoch": 1.979539641943734, "percentage": 28.28, "elapsed_time": "7:40:05", "remaining_time": "19:26:53"} +{"current_steps": 775, "total_steps": 2737, "loss": 1.0653, "lr": 1.8026729578366457e-05, "epoch": 1.9820971867007673, "percentage": 28.32, "elapsed_time": "7:40:41", "remaining_time": "19:26:17"} +{"current_steps": 776, "total_steps": 2737, "loss": 1.0847, "lr": 1.801911563370628e-05, "epoch": 1.9846547314578005, "percentage": 28.35, "elapsed_time": "7:41:17", "remaining_time": "19:25:41"} +{"current_steps": 777, "total_steps": 2737, "loss": 1.0617, "lr": 1.801148864244636e-05, "epoch": 1.9872122762148337, "percentage": 28.39, "elapsed_time": "7:41:52", "remaining_time": "19:25:05"} +{"current_steps": 778, "total_steps": 2737, "loss": 1.1046, "lr": 1.8003848616995333e-05, "epoch": 1.989769820971867, "percentage": 28.43, "elapsed_time": "7:42:28", "remaining_time": "19:24:30"} +{"current_steps": 779, "total_steps": 2737, "loss": 1.0841, "lr": 1.7996195569783053e-05, "epoch": 1.9923273657289002, "percentage": 28.46, "elapsed_time": "7:43:04", "remaining_time": "19:23:54"} +{"current_steps": 780, "total_steps": 2737, "loss": 1.064, "lr": 1.798852951326054e-05, "epoch": 1.9948849104859336, "percentage": 28.5, "elapsed_time": "7:43:39", "remaining_time": "19:23:18"} +{"current_steps": 781, "total_steps": 2737, "loss": 1.0748, "lr": 1.7980850459899997e-05, "epoch": 1.9974424552429668, "percentage": 28.53, "elapsed_time": "7:44:15", "remaining_time": "19:22:44"} +{"current_steps": 782, "total_steps": 2737, "loss": 1.0504, "lr": 1.7973158422194754e-05, "epoch": 2.0, "percentage": 28.57, "elapsed_time": "7:44:51", "remaining_time": "19:22:09"} +{"current_steps": 783, "total_steps": 2737, "loss": 1.0561, "lr": 1.7965453412659284e-05, "epoch": 2.002557544757033, "percentage": 28.61, "elapsed_time": "7:45:36", "remaining_time": "19:21:57"} +{"current_steps": 784, "total_steps": 2737, "loss": 1.0484, "lr": 1.795773544382915e-05, "epoch": 2.0051150895140664, "percentage": 28.64, "elapsed_time": "7:46:12", "remaining_time": "19:21:21"} +{"current_steps": 785, "total_steps": 2737, "loss": 1.0494, "lr": 1.795000452826101e-05, "epoch": 2.0076726342710995, "percentage": 28.68, "elapsed_time": "7:46:48", "remaining_time": "19:20:46"} +{"current_steps": 786, "total_steps": 2737, "loss": 1.1343, "lr": 1.794226067853257e-05, "epoch": 2.010230179028133, "percentage": 28.72, "elapsed_time": "7:47:23", "remaining_time": "19:20:10"} +{"current_steps": 787, "total_steps": 2737, "loss": 1.0648, "lr": 1.79345039072426e-05, "epoch": 2.0127877237851663, "percentage": 28.75, "elapsed_time": "7:47:59", "remaining_time": "19:19:34"} +{"current_steps": 788, "total_steps": 2737, "loss": 1.0801, "lr": 1.7926734227010876e-05, "epoch": 2.0153452685421995, "percentage": 28.79, "elapsed_time": "7:48:34", "remaining_time": "19:18:58"} +{"current_steps": 789, "total_steps": 2737, "loss": 1.0613, "lr": 1.7918951650478188e-05, "epoch": 2.0179028132992327, "percentage": 28.83, "elapsed_time": "7:49:10", "remaining_time": "19:18:22"} +{"current_steps": 790, "total_steps": 2737, "loss": 1.0476, "lr": 1.7911156190306296e-05, "epoch": 2.020460358056266, "percentage": 28.86, "elapsed_time": "7:49:46", "remaining_time": "19:17:46"} +{"current_steps": 791, "total_steps": 2737, "loss": 1.0486, "lr": 1.7903347859177926e-05, "epoch": 2.023017902813299, "percentage": 28.9, "elapsed_time": "7:50:21", "remaining_time": "19:17:11"} +{"current_steps": 792, "total_steps": 2737, "loss": 1.0543, "lr": 1.7895526669796747e-05, "epoch": 2.0255754475703327, "percentage": 28.94, "elapsed_time": "7:50:57", "remaining_time": "19:16:35"} +{"current_steps": 793, "total_steps": 2737, "loss": 1.0434, "lr": 1.7887692634887345e-05, "epoch": 2.028132992327366, "percentage": 28.97, "elapsed_time": "7:51:33", "remaining_time": "19:15:59"} +{"current_steps": 794, "total_steps": 2737, "loss": 1.0443, "lr": 1.7879845767195204e-05, "epoch": 2.030690537084399, "percentage": 29.01, "elapsed_time": "7:52:09", "remaining_time": "19:15:25"} +{"current_steps": 795, "total_steps": 2737, "loss": 1.0516, "lr": 1.787198607948669e-05, "epoch": 2.0332480818414322, "percentage": 29.05, "elapsed_time": "7:52:45", "remaining_time": "19:14:49"} +{"current_steps": 796, "total_steps": 2737, "loss": 1.0588, "lr": 1.786411358454902e-05, "epoch": 2.0358056265984654, "percentage": 29.08, "elapsed_time": "7:53:20", "remaining_time": "19:14:13"} +{"current_steps": 797, "total_steps": 2737, "loss": 1.0835, "lr": 1.785622829519025e-05, "epoch": 2.0383631713554986, "percentage": 29.12, "elapsed_time": "7:53:56", "remaining_time": "19:13:38"} +{"current_steps": 798, "total_steps": 2737, "loss": 1.0563, "lr": 1.7848330224239256e-05, "epoch": 2.040920716112532, "percentage": 29.16, "elapsed_time": "7:54:32", "remaining_time": "19:13:02"} +{"current_steps": 799, "total_steps": 2737, "loss": 1.0579, "lr": 1.7840419384545706e-05, "epoch": 2.0434782608695654, "percentage": 29.19, "elapsed_time": "7:55:07", "remaining_time": "19:12:26"} +{"current_steps": 800, "total_steps": 2737, "loss": 1.1015, "lr": 1.7832495788980035e-05, "epoch": 2.0460358056265986, "percentage": 29.23, "elapsed_time": "7:55:43", "remaining_time": "19:11:50"} +{"current_steps": 801, "total_steps": 2737, "loss": 1.0537, "lr": 1.7824559450433446e-05, "epoch": 2.0485933503836318, "percentage": 29.27, "elapsed_time": "7:56:19", "remaining_time": "19:11:15"} +{"current_steps": 802, "total_steps": 2737, "loss": 1.0604, "lr": 1.7816610381817864e-05, "epoch": 2.051150895140665, "percentage": 29.3, "elapsed_time": "7:56:54", "remaining_time": "19:10:39"} +{"current_steps": 803, "total_steps": 2737, "loss": 1.0664, "lr": 1.780864859606592e-05, "epoch": 2.053708439897698, "percentage": 29.34, "elapsed_time": "7:57:30", "remaining_time": "19:10:03"} +{"current_steps": 804, "total_steps": 2737, "loss": 1.0937, "lr": 1.780067410613095e-05, "epoch": 2.0562659846547313, "percentage": 29.38, "elapsed_time": "7:58:06", "remaining_time": "19:09:27"} +{"current_steps": 805, "total_steps": 2737, "loss": 1.0441, "lr": 1.7792686924986946e-05, "epoch": 2.0588235294117645, "percentage": 29.41, "elapsed_time": "7:58:41", "remaining_time": "19:08:52"} +{"current_steps": 806, "total_steps": 2737, "loss": 1.058, "lr": 1.7784687065628554e-05, "epoch": 2.061381074168798, "percentage": 29.45, "elapsed_time": "7:59:17", "remaining_time": "19:08:16"} +{"current_steps": 807, "total_steps": 2737, "loss": 1.0992, "lr": 1.777667454107104e-05, "epoch": 2.0639386189258313, "percentage": 29.48, "elapsed_time": "7:59:52", "remaining_time": "19:07:40"} +{"current_steps": 808, "total_steps": 2737, "loss": 1.0735, "lr": 1.776864936435029e-05, "epoch": 2.0664961636828645, "percentage": 29.52, "elapsed_time": "8:00:28", "remaining_time": "19:07:04"} +{"current_steps": 809, "total_steps": 2737, "loss": 1.0498, "lr": 1.7760611548522755e-05, "epoch": 2.0690537084398977, "percentage": 29.56, "elapsed_time": "8:01:04", "remaining_time": "19:06:28"} +{"current_steps": 810, "total_steps": 2737, "loss": 1.0548, "lr": 1.7752561106665463e-05, "epoch": 2.071611253196931, "percentage": 29.59, "elapsed_time": "8:01:39", "remaining_time": "19:05:52"} +{"current_steps": 811, "total_steps": 2737, "loss": 1.077, "lr": 1.7744498051875984e-05, "epoch": 2.074168797953964, "percentage": 29.63, "elapsed_time": "8:02:15", "remaining_time": "19:05:16"} +{"current_steps": 812, "total_steps": 2737, "loss": 1.0494, "lr": 1.7736422397272396e-05, "epoch": 2.0767263427109977, "percentage": 29.67, "elapsed_time": "8:02:50", "remaining_time": "19:04:41"} +{"current_steps": 813, "total_steps": 2737, "loss": 1.0511, "lr": 1.772833415599329e-05, "epoch": 2.079283887468031, "percentage": 29.7, "elapsed_time": "8:03:26", "remaining_time": "19:04:05"} +{"current_steps": 814, "total_steps": 2737, "loss": 1.1121, "lr": 1.7720233341197726e-05, "epoch": 2.081841432225064, "percentage": 29.74, "elapsed_time": "8:04:02", "remaining_time": "19:03:30"} +{"current_steps": 815, "total_steps": 2737, "loss": 1.0383, "lr": 1.7712119966065225e-05, "epoch": 2.084398976982097, "percentage": 29.78, "elapsed_time": "8:04:38", "remaining_time": "19:02:54"} +{"current_steps": 816, "total_steps": 2737, "loss": 1.0498, "lr": 1.770399404379574e-05, "epoch": 2.0869565217391304, "percentage": 29.81, "elapsed_time": "8:05:13", "remaining_time": "19:02:18"} +{"current_steps": 817, "total_steps": 2737, "loss": 1.0594, "lr": 1.7695855587609637e-05, "epoch": 2.0895140664961636, "percentage": 29.85, "elapsed_time": "8:05:49", "remaining_time": "19:01:42"} +{"current_steps": 818, "total_steps": 2737, "loss": 1.0419, "lr": 1.7687704610747676e-05, "epoch": 2.0920716112531967, "percentage": 29.89, "elapsed_time": "8:06:24", "remaining_time": "19:01:06"} +{"current_steps": 819, "total_steps": 2737, "loss": 1.0435, "lr": 1.767954112647099e-05, "epoch": 2.0946291560102304, "percentage": 29.92, "elapsed_time": "8:07:00", "remaining_time": "19:00:30"} +{"current_steps": 820, "total_steps": 2737, "loss": 1.0458, "lr": 1.7671365148061053e-05, "epoch": 2.0971867007672635, "percentage": 29.96, "elapsed_time": "8:07:36", "remaining_time": "18:59:55"} +{"current_steps": 821, "total_steps": 2737, "loss": 1.0566, "lr": 1.7663176688819673e-05, "epoch": 2.0997442455242967, "percentage": 30.0, "elapsed_time": "8:08:12", "remaining_time": "18:59:21"} +{"current_steps": 822, "total_steps": 2737, "loss": 1.0422, "lr": 1.765497576206896e-05, "epoch": 2.10230179028133, "percentage": 30.03, "elapsed_time": "8:08:48", "remaining_time": "18:58:45"} +{"current_steps": 823, "total_steps": 2737, "loss": 1.0776, "lr": 1.764676238115131e-05, "epoch": 2.104859335038363, "percentage": 30.07, "elapsed_time": "8:09:23", "remaining_time": "18:58:09"} +{"current_steps": 824, "total_steps": 2737, "loss": 1.0674, "lr": 1.763853655942938e-05, "epoch": 2.1074168797953963, "percentage": 30.11, "elapsed_time": "8:09:59", "remaining_time": "18:57:34"} +{"current_steps": 825, "total_steps": 2737, "loss": 1.0699, "lr": 1.7630298310286065e-05, "epoch": 2.10997442455243, "percentage": 30.14, "elapsed_time": "8:10:35", "remaining_time": "18:56:58"} +{"current_steps": 826, "total_steps": 2737, "loss": 1.0634, "lr": 1.7622047647124488e-05, "epoch": 2.112531969309463, "percentage": 30.18, "elapsed_time": "8:11:10", "remaining_time": "18:56:22"} +{"current_steps": 827, "total_steps": 2737, "loss": 1.0548, "lr": 1.761378458336796e-05, "epoch": 2.1150895140664963, "percentage": 30.22, "elapsed_time": "8:11:46", "remaining_time": "18:55:46"} +{"current_steps": 828, "total_steps": 2737, "loss": 1.0621, "lr": 1.760550913245996e-05, "epoch": 2.1176470588235294, "percentage": 30.25, "elapsed_time": "8:12:22", "remaining_time": "18:55:10"} +{"current_steps": 829, "total_steps": 2737, "loss": 1.0704, "lr": 1.7597221307864142e-05, "epoch": 2.1202046035805626, "percentage": 30.29, "elapsed_time": "8:12:57", "remaining_time": "18:54:34"} +{"current_steps": 830, "total_steps": 2737, "loss": 1.0961, "lr": 1.7588921123064273e-05, "epoch": 2.122762148337596, "percentage": 30.33, "elapsed_time": "8:13:33", "remaining_time": "18:53:59"} +{"current_steps": 831, "total_steps": 2737, "loss": 1.0916, "lr": 1.7580608591564233e-05, "epoch": 2.125319693094629, "percentage": 30.36, "elapsed_time": "8:14:08", "remaining_time": "18:53:23"} +{"current_steps": 832, "total_steps": 2737, "loss": 1.0848, "lr": 1.757228372688799e-05, "epoch": 2.1278772378516626, "percentage": 30.4, "elapsed_time": "8:14:44", "remaining_time": "18:52:47"} +{"current_steps": 833, "total_steps": 2737, "loss": 1.0824, "lr": 1.7563946542579584e-05, "epoch": 2.130434782608696, "percentage": 30.43, "elapsed_time": "8:15:20", "remaining_time": "18:52:11"} +{"current_steps": 834, "total_steps": 2737, "loss": 1.0424, "lr": 1.7555597052203088e-05, "epoch": 2.132992327365729, "percentage": 30.47, "elapsed_time": "8:15:55", "remaining_time": "18:51:36"} +{"current_steps": 835, "total_steps": 2737, "loss": 1.0749, "lr": 1.7547235269342602e-05, "epoch": 2.135549872122762, "percentage": 30.51, "elapsed_time": "8:16:31", "remaining_time": "18:51:00"} +{"current_steps": 836, "total_steps": 2737, "loss": 1.0871, "lr": 1.7538861207602225e-05, "epoch": 2.1381074168797953, "percentage": 30.54, "elapsed_time": "8:17:07", "remaining_time": "18:50:26"} +{"current_steps": 837, "total_steps": 2737, "loss": 1.0257, "lr": 1.753047488060603e-05, "epoch": 2.1406649616368285, "percentage": 30.58, "elapsed_time": "8:17:43", "remaining_time": "18:49:50"} +{"current_steps": 838, "total_steps": 2737, "loss": 1.0907, "lr": 1.7522076301998048e-05, "epoch": 2.1432225063938617, "percentage": 30.62, "elapsed_time": "8:18:19", "remaining_time": "18:49:14"} +{"current_steps": 839, "total_steps": 2737, "loss": 1.067, "lr": 1.7513665485442238e-05, "epoch": 2.1457800511508953, "percentage": 30.65, "elapsed_time": "8:18:54", "remaining_time": "18:48:38"} +{"current_steps": 840, "total_steps": 2737, "loss": 1.0893, "lr": 1.750524244462248e-05, "epoch": 2.1483375959079285, "percentage": 30.69, "elapsed_time": "8:19:30", "remaining_time": "18:48:03"} +{"current_steps": 841, "total_steps": 2737, "loss": 1.0638, "lr": 1.7496807193242528e-05, "epoch": 2.1508951406649617, "percentage": 30.73, "elapsed_time": "8:20:05", "remaining_time": "18:47:27"} +{"current_steps": 842, "total_steps": 2737, "loss": 1.0825, "lr": 1.748835974502601e-05, "epoch": 2.153452685421995, "percentage": 30.76, "elapsed_time": "8:20:41", "remaining_time": "18:46:51"} +{"current_steps": 843, "total_steps": 2737, "loss": 1.0537, "lr": 1.7479900113716398e-05, "epoch": 2.156010230179028, "percentage": 30.8, "elapsed_time": "8:21:17", "remaining_time": "18:46:15"} +{"current_steps": 844, "total_steps": 2737, "loss": 1.1031, "lr": 1.7471428313076984e-05, "epoch": 2.1585677749360612, "percentage": 30.84, "elapsed_time": "8:21:52", "remaining_time": "18:45:39"} +{"current_steps": 845, "total_steps": 2737, "loss": 1.0589, "lr": 1.7462944356890853e-05, "epoch": 2.1611253196930944, "percentage": 30.87, "elapsed_time": "8:22:28", "remaining_time": "18:45:03"} +{"current_steps": 846, "total_steps": 2737, "loss": 1.0561, "lr": 1.7454448258960877e-05, "epoch": 2.163682864450128, "percentage": 30.91, "elapsed_time": "8:23:04", "remaining_time": "18:44:28"} +{"current_steps": 847, "total_steps": 2737, "loss": 1.0186, "lr": 1.744594003310967e-05, "epoch": 2.166240409207161, "percentage": 30.95, "elapsed_time": "8:23:39", "remaining_time": "18:43:51"} +{"current_steps": 848, "total_steps": 2737, "loss": 1.1099, "lr": 1.743741969317959e-05, "epoch": 2.1687979539641944, "percentage": 30.98, "elapsed_time": "8:24:14", "remaining_time": "18:43:15"} +{"current_steps": 849, "total_steps": 2737, "loss": 1.0691, "lr": 1.7428887253032695e-05, "epoch": 2.1713554987212276, "percentage": 31.02, "elapsed_time": "8:24:50", "remaining_time": "18:42:39"} +{"current_steps": 850, "total_steps": 2737, "loss": 1.0701, "lr": 1.7420342726550728e-05, "epoch": 2.1739130434782608, "percentage": 31.06, "elapsed_time": "8:25:26", "remaining_time": "18:42:03"} +{"current_steps": 851, "total_steps": 2737, "loss": 1.0716, "lr": 1.74117861276351e-05, "epoch": 2.176470588235294, "percentage": 31.09, "elapsed_time": "8:26:01", "remaining_time": "18:41:28"} +{"current_steps": 852, "total_steps": 2737, "loss": 1.0893, "lr": 1.740321747020687e-05, "epoch": 2.1790281329923276, "percentage": 31.13, "elapsed_time": "8:26:37", "remaining_time": "18:40:52"} +{"current_steps": 853, "total_steps": 2737, "loss": 1.0266, "lr": 1.7394636768206702e-05, "epoch": 2.1815856777493607, "percentage": 31.17, "elapsed_time": "8:27:13", "remaining_time": "18:40:16"} +{"current_steps": 854, "total_steps": 2737, "loss": 1.0085, "lr": 1.738604403559486e-05, "epoch": 2.184143222506394, "percentage": 31.2, "elapsed_time": "8:27:48", "remaining_time": "18:39:41"} +{"current_steps": 855, "total_steps": 2737, "loss": 1.0622, "lr": 1.7377439286351184e-05, "epoch": 2.186700767263427, "percentage": 31.24, "elapsed_time": "8:28:24", "remaining_time": "18:39:05"} +{"current_steps": 856, "total_steps": 2737, "loss": 1.083, "lr": 1.736882253447506e-05, "epoch": 2.1892583120204603, "percentage": 31.28, "elapsed_time": "8:29:00", "remaining_time": "18:38:29"} +{"current_steps": 857, "total_steps": 2737, "loss": 1.1006, "lr": 1.736019379398542e-05, "epoch": 2.1918158567774935, "percentage": 31.31, "elapsed_time": "8:29:35", "remaining_time": "18:37:53"} +{"current_steps": 858, "total_steps": 2737, "loss": 1.0914, "lr": 1.7351553078920665e-05, "epoch": 2.1943734015345266, "percentage": 31.35, "elapsed_time": "8:30:11", "remaining_time": "18:37:18"} +{"current_steps": 859, "total_steps": 2737, "loss": 1.0873, "lr": 1.734290040333871e-05, "epoch": 2.1969309462915603, "percentage": 31.38, "elapsed_time": "8:30:47", "remaining_time": "18:36:42"} +{"current_steps": 860, "total_steps": 2737, "loss": 1.0835, "lr": 1.733423578131691e-05, "epoch": 2.1994884910485935, "percentage": 31.42, "elapsed_time": "8:31:22", "remaining_time": "18:36:06"} +{"current_steps": 861, "total_steps": 2737, "loss": 1.0743, "lr": 1.732555922695207e-05, "epoch": 2.2020460358056266, "percentage": 31.46, "elapsed_time": "8:31:58", "remaining_time": "18:35:31"} +{"current_steps": 862, "total_steps": 2737, "loss": 1.0482, "lr": 1.73168707543604e-05, "epoch": 2.20460358056266, "percentage": 31.49, "elapsed_time": "8:32:34", "remaining_time": "18:34:57"} +{"current_steps": 863, "total_steps": 2737, "loss": 1.0686, "lr": 1.73081703776775e-05, "epoch": 2.207161125319693, "percentage": 31.53, "elapsed_time": "8:33:11", "remaining_time": "18:34:23"} +{"current_steps": 864, "total_steps": 2737, "loss": 1.0738, "lr": 1.7299458111058336e-05, "epoch": 2.209718670076726, "percentage": 31.57, "elapsed_time": "8:33:47", "remaining_time": "18:33:47"} +{"current_steps": 865, "total_steps": 2737, "loss": 1.0313, "lr": 1.7290733968677226e-05, "epoch": 2.21227621483376, "percentage": 31.6, "elapsed_time": "8:34:22", "remaining_time": "18:33:12"} +{"current_steps": 866, "total_steps": 2737, "loss": 1.0602, "lr": 1.7281997964727803e-05, "epoch": 2.214833759590793, "percentage": 31.64, "elapsed_time": "8:34:58", "remaining_time": "18:32:36"} +{"current_steps": 867, "total_steps": 2737, "loss": 1.1046, "lr": 1.7273250113423e-05, "epoch": 2.217391304347826, "percentage": 31.68, "elapsed_time": "8:35:35", "remaining_time": "18:32:02"} +{"current_steps": 868, "total_steps": 2737, "loss": 1.0437, "lr": 1.726449042899502e-05, "epoch": 2.2199488491048593, "percentage": 31.71, "elapsed_time": "8:36:10", "remaining_time": "18:31:26"} +{"current_steps": 869, "total_steps": 2737, "loss": 1.0809, "lr": 1.725571892569533e-05, "epoch": 2.2225063938618925, "percentage": 31.75, "elapsed_time": "8:36:46", "remaining_time": "18:30:51"} +{"current_steps": 870, "total_steps": 2737, "loss": 1.0664, "lr": 1.7246935617794608e-05, "epoch": 2.2250639386189257, "percentage": 31.79, "elapsed_time": "8:37:22", "remaining_time": "18:30:15"} +{"current_steps": 871, "total_steps": 2737, "loss": 1.045, "lr": 1.723814051958275e-05, "epoch": 2.227621483375959, "percentage": 31.82, "elapsed_time": "8:37:57", "remaining_time": "18:29:39"} +{"current_steps": 872, "total_steps": 2737, "loss": 1.0661, "lr": 1.7229333645368834e-05, "epoch": 2.2301790281329925, "percentage": 31.86, "elapsed_time": "8:38:33", "remaining_time": "18:29:03"} +{"current_steps": 873, "total_steps": 2737, "loss": 1.0846, "lr": 1.722051500948109e-05, "epoch": 2.2327365728900257, "percentage": 31.9, "elapsed_time": "8:39:08", "remaining_time": "18:28:28"} +{"current_steps": 874, "total_steps": 2737, "loss": 1.0718, "lr": 1.7211684626266887e-05, "epoch": 2.235294117647059, "percentage": 31.93, "elapsed_time": "8:39:44", "remaining_time": "18:27:52"} +{"current_steps": 875, "total_steps": 2737, "loss": 1.0428, "lr": 1.7202842510092706e-05, "epoch": 2.237851662404092, "percentage": 31.97, "elapsed_time": "8:40:20", "remaining_time": "18:27:16"} +{"current_steps": 876, "total_steps": 2737, "loss": 1.0598, "lr": 1.7193988675344125e-05, "epoch": 2.2404092071611252, "percentage": 32.01, "elapsed_time": "8:40:55", "remaining_time": "18:26:40"} +{"current_steps": 877, "total_steps": 2737, "loss": 1.0486, "lr": 1.7185123136425775e-05, "epoch": 2.2429667519181584, "percentage": 32.04, "elapsed_time": "8:41:31", "remaining_time": "18:26:04"} +{"current_steps": 878, "total_steps": 2737, "loss": 1.0567, "lr": 1.7176245907761327e-05, "epoch": 2.2455242966751916, "percentage": 32.08, "elapsed_time": "8:42:07", "remaining_time": "18:25:29"} +{"current_steps": 879, "total_steps": 2737, "loss": 1.0567, "lr": 1.7167357003793485e-05, "epoch": 2.2480818414322252, "percentage": 32.12, "elapsed_time": "8:42:42", "remaining_time": "18:24:53"} +{"current_steps": 880, "total_steps": 2737, "loss": 1.0299, "lr": 1.7158456438983934e-05, "epoch": 2.2506393861892584, "percentage": 32.15, "elapsed_time": "8:43:18", "remaining_time": "18:24:18"} +{"current_steps": 881, "total_steps": 2737, "loss": 1.05, "lr": 1.7149544227813343e-05, "epoch": 2.2531969309462916, "percentage": 32.19, "elapsed_time": "8:43:54", "remaining_time": "18:23:42"} +{"current_steps": 882, "total_steps": 2737, "loss": 1.0166, "lr": 1.7140620384781316e-05, "epoch": 2.2557544757033248, "percentage": 32.23, "elapsed_time": "8:44:29", "remaining_time": "18:23:06"} +{"current_steps": 883, "total_steps": 2737, "loss": 1.0561, "lr": 1.7131684924406392e-05, "epoch": 2.258312020460358, "percentage": 32.26, "elapsed_time": "8:45:05", "remaining_time": "18:22:30"} +{"current_steps": 884, "total_steps": 2737, "loss": 1.0536, "lr": 1.7122737861226007e-05, "epoch": 2.260869565217391, "percentage": 32.3, "elapsed_time": "8:45:41", "remaining_time": "18:21:54"} +{"current_steps": 885, "total_steps": 2737, "loss": 1.0717, "lr": 1.711377920979647e-05, "epoch": 2.2634271099744243, "percentage": 32.33, "elapsed_time": "8:46:17", "remaining_time": "18:21:19"} +{"current_steps": 886, "total_steps": 2737, "loss": 1.0788, "lr": 1.7104808984692946e-05, "epoch": 2.265984654731458, "percentage": 32.37, "elapsed_time": "8:46:52", "remaining_time": "18:20:44"} +{"current_steps": 887, "total_steps": 2737, "loss": 1.0358, "lr": 1.7095827200509436e-05, "epoch": 2.268542199488491, "percentage": 32.41, "elapsed_time": "8:47:28", "remaining_time": "18:20:08"} +{"current_steps": 888, "total_steps": 2737, "loss": 1.0405, "lr": 1.7086833871858735e-05, "epoch": 2.2710997442455243, "percentage": 32.44, "elapsed_time": "8:48:04", "remaining_time": "18:19:33"} +{"current_steps": 889, "total_steps": 2737, "loss": 1.0635, "lr": 1.707782901337243e-05, "epoch": 2.2736572890025575, "percentage": 32.48, "elapsed_time": "8:48:39", "remaining_time": "18:18:57"} +{"current_steps": 890, "total_steps": 2737, "loss": 1.0995, "lr": 1.7068812639700862e-05, "epoch": 2.2762148337595907, "percentage": 32.52, "elapsed_time": "8:49:15", "remaining_time": "18:18:21"} +{"current_steps": 891, "total_steps": 2737, "loss": 1.0772, "lr": 1.7059784765513106e-05, "epoch": 2.2787723785166243, "percentage": 32.55, "elapsed_time": "8:49:50", "remaining_time": "18:17:45"} +{"current_steps": 892, "total_steps": 2737, "loss": 1.0609, "lr": 1.705074540549695e-05, "epoch": 2.2813299232736575, "percentage": 32.59, "elapsed_time": "8:50:26", "remaining_time": "18:17:10"} +{"current_steps": 893, "total_steps": 2737, "loss": 1.0661, "lr": 1.704169457435887e-05, "epoch": 2.2838874680306906, "percentage": 32.63, "elapsed_time": "8:51:02", "remaining_time": "18:16:34"} +{"current_steps": 894, "total_steps": 2737, "loss": 1.0853, "lr": 1.7032632286823995e-05, "epoch": 2.286445012787724, "percentage": 32.66, "elapsed_time": "8:51:38", "remaining_time": "18:15:58"} +{"current_steps": 895, "total_steps": 2737, "loss": 1.0723, "lr": 1.702355855763611e-05, "epoch": 2.289002557544757, "percentage": 32.7, "elapsed_time": "8:52:13", "remaining_time": "18:15:23"} +{"current_steps": 896, "total_steps": 2737, "loss": 1.0619, "lr": 1.70144734015576e-05, "epoch": 2.29156010230179, "percentage": 32.74, "elapsed_time": "8:52:49", "remaining_time": "18:14:47"} +{"current_steps": 897, "total_steps": 2737, "loss": 1.0589, "lr": 1.700537683336944e-05, "epoch": 2.2941176470588234, "percentage": 32.77, "elapsed_time": "8:53:25", "remaining_time": "18:14:11"} +{"current_steps": 898, "total_steps": 2737, "loss": 1.0361, "lr": 1.699626886787119e-05, "epoch": 2.296675191815857, "percentage": 32.81, "elapsed_time": "8:54:00", "remaining_time": "18:13:35"} +{"current_steps": 899, "total_steps": 2737, "loss": 1.071, "lr": 1.698714951988093e-05, "epoch": 2.29923273657289, "percentage": 32.85, "elapsed_time": "8:54:36", "remaining_time": "18:12:59"} +{"current_steps": 900, "total_steps": 2737, "loss": 1.0555, "lr": 1.6978018804235278e-05, "epoch": 2.3017902813299234, "percentage": 32.88, "elapsed_time": "8:55:12", "remaining_time": "18:12:24"} +{"current_steps": 901, "total_steps": 2737, "loss": 1.0483, "lr": 1.6968876735789326e-05, "epoch": 2.3043478260869565, "percentage": 32.92, "elapsed_time": "8:55:48", "remaining_time": "18:11:49"} +{"current_steps": 902, "total_steps": 2737, "loss": 1.0551, "lr": 1.695972332941666e-05, "epoch": 2.3069053708439897, "percentage": 32.96, "elapsed_time": "8:56:24", "remaining_time": "18:11:14"} +{"current_steps": 903, "total_steps": 2737, "loss": 1.0743, "lr": 1.695055860000929e-05, "epoch": 2.309462915601023, "percentage": 32.99, "elapsed_time": "8:57:00", "remaining_time": "18:10:40"} +{"current_steps": 904, "total_steps": 2737, "loss": 1.0003, "lr": 1.6941382562477664e-05, "epoch": 2.312020460358056, "percentage": 33.03, "elapsed_time": "8:57:36", "remaining_time": "18:10:05"} +{"current_steps": 905, "total_steps": 2737, "loss": 1.0351, "lr": 1.6932195231750616e-05, "epoch": 2.3145780051150897, "percentage": 33.07, "elapsed_time": "8:58:12", "remaining_time": "18:09:29"} +{"current_steps": 906, "total_steps": 2737, "loss": 1.0445, "lr": 1.6922996622775363e-05, "epoch": 2.317135549872123, "percentage": 33.1, "elapsed_time": "8:58:47", "remaining_time": "18:08:53"} +{"current_steps": 907, "total_steps": 2737, "loss": 1.0519, "lr": 1.691378675051747e-05, "epoch": 2.319693094629156, "percentage": 33.14, "elapsed_time": "8:59:23", "remaining_time": "18:08:18"} +{"current_steps": 908, "total_steps": 2737, "loss": 1.0902, "lr": 1.6904565629960814e-05, "epoch": 2.3222506393861893, "percentage": 33.18, "elapsed_time": "8:59:59", "remaining_time": "18:07:43"} +{"current_steps": 909, "total_steps": 2737, "loss": 1.0265, "lr": 1.6895333276107588e-05, "epoch": 2.3248081841432224, "percentage": 33.21, "elapsed_time": "9:00:35", "remaining_time": "18:07:07"} +{"current_steps": 910, "total_steps": 2737, "loss": 1.1046, "lr": 1.688608970397825e-05, "epoch": 2.3273657289002556, "percentage": 33.25, "elapsed_time": "9:01:11", "remaining_time": "18:06:33"} +{"current_steps": 911, "total_steps": 2737, "loss": 1.0784, "lr": 1.6876834928611524e-05, "epoch": 2.329923273657289, "percentage": 33.28, "elapsed_time": "9:01:47", "remaining_time": "18:05:57"} +{"current_steps": 912, "total_steps": 2737, "loss": 1.0364, "lr": 1.6867568965064336e-05, "epoch": 2.3324808184143224, "percentage": 33.32, "elapsed_time": "9:02:23", "remaining_time": "18:05:22"} +{"current_steps": 913, "total_steps": 2737, "loss": 1.0707, "lr": 1.685829182841184e-05, "epoch": 2.3350383631713556, "percentage": 33.36, "elapsed_time": "9:02:58", "remaining_time": "18:04:46"} +{"current_steps": 914, "total_steps": 2737, "loss": 1.0702, "lr": 1.684900353374735e-05, "epoch": 2.337595907928389, "percentage": 33.39, "elapsed_time": "9:03:34", "remaining_time": "18:04:11"} +{"current_steps": 915, "total_steps": 2737, "loss": 1.0689, "lr": 1.683970409618235e-05, "epoch": 2.340153452685422, "percentage": 33.43, "elapsed_time": "9:04:10", "remaining_time": "18:03:36"} +{"current_steps": 916, "total_steps": 2737, "loss": 1.0905, "lr": 1.683039353084644e-05, "epoch": 2.342710997442455, "percentage": 33.47, "elapsed_time": "9:04:46", "remaining_time": "18:03:00"} +{"current_steps": 917, "total_steps": 2737, "loss": 1.0317, "lr": 1.6821071852887322e-05, "epoch": 2.3452685421994883, "percentage": 33.5, "elapsed_time": "9:05:22", "remaining_time": "18:02:25"} +{"current_steps": 918, "total_steps": 2737, "loss": 1.0572, "lr": 1.681173907747079e-05, "epoch": 2.3478260869565215, "percentage": 33.54, "elapsed_time": "9:05:58", "remaining_time": "18:01:49"} +{"current_steps": 919, "total_steps": 2737, "loss": 1.0429, "lr": 1.680239521978068e-05, "epoch": 2.350383631713555, "percentage": 33.58, "elapsed_time": "9:06:35", "remaining_time": "18:01:16"} +{"current_steps": 920, "total_steps": 2737, "loss": 1.0452, "lr": 1.679304029501887e-05, "epoch": 2.3529411764705883, "percentage": 33.61, "elapsed_time": "9:07:11", "remaining_time": "18:00:41"} +{"current_steps": 921, "total_steps": 2737, "loss": 1.0496, "lr": 1.6783674318405233e-05, "epoch": 2.3554987212276215, "percentage": 33.65, "elapsed_time": "9:07:46", "remaining_time": "18:00:05"} +{"current_steps": 922, "total_steps": 2737, "loss": 1.0471, "lr": 1.677429730517763e-05, "epoch": 2.3580562659846547, "percentage": 33.69, "elapsed_time": "9:08:22", "remaining_time": "17:59:30"} +{"current_steps": 923, "total_steps": 2737, "loss": 1.049, "lr": 1.6764909270591875e-05, "epoch": 2.360613810741688, "percentage": 33.72, "elapsed_time": "9:08:58", "remaining_time": "17:58:54"} +{"current_steps": 924, "total_steps": 2737, "loss": 1.0568, "lr": 1.6755510229921713e-05, "epoch": 2.363171355498721, "percentage": 33.76, "elapsed_time": "9:09:33", "remaining_time": "17:58:18"} +{"current_steps": 925, "total_steps": 2737, "loss": 1.0447, "lr": 1.6746100198458795e-05, "epoch": 2.3657289002557547, "percentage": 33.8, "elapsed_time": "9:10:09", "remaining_time": "17:57:42"} +{"current_steps": 926, "total_steps": 2737, "loss": 1.0213, "lr": 1.673667919151266e-05, "epoch": 2.368286445012788, "percentage": 33.83, "elapsed_time": "9:10:44", "remaining_time": "17:57:06"} +{"current_steps": 927, "total_steps": 2737, "loss": 1.079, "lr": 1.6727247224410686e-05, "epoch": 2.370843989769821, "percentage": 33.87, "elapsed_time": "9:11:20", "remaining_time": "17:56:31"} +{"current_steps": 928, "total_steps": 2737, "loss": 1.0864, "lr": 1.67178043124981e-05, "epoch": 2.373401534526854, "percentage": 33.91, "elapsed_time": "9:11:56", "remaining_time": "17:55:55"} +{"current_steps": 929, "total_steps": 2737, "loss": 1.0564, "lr": 1.6708350471137927e-05, "epoch": 2.3759590792838874, "percentage": 33.94, "elapsed_time": "9:12:31", "remaining_time": "17:55:19"} +{"current_steps": 930, "total_steps": 2737, "loss": 1.0815, "lr": 1.669888571571098e-05, "epoch": 2.3785166240409206, "percentage": 33.98, "elapsed_time": "9:13:08", "remaining_time": "17:54:45"} +{"current_steps": 931, "total_steps": 2737, "loss": 1.0453, "lr": 1.6689410061615823e-05, "epoch": 2.381074168797954, "percentage": 34.02, "elapsed_time": "9:13:44", "remaining_time": "17:54:10"} +{"current_steps": 932, "total_steps": 2737, "loss": 1.0691, "lr": 1.6679923524268748e-05, "epoch": 2.3836317135549874, "percentage": 34.05, "elapsed_time": "9:14:20", "remaining_time": "17:53:35"} +{"current_steps": 933, "total_steps": 2737, "loss": 1.0527, "lr": 1.6670426119103762e-05, "epoch": 2.3861892583120206, "percentage": 34.09, "elapsed_time": "9:14:56", "remaining_time": "17:52:59"} +{"current_steps": 934, "total_steps": 2737, "loss": 1.039, "lr": 1.666091786157255e-05, "epoch": 2.3887468030690537, "percentage": 34.12, "elapsed_time": "9:15:32", "remaining_time": "17:52:24"} +{"current_steps": 935, "total_steps": 2737, "loss": 1.0368, "lr": 1.6651398767144454e-05, "epoch": 2.391304347826087, "percentage": 34.16, "elapsed_time": "9:16:08", "remaining_time": "17:51:49"} +{"current_steps": 936, "total_steps": 2737, "loss": 1.0612, "lr": 1.664186885130644e-05, "epoch": 2.39386189258312, "percentage": 34.2, "elapsed_time": "9:16:43", "remaining_time": "17:51:13"} +{"current_steps": 937, "total_steps": 2737, "loss": 1.0573, "lr": 1.6632328129563088e-05, "epoch": 2.3964194373401533, "percentage": 34.23, "elapsed_time": "9:17:19", "remaining_time": "17:50:38"} +{"current_steps": 938, "total_steps": 2737, "loss": 1.0689, "lr": 1.6622776617436556e-05, "epoch": 2.398976982097187, "percentage": 34.27, "elapsed_time": "9:17:55", "remaining_time": "17:50:02"} +{"current_steps": 939, "total_steps": 2737, "loss": 1.0514, "lr": 1.6613214330466557e-05, "epoch": 2.40153452685422, "percentage": 34.31, "elapsed_time": "9:18:30", "remaining_time": "17:49:26"} +{"current_steps": 940, "total_steps": 2737, "loss": 1.0607, "lr": 1.6603641284210335e-05, "epoch": 2.4040920716112533, "percentage": 34.34, "elapsed_time": "9:19:07", "remaining_time": "17:48:52"} +{"current_steps": 941, "total_steps": 2737, "loss": 1.0526, "lr": 1.6594057494242634e-05, "epoch": 2.4066496163682864, "percentage": 34.38, "elapsed_time": "9:19:42", "remaining_time": "17:48:16"} +{"current_steps": 942, "total_steps": 2737, "loss": 1.0584, "lr": 1.6584462976155683e-05, "epoch": 2.4092071611253196, "percentage": 34.42, "elapsed_time": "9:20:18", "remaining_time": "17:47:41"} +{"current_steps": 943, "total_steps": 2737, "loss": 1.0621, "lr": 1.6574857745559168e-05, "epoch": 2.411764705882353, "percentage": 34.45, "elapsed_time": "9:20:54", "remaining_time": "17:47:05"} +{"current_steps": 944, "total_steps": 2737, "loss": 1.0625, "lr": 1.656524181808019e-05, "epoch": 2.414322250639386, "percentage": 34.49, "elapsed_time": "9:21:30", "remaining_time": "17:46:29"} +{"current_steps": 945, "total_steps": 2737, "loss": 1.0165, "lr": 1.655561520936327e-05, "epoch": 2.4168797953964196, "percentage": 34.53, "elapsed_time": "9:22:06", "remaining_time": "17:45:55"} +{"current_steps": 946, "total_steps": 2737, "loss": 1.036, "lr": 1.6545977935070293e-05, "epoch": 2.419437340153453, "percentage": 34.56, "elapsed_time": "9:22:42", "remaining_time": "17:45:19"} +{"current_steps": 947, "total_steps": 2737, "loss": 1.0879, "lr": 1.6536330010880502e-05, "epoch": 2.421994884910486, "percentage": 34.6, "elapsed_time": "9:23:18", "remaining_time": "17:44:44"} +{"current_steps": 948, "total_steps": 2737, "loss": 1.0447, "lr": 1.652667145249047e-05, "epoch": 2.424552429667519, "percentage": 34.64, "elapsed_time": "9:23:53", "remaining_time": "17:44:08"} +{"current_steps": 949, "total_steps": 2737, "loss": 1.0603, "lr": 1.6517002275614062e-05, "epoch": 2.4271099744245523, "percentage": 34.67, "elapsed_time": "9:24:29", "remaining_time": "17:43:32"} +{"current_steps": 950, "total_steps": 2737, "loss": 1.0415, "lr": 1.6507322495982433e-05, "epoch": 2.4296675191815855, "percentage": 34.71, "elapsed_time": "9:25:04", "remaining_time": "17:42:56"} +{"current_steps": 951, "total_steps": 2737, "loss": 1.057, "lr": 1.6497632129343964e-05, "epoch": 2.4322250639386187, "percentage": 34.75, "elapsed_time": "9:25:40", "remaining_time": "17:42:21"} +{"current_steps": 952, "total_steps": 2737, "loss": 1.0225, "lr": 1.6487931191464293e-05, "epoch": 2.4347826086956523, "percentage": 34.78, "elapsed_time": "9:26:16", "remaining_time": "17:41:45"} +{"current_steps": 953, "total_steps": 2737, "loss": 1.0743, "lr": 1.647821969812623e-05, "epoch": 2.4373401534526855, "percentage": 34.82, "elapsed_time": "9:26:51", "remaining_time": "17:41:09"} +{"current_steps": 954, "total_steps": 2737, "loss": 1.0753, "lr": 1.6468497665129767e-05, "epoch": 2.4398976982097187, "percentage": 34.86, "elapsed_time": "9:27:27", "remaining_time": "17:40:33"} +{"current_steps": 955, "total_steps": 2737, "loss": 1.0502, "lr": 1.645876510829205e-05, "epoch": 2.442455242966752, "percentage": 34.89, "elapsed_time": "9:28:03", "remaining_time": "17:39:58"} +{"current_steps": 956, "total_steps": 2737, "loss": 1.0604, "lr": 1.6449022043447333e-05, "epoch": 2.445012787723785, "percentage": 34.93, "elapsed_time": "9:28:38", "remaining_time": "17:39:22"} +{"current_steps": 957, "total_steps": 2737, "loss": 1.0307, "lr": 1.6439268486446982e-05, "epoch": 2.4475703324808182, "percentage": 34.97, "elapsed_time": "9:29:15", "remaining_time": "17:38:48"} +{"current_steps": 958, "total_steps": 2737, "loss": 1.0244, "lr": 1.642950445315941e-05, "epoch": 2.4501278772378514, "percentage": 35.0, "elapsed_time": "9:29:50", "remaining_time": "17:38:12"} +{"current_steps": 959, "total_steps": 2737, "loss": 1.0475, "lr": 1.6419729959470107e-05, "epoch": 2.452685421994885, "percentage": 35.04, "elapsed_time": "9:30:26", "remaining_time": "17:37:36"} +{"current_steps": 960, "total_steps": 2737, "loss": 1.0205, "lr": 1.6409945021281547e-05, "epoch": 2.455242966751918, "percentage": 35.07, "elapsed_time": "9:31:02", "remaining_time": "17:37:00"} +{"current_steps": 961, "total_steps": 2737, "loss": 1.0902, "lr": 1.6400149654513224e-05, "epoch": 2.4578005115089514, "percentage": 35.11, "elapsed_time": "9:31:37", "remaining_time": "17:36:24"} +{"current_steps": 962, "total_steps": 2737, "loss": 1.0655, "lr": 1.6390343875101582e-05, "epoch": 2.4603580562659846, "percentage": 35.15, "elapsed_time": "9:32:13", "remaining_time": "17:35:49"} +{"current_steps": 963, "total_steps": 2737, "loss": 1.075, "lr": 1.6380527699000012e-05, "epoch": 2.4629156010230178, "percentage": 35.18, "elapsed_time": "9:32:49", "remaining_time": "17:35:13"} +{"current_steps": 964, "total_steps": 2737, "loss": 1.0802, "lr": 1.6370701142178815e-05, "epoch": 2.4654731457800514, "percentage": 35.22, "elapsed_time": "9:33:24", "remaining_time": "17:34:37"} +{"current_steps": 965, "total_steps": 2737, "loss": 1.0315, "lr": 1.636086422062519e-05, "epoch": 2.4680306905370846, "percentage": 35.26, "elapsed_time": "9:34:00", "remaining_time": "17:34:02"} +{"current_steps": 966, "total_steps": 2737, "loss": 1.0454, "lr": 1.635101695034319e-05, "epoch": 2.4705882352941178, "percentage": 35.29, "elapsed_time": "9:34:36", "remaining_time": "17:33:26"} +{"current_steps": 967, "total_steps": 2737, "loss": 1.0577, "lr": 1.6341159347353714e-05, "epoch": 2.473145780051151, "percentage": 35.33, "elapsed_time": "9:35:12", "remaining_time": "17:32:51"} +{"current_steps": 968, "total_steps": 2737, "loss": 1.0607, "lr": 1.633129142769446e-05, "epoch": 2.475703324808184, "percentage": 35.37, "elapsed_time": "9:35:48", "remaining_time": "17:32:15"} +{"current_steps": 969, "total_steps": 2737, "loss": 1.0624, "lr": 1.6321413207419915e-05, "epoch": 2.4782608695652173, "percentage": 35.4, "elapsed_time": "9:36:23", "remaining_time": "17:31:40"} +{"current_steps": 970, "total_steps": 2737, "loss": 1.0277, "lr": 1.6311524702601328e-05, "epoch": 2.4808184143222505, "percentage": 35.44, "elapsed_time": "9:36:59", "remaining_time": "17:31:04"} +{"current_steps": 971, "total_steps": 2737, "loss": 1.0509, "lr": 1.6301625929326682e-05, "epoch": 2.483375959079284, "percentage": 35.48, "elapsed_time": "9:37:35", "remaining_time": "17:30:28"} +{"current_steps": 972, "total_steps": 2737, "loss": 1.0743, "lr": 1.6291716903700657e-05, "epoch": 2.4859335038363173, "percentage": 35.51, "elapsed_time": "9:38:10", "remaining_time": "17:29:52"} +{"current_steps": 973, "total_steps": 2737, "loss": 1.0528, "lr": 1.6281797641844615e-05, "epoch": 2.4884910485933505, "percentage": 35.55, "elapsed_time": "9:38:46", "remaining_time": "17:29:17"} +{"current_steps": 974, "total_steps": 2737, "loss": 1.0536, "lr": 1.6271868159896583e-05, "epoch": 2.4910485933503836, "percentage": 35.59, "elapsed_time": "9:39:22", "remaining_time": "17:28:41"} +{"current_steps": 975, "total_steps": 2737, "loss": 1.0295, "lr": 1.6261928474011205e-05, "epoch": 2.493606138107417, "percentage": 35.62, "elapsed_time": "9:39:57", "remaining_time": "17:28:05"} +{"current_steps": 976, "total_steps": 2737, "loss": 1.0611, "lr": 1.6251978600359727e-05, "epoch": 2.49616368286445, "percentage": 35.66, "elapsed_time": "9:40:33", "remaining_time": "17:27:29"} +{"current_steps": 977, "total_steps": 2737, "loss": 1.0501, "lr": 1.6242018555129968e-05, "epoch": 2.498721227621483, "percentage": 35.7, "elapsed_time": "9:41:09", "remaining_time": "17:26:54"} +{"current_steps": 978, "total_steps": 2737, "loss": 1.0632, "lr": 1.6232048354526305e-05, "epoch": 2.501278772378517, "percentage": 35.73, "elapsed_time": "9:41:44", "remaining_time": "17:26:18"} +{"current_steps": 979, "total_steps": 2737, "loss": 1.0669, "lr": 1.6222068014769626e-05, "epoch": 2.50383631713555, "percentage": 35.77, "elapsed_time": "9:42:20", "remaining_time": "17:25:42"} +{"current_steps": 980, "total_steps": 2737, "loss": 1.0242, "lr": 1.6212077552097326e-05, "epoch": 2.506393861892583, "percentage": 35.81, "elapsed_time": "9:42:56", "remaining_time": "17:25:07"} +{"current_steps": 981, "total_steps": 2737, "loss": 1.038, "lr": 1.6202076982763258e-05, "epoch": 2.5089514066496164, "percentage": 35.84, "elapsed_time": "9:43:31", "remaining_time": "17:24:31"} +{"current_steps": 982, "total_steps": 2737, "loss": 1.0192, "lr": 1.6192066323037723e-05, "epoch": 2.5115089514066495, "percentage": 35.88, "elapsed_time": "9:44:07", "remaining_time": "17:23:55"} +{"current_steps": 983, "total_steps": 2737, "loss": 1.0317, "lr": 1.618204558920744e-05, "epoch": 2.5140664961636827, "percentage": 35.92, "elapsed_time": "9:44:43", "remaining_time": "17:23:20"} +{"current_steps": 984, "total_steps": 2737, "loss": 1.0604, "lr": 1.6172014797575512e-05, "epoch": 2.516624040920716, "percentage": 35.95, "elapsed_time": "9:45:18", "remaining_time": "17:22:44"} +{"current_steps": 985, "total_steps": 2737, "loss": 1.0558, "lr": 1.616197396446142e-05, "epoch": 2.5191815856777495, "percentage": 35.99, "elapsed_time": "9:45:55", "remaining_time": "17:22:10"} +{"current_steps": 986, "total_steps": 2737, "loss": 1.0282, "lr": 1.6151923106200964e-05, "epoch": 2.5217391304347827, "percentage": 36.02, "elapsed_time": "9:46:31", "remaining_time": "17:21:34"} +{"current_steps": 987, "total_steps": 2737, "loss": 1.0442, "lr": 1.6141862239146263e-05, "epoch": 2.524296675191816, "percentage": 36.06, "elapsed_time": "9:47:06", "remaining_time": "17:20:58"} +{"current_steps": 988, "total_steps": 2737, "loss": 1.0671, "lr": 1.613179137966572e-05, "epoch": 2.526854219948849, "percentage": 36.1, "elapsed_time": "9:47:42", "remaining_time": "17:20:22"} +{"current_steps": 989, "total_steps": 2737, "loss": 1.0659, "lr": 1.612171054414399e-05, "epoch": 2.5294117647058822, "percentage": 36.13, "elapsed_time": "9:48:18", "remaining_time": "17:19:47"} +{"current_steps": 990, "total_steps": 2737, "loss": 1.0757, "lr": 1.6111619748981967e-05, "epoch": 2.531969309462916, "percentage": 36.17, "elapsed_time": "9:48:53", "remaining_time": "17:19:11"} +{"current_steps": 991, "total_steps": 2737, "loss": 1.0574, "lr": 1.610151901059674e-05, "epoch": 2.5345268542199486, "percentage": 36.21, "elapsed_time": "9:49:29", "remaining_time": "17:18:35"} +{"current_steps": 992, "total_steps": 2737, "loss": 1.076, "lr": 1.6091408345421583e-05, "epoch": 2.5370843989769822, "percentage": 36.24, "elapsed_time": "9:50:05", "remaining_time": "17:18:00"} +{"current_steps": 993, "total_steps": 2737, "loss": 1.0557, "lr": 1.6081287769905914e-05, "epoch": 2.5396419437340154, "percentage": 36.28, "elapsed_time": "9:50:40", "remaining_time": "17:17:24"} +{"current_steps": 994, "total_steps": 2737, "loss": 1.0371, "lr": 1.6071157300515274e-05, "epoch": 2.5421994884910486, "percentage": 36.32, "elapsed_time": "9:51:16", "remaining_time": "17:16:48"} +{"current_steps": 995, "total_steps": 2737, "loss": 1.0293, "lr": 1.6061016953731307e-05, "epoch": 2.544757033248082, "percentage": 36.35, "elapsed_time": "9:51:52", "remaining_time": "17:16:12"} +{"current_steps": 996, "total_steps": 2737, "loss": 1.0497, "lr": 1.6050866746051722e-05, "epoch": 2.547314578005115, "percentage": 36.39, "elapsed_time": "9:52:27", "remaining_time": "17:15:37"} +{"current_steps": 997, "total_steps": 2737, "loss": 1.0507, "lr": 1.6040706693990272e-05, "epoch": 2.5498721227621486, "percentage": 36.43, "elapsed_time": "9:53:03", "remaining_time": "17:15:01"} +{"current_steps": 998, "total_steps": 2737, "loss": 1.051, "lr": 1.6030536814076722e-05, "epoch": 2.5524296675191813, "percentage": 36.46, "elapsed_time": "9:53:39", "remaining_time": "17:14:25"} +{"current_steps": 999, "total_steps": 2737, "loss": 1.044, "lr": 1.602035712285684e-05, "epoch": 2.554987212276215, "percentage": 36.5, "elapsed_time": "9:54:14", "remaining_time": "17:13:50"} +{"current_steps": 1000, "total_steps": 2737, "loss": 1.0466, "lr": 1.6010167636892338e-05, "epoch": 2.557544757033248, "percentage": 36.54, "elapsed_time": "9:54:50", "remaining_time": "17:13:14"} +{"current_steps": 1001, "total_steps": 2737, "loss": 1.0503, "lr": 1.5999968372760882e-05, "epoch": 2.5601023017902813, "percentage": 36.57, "elapsed_time": "9:55:26", "remaining_time": "17:12:38"} +{"current_steps": 1002, "total_steps": 2737, "loss": 1.0428, "lr": 1.5989759347056028e-05, "epoch": 2.5626598465473145, "percentage": 36.61, "elapsed_time": "9:56:01", "remaining_time": "17:12:03"} +{"current_steps": 1003, "total_steps": 2737, "loss": 1.067, "lr": 1.5979540576387226e-05, "epoch": 2.5652173913043477, "percentage": 36.65, "elapsed_time": "9:56:37", "remaining_time": "17:11:27"} +{"current_steps": 1004, "total_steps": 2737, "loss": 1.0735, "lr": 1.596931207737978e-05, "epoch": 2.5677749360613813, "percentage": 36.68, "elapsed_time": "9:57:13", "remaining_time": "17:10:51"} +{"current_steps": 1005, "total_steps": 2737, "loss": 1.0683, "lr": 1.5959073866674812e-05, "epoch": 2.5703324808184145, "percentage": 36.72, "elapsed_time": "9:57:49", "remaining_time": "17:10:16"} +{"current_steps": 1006, "total_steps": 2737, "loss": 1.006, "lr": 1.594882596092926e-05, "epoch": 2.5728900255754477, "percentage": 36.76, "elapsed_time": "9:58:24", "remaining_time": "17:09:40"} +{"current_steps": 1007, "total_steps": 2737, "loss": 1.0815, "lr": 1.5938568376815816e-05, "epoch": 2.575447570332481, "percentage": 36.79, "elapsed_time": "9:59:00", "remaining_time": "17:09:04"} +{"current_steps": 1008, "total_steps": 2737, "loss": 1.0712, "lr": 1.5928301131022933e-05, "epoch": 2.578005115089514, "percentage": 36.83, "elapsed_time": "9:59:36", "remaining_time": "17:08:29"} +{"current_steps": 1009, "total_steps": 2737, "loss": 1.069, "lr": 1.5918024240254778e-05, "epoch": 2.580562659846547, "percentage": 36.87, "elapsed_time": "10:00:11", "remaining_time": "17:07:53"} +{"current_steps": 1010, "total_steps": 2737, "loss": 1.0485, "lr": 1.5907737721231205e-05, "epoch": 2.5831202046035804, "percentage": 36.9, "elapsed_time": "10:00:47", "remaining_time": "17:07:17"} +{"current_steps": 1011, "total_steps": 2737, "loss": 1.0577, "lr": 1.5897441590687747e-05, "epoch": 2.585677749360614, "percentage": 36.94, "elapsed_time": "10:01:23", "remaining_time": "17:06:42"} +{"current_steps": 1012, "total_steps": 2737, "loss": 1.0603, "lr": 1.5887135865375552e-05, "epoch": 2.588235294117647, "percentage": 36.97, "elapsed_time": "10:01:59", "remaining_time": "17:06:06"} +{"current_steps": 1013, "total_steps": 2737, "loss": 1.0433, "lr": 1.5876820562061402e-05, "epoch": 2.5907928388746804, "percentage": 37.01, "elapsed_time": "10:02:34", "remaining_time": "17:05:30"} +{"current_steps": 1014, "total_steps": 2737, "loss": 1.0616, "lr": 1.586649569752765e-05, "epoch": 2.5933503836317136, "percentage": 37.05, "elapsed_time": "10:03:10", "remaining_time": "17:04:55"} +{"current_steps": 1015, "total_steps": 2737, "loss": 1.0413, "lr": 1.5856161288572195e-05, "epoch": 2.5959079283887467, "percentage": 37.08, "elapsed_time": "10:03:46", "remaining_time": "17:04:19"} +{"current_steps": 1016, "total_steps": 2737, "loss": 1.0407, "lr": 1.5845817352008485e-05, "epoch": 2.59846547314578, "percentage": 37.12, "elapsed_time": "10:04:21", "remaining_time": "17:03:43"} +{"current_steps": 1017, "total_steps": 2737, "loss": 1.0536, "lr": 1.583546390466545e-05, "epoch": 2.601023017902813, "percentage": 37.16, "elapsed_time": "10:04:57", "remaining_time": "17:03:07"} +{"current_steps": 1018, "total_steps": 2737, "loss": 1.0571, "lr": 1.58251009633875e-05, "epoch": 2.6035805626598467, "percentage": 37.19, "elapsed_time": "10:05:32", "remaining_time": "17:02:32"} +{"current_steps": 1019, "total_steps": 2737, "loss": 1.0297, "lr": 1.5814728545034503e-05, "epoch": 2.60613810741688, "percentage": 37.23, "elapsed_time": "10:06:08", "remaining_time": "17:01:56"} +{"current_steps": 1020, "total_steps": 2737, "loss": 1.037, "lr": 1.5804346666481728e-05, "epoch": 2.608695652173913, "percentage": 37.27, "elapsed_time": "10:06:44", "remaining_time": "17:01:20"} +{"current_steps": 1021, "total_steps": 2737, "loss": 1.0493, "lr": 1.5793955344619846e-05, "epoch": 2.6112531969309463, "percentage": 37.3, "elapsed_time": "10:07:19", "remaining_time": "17:00:43"} +{"current_steps": 1022, "total_steps": 2737, "loss": 1.0428, "lr": 1.5783554596354885e-05, "epoch": 2.6138107416879794, "percentage": 37.34, "elapsed_time": "10:07:54", "remaining_time": "17:00:07"} +{"current_steps": 1023, "total_steps": 2737, "loss": 1.0659, "lr": 1.577314443860821e-05, "epoch": 2.6163682864450126, "percentage": 37.38, "elapsed_time": "10:08:30", "remaining_time": "16:59:32"} +{"current_steps": 1024, "total_steps": 2737, "loss": 1.0434, "lr": 1.57627248883165e-05, "epoch": 2.618925831202046, "percentage": 37.41, "elapsed_time": "10:09:06", "remaining_time": "16:58:56"} +{"current_steps": 1025, "total_steps": 2737, "loss": 1.043, "lr": 1.575229596243171e-05, "epoch": 2.6214833759590794, "percentage": 37.45, "elapsed_time": "10:09:42", "remaining_time": "16:58:20"} +{"current_steps": 1026, "total_steps": 2737, "loss": 1.0494, "lr": 1.574185767792106e-05, "epoch": 2.6240409207161126, "percentage": 37.49, "elapsed_time": "10:10:17", "remaining_time": "16:57:45"} +{"current_steps": 1027, "total_steps": 2737, "loss": 1.0568, "lr": 1.573141005176697e-05, "epoch": 2.626598465473146, "percentage": 37.52, "elapsed_time": "10:10:53", "remaining_time": "16:57:09"} +{"current_steps": 1028, "total_steps": 2737, "loss": 1.0648, "lr": 1.5720953100967085e-05, "epoch": 2.629156010230179, "percentage": 37.56, "elapsed_time": "10:11:29", "remaining_time": "16:56:34"} +{"current_steps": 1029, "total_steps": 2737, "loss": 1.0663, "lr": 1.5710486842534206e-05, "epoch": 2.631713554987212, "percentage": 37.6, "elapsed_time": "10:12:04", "remaining_time": "16:55:58"} +{"current_steps": 1030, "total_steps": 2737, "loss": 1.0534, "lr": 1.5700011293496285e-05, "epoch": 2.634271099744246, "percentage": 37.63, "elapsed_time": "10:12:40", "remaining_time": "16:55:22"} +{"current_steps": 1031, "total_steps": 2737, "loss": 1.059, "lr": 1.568952647089638e-05, "epoch": 2.6368286445012785, "percentage": 37.67, "elapsed_time": "10:13:16", "remaining_time": "16:54:47"} +{"current_steps": 1032, "total_steps": 2737, "loss": 1.0221, "lr": 1.5679032391792648e-05, "epoch": 2.639386189258312, "percentage": 37.71, "elapsed_time": "10:13:52", "remaining_time": "16:54:11"} +{"current_steps": 1033, "total_steps": 2737, "loss": 1.0858, "lr": 1.5668529073258298e-05, "epoch": 2.6419437340153453, "percentage": 37.74, "elapsed_time": "10:14:27", "remaining_time": "16:53:35"} +{"current_steps": 1034, "total_steps": 2737, "loss": 1.06, "lr": 1.5658016532381565e-05, "epoch": 2.6445012787723785, "percentage": 37.78, "elapsed_time": "10:15:03", "remaining_time": "16:52:59"} +{"current_steps": 1035, "total_steps": 2737, "loss": 1.0651, "lr": 1.5647494786265705e-05, "epoch": 2.6470588235294117, "percentage": 37.82, "elapsed_time": "10:15:38", "remaining_time": "16:52:24"} +{"current_steps": 1036, "total_steps": 2737, "loss": 1.0373, "lr": 1.5636963852028936e-05, "epoch": 2.649616368286445, "percentage": 37.85, "elapsed_time": "10:16:14", "remaining_time": "16:51:48"} +{"current_steps": 1037, "total_steps": 2737, "loss": 1.0426, "lr": 1.5626423746804433e-05, "epoch": 2.6521739130434785, "percentage": 37.89, "elapsed_time": "10:16:50", "remaining_time": "16:51:13"} +{"current_steps": 1038, "total_steps": 2737, "loss": 1.0504, "lr": 1.5615874487740287e-05, "epoch": 2.6547314578005117, "percentage": 37.92, "elapsed_time": "10:17:26", "remaining_time": "16:50:37"} +{"current_steps": 1039, "total_steps": 2737, "loss": 1.0572, "lr": 1.560531609199948e-05, "epoch": 2.657289002557545, "percentage": 37.96, "elapsed_time": "10:18:01", "remaining_time": "16:50:01"} +{"current_steps": 1040, "total_steps": 2737, "loss": 1.068, "lr": 1.559474857675986e-05, "epoch": 2.659846547314578, "percentage": 38.0, "elapsed_time": "10:18:37", "remaining_time": "16:49:25"} +{"current_steps": 1041, "total_steps": 2737, "loss": 1.0449, "lr": 1.5584171959214126e-05, "epoch": 2.662404092071611, "percentage": 38.03, "elapsed_time": "10:19:13", "remaining_time": "16:48:50"} +{"current_steps": 1042, "total_steps": 2737, "loss": 1.0784, "lr": 1.557358625656976e-05, "epoch": 2.6649616368286444, "percentage": 38.07, "elapsed_time": "10:19:48", "remaining_time": "16:48:14"} +{"current_steps": 1043, "total_steps": 2737, "loss": 1.0118, "lr": 1.5562991486049045e-05, "epoch": 2.6675191815856776, "percentage": 38.11, "elapsed_time": "10:20:24", "remaining_time": "16:47:38"} +{"current_steps": 1044, "total_steps": 2737, "loss": 1.0555, "lr": 1.555238766488901e-05, "epoch": 2.670076726342711, "percentage": 38.14, "elapsed_time": "10:21:00", "remaining_time": "16:47:02"} +{"current_steps": 1045, "total_steps": 2737, "loss": 1.0402, "lr": 1.5541774810341404e-05, "epoch": 2.6726342710997444, "percentage": 38.18, "elapsed_time": "10:21:35", "remaining_time": "16:46:27"} +{"current_steps": 1046, "total_steps": 2737, "loss": 1.0251, "lr": 1.5531152939672683e-05, "epoch": 2.6751918158567776, "percentage": 38.22, "elapsed_time": "10:22:11", "remaining_time": "16:45:51"} +{"current_steps": 1047, "total_steps": 2737, "loss": 1.0549, "lr": 1.5520522070163962e-05, "epoch": 2.6777493606138107, "percentage": 38.25, "elapsed_time": "10:22:47", "remaining_time": "16:45:15"} +{"current_steps": 1048, "total_steps": 2737, "loss": 1.0586, "lr": 1.550988221911101e-05, "epoch": 2.680306905370844, "percentage": 38.29, "elapsed_time": "10:23:22", "remaining_time": "16:44:40"} +{"current_steps": 1049, "total_steps": 2737, "loss": 1.0315, "lr": 1.549923340382419e-05, "epoch": 2.682864450127877, "percentage": 38.33, "elapsed_time": "10:23:58", "remaining_time": "16:44:04"} +{"current_steps": 1050, "total_steps": 2737, "loss": 1.0542, "lr": 1.548857564162846e-05, "epoch": 2.6854219948849103, "percentage": 38.36, "elapsed_time": "10:24:34", "remaining_time": "16:43:28"} +{"current_steps": 1051, "total_steps": 2737, "loss": 1.0546, "lr": 1.5477908949863335e-05, "epoch": 2.687979539641944, "percentage": 38.4, "elapsed_time": "10:25:09", "remaining_time": "16:42:52"} +{"current_steps": 1052, "total_steps": 2737, "loss": 1.05, "lr": 1.5467233345882858e-05, "epoch": 2.690537084398977, "percentage": 38.44, "elapsed_time": "10:25:45", "remaining_time": "16:42:16"} +{"current_steps": 1053, "total_steps": 2737, "loss": 1.0582, "lr": 1.5456548847055565e-05, "epoch": 2.6930946291560103, "percentage": 38.47, "elapsed_time": "10:26:21", "remaining_time": "16:41:41"} +{"current_steps": 1054, "total_steps": 2737, "loss": 1.0227, "lr": 1.5445855470764467e-05, "epoch": 2.6956521739130435, "percentage": 38.51, "elapsed_time": "10:26:56", "remaining_time": "16:41:05"} +{"current_steps": 1055, "total_steps": 2737, "loss": 1.0361, "lr": 1.5435153234407023e-05, "epoch": 2.6982097186700766, "percentage": 38.55, "elapsed_time": "10:27:32", "remaining_time": "16:40:29"} +{"current_steps": 1056, "total_steps": 2737, "loss": 1.0556, "lr": 1.5424442155395095e-05, "epoch": 2.70076726342711, "percentage": 38.58, "elapsed_time": "10:28:08", "remaining_time": "16:39:54"} +{"current_steps": 1057, "total_steps": 2737, "loss": 1.0583, "lr": 1.5413722251154947e-05, "epoch": 2.703324808184143, "percentage": 38.62, "elapsed_time": "10:28:43", "remaining_time": "16:39:18"} +{"current_steps": 1058, "total_steps": 2737, "loss": 1.0461, "lr": 1.540299353912719e-05, "epoch": 2.7058823529411766, "percentage": 38.66, "elapsed_time": "10:29:19", "remaining_time": "16:38:42"} +{"current_steps": 1059, "total_steps": 2737, "loss": 1.0723, "lr": 1.5392256036766767e-05, "epoch": 2.70843989769821, "percentage": 38.69, "elapsed_time": "10:29:55", "remaining_time": "16:38:08"} +{"current_steps": 1060, "total_steps": 2737, "loss": 1.0303, "lr": 1.5381509761542925e-05, "epoch": 2.710997442455243, "percentage": 38.73, "elapsed_time": "10:30:31", "remaining_time": "16:37:32"} +{"current_steps": 1061, "total_steps": 2737, "loss": 1.072, "lr": 1.537075473093918e-05, "epoch": 2.713554987212276, "percentage": 38.77, "elapsed_time": "10:31:07", "remaining_time": "16:36:56"} +{"current_steps": 1062, "total_steps": 2737, "loss": 1.0609, "lr": 1.535999096245329e-05, "epoch": 2.7161125319693094, "percentage": 38.8, "elapsed_time": "10:31:43", "remaining_time": "16:36:21"} +{"current_steps": 1063, "total_steps": 2737, "loss": 1.0976, "lr": 1.5349218473597244e-05, "epoch": 2.718670076726343, "percentage": 38.84, "elapsed_time": "10:32:18", "remaining_time": "16:35:45"} +{"current_steps": 1064, "total_steps": 2737, "loss": 1.0561, "lr": 1.5338437281897196e-05, "epoch": 2.7212276214833757, "percentage": 38.87, "elapsed_time": "10:32:54", "remaining_time": "16:35:09"} +{"current_steps": 1065, "total_steps": 2737, "loss": 1.0249, "lr": 1.532764740489348e-05, "epoch": 2.7237851662404093, "percentage": 38.91, "elapsed_time": "10:33:30", "remaining_time": "16:34:34"} +{"current_steps": 1066, "total_steps": 2737, "loss": 1.0448, "lr": 1.5316848860140545e-05, "epoch": 2.7263427109974425, "percentage": 38.95, "elapsed_time": "10:34:05", "remaining_time": "16:33:58"} +{"current_steps": 1067, "total_steps": 2737, "loss": 1.0538, "lr": 1.530604166520695e-05, "epoch": 2.7289002557544757, "percentage": 38.98, "elapsed_time": "10:34:41", "remaining_time": "16:33:22"} +{"current_steps": 1068, "total_steps": 2737, "loss": 1.0709, "lr": 1.529522583767533e-05, "epoch": 2.731457800511509, "percentage": 39.02, "elapsed_time": "10:35:17", "remaining_time": "16:32:47"} +{"current_steps": 1069, "total_steps": 2737, "loss": 1.0476, "lr": 1.5284401395142356e-05, "epoch": 2.734015345268542, "percentage": 39.06, "elapsed_time": "10:35:53", "remaining_time": "16:32:11"} +{"current_steps": 1070, "total_steps": 2737, "loss": 1.0906, "lr": 1.5273568355218714e-05, "epoch": 2.7365728900255757, "percentage": 39.09, "elapsed_time": "10:36:29", "remaining_time": "16:31:37"} +{"current_steps": 1071, "total_steps": 2737, "loss": 1.0421, "lr": 1.5262726735529096e-05, "epoch": 2.7391304347826084, "percentage": 39.13, "elapsed_time": "10:37:05", "remaining_time": "16:31:01"} +{"current_steps": 1072, "total_steps": 2737, "loss": 1.0714, "lr": 1.5251876553712129e-05, "epoch": 2.741687979539642, "percentage": 39.17, "elapsed_time": "10:37:41", "remaining_time": "16:30:26"} +{"current_steps": 1073, "total_steps": 2737, "loss": 1.0529, "lr": 1.5241017827420379e-05, "epoch": 2.7442455242966752, "percentage": 39.2, "elapsed_time": "10:38:16", "remaining_time": "16:29:50"} +{"current_steps": 1074, "total_steps": 2737, "loss": 1.0413, "lr": 1.523015057432032e-05, "epoch": 2.7468030690537084, "percentage": 39.24, "elapsed_time": "10:38:52", "remaining_time": "16:29:14"} +{"current_steps": 1075, "total_steps": 2737, "loss": 1.0965, "lr": 1.5219274812092297e-05, "epoch": 2.7493606138107416, "percentage": 39.28, "elapsed_time": "10:39:28", "remaining_time": "16:28:39"} +{"current_steps": 1076, "total_steps": 2737, "loss": 1.0506, "lr": 1.5208390558430486e-05, "epoch": 2.7519181585677748, "percentage": 39.31, "elapsed_time": "10:40:03", "remaining_time": "16:28:03"} +{"current_steps": 1077, "total_steps": 2737, "loss": 1.0701, "lr": 1.5197497831042891e-05, "epoch": 2.7544757033248084, "percentage": 39.35, "elapsed_time": "10:40:39", "remaining_time": "16:27:27"} +{"current_steps": 1078, "total_steps": 2737, "loss": 1.0344, "lr": 1.5186596647651299e-05, "epoch": 2.7570332480818416, "percentage": 39.39, "elapsed_time": "10:41:14", "remaining_time": "16:26:50"} +{"current_steps": 1079, "total_steps": 2737, "loss": 1.0111, "lr": 1.5175687025991254e-05, "epoch": 2.7595907928388748, "percentage": 39.42, "elapsed_time": "10:41:50", "remaining_time": "16:26:15"} +{"current_steps": 1080, "total_steps": 2737, "loss": 1.0594, "lr": 1.5164768983812031e-05, "epoch": 2.762148337595908, "percentage": 39.46, "elapsed_time": "10:42:25", "remaining_time": "16:25:39"} +{"current_steps": 1081, "total_steps": 2737, "loss": 1.0195, "lr": 1.5153842538876595e-05, "epoch": 2.764705882352941, "percentage": 39.5, "elapsed_time": "10:43:01", "remaining_time": "16:25:03"} +{"current_steps": 1082, "total_steps": 2737, "loss": 1.0563, "lr": 1.5142907708961594e-05, "epoch": 2.7672634271099743, "percentage": 39.53, "elapsed_time": "10:43:37", "remaining_time": "16:24:27"} +{"current_steps": 1083, "total_steps": 2737, "loss": 1.0579, "lr": 1.5131964511857307e-05, "epoch": 2.7698209718670075, "percentage": 39.57, "elapsed_time": "10:44:12", "remaining_time": "16:23:52"} +{"current_steps": 1084, "total_steps": 2737, "loss": 1.0594, "lr": 1.512101296536764e-05, "epoch": 2.772378516624041, "percentage": 39.61, "elapsed_time": "10:44:48", "remaining_time": "16:23:16"} +{"current_steps": 1085, "total_steps": 2737, "loss": 1.0347, "lr": 1.5110053087310067e-05, "epoch": 2.7749360613810743, "percentage": 39.64, "elapsed_time": "10:45:24", "remaining_time": "16:22:40"} +{"current_steps": 1086, "total_steps": 2737, "loss": 1.0872, "lr": 1.5099084895515633e-05, "epoch": 2.7774936061381075, "percentage": 39.68, "elapsed_time": "10:45:59", "remaining_time": "16:22:04"} +{"current_steps": 1087, "total_steps": 2737, "loss": 1.0102, "lr": 1.5088108407828887e-05, "epoch": 2.7800511508951407, "percentage": 39.72, "elapsed_time": "10:46:35", "remaining_time": "16:21:29"} +{"current_steps": 1088, "total_steps": 2737, "loss": 1.0373, "lr": 1.5077123642107901e-05, "epoch": 2.782608695652174, "percentage": 39.75, "elapsed_time": "10:47:11", "remaining_time": "16:20:53"} +{"current_steps": 1089, "total_steps": 2737, "loss": 1.0601, "lr": 1.5066130616224194e-05, "epoch": 2.785166240409207, "percentage": 39.79, "elapsed_time": "10:47:46", "remaining_time": "16:20:17"} +{"current_steps": 1090, "total_steps": 2737, "loss": 1.0282, "lr": 1.5055129348062733e-05, "epoch": 2.78772378516624, "percentage": 39.82, "elapsed_time": "10:48:22", "remaining_time": "16:19:41"} +{"current_steps": 1091, "total_steps": 2737, "loss": 1.0028, "lr": 1.5044119855521899e-05, "epoch": 2.790281329923274, "percentage": 39.86, "elapsed_time": "10:48:58", "remaining_time": "16:19:06"} +{"current_steps": 1092, "total_steps": 2737, "loss": 1.0642, "lr": 1.5033102156513442e-05, "epoch": 2.792838874680307, "percentage": 39.9, "elapsed_time": "10:49:33", "remaining_time": "16:18:30"} +{"current_steps": 1093, "total_steps": 2737, "loss": 1.0651, "lr": 1.5022076268962474e-05, "epoch": 2.79539641943734, "percentage": 39.93, "elapsed_time": "10:50:09", "remaining_time": "16:17:54"} +{"current_steps": 1094, "total_steps": 2737, "loss": 1.0499, "lr": 1.5011042210807416e-05, "epoch": 2.7979539641943734, "percentage": 39.97, "elapsed_time": "10:50:45", "remaining_time": "16:17:19"} +{"current_steps": 1095, "total_steps": 2737, "loss": 1.0441, "lr": 1.5000000000000002e-05, "epoch": 2.8005115089514065, "percentage": 40.01, "elapsed_time": "10:51:21", "remaining_time": "16:16:43"} +{"current_steps": 1096, "total_steps": 2737, "loss": 1.0954, "lr": 1.4988949654505212e-05, "epoch": 2.80306905370844, "percentage": 40.04, "elapsed_time": "10:51:56", "remaining_time": "16:16:07"} +{"current_steps": 1097, "total_steps": 2737, "loss": 1.0616, "lr": 1.4977891192301266e-05, "epoch": 2.805626598465473, "percentage": 40.08, "elapsed_time": "10:52:32", "remaining_time": "16:15:32"} +{"current_steps": 1098, "total_steps": 2737, "loss": 1.0767, "lr": 1.4966824631379595e-05, "epoch": 2.8081841432225065, "percentage": 40.12, "elapsed_time": "10:53:08", "remaining_time": "16:14:56"} +{"current_steps": 1099, "total_steps": 2737, "loss": 1.0629, "lr": 1.49557499897448e-05, "epoch": 2.8107416879795397, "percentage": 40.15, "elapsed_time": "10:53:43", "remaining_time": "16:14:20"} +{"current_steps": 1100, "total_steps": 2737, "loss": 1.0401, "lr": 1.4944667285414629e-05, "epoch": 2.813299232736573, "percentage": 40.19, "elapsed_time": "10:54:19", "remaining_time": "16:13:45"} +{"current_steps": 1101, "total_steps": 2737, "loss": 1.0681, "lr": 1.4933576536419951e-05, "epoch": 2.815856777493606, "percentage": 40.23, "elapsed_time": "10:54:55", "remaining_time": "16:13:10"} +{"current_steps": 1102, "total_steps": 2737, "loss": 1.0478, "lr": 1.492247776080472e-05, "epoch": 2.8184143222506393, "percentage": 40.26, "elapsed_time": "10:55:31", "remaining_time": "16:12:34"} +{"current_steps": 1103, "total_steps": 2737, "loss": 1.0646, "lr": 1.4911370976625951e-05, "epoch": 2.820971867007673, "percentage": 40.3, "elapsed_time": "10:56:07", "remaining_time": "16:11:59"} +{"current_steps": 1104, "total_steps": 2737, "loss": 1.0395, "lr": 1.4900256201953686e-05, "epoch": 2.8235294117647056, "percentage": 40.34, "elapsed_time": "10:56:42", "remaining_time": "16:11:23"} +{"current_steps": 1105, "total_steps": 2737, "loss": 1.0299, "lr": 1.488913345487097e-05, "epoch": 2.8260869565217392, "percentage": 40.37, "elapsed_time": "10:57:18", "remaining_time": "16:10:47"} +{"current_steps": 1106, "total_steps": 2737, "loss": 1.0588, "lr": 1.4878002753473814e-05, "epoch": 2.8286445012787724, "percentage": 40.41, "elapsed_time": "10:57:54", "remaining_time": "16:10:11"} +{"current_steps": 1107, "total_steps": 2737, "loss": 1.0544, "lr": 1.486686411587118e-05, "epoch": 2.8312020460358056, "percentage": 40.45, "elapsed_time": "10:58:29", "remaining_time": "16:09:36"} +{"current_steps": 1108, "total_steps": 2737, "loss": 1.0673, "lr": 1.4855717560184925e-05, "epoch": 2.833759590792839, "percentage": 40.48, "elapsed_time": "10:59:05", "remaining_time": "16:09:00"} +{"current_steps": 1109, "total_steps": 2737, "loss": 1.0702, "lr": 1.4844563104549808e-05, "epoch": 2.836317135549872, "percentage": 40.52, "elapsed_time": "10:59:41", "remaining_time": "16:08:24"} +{"current_steps": 1110, "total_steps": 2737, "loss": 1.0518, "lr": 1.4833400767113425e-05, "epoch": 2.8388746803069056, "percentage": 40.56, "elapsed_time": "11:00:16", "remaining_time": "16:07:49"} +{"current_steps": 1111, "total_steps": 2737, "loss": 1.0519, "lr": 1.48222305660362e-05, "epoch": 2.8414322250639388, "percentage": 40.59, "elapsed_time": "11:00:52", "remaining_time": "16:07:13"} +{"current_steps": 1112, "total_steps": 2737, "loss": 1.0621, "lr": 1.4811052519491358e-05, "epoch": 2.843989769820972, "percentage": 40.63, "elapsed_time": "11:01:28", "remaining_time": "16:06:37"} +{"current_steps": 1113, "total_steps": 2737, "loss": 1.0495, "lr": 1.4799866645664875e-05, "epoch": 2.846547314578005, "percentage": 40.66, "elapsed_time": "11:02:04", "remaining_time": "16:06:02"} +{"current_steps": 1114, "total_steps": 2737, "loss": 1.0474, "lr": 1.4788672962755474e-05, "epoch": 2.8491048593350383, "percentage": 40.7, "elapsed_time": "11:02:39", "remaining_time": "16:05:26"} +{"current_steps": 1115, "total_steps": 2737, "loss": 1.056, "lr": 1.4777471488974573e-05, "epoch": 2.8516624040920715, "percentage": 40.74, "elapsed_time": "11:03:15", "remaining_time": "16:04:51"} +{"current_steps": 1116, "total_steps": 2737, "loss": 1.0473, "lr": 1.476626224254627e-05, "epoch": 2.8542199488491047, "percentage": 40.77, "elapsed_time": "11:03:51", "remaining_time": "16:04:15"} +{"current_steps": 1117, "total_steps": 2737, "loss": 1.0327, "lr": 1.475504524170731e-05, "epoch": 2.8567774936061383, "percentage": 40.81, "elapsed_time": "11:04:27", "remaining_time": "16:03:39"} +{"current_steps": 1118, "total_steps": 2737, "loss": 1.0603, "lr": 1.4743820504707054e-05, "epoch": 2.8593350383631715, "percentage": 40.85, "elapsed_time": "11:05:02", "remaining_time": "16:03:03"} +{"current_steps": 1119, "total_steps": 2737, "loss": 1.0345, "lr": 1.4732588049807442e-05, "epoch": 2.8618925831202047, "percentage": 40.88, "elapsed_time": "11:05:38", "remaining_time": "16:02:28"} +{"current_steps": 1120, "total_steps": 2737, "loss": 1.0932, "lr": 1.4721347895282977e-05, "epoch": 2.864450127877238, "percentage": 40.92, "elapsed_time": "11:06:14", "remaining_time": "16:01:52"} +{"current_steps": 1121, "total_steps": 2737, "loss": 1.0577, "lr": 1.4710100059420693e-05, "epoch": 2.867007672634271, "percentage": 40.96, "elapsed_time": "11:06:49", "remaining_time": "16:01:16"} +{"current_steps": 1122, "total_steps": 2737, "loss": 1.04, "lr": 1.4698844560520107e-05, "epoch": 2.869565217391304, "percentage": 40.99, "elapsed_time": "11:07:25", "remaining_time": "16:00:41"} +{"current_steps": 1123, "total_steps": 2737, "loss": 1.0115, "lr": 1.4687581416893218e-05, "epoch": 2.8721227621483374, "percentage": 41.03, "elapsed_time": "11:08:01", "remaining_time": "16:00:05"} +{"current_steps": 1124, "total_steps": 2737, "loss": 1.0925, "lr": 1.4676310646864455e-05, "epoch": 2.874680306905371, "percentage": 41.07, "elapsed_time": "11:08:36", "remaining_time": "15:59:29"} +{"current_steps": 1125, "total_steps": 2737, "loss": 1.0662, "lr": 1.4665032268770656e-05, "epoch": 2.877237851662404, "percentage": 41.1, "elapsed_time": "11:09:12", "remaining_time": "15:58:53"} +{"current_steps": 1126, "total_steps": 2737, "loss": 1.0615, "lr": 1.4653746300961037e-05, "epoch": 2.8797953964194374, "percentage": 41.14, "elapsed_time": "11:09:47", "remaining_time": "15:58:18"} +{"current_steps": 1127, "total_steps": 2737, "loss": 1.028, "lr": 1.4642452761797166e-05, "epoch": 2.8823529411764706, "percentage": 41.18, "elapsed_time": "11:10:23", "remaining_time": "15:57:42"} +{"current_steps": 1128, "total_steps": 2737, "loss": 1.0339, "lr": 1.4631151669652917e-05, "epoch": 2.8849104859335037, "percentage": 41.21, "elapsed_time": "11:10:59", "remaining_time": "15:57:06"} +{"current_steps": 1129, "total_steps": 2737, "loss": 1.0382, "lr": 1.4619843042914466e-05, "epoch": 2.887468030690537, "percentage": 41.25, "elapsed_time": "11:11:34", "remaining_time": "15:56:30"} +{"current_steps": 1130, "total_steps": 2737, "loss": 1.0631, "lr": 1.4608526899980238e-05, "epoch": 2.89002557544757, "percentage": 41.29, "elapsed_time": "11:12:10", "remaining_time": "15:55:55"} +{"current_steps": 1131, "total_steps": 2737, "loss": 1.0742, "lr": 1.4597203259260893e-05, "epoch": 2.8925831202046037, "percentage": 41.32, "elapsed_time": "11:12:46", "remaining_time": "15:55:19"} +{"current_steps": 1132, "total_steps": 2737, "loss": 1.0108, "lr": 1.4585872139179284e-05, "epoch": 2.895140664961637, "percentage": 41.36, "elapsed_time": "11:13:22", "remaining_time": "15:54:43"} +{"current_steps": 1133, "total_steps": 2737, "loss": 1.0343, "lr": 1.457453355817044e-05, "epoch": 2.89769820971867, "percentage": 41.4, "elapsed_time": "11:13:57", "remaining_time": "15:54:08"} +{"current_steps": 1134, "total_steps": 2737, "loss": 1.0344, "lr": 1.456318753468152e-05, "epoch": 2.9002557544757033, "percentage": 41.43, "elapsed_time": "11:14:33", "remaining_time": "15:53:32"} +{"current_steps": 1135, "total_steps": 2737, "loss": 1.0582, "lr": 1.455183408717179e-05, "epoch": 2.9028132992327365, "percentage": 41.47, "elapsed_time": "11:15:09", "remaining_time": "15:52:56"} +{"current_steps": 1136, "total_steps": 2737, "loss": 1.0319, "lr": 1.4540473234112607e-05, "epoch": 2.90537084398977, "percentage": 41.51, "elapsed_time": "11:15:44", "remaining_time": "15:52:21"} +{"current_steps": 1137, "total_steps": 2737, "loss": 1.094, "lr": 1.4529104993987364e-05, "epoch": 2.907928388746803, "percentage": 41.54, "elapsed_time": "11:16:20", "remaining_time": "15:51:45"} +{"current_steps": 1138, "total_steps": 2737, "loss": 1.0289, "lr": 1.4517729385291479e-05, "epoch": 2.9104859335038364, "percentage": 41.58, "elapsed_time": "11:16:56", "remaining_time": "15:51:09"} +{"current_steps": 1139, "total_steps": 2737, "loss": 1.0474, "lr": 1.4506346426532356e-05, "epoch": 2.9130434782608696, "percentage": 41.61, "elapsed_time": "11:17:32", "remaining_time": "15:50:34"} +{"current_steps": 1140, "total_steps": 2737, "loss": 1.0406, "lr": 1.4494956136229356e-05, "epoch": 2.915601023017903, "percentage": 41.65, "elapsed_time": "11:18:07", "remaining_time": "15:49:58"} +{"current_steps": 1141, "total_steps": 2737, "loss": 1.0545, "lr": 1.448355853291377e-05, "epoch": 2.918158567774936, "percentage": 41.69, "elapsed_time": "11:18:43", "remaining_time": "15:49:23"} +{"current_steps": 1142, "total_steps": 2737, "loss": 1.0649, "lr": 1.4472153635128787e-05, "epoch": 2.920716112531969, "percentage": 41.72, "elapsed_time": "11:19:19", "remaining_time": "15:48:47"} +{"current_steps": 1143, "total_steps": 2737, "loss": 1.0643, "lr": 1.4460741461429457e-05, "epoch": 2.923273657289003, "percentage": 41.76, "elapsed_time": "11:19:54", "remaining_time": "15:48:11"} +{"current_steps": 1144, "total_steps": 2737, "loss": 1.0375, "lr": 1.4449322030382681e-05, "epoch": 2.9258312020460355, "percentage": 41.8, "elapsed_time": "11:20:30", "remaining_time": "15:47:35"} +{"current_steps": 1145, "total_steps": 2737, "loss": 1.0459, "lr": 1.4437895360567156e-05, "epoch": 2.928388746803069, "percentage": 41.83, "elapsed_time": "11:21:06", "remaining_time": "15:47:00"} +{"current_steps": 1146, "total_steps": 2737, "loss": 1.0352, "lr": 1.4426461470573358e-05, "epoch": 2.9309462915601023, "percentage": 41.87, "elapsed_time": "11:21:42", "remaining_time": "15:46:24"} +{"current_steps": 1147, "total_steps": 2737, "loss": 1.0547, "lr": 1.4415020379003513e-05, "epoch": 2.9335038363171355, "percentage": 41.91, "elapsed_time": "11:22:17", "remaining_time": "15:45:49"} +{"current_steps": 1148, "total_steps": 2737, "loss": 1.0506, "lr": 1.4403572104471559e-05, "epoch": 2.9360613810741687, "percentage": 41.94, "elapsed_time": "11:22:54", "remaining_time": "15:45:14"} +{"current_steps": 1149, "total_steps": 2737, "loss": 1.067, "lr": 1.4392116665603123e-05, "epoch": 2.938618925831202, "percentage": 41.98, "elapsed_time": "11:23:29", "remaining_time": "15:44:38"} +{"current_steps": 1150, "total_steps": 2737, "loss": 1.0566, "lr": 1.4380654081035492e-05, "epoch": 2.9411764705882355, "percentage": 42.02, "elapsed_time": "11:24:06", "remaining_time": "15:44:04"} +{"current_steps": 1151, "total_steps": 2737, "loss": 1.069, "lr": 1.4369184369417573e-05, "epoch": 2.9437340153452687, "percentage": 42.05, "elapsed_time": "11:24:41", "remaining_time": "15:43:28"} +{"current_steps": 1152, "total_steps": 2737, "loss": 1.0393, "lr": 1.4357707549409865e-05, "epoch": 2.946291560102302, "percentage": 42.09, "elapsed_time": "11:25:17", "remaining_time": "15:42:52"} +{"current_steps": 1153, "total_steps": 2737, "loss": 1.0629, "lr": 1.4346223639684445e-05, "epoch": 2.948849104859335, "percentage": 42.13, "elapsed_time": "11:25:53", "remaining_time": "15:42:16"} +{"current_steps": 1154, "total_steps": 2737, "loss": 1.0683, "lr": 1.4334732658924906e-05, "epoch": 2.9514066496163682, "percentage": 42.16, "elapsed_time": "11:26:29", "remaining_time": "15:41:41"} +{"current_steps": 1155, "total_steps": 2737, "loss": 1.082, "lr": 1.4323234625826363e-05, "epoch": 2.9539641943734014, "percentage": 42.2, "elapsed_time": "11:27:04", "remaining_time": "15:41:05"} +{"current_steps": 1156, "total_steps": 2737, "loss": 1.0579, "lr": 1.4311729559095391e-05, "epoch": 2.9565217391304346, "percentage": 42.24, "elapsed_time": "11:27:40", "remaining_time": "15:40:29"} +{"current_steps": 1157, "total_steps": 2737, "loss": 1.0501, "lr": 1.430021747745002e-05, "epoch": 2.959079283887468, "percentage": 42.27, "elapsed_time": "11:28:16", "remaining_time": "15:39:54"} +{"current_steps": 1158, "total_steps": 2737, "loss": 1.0423, "lr": 1.4288698399619682e-05, "epoch": 2.9616368286445014, "percentage": 42.31, "elapsed_time": "11:28:52", "remaining_time": "15:39:18"} +{"current_steps": 1159, "total_steps": 2737, "loss": 1.0429, "lr": 1.4277172344345203e-05, "epoch": 2.9641943734015346, "percentage": 42.35, "elapsed_time": "11:29:27", "remaining_time": "15:38:43"} +{"current_steps": 1160, "total_steps": 2737, "loss": 1.0637, "lr": 1.4265639330378751e-05, "epoch": 2.9667519181585678, "percentage": 42.38, "elapsed_time": "11:30:03", "remaining_time": "15:38:07"} +{"current_steps": 1161, "total_steps": 2737, "loss": 1.032, "lr": 1.4254099376483814e-05, "epoch": 2.969309462915601, "percentage": 42.42, "elapsed_time": "11:30:39", "remaining_time": "15:37:31"} +{"current_steps": 1162, "total_steps": 2737, "loss": 1.0399, "lr": 1.424255250143518e-05, "epoch": 2.971867007672634, "percentage": 42.46, "elapsed_time": "11:31:14", "remaining_time": "15:36:55"} +{"current_steps": 1163, "total_steps": 2737, "loss": 1.082, "lr": 1.423099872401889e-05, "epoch": 2.9744245524296673, "percentage": 42.49, "elapsed_time": "11:31:50", "remaining_time": "15:36:20"} +{"current_steps": 1164, "total_steps": 2737, "loss": 1.0337, "lr": 1.4219438063032223e-05, "epoch": 2.976982097186701, "percentage": 42.53, "elapsed_time": "11:32:26", "remaining_time": "15:35:44"} +{"current_steps": 1165, "total_steps": 2737, "loss": 1.0464, "lr": 1.4207870537283645e-05, "epoch": 2.979539641943734, "percentage": 42.56, "elapsed_time": "11:33:01", "remaining_time": "15:35:08"} +{"current_steps": 1166, "total_steps": 2737, "loss": 1.0738, "lr": 1.4196296165592804e-05, "epoch": 2.9820971867007673, "percentage": 42.6, "elapsed_time": "11:33:37", "remaining_time": "15:34:33"} +{"current_steps": 1167, "total_steps": 2737, "loss": 1.0515, "lr": 1.4184714966790472e-05, "epoch": 2.9846547314578005, "percentage": 42.64, "elapsed_time": "11:34:13", "remaining_time": "15:33:57"} +{"current_steps": 1168, "total_steps": 2737, "loss": 1.0685, "lr": 1.4173126959718542e-05, "epoch": 2.9872122762148337, "percentage": 42.67, "elapsed_time": "11:34:48", "remaining_time": "15:33:21"} +{"current_steps": 1169, "total_steps": 2737, "loss": 1.0406, "lr": 1.416153216322997e-05, "epoch": 2.9897698209718673, "percentage": 42.71, "elapsed_time": "11:35:24", "remaining_time": "15:32:46"} +{"current_steps": 1170, "total_steps": 2737, "loss": 1.0388, "lr": 1.4149930596188768e-05, "epoch": 2.9923273657289, "percentage": 42.75, "elapsed_time": "11:36:00", "remaining_time": "15:32:10"} +{"current_steps": 1171, "total_steps": 2737, "loss": 1.035, "lr": 1.4138322277469962e-05, "epoch": 2.9948849104859336, "percentage": 42.78, "elapsed_time": "11:36:36", "remaining_time": "15:31:34"} +{"current_steps": 1172, "total_steps": 2737, "loss": 1.0798, "lr": 1.412670722595956e-05, "epoch": 2.997442455242967, "percentage": 42.82, "elapsed_time": "11:37:12", "remaining_time": "15:30:59"} +{"current_steps": 1173, "total_steps": 2737, "loss": 1.0724, "lr": 1.4115085460554524e-05, "epoch": 3.0, "percentage": 42.86, "elapsed_time": "11:37:47", "remaining_time": "15:30:23"} +{"current_steps": 1174, "total_steps": 2737, "loss": 1.0653, "lr": 1.410345700016274e-05, "epoch": 3.002557544757033, "percentage": 42.89, "elapsed_time": "11:38:32", "remaining_time": "15:29:59"} +{"current_steps": 1175, "total_steps": 2737, "loss": 1.0641, "lr": 1.4091821863702983e-05, "epoch": 3.0051150895140664, "percentage": 42.93, "elapsed_time": "11:39:07", "remaining_time": "15:29:23"} +{"current_steps": 1176, "total_steps": 2737, "loss": 1.0426, "lr": 1.4080180070104897e-05, "epoch": 3.0076726342710995, "percentage": 42.97, "elapsed_time": "11:39:43", "remaining_time": "15:28:48"} +{"current_steps": 1177, "total_steps": 2737, "loss": 1.0849, "lr": 1.406853163830895e-05, "epoch": 3.010230179028133, "percentage": 43.0, "elapsed_time": "11:40:19", "remaining_time": "15:28:12"} +{"current_steps": 1178, "total_steps": 2737, "loss": 1.0687, "lr": 1.4056876587266413e-05, "epoch": 3.0127877237851663, "percentage": 43.04, "elapsed_time": "11:40:54", "remaining_time": "15:27:36"} +{"current_steps": 1179, "total_steps": 2737, "loss": 1.0193, "lr": 1.4045214935939323e-05, "epoch": 3.0153452685421995, "percentage": 43.08, "elapsed_time": "11:41:30", "remaining_time": "15:27:00"} +{"current_steps": 1180, "total_steps": 2737, "loss": 1.027, "lr": 1.4033546703300465e-05, "epoch": 3.0179028132992327, "percentage": 43.11, "elapsed_time": "11:42:06", "remaining_time": "15:26:25"} +{"current_steps": 1181, "total_steps": 2737, "loss": 1.041, "lr": 1.402187190833331e-05, "epoch": 3.020460358056266, "percentage": 43.15, "elapsed_time": "11:42:41", "remaining_time": "15:25:49"} +{"current_steps": 1182, "total_steps": 2737, "loss": 1.0371, "lr": 1.4010190570032034e-05, "epoch": 3.023017902813299, "percentage": 43.19, "elapsed_time": "11:43:17", "remaining_time": "15:25:13"} +{"current_steps": 1183, "total_steps": 2737, "loss": 1.0578, "lr": 1.3998502707401437e-05, "epoch": 3.0255754475703327, "percentage": 43.22, "elapsed_time": "11:43:53", "remaining_time": "15:24:37"} +{"current_steps": 1184, "total_steps": 2737, "loss": 1.023, "lr": 1.398680833945694e-05, "epoch": 3.028132992327366, "percentage": 43.26, "elapsed_time": "11:44:28", "remaining_time": "15:24:02"} +{"current_steps": 1185, "total_steps": 2737, "loss": 1.0382, "lr": 1.3975107485224552e-05, "epoch": 3.030690537084399, "percentage": 43.3, "elapsed_time": "11:45:05", "remaining_time": "15:23:27"} +{"current_steps": 1186, "total_steps": 2737, "loss": 1.0186, "lr": 1.3963400163740828e-05, "epoch": 3.0332480818414322, "percentage": 43.33, "elapsed_time": "11:45:41", "remaining_time": "15:22:51"} +{"current_steps": 1187, "total_steps": 2737, "loss": 1.0455, "lr": 1.395168639405285e-05, "epoch": 3.0358056265984654, "percentage": 43.37, "elapsed_time": "11:46:16", "remaining_time": "15:22:16"} +{"current_steps": 1188, "total_steps": 2737, "loss": 1.0074, "lr": 1.3939966195218188e-05, "epoch": 3.0383631713554986, "percentage": 43.41, "elapsed_time": "11:46:52", "remaining_time": "15:21:40"} +{"current_steps": 1189, "total_steps": 2737, "loss": 1.0437, "lr": 1.3928239586304873e-05, "epoch": 3.040920716112532, "percentage": 43.44, "elapsed_time": "11:47:28", "remaining_time": "15:21:04"} +{"current_steps": 1190, "total_steps": 2737, "loss": 1.0327, "lr": 1.3916506586391364e-05, "epoch": 3.0434782608695654, "percentage": 43.48, "elapsed_time": "11:48:03", "remaining_time": "15:20:29"} +{"current_steps": 1191, "total_steps": 2737, "loss": 1.0099, "lr": 1.390476721456652e-05, "epoch": 3.0460358056265986, "percentage": 43.51, "elapsed_time": "11:48:39", "remaining_time": "15:19:53"} +{"current_steps": 1192, "total_steps": 2737, "loss": 1.051, "lr": 1.3893021489929564e-05, "epoch": 3.0485933503836318, "percentage": 43.55, "elapsed_time": "11:49:15", "remaining_time": "15:19:17"} +{"current_steps": 1193, "total_steps": 2737, "loss": 1.057, "lr": 1.3881269431590052e-05, "epoch": 3.051150895140665, "percentage": 43.59, "elapsed_time": "11:49:50", "remaining_time": "15:18:41"} +{"current_steps": 1194, "total_steps": 2737, "loss": 1.0296, "lr": 1.3869511058667855e-05, "epoch": 3.053708439897698, "percentage": 43.62, "elapsed_time": "11:50:26", "remaining_time": "15:18:05"} +{"current_steps": 1195, "total_steps": 2737, "loss": 1.0342, "lr": 1.3857746390293106e-05, "epoch": 3.0562659846547313, "percentage": 43.66, "elapsed_time": "11:51:02", "remaining_time": "15:17:30"} +{"current_steps": 1196, "total_steps": 2737, "loss": 1.0491, "lr": 1.3845975445606184e-05, "epoch": 3.0588235294117645, "percentage": 43.7, "elapsed_time": "11:51:37", "remaining_time": "15:16:54"} +{"current_steps": 1197, "total_steps": 2737, "loss": 1.0458, "lr": 1.383419824375768e-05, "epoch": 3.061381074168798, "percentage": 43.73, "elapsed_time": "11:52:13", "remaining_time": "15:16:19"} +{"current_steps": 1198, "total_steps": 2737, "loss": 1.0451, "lr": 1.382241480390837e-05, "epoch": 3.0639386189258313, "percentage": 43.77, "elapsed_time": "11:52:49", "remaining_time": "15:15:43"} +{"current_steps": 1199, "total_steps": 2737, "loss": 1.0621, "lr": 1.3810625145229174e-05, "epoch": 3.0664961636828645, "percentage": 43.81, "elapsed_time": "11:53:24", "remaining_time": "15:15:07"} +{"current_steps": 1200, "total_steps": 2737, "loss": 1.0216, "lr": 1.3798829286901122e-05, "epoch": 3.0690537084398977, "percentage": 43.84, "elapsed_time": "11:54:00", "remaining_time": "15:14:31"} +{"current_steps": 1201, "total_steps": 2737, "loss": 1.0344, "lr": 1.3787027248115341e-05, "epoch": 3.071611253196931, "percentage": 43.88, "elapsed_time": "11:54:36", "remaining_time": "15:13:55"} +{"current_steps": 1202, "total_steps": 2737, "loss": 1.0571, "lr": 1.3775219048073011e-05, "epoch": 3.074168797953964, "percentage": 43.92, "elapsed_time": "11:55:11", "remaining_time": "15:13:20"} +{"current_steps": 1203, "total_steps": 2737, "loss": 1.0621, "lr": 1.376340470598534e-05, "epoch": 3.0767263427109977, "percentage": 43.95, "elapsed_time": "11:55:47", "remaining_time": "15:12:44"} +{"current_steps": 1204, "total_steps": 2737, "loss": 1.0627, "lr": 1.3751584241073517e-05, "epoch": 3.079283887468031, "percentage": 43.99, "elapsed_time": "11:56:23", "remaining_time": "15:12:08"} +{"current_steps": 1205, "total_steps": 2737, "loss": 1.0619, "lr": 1.3739757672568703e-05, "epoch": 3.081841432225064, "percentage": 44.03, "elapsed_time": "11:56:59", "remaining_time": "15:11:33"} +{"current_steps": 1206, "total_steps": 2737, "loss": 1.0324, "lr": 1.3727925019711981e-05, "epoch": 3.084398976982097, "percentage": 44.06, "elapsed_time": "11:57:35", "remaining_time": "15:10:57"} +{"current_steps": 1207, "total_steps": 2737, "loss": 1.0538, "lr": 1.3716086301754343e-05, "epoch": 3.0869565217391304, "percentage": 44.1, "elapsed_time": "11:58:10", "remaining_time": "15:10:22"} +{"current_steps": 1208, "total_steps": 2737, "loss": 1.0806, "lr": 1.3704241537956643e-05, "epoch": 3.0895140664961636, "percentage": 44.14, "elapsed_time": "11:58:46", "remaining_time": "15:09:46"} +{"current_steps": 1209, "total_steps": 2737, "loss": 1.0272, "lr": 1.3692390747589564e-05, "epoch": 3.0920716112531967, "percentage": 44.17, "elapsed_time": "11:59:22", "remaining_time": "15:09:11"} +{"current_steps": 1210, "total_steps": 2737, "loss": 1.0499, "lr": 1.3680533949933607e-05, "epoch": 3.0946291560102304, "percentage": 44.21, "elapsed_time": "11:59:58", "remaining_time": "15:08:35"} +{"current_steps": 1211, "total_steps": 2737, "loss": 1.0514, "lr": 1.3668671164279039e-05, "epoch": 3.0971867007672635, "percentage": 44.25, "elapsed_time": "12:00:33", "remaining_time": "15:07:59"} +{"current_steps": 1212, "total_steps": 2737, "loss": 1.0134, "lr": 1.3656802409925874e-05, "epoch": 3.0997442455242967, "percentage": 44.28, "elapsed_time": "12:01:09", "remaining_time": "15:07:23"} +{"current_steps": 1213, "total_steps": 2737, "loss": 1.0851, "lr": 1.3644927706183824e-05, "epoch": 3.10230179028133, "percentage": 44.32, "elapsed_time": "12:01:45", "remaining_time": "15:06:48"} +{"current_steps": 1214, "total_steps": 2737, "loss": 1.0311, "lr": 1.3633047072372301e-05, "epoch": 3.104859335038363, "percentage": 44.36, "elapsed_time": "12:02:21", "remaining_time": "15:06:12"} +{"current_steps": 1215, "total_steps": 2737, "loss": 1.0737, "lr": 1.3621160527820343e-05, "epoch": 3.1074168797953963, "percentage": 44.39, "elapsed_time": "12:02:56", "remaining_time": "15:05:36"} +{"current_steps": 1216, "total_steps": 2737, "loss": 1.0298, "lr": 1.3609268091866621e-05, "epoch": 3.10997442455243, "percentage": 44.43, "elapsed_time": "12:03:32", "remaining_time": "15:05:01"} +{"current_steps": 1217, "total_steps": 2737, "loss": 1.0637, "lr": 1.3597369783859385e-05, "epoch": 3.112531969309463, "percentage": 44.46, "elapsed_time": "12:04:08", "remaining_time": "15:04:25"} +{"current_steps": 1218, "total_steps": 2737, "loss": 1.0358, "lr": 1.3585465623156434e-05, "epoch": 3.1150895140664963, "percentage": 44.5, "elapsed_time": "12:04:44", "remaining_time": "15:03:50"} +{"current_steps": 1219, "total_steps": 2737, "loss": 1.0531, "lr": 1.3573555629125097e-05, "epoch": 3.1176470588235294, "percentage": 44.54, "elapsed_time": "12:05:19", "remaining_time": "15:03:14"} +{"current_steps": 1220, "total_steps": 2737, "loss": 1.0422, "lr": 1.3561639821142187e-05, "epoch": 3.1202046035805626, "percentage": 44.57, "elapsed_time": "12:05:55", "remaining_time": "15:02:38"} +{"current_steps": 1221, "total_steps": 2737, "loss": 1.0373, "lr": 1.3549718218593982e-05, "epoch": 3.122762148337596, "percentage": 44.61, "elapsed_time": "12:06:31", "remaining_time": "15:02:03"} +{"current_steps": 1222, "total_steps": 2737, "loss": 0.9867, "lr": 1.3537790840876179e-05, "epoch": 3.125319693094629, "percentage": 44.65, "elapsed_time": "12:07:06", "remaining_time": "15:01:27"} +{"current_steps": 1223, "total_steps": 2737, "loss": 1.0493, "lr": 1.3525857707393878e-05, "epoch": 3.1278772378516626, "percentage": 44.68, "elapsed_time": "12:07:42", "remaining_time": "15:00:51"} +{"current_steps": 1224, "total_steps": 2737, "loss": 1.0192, "lr": 1.3513918837561544e-05, "epoch": 3.130434782608696, "percentage": 44.72, "elapsed_time": "12:08:18", "remaining_time": "15:00:15"} +{"current_steps": 1225, "total_steps": 2737, "loss": 1.0233, "lr": 1.3501974250802967e-05, "epoch": 3.132992327365729, "percentage": 44.76, "elapsed_time": "12:08:53", "remaining_time": "14:59:40"} +{"current_steps": 1226, "total_steps": 2737, "loss": 1.0415, "lr": 1.3490023966551249e-05, "epoch": 3.135549872122762, "percentage": 44.79, "elapsed_time": "12:09:29", "remaining_time": "14:59:04"} +{"current_steps": 1227, "total_steps": 2737, "loss": 1.0399, "lr": 1.3478068004248747e-05, "epoch": 3.1381074168797953, "percentage": 44.83, "elapsed_time": "12:10:05", "remaining_time": "14:58:28"} +{"current_steps": 1228, "total_steps": 2737, "loss": 1.0596, "lr": 1.346610638334707e-05, "epoch": 3.1406649616368285, "percentage": 44.87, "elapsed_time": "12:10:41", "remaining_time": "14:57:53"} +{"current_steps": 1229, "total_steps": 2737, "loss": 1.065, "lr": 1.3454139123307023e-05, "epoch": 3.1432225063938617, "percentage": 44.9, "elapsed_time": "12:11:16", "remaining_time": "14:57:17"} +{"current_steps": 1230, "total_steps": 2737, "loss": 1.0497, "lr": 1.3442166243598598e-05, "epoch": 3.1457800511508953, "percentage": 44.94, "elapsed_time": "12:11:52", "remaining_time": "14:56:41"} +{"current_steps": 1231, "total_steps": 2737, "loss": 1.0579, "lr": 1.3430187763700914e-05, "epoch": 3.1483375959079285, "percentage": 44.98, "elapsed_time": "12:12:27", "remaining_time": "14:56:05"} +{"current_steps": 1232, "total_steps": 2737, "loss": 1.037, "lr": 1.341820370310221e-05, "epoch": 3.1508951406649617, "percentage": 45.01, "elapsed_time": "12:13:03", "remaining_time": "14:55:30"} +{"current_steps": 1233, "total_steps": 2737, "loss": 1.077, "lr": 1.3406214081299807e-05, "epoch": 3.153452685421995, "percentage": 45.05, "elapsed_time": "12:13:39", "remaining_time": "14:54:54"} +{"current_steps": 1234, "total_steps": 2737, "loss": 1.0576, "lr": 1.3394218917800064e-05, "epoch": 3.156010230179028, "percentage": 45.09, "elapsed_time": "12:14:15", "remaining_time": "14:54:18"} +{"current_steps": 1235, "total_steps": 2737, "loss": 1.046, "lr": 1.3382218232118367e-05, "epoch": 3.1585677749360612, "percentage": 45.12, "elapsed_time": "12:14:50", "remaining_time": "14:53:42"} +{"current_steps": 1236, "total_steps": 2737, "loss": 1.0513, "lr": 1.3370212043779078e-05, "epoch": 3.1611253196930944, "percentage": 45.16, "elapsed_time": "12:15:26", "remaining_time": "14:53:06"} +{"current_steps": 1237, "total_steps": 2737, "loss": 1.0418, "lr": 1.335820037231552e-05, "epoch": 3.163682864450128, "percentage": 45.2, "elapsed_time": "12:16:01", "remaining_time": "14:52:31"} +{"current_steps": 1238, "total_steps": 2737, "loss": 1.044, "lr": 1.3346183237269925e-05, "epoch": 3.166240409207161, "percentage": 45.23, "elapsed_time": "12:16:37", "remaining_time": "14:51:55"} +{"current_steps": 1239, "total_steps": 2737, "loss": 1.0085, "lr": 1.3334160658193425e-05, "epoch": 3.1687979539641944, "percentage": 45.27, "elapsed_time": "12:17:13", "remaining_time": "14:51:20"} +{"current_steps": 1240, "total_steps": 2737, "loss": 1.0348, "lr": 1.3322132654646003e-05, "epoch": 3.1713554987212276, "percentage": 45.31, "elapsed_time": "12:17:49", "remaining_time": "14:50:44"} +{"current_steps": 1241, "total_steps": 2737, "loss": 1.0255, "lr": 1.3310099246196466e-05, "epoch": 3.1739130434782608, "percentage": 45.34, "elapsed_time": "12:18:24", "remaining_time": "14:50:08"} +{"current_steps": 1242, "total_steps": 2737, "loss": 1.0303, "lr": 1.3298060452422421e-05, "epoch": 3.176470588235294, "percentage": 45.38, "elapsed_time": "12:19:00", "remaining_time": "14:49:32"} +{"current_steps": 1243, "total_steps": 2737, "loss": 1.0366, "lr": 1.3286016292910229e-05, "epoch": 3.1790281329923276, "percentage": 45.41, "elapsed_time": "12:19:36", "remaining_time": "14:48:57"} +{"current_steps": 1244, "total_steps": 2737, "loss": 1.0224, "lr": 1.327396678725499e-05, "epoch": 3.1815856777493607, "percentage": 45.45, "elapsed_time": "12:20:12", "remaining_time": "14:48:22"} +{"current_steps": 1245, "total_steps": 2737, "loss": 0.9968, "lr": 1.3261911955060493e-05, "epoch": 3.184143222506394, "percentage": 45.49, "elapsed_time": "12:20:48", "remaining_time": "14:47:46"} +{"current_steps": 1246, "total_steps": 2737, "loss": 1.0502, "lr": 1.3249851815939197e-05, "epoch": 3.186700767263427, "percentage": 45.52, "elapsed_time": "12:21:24", "remaining_time": "14:47:10"} +{"current_steps": 1247, "total_steps": 2737, "loss": 1.0577, "lr": 1.3237786389512191e-05, "epoch": 3.1892583120204603, "percentage": 45.56, "elapsed_time": "12:21:59", "remaining_time": "14:46:35"} +{"current_steps": 1248, "total_steps": 2737, "loss": 1.0407, "lr": 1.3225715695409171e-05, "epoch": 3.1918158567774935, "percentage": 45.6, "elapsed_time": "12:22:35", "remaining_time": "14:45:59"} +{"current_steps": 1249, "total_steps": 2737, "loss": 1.0289, "lr": 1.3213639753268406e-05, "epoch": 3.1943734015345266, "percentage": 45.63, "elapsed_time": "12:23:11", "remaining_time": "14:45:23"} +{"current_steps": 1250, "total_steps": 2737, "loss": 1.0389, "lr": 1.3201558582736693e-05, "epoch": 3.1969309462915603, "percentage": 45.67, "elapsed_time": "12:23:46", "remaining_time": "14:44:48"} +{"current_steps": 1251, "total_steps": 2737, "loss": 1.0167, "lr": 1.3189472203469347e-05, "epoch": 3.1994884910485935, "percentage": 45.71, "elapsed_time": "12:24:22", "remaining_time": "14:44:12"} +{"current_steps": 1252, "total_steps": 2737, "loss": 1.0522, "lr": 1.3177380635130144e-05, "epoch": 3.2020460358056266, "percentage": 45.74, "elapsed_time": "12:24:58", "remaining_time": "14:43:36"} +{"current_steps": 1253, "total_steps": 2737, "loss": 1.0125, "lr": 1.3165283897391315e-05, "epoch": 3.20460358056266, "percentage": 45.78, "elapsed_time": "12:25:33", "remaining_time": "14:43:00"} +{"current_steps": 1254, "total_steps": 2737, "loss": 1.035, "lr": 1.3153182009933495e-05, "epoch": 3.207161125319693, "percentage": 45.82, "elapsed_time": "12:26:09", "remaining_time": "14:42:25"} +{"current_steps": 1255, "total_steps": 2737, "loss": 1.0354, "lr": 1.3141074992445695e-05, "epoch": 3.209718670076726, "percentage": 45.85, "elapsed_time": "12:26:45", "remaining_time": "14:41:49"} +{"current_steps": 1256, "total_steps": 2737, "loss": 1.0288, "lr": 1.3128962864625281e-05, "epoch": 3.21227621483376, "percentage": 45.89, "elapsed_time": "12:27:20", "remaining_time": "14:41:13"} +{"current_steps": 1257, "total_steps": 2737, "loss": 1.0329, "lr": 1.3116845646177923e-05, "epoch": 3.214833759590793, "percentage": 45.93, "elapsed_time": "12:27:56", "remaining_time": "14:40:37"} +{"current_steps": 1258, "total_steps": 2737, "loss": 1.0272, "lr": 1.3104723356817582e-05, "epoch": 3.217391304347826, "percentage": 45.96, "elapsed_time": "12:28:32", "remaining_time": "14:40:02"} +{"current_steps": 1259, "total_steps": 2737, "loss": 1.0757, "lr": 1.309259601626646e-05, "epoch": 3.2199488491048593, "percentage": 46.0, "elapsed_time": "12:29:07", "remaining_time": "14:39:26"} +{"current_steps": 1260, "total_steps": 2737, "loss": 1.0449, "lr": 1.3080463644254986e-05, "epoch": 3.2225063938618925, "percentage": 46.04, "elapsed_time": "12:29:43", "remaining_time": "14:38:50"} +{"current_steps": 1261, "total_steps": 2737, "loss": 1.0253, "lr": 1.3068326260521769e-05, "epoch": 3.2250639386189257, "percentage": 46.07, "elapsed_time": "12:30:18", "remaining_time": "14:38:14"} +{"current_steps": 1262, "total_steps": 2737, "loss": 1.0055, "lr": 1.3056183884813568e-05, "epoch": 3.227621483375959, "percentage": 46.11, "elapsed_time": "12:30:54", "remaining_time": "14:37:39"} +{"current_steps": 1263, "total_steps": 2737, "loss": 1.0305, "lr": 1.3044036536885284e-05, "epoch": 3.2301790281329925, "percentage": 46.15, "elapsed_time": "12:31:30", "remaining_time": "14:37:03"} +{"current_steps": 1264, "total_steps": 2737, "loss": 1.0356, "lr": 1.3031884236499877e-05, "epoch": 3.2327365728900257, "percentage": 46.18, "elapsed_time": "12:32:06", "remaining_time": "14:36:27"} +{"current_steps": 1265, "total_steps": 2737, "loss": 1.0327, "lr": 1.3019727003428387e-05, "epoch": 3.235294117647059, "percentage": 46.22, "elapsed_time": "12:32:41", "remaining_time": "14:35:51"} +{"current_steps": 1266, "total_steps": 2737, "loss": 1.0351, "lr": 1.300756485744987e-05, "epoch": 3.237851662404092, "percentage": 46.26, "elapsed_time": "12:33:17", "remaining_time": "14:35:16"} +{"current_steps": 1267, "total_steps": 2737, "loss": 1.0272, "lr": 1.2995397818351381e-05, "epoch": 3.2404092071611252, "percentage": 46.29, "elapsed_time": "12:33:53", "remaining_time": "14:34:40"} +{"current_steps": 1268, "total_steps": 2737, "loss": 0.9923, "lr": 1.2983225905927924e-05, "epoch": 3.2429667519181584, "percentage": 46.33, "elapsed_time": "12:34:28", "remaining_time": "14:34:04"} +{"current_steps": 1269, "total_steps": 2737, "loss": 1.0526, "lr": 1.2971049139982448e-05, "epoch": 3.2455242966751916, "percentage": 46.36, "elapsed_time": "12:35:04", "remaining_time": "14:33:28"} +{"current_steps": 1270, "total_steps": 2737, "loss": 1.0283, "lr": 1.2958867540325785e-05, "epoch": 3.2480818414322252, "percentage": 46.4, "elapsed_time": "12:35:40", "remaining_time": "14:32:53"} +{"current_steps": 1271, "total_steps": 2737, "loss": 1.0467, "lr": 1.294668112677664e-05, "epoch": 3.2506393861892584, "percentage": 46.44, "elapsed_time": "12:36:15", "remaining_time": "14:32:17"} +{"current_steps": 1272, "total_steps": 2737, "loss": 1.0292, "lr": 1.2934489919161541e-05, "epoch": 3.2531969309462916, "percentage": 46.47, "elapsed_time": "12:36:51", "remaining_time": "14:31:41"} +{"current_steps": 1273, "total_steps": 2737, "loss": 1.0273, "lr": 1.292229393731482e-05, "epoch": 3.2557544757033248, "percentage": 46.51, "elapsed_time": "12:37:26", "remaining_time": "14:31:05"} +{"current_steps": 1274, "total_steps": 2737, "loss": 1.041, "lr": 1.2910093201078584e-05, "epoch": 3.258312020460358, "percentage": 46.55, "elapsed_time": "12:38:02", "remaining_time": "14:30:29"} +{"current_steps": 1275, "total_steps": 2737, "loss": 1.0507, "lr": 1.289788773030266e-05, "epoch": 3.260869565217391, "percentage": 46.58, "elapsed_time": "12:38:37", "remaining_time": "14:29:53"} +{"current_steps": 1276, "total_steps": 2737, "loss": 1.0073, "lr": 1.2885677544844592e-05, "epoch": 3.2634271099744243, "percentage": 46.62, "elapsed_time": "12:39:13", "remaining_time": "14:29:18"} +{"current_steps": 1277, "total_steps": 2737, "loss": 1.063, "lr": 1.2873462664569583e-05, "epoch": 3.265984654731458, "percentage": 46.66, "elapsed_time": "12:39:49", "remaining_time": "14:28:42"} +{"current_steps": 1278, "total_steps": 2737, "loss": 1.0275, "lr": 1.2861243109350485e-05, "epoch": 3.268542199488491, "percentage": 46.69, "elapsed_time": "12:40:24", "remaining_time": "14:28:06"} +{"current_steps": 1279, "total_steps": 2737, "loss": 1.0786, "lr": 1.2849018899067746e-05, "epoch": 3.2710997442455243, "percentage": 46.73, "elapsed_time": "12:41:00", "remaining_time": "14:27:30"} +{"current_steps": 1280, "total_steps": 2737, "loss": 1.0475, "lr": 1.2836790053609396e-05, "epoch": 3.2736572890025575, "percentage": 46.77, "elapsed_time": "12:41:36", "remaining_time": "14:26:55"} +{"current_steps": 1281, "total_steps": 2737, "loss": 1.0544, "lr": 1.2824556592870993e-05, "epoch": 3.2762148337595907, "percentage": 46.8, "elapsed_time": "12:42:11", "remaining_time": "14:26:19"} +{"current_steps": 1282, "total_steps": 2737, "loss": 1.0432, "lr": 1.2812318536755624e-05, "epoch": 3.2787723785166243, "percentage": 46.84, "elapsed_time": "12:42:47", "remaining_time": "14:25:43"} +{"current_steps": 1283, "total_steps": 2737, "loss": 1.0432, "lr": 1.2800075905173834e-05, "epoch": 3.2813299232736575, "percentage": 46.88, "elapsed_time": "12:43:23", "remaining_time": "14:25:07"} +{"current_steps": 1284, "total_steps": 2737, "loss": 1.0379, "lr": 1.2787828718043622e-05, "epoch": 3.2838874680306906, "percentage": 46.91, "elapsed_time": "12:43:58", "remaining_time": "14:24:32"} +{"current_steps": 1285, "total_steps": 2737, "loss": 1.0421, "lr": 1.2775576995290397e-05, "epoch": 3.286445012787724, "percentage": 46.95, "elapsed_time": "12:44:34", "remaining_time": "14:23:56"} +{"current_steps": 1286, "total_steps": 2737, "loss": 1.0392, "lr": 1.276332075684694e-05, "epoch": 3.289002557544757, "percentage": 46.99, "elapsed_time": "12:45:10", "remaining_time": "14:23:20"} +{"current_steps": 1287, "total_steps": 2737, "loss": 1.0283, "lr": 1.2751060022653393e-05, "epoch": 3.29156010230179, "percentage": 47.02, "elapsed_time": "12:45:45", "remaining_time": "14:22:44"} +{"current_steps": 1288, "total_steps": 2737, "loss": 1.0144, "lr": 1.2738794812657194e-05, "epoch": 3.2941176470588234, "percentage": 47.06, "elapsed_time": "12:46:21", "remaining_time": "14:22:09"} +{"current_steps": 1289, "total_steps": 2737, "loss": 1.0151, "lr": 1.2726525146813078e-05, "epoch": 3.296675191815857, "percentage": 47.1, "elapsed_time": "12:46:57", "remaining_time": "14:21:33"} +{"current_steps": 1290, "total_steps": 2737, "loss": 1.0137, "lr": 1.2714251045083028e-05, "epoch": 3.29923273657289, "percentage": 47.13, "elapsed_time": "12:47:33", "remaining_time": "14:20:58"} +{"current_steps": 1291, "total_steps": 2737, "loss": 1.0233, "lr": 1.2701972527436235e-05, "epoch": 3.3017902813299234, "percentage": 47.17, "elapsed_time": "12:48:08", "remaining_time": "14:20:22"} +{"current_steps": 1292, "total_steps": 2737, "loss": 1.0586, "lr": 1.2689689613849083e-05, "epoch": 3.3043478260869565, "percentage": 47.2, "elapsed_time": "12:48:44", "remaining_time": "14:19:46"} +{"current_steps": 1293, "total_steps": 2737, "loss": 0.994, "lr": 1.2677402324305099e-05, "epoch": 3.3069053708439897, "percentage": 47.24, "elapsed_time": "12:49:20", "remaining_time": "14:19:10"} +{"current_steps": 1294, "total_steps": 2737, "loss": 1.0283, "lr": 1.266511067879494e-05, "epoch": 3.309462915601023, "percentage": 47.28, "elapsed_time": "12:49:55", "remaining_time": "14:18:35"} +{"current_steps": 1295, "total_steps": 2737, "loss": 1.0373, "lr": 1.265281469731634e-05, "epoch": 3.312020460358056, "percentage": 47.31, "elapsed_time": "12:50:31", "remaining_time": "14:17:59"} +{"current_steps": 1296, "total_steps": 2737, "loss": 1.0517, "lr": 1.2640514399874095e-05, "epoch": 3.3145780051150897, "percentage": 47.35, "elapsed_time": "12:51:07", "remaining_time": "14:17:23"} +{"current_steps": 1297, "total_steps": 2737, "loss": 1.0068, "lr": 1.2628209806480024e-05, "epoch": 3.317135549872123, "percentage": 47.39, "elapsed_time": "12:51:43", "remaining_time": "14:16:48"} +{"current_steps": 1298, "total_steps": 2737, "loss": 1.0236, "lr": 1.2615900937152923e-05, "epoch": 3.319693094629156, "percentage": 47.42, "elapsed_time": "12:52:18", "remaining_time": "14:16:12"} +{"current_steps": 1299, "total_steps": 2737, "loss": 1.0495, "lr": 1.2603587811918558e-05, "epoch": 3.3222506393861893, "percentage": 47.46, "elapsed_time": "12:52:54", "remaining_time": "14:15:36"} +{"current_steps": 1300, "total_steps": 2737, "loss": 1.0741, "lr": 1.2591270450809612e-05, "epoch": 3.3248081841432224, "percentage": 47.5, "elapsed_time": "12:53:30", "remaining_time": "14:15:01"} +{"current_steps": 1301, "total_steps": 2737, "loss": 1.0132, "lr": 1.2578948873865662e-05, "epoch": 3.3273657289002556, "percentage": 47.53, "elapsed_time": "12:54:06", "remaining_time": "14:14:25"} +{"current_steps": 1302, "total_steps": 2737, "loss": 1.0464, "lr": 1.2566623101133144e-05, "epoch": 3.329923273657289, "percentage": 47.57, "elapsed_time": "12:54:41", "remaining_time": "14:13:50"} +{"current_steps": 1303, "total_steps": 2737, "loss": 1.0247, "lr": 1.2554293152665316e-05, "epoch": 3.3324808184143224, "percentage": 47.61, "elapsed_time": "12:55:17", "remaining_time": "14:13:14"} +{"current_steps": 1304, "total_steps": 2737, "loss": 1.0399, "lr": 1.2541959048522239e-05, "epoch": 3.3350383631713556, "percentage": 47.64, "elapsed_time": "12:55:53", "remaining_time": "14:12:38"} +{"current_steps": 1305, "total_steps": 2737, "loss": 1.0157, "lr": 1.2529620808770723e-05, "epoch": 3.337595907928389, "percentage": 47.68, "elapsed_time": "12:56:28", "remaining_time": "14:12:02"} +{"current_steps": 1306, "total_steps": 2737, "loss": 1.0495, "lr": 1.251727845348432e-05, "epoch": 3.340153452685422, "percentage": 47.72, "elapsed_time": "12:57:04", "remaining_time": "14:11:27"} +{"current_steps": 1307, "total_steps": 2737, "loss": 1.001, "lr": 1.2504932002743262e-05, "epoch": 3.342710997442455, "percentage": 47.75, "elapsed_time": "12:57:40", "remaining_time": "14:10:51"} +{"current_steps": 1308, "total_steps": 2737, "loss": 1.045, "lr": 1.2492581476634458e-05, "epoch": 3.3452685421994883, "percentage": 47.79, "elapsed_time": "12:58:16", "remaining_time": "14:10:16"} +{"current_steps": 1309, "total_steps": 2737, "loss": 1.0285, "lr": 1.2480226895251439e-05, "epoch": 3.3478260869565215, "percentage": 47.83, "elapsed_time": "12:58:51", "remaining_time": "14:09:40"} +{"current_steps": 1310, "total_steps": 2737, "loss": 1.0658, "lr": 1.2467868278694342e-05, "epoch": 3.350383631713555, "percentage": 47.86, "elapsed_time": "12:59:27", "remaining_time": "14:09:04"} +{"current_steps": 1311, "total_steps": 2737, "loss": 1.0372, "lr": 1.245550564706986e-05, "epoch": 3.3529411764705883, "percentage": 47.9, "elapsed_time": "13:00:03", "remaining_time": "14:08:28"} +{"current_steps": 1312, "total_steps": 2737, "loss": 1.0295, "lr": 1.2443139020491216e-05, "epoch": 3.3554987212276215, "percentage": 47.94, "elapsed_time": "13:00:39", "remaining_time": "14:07:53"} +{"current_steps": 1313, "total_steps": 2737, "loss": 1.0312, "lr": 1.2430768419078143e-05, "epoch": 3.3580562659846547, "percentage": 47.97, "elapsed_time": "13:01:14", "remaining_time": "14:07:17"} +{"current_steps": 1314, "total_steps": 2737, "loss": 1.0419, "lr": 1.2418393862956837e-05, "epoch": 3.360613810741688, "percentage": 48.01, "elapsed_time": "13:01:50", "remaining_time": "14:06:41"} +{"current_steps": 1315, "total_steps": 2737, "loss": 1.0122, "lr": 1.2406015372259925e-05, "epoch": 3.363171355498721, "percentage": 48.05, "elapsed_time": "13:02:26", "remaining_time": "14:06:06"} +{"current_steps": 1316, "total_steps": 2737, "loss": 1.0327, "lr": 1.2393632967126441e-05, "epoch": 3.3657289002557547, "percentage": 48.08, "elapsed_time": "13:03:01", "remaining_time": "14:05:30"} +{"current_steps": 1317, "total_steps": 2737, "loss": 1.0475, "lr": 1.2381246667701781e-05, "epoch": 3.368286445012788, "percentage": 48.12, "elapsed_time": "13:03:37", "remaining_time": "14:04:54"} +{"current_steps": 1318, "total_steps": 2737, "loss": 1.0426, "lr": 1.236885649413768e-05, "epoch": 3.370843989769821, "percentage": 48.15, "elapsed_time": "13:04:13", "remaining_time": "14:04:19"} +{"current_steps": 1319, "total_steps": 2737, "loss": 1.0412, "lr": 1.2356462466592177e-05, "epoch": 3.373401534526854, "percentage": 48.19, "elapsed_time": "13:04:49", "remaining_time": "14:03:43"} +{"current_steps": 1320, "total_steps": 2737, "loss": 1.0175, "lr": 1.2344064605229577e-05, "epoch": 3.3759590792838874, "percentage": 48.23, "elapsed_time": "13:05:24", "remaining_time": "14:03:07"} +{"current_steps": 1321, "total_steps": 2737, "loss": 1.018, "lr": 1.2331662930220424e-05, "epoch": 3.3785166240409206, "percentage": 48.26, "elapsed_time": "13:06:00", "remaining_time": "14:02:31"} +{"current_steps": 1322, "total_steps": 2737, "loss": 1.029, "lr": 1.2319257461741478e-05, "epoch": 3.381074168797954, "percentage": 48.3, "elapsed_time": "13:06:36", "remaining_time": "14:01:56"} +{"current_steps": 1323, "total_steps": 2737, "loss": 1.017, "lr": 1.2306848219975649e-05, "epoch": 3.3836317135549874, "percentage": 48.34, "elapsed_time": "13:07:12", "remaining_time": "14:01:21"} +{"current_steps": 1324, "total_steps": 2737, "loss": 1.0301, "lr": 1.2294435225112005e-05, "epoch": 3.3861892583120206, "percentage": 48.37, "elapsed_time": "13:07:48", "remaining_time": "14:00:45"} +{"current_steps": 1325, "total_steps": 2737, "loss": 1.0361, "lr": 1.2282018497345705e-05, "epoch": 3.3887468030690537, "percentage": 48.41, "elapsed_time": "13:08:24", "remaining_time": "14:00:10"} +{"current_steps": 1326, "total_steps": 2737, "loss": 1.0385, "lr": 1.2269598056877996e-05, "epoch": 3.391304347826087, "percentage": 48.45, "elapsed_time": "13:08:59", "remaining_time": "13:59:34"} +{"current_steps": 1327, "total_steps": 2737, "loss": 1.0208, "lr": 1.2257173923916154e-05, "epoch": 3.39386189258312, "percentage": 48.48, "elapsed_time": "13:09:35", "remaining_time": "13:58:58"} +{"current_steps": 1328, "total_steps": 2737, "loss": 1.0116, "lr": 1.2244746118673467e-05, "epoch": 3.3964194373401533, "percentage": 48.52, "elapsed_time": "13:10:11", "remaining_time": "13:58:22"} +{"current_steps": 1329, "total_steps": 2737, "loss": 1.0742, "lr": 1.22323146613692e-05, "epoch": 3.398976982097187, "percentage": 48.56, "elapsed_time": "13:10:46", "remaining_time": "13:57:47"} +{"current_steps": 1330, "total_steps": 2737, "loss": 1.0565, "lr": 1.2219879572228555e-05, "epoch": 3.40153452685422, "percentage": 48.59, "elapsed_time": "13:11:22", "remaining_time": "13:57:11"} +{"current_steps": 1331, "total_steps": 2737, "loss": 1.0294, "lr": 1.2207440871482644e-05, "epoch": 3.4040920716112533, "percentage": 48.63, "elapsed_time": "13:11:58", "remaining_time": "13:56:36"} +{"current_steps": 1332, "total_steps": 2737, "loss": 1.0479, "lr": 1.2194998579368451e-05, "epoch": 3.4066496163682864, "percentage": 48.67, "elapsed_time": "13:12:34", "remaining_time": "13:56:00"} +{"current_steps": 1333, "total_steps": 2737, "loss": 1.0236, "lr": 1.2182552716128818e-05, "epoch": 3.4092071611253196, "percentage": 48.7, "elapsed_time": "13:13:10", "remaining_time": "13:55:25"} +{"current_steps": 1334, "total_steps": 2737, "loss": 1.0513, "lr": 1.2170103302012374e-05, "epoch": 3.411764705882353, "percentage": 48.74, "elapsed_time": "13:13:45", "remaining_time": "13:54:49"} +{"current_steps": 1335, "total_steps": 2737, "loss": 1.0389, "lr": 1.2157650357273547e-05, "epoch": 3.414322250639386, "percentage": 48.78, "elapsed_time": "13:14:21", "remaining_time": "13:54:13"} +{"current_steps": 1336, "total_steps": 2737, "loss": 1.0355, "lr": 1.2145193902172496e-05, "epoch": 3.4168797953964196, "percentage": 48.81, "elapsed_time": "13:14:57", "remaining_time": "13:53:38"} +{"current_steps": 1337, "total_steps": 2737, "loss": 1.0322, "lr": 1.2132733956975093e-05, "epoch": 3.419437340153453, "percentage": 48.85, "elapsed_time": "13:15:33", "remaining_time": "13:53:02"} +{"current_steps": 1338, "total_steps": 2737, "loss": 1.0227, "lr": 1.2120270541952892e-05, "epoch": 3.421994884910486, "percentage": 48.89, "elapsed_time": "13:16:08", "remaining_time": "13:52:26"} +{"current_steps": 1339, "total_steps": 2737, "loss": 1.0285, "lr": 1.210780367738309e-05, "epoch": 3.424552429667519, "percentage": 48.92, "elapsed_time": "13:16:44", "remaining_time": "13:51:51"} +{"current_steps": 1340, "total_steps": 2737, "loss": 1.0812, "lr": 1.2095333383548495e-05, "epoch": 3.4271099744245523, "percentage": 48.96, "elapsed_time": "13:17:20", "remaining_time": "13:51:15"} +{"current_steps": 1341, "total_steps": 2737, "loss": 1.0716, "lr": 1.2082859680737495e-05, "epoch": 3.4296675191815855, "percentage": 49.0, "elapsed_time": "13:17:56", "remaining_time": "13:50:39"} +{"current_steps": 1342, "total_steps": 2737, "loss": 1.0311, "lr": 1.2070382589244026e-05, "epoch": 3.4322250639386187, "percentage": 49.03, "elapsed_time": "13:18:31", "remaining_time": "13:50:03"} +{"current_steps": 1343, "total_steps": 2737, "loss": 1.0467, "lr": 1.2057902129367536e-05, "epoch": 3.4347826086956523, "percentage": 49.07, "elapsed_time": "13:19:07", "remaining_time": "13:49:28"} +{"current_steps": 1344, "total_steps": 2737, "loss": 1.028, "lr": 1.204541832141295e-05, "epoch": 3.4373401534526855, "percentage": 49.1, "elapsed_time": "13:19:43", "remaining_time": "13:48:52"} +{"current_steps": 1345, "total_steps": 2737, "loss": 1.0163, "lr": 1.2032931185690646e-05, "epoch": 3.4398976982097187, "percentage": 49.14, "elapsed_time": "13:20:19", "remaining_time": "13:48:17"} +{"current_steps": 1346, "total_steps": 2737, "loss": 1.063, "lr": 1.202044074251641e-05, "epoch": 3.442455242966752, "percentage": 49.18, "elapsed_time": "13:20:55", "remaining_time": "13:47:41"} +{"current_steps": 1347, "total_steps": 2737, "loss": 1.0361, "lr": 1.2007947012211419e-05, "epoch": 3.445012787723785, "percentage": 49.21, "elapsed_time": "13:21:31", "remaining_time": "13:47:06"} +{"current_steps": 1348, "total_steps": 2737, "loss": 1.054, "lr": 1.199545001510218e-05, "epoch": 3.4475703324808182, "percentage": 49.25, "elapsed_time": "13:22:06", "remaining_time": "13:46:30"} +{"current_steps": 1349, "total_steps": 2737, "loss": 1.0605, "lr": 1.1982949771520535e-05, "epoch": 3.4501278772378514, "percentage": 49.29, "elapsed_time": "13:22:42", "remaining_time": "13:45:54"} +{"current_steps": 1350, "total_steps": 2737, "loss": 1.0461, "lr": 1.1970446301803598e-05, "epoch": 3.452685421994885, "percentage": 49.32, "elapsed_time": "13:23:17", "remaining_time": "13:45:18"} +{"current_steps": 1351, "total_steps": 2737, "loss": 1.0459, "lr": 1.1957939626293726e-05, "epoch": 3.455242966751918, "percentage": 49.36, "elapsed_time": "13:23:53", "remaining_time": "13:44:43"} +{"current_steps": 1352, "total_steps": 2737, "loss": 1.0531, "lr": 1.1945429765338507e-05, "epoch": 3.4578005115089514, "percentage": 49.4, "elapsed_time": "13:24:29", "remaining_time": "13:44:07"} +{"current_steps": 1353, "total_steps": 2737, "loss": 1.0148, "lr": 1.1932916739290694e-05, "epoch": 3.4603580562659846, "percentage": 49.43, "elapsed_time": "13:25:05", "remaining_time": "13:43:32"} +{"current_steps": 1354, "total_steps": 2737, "loss": 1.0375, "lr": 1.1920400568508201e-05, "epoch": 3.4629156010230178, "percentage": 49.47, "elapsed_time": "13:25:40", "remaining_time": "13:42:56"} +{"current_steps": 1355, "total_steps": 2737, "loss": 1.0276, "lr": 1.1907881273354059e-05, "epoch": 3.4654731457800514, "percentage": 49.51, "elapsed_time": "13:26:17", "remaining_time": "13:42:21"} +{"current_steps": 1356, "total_steps": 2737, "loss": 1.0368, "lr": 1.1895358874196377e-05, "epoch": 3.4680306905370846, "percentage": 49.54, "elapsed_time": "13:26:52", "remaining_time": "13:41:45"} +{"current_steps": 1357, "total_steps": 2737, "loss": 1.038, "lr": 1.188283339140831e-05, "epoch": 3.4705882352941178, "percentage": 49.58, "elapsed_time": "13:27:28", "remaining_time": "13:41:09"} +{"current_steps": 1358, "total_steps": 2737, "loss": 1.0433, "lr": 1.1870304845368043e-05, "epoch": 3.473145780051151, "percentage": 49.62, "elapsed_time": "13:28:04", "remaining_time": "13:40:34"} +{"current_steps": 1359, "total_steps": 2737, "loss": 1.0605, "lr": 1.1857773256458732e-05, "epoch": 3.475703324808184, "percentage": 49.65, "elapsed_time": "13:28:39", "remaining_time": "13:39:58"} +{"current_steps": 1360, "total_steps": 2737, "loss": 1.0476, "lr": 1.184523864506849e-05, "epoch": 3.4782608695652173, "percentage": 49.69, "elapsed_time": "13:29:15", "remaining_time": "13:39:22"} +{"current_steps": 1361, "total_steps": 2737, "loss": 1.032, "lr": 1.1832701031590345e-05, "epoch": 3.4808184143222505, "percentage": 49.73, "elapsed_time": "13:29:52", "remaining_time": "13:38:47"} +{"current_steps": 1362, "total_steps": 2737, "loss": 1.0392, "lr": 1.1820160436422213e-05, "epoch": 3.483375959079284, "percentage": 49.76, "elapsed_time": "13:30:28", "remaining_time": "13:38:12"} +{"current_steps": 1363, "total_steps": 2737, "loss": 1.0549, "lr": 1.1807616879966856e-05, "epoch": 3.4859335038363173, "percentage": 49.8, "elapsed_time": "13:31:04", "remaining_time": "13:37:36"} +{"current_steps": 1364, "total_steps": 2737, "loss": 1.0257, "lr": 1.1795070382631856e-05, "epoch": 3.4884910485933505, "percentage": 49.84, "elapsed_time": "13:31:39", "remaining_time": "13:37:01"} +{"current_steps": 1365, "total_steps": 2737, "loss": 1.0616, "lr": 1.1782520964829583e-05, "epoch": 3.4910485933503836, "percentage": 49.87, "elapsed_time": "13:32:15", "remaining_time": "13:36:25"} +{"current_steps": 1366, "total_steps": 2737, "loss": 1.08, "lr": 1.1769968646977148e-05, "epoch": 3.493606138107417, "percentage": 49.91, "elapsed_time": "13:32:50", "remaining_time": "13:35:49"} +{"current_steps": 1367, "total_steps": 2737, "loss": 1.0582, "lr": 1.1757413449496393e-05, "epoch": 3.49616368286445, "percentage": 49.95, "elapsed_time": "13:33:26", "remaining_time": "13:35:13"} +{"current_steps": 1368, "total_steps": 2737, "loss": 0.9999, "lr": 1.174485539281384e-05, "epoch": 3.498721227621483, "percentage": 49.98, "elapsed_time": "13:34:02", "remaining_time": "13:34:38"} +{"current_steps": 1369, "total_steps": 2737, "loss": 1.0481, "lr": 1.1732294497360658e-05, "epoch": 3.501278772378517, "percentage": 50.02, "elapsed_time": "13:34:37", "remaining_time": "13:34:02"} +{"current_steps": 1370, "total_steps": 2737, "loss": 1.0526, "lr": 1.1719730783572645e-05, "epoch": 3.50383631713555, "percentage": 50.05, "elapsed_time": "13:35:13", "remaining_time": "13:33:26"} +{"current_steps": 1371, "total_steps": 2737, "loss": 1.0465, "lr": 1.1707164271890168e-05, "epoch": 3.506393861892583, "percentage": 50.09, "elapsed_time": "13:35:49", "remaining_time": "13:32:50"} +{"current_steps": 1372, "total_steps": 2737, "loss": 1.0672, "lr": 1.1694594982758164e-05, "epoch": 3.5089514066496164, "percentage": 50.13, "elapsed_time": "13:36:25", "remaining_time": "13:32:15"} +{"current_steps": 1373, "total_steps": 2737, "loss": 1.0249, "lr": 1.1682022936626076e-05, "epoch": 3.5115089514066495, "percentage": 50.16, "elapsed_time": "13:37:00", "remaining_time": "13:31:39"} +{"current_steps": 1374, "total_steps": 2737, "loss": 1.0444, "lr": 1.166944815394784e-05, "epoch": 3.5140664961636827, "percentage": 50.2, "elapsed_time": "13:37:36", "remaining_time": "13:31:03"} +{"current_steps": 1375, "total_steps": 2737, "loss": 1.0164, "lr": 1.165687065518184e-05, "epoch": 3.516624040920716, "percentage": 50.24, "elapsed_time": "13:38:12", "remaining_time": "13:30:28"} +{"current_steps": 1376, "total_steps": 2737, "loss": 1.0231, "lr": 1.1644290460790879e-05, "epoch": 3.5191815856777495, "percentage": 50.27, "elapsed_time": "13:38:47", "remaining_time": "13:29:52"} +{"current_steps": 1377, "total_steps": 2737, "loss": 1.0499, "lr": 1.163170759124215e-05, "epoch": 3.5217391304347827, "percentage": 50.31, "elapsed_time": "13:39:23", "remaining_time": "13:29:16"} +{"current_steps": 1378, "total_steps": 2737, "loss": 1.0179, "lr": 1.161912206700719e-05, "epoch": 3.524296675191816, "percentage": 50.35, "elapsed_time": "13:39:59", "remaining_time": "13:28:40"} +{"current_steps": 1379, "total_steps": 2737, "loss": 1.0825, "lr": 1.1606533908561866e-05, "epoch": 3.526854219948849, "percentage": 50.38, "elapsed_time": "13:40:34", "remaining_time": "13:28:05"} +{"current_steps": 1380, "total_steps": 2737, "loss": 1.0239, "lr": 1.1593943136386316e-05, "epoch": 3.5294117647058822, "percentage": 50.42, "elapsed_time": "13:41:10", "remaining_time": "13:27:29"} +{"current_steps": 1381, "total_steps": 2737, "loss": 1.0797, "lr": 1.1581349770964946e-05, "epoch": 3.531969309462916, "percentage": 50.46, "elapsed_time": "13:41:46", "remaining_time": "13:26:53"} +{"current_steps": 1382, "total_steps": 2737, "loss": 1.0482, "lr": 1.1568753832786376e-05, "epoch": 3.5345268542199486, "percentage": 50.49, "elapsed_time": "13:42:22", "remaining_time": "13:26:18"} +{"current_steps": 1383, "total_steps": 2737, "loss": 1.0341, "lr": 1.1556155342343405e-05, "epoch": 3.5370843989769822, "percentage": 50.53, "elapsed_time": "13:42:57", "remaining_time": "13:25:42"} +{"current_steps": 1384, "total_steps": 2737, "loss": 1.0657, "lr": 1.154355432013299e-05, "epoch": 3.5396419437340154, "percentage": 50.57, "elapsed_time": "13:43:33", "remaining_time": "13:25:06"} +{"current_steps": 1385, "total_steps": 2737, "loss": 1.0428, "lr": 1.1530950786656205e-05, "epoch": 3.5421994884910486, "percentage": 50.6, "elapsed_time": "13:44:09", "remaining_time": "13:24:30"} +{"current_steps": 1386, "total_steps": 2737, "loss": 1.0614, "lr": 1.1518344762418216e-05, "epoch": 3.544757033248082, "percentage": 50.64, "elapsed_time": "13:44:44", "remaining_time": "13:23:55"} +{"current_steps": 1387, "total_steps": 2737, "loss": 1.0119, "lr": 1.150573626792823e-05, "epoch": 3.547314578005115, "percentage": 50.68, "elapsed_time": "13:45:20", "remaining_time": "13:23:19"} +{"current_steps": 1388, "total_steps": 2737, "loss": 1.0325, "lr": 1.1493125323699486e-05, "epoch": 3.5498721227621486, "percentage": 50.71, "elapsed_time": "13:45:56", "remaining_time": "13:22:43"} +{"current_steps": 1389, "total_steps": 2737, "loss": 1.0621, "lr": 1.1480511950249195e-05, "epoch": 3.5524296675191813, "percentage": 50.75, "elapsed_time": "13:46:32", "remaining_time": "13:22:08"} +{"current_steps": 1390, "total_steps": 2737, "loss": 1.0688, "lr": 1.1467896168098533e-05, "epoch": 3.554987212276215, "percentage": 50.79, "elapsed_time": "13:47:07", "remaining_time": "13:21:32"} +{"current_steps": 1391, "total_steps": 2737, "loss": 0.9992, "lr": 1.1455277997772585e-05, "epoch": 3.557544757033248, "percentage": 50.82, "elapsed_time": "13:47:43", "remaining_time": "13:20:57"} +{"current_steps": 1392, "total_steps": 2737, "loss": 1.0298, "lr": 1.1442657459800323e-05, "epoch": 3.5601023017902813, "percentage": 50.86, "elapsed_time": "13:48:19", "remaining_time": "13:20:21"} +{"current_steps": 1393, "total_steps": 2737, "loss": 1.0481, "lr": 1.143003457471458e-05, "epoch": 3.5626598465473145, "percentage": 50.9, "elapsed_time": "13:48:55", "remaining_time": "13:19:45"} +{"current_steps": 1394, "total_steps": 2737, "loss": 1.0508, "lr": 1.1417409363051992e-05, "epoch": 3.5652173913043477, "percentage": 50.93, "elapsed_time": "13:49:30", "remaining_time": "13:19:10"} +{"current_steps": 1395, "total_steps": 2737, "loss": 1.0586, "lr": 1.1404781845352999e-05, "epoch": 3.5677749360613813, "percentage": 50.97, "elapsed_time": "13:50:06", "remaining_time": "13:18:34"} +{"current_steps": 1396, "total_steps": 2737, "loss": 1.0319, "lr": 1.1392152042161774e-05, "epoch": 3.5703324808184145, "percentage": 51.0, "elapsed_time": "13:50:42", "remaining_time": "13:17:58"} +{"current_steps": 1397, "total_steps": 2737, "loss": 1.0151, "lr": 1.1379519974026226e-05, "epoch": 3.5728900255754477, "percentage": 51.04, "elapsed_time": "13:51:17", "remaining_time": "13:17:22"} +{"current_steps": 1398, "total_steps": 2737, "loss": 1.0516, "lr": 1.136688566149793e-05, "epoch": 3.575447570332481, "percentage": 51.08, "elapsed_time": "13:51:53", "remaining_time": "13:16:46"} +{"current_steps": 1399, "total_steps": 2737, "loss": 1.0558, "lr": 1.1354249125132131e-05, "epoch": 3.578005115089514, "percentage": 51.11, "elapsed_time": "13:52:29", "remaining_time": "13:16:11"} +{"current_steps": 1400, "total_steps": 2737, "loss": 1.0159, "lr": 1.1341610385487677e-05, "epoch": 3.580562659846547, "percentage": 51.15, "elapsed_time": "13:53:04", "remaining_time": "13:15:35"} +{"current_steps": 1401, "total_steps": 2737, "loss": 1.0256, "lr": 1.1328969463127009e-05, "epoch": 3.5831202046035804, "percentage": 51.19, "elapsed_time": "13:53:40", "remaining_time": "13:14:59"} +{"current_steps": 1402, "total_steps": 2737, "loss": 1.0452, "lr": 1.1316326378616121e-05, "epoch": 3.585677749360614, "percentage": 51.22, "elapsed_time": "13:54:16", "remaining_time": "13:14:24"} +{"current_steps": 1403, "total_steps": 2737, "loss": 1.0417, "lr": 1.1303681152524514e-05, "epoch": 3.588235294117647, "percentage": 51.26, "elapsed_time": "13:54:52", "remaining_time": "13:13:48"} +{"current_steps": 1404, "total_steps": 2737, "loss": 1.0483, "lr": 1.129103380542519e-05, "epoch": 3.5907928388746804, "percentage": 51.3, "elapsed_time": "13:55:28", "remaining_time": "13:13:13"} +{"current_steps": 1405, "total_steps": 2737, "loss": 1.0407, "lr": 1.1278384357894585e-05, "epoch": 3.5933503836317136, "percentage": 51.33, "elapsed_time": "13:56:03", "remaining_time": "13:12:37"} +{"current_steps": 1406, "total_steps": 2737, "loss": 1.0391, "lr": 1.1265732830512561e-05, "epoch": 3.5959079283887467, "percentage": 51.37, "elapsed_time": "13:56:39", "remaining_time": "13:12:01"} +{"current_steps": 1407, "total_steps": 2737, "loss": 1.0456, "lr": 1.125307924386236e-05, "epoch": 3.59846547314578, "percentage": 51.41, "elapsed_time": "13:57:15", "remaining_time": "13:11:26"} +{"current_steps": 1408, "total_steps": 2737, "loss": 1.0501, "lr": 1.1240423618530578e-05, "epoch": 3.601023017902813, "percentage": 51.44, "elapsed_time": "13:57:51", "remaining_time": "13:10:50"} +{"current_steps": 1409, "total_steps": 2737, "loss": 1.0294, "lr": 1.122776597510713e-05, "epoch": 3.6035805626598467, "percentage": 51.48, "elapsed_time": "13:58:27", "remaining_time": "13:10:15"} +{"current_steps": 1410, "total_steps": 2737, "loss": 1.0024, "lr": 1.1215106334185201e-05, "epoch": 3.60613810741688, "percentage": 51.52, "elapsed_time": "13:59:02", "remaining_time": "13:09:39"} +{"current_steps": 1411, "total_steps": 2737, "loss": 1.0451, "lr": 1.1202444716361247e-05, "epoch": 3.608695652173913, "percentage": 51.55, "elapsed_time": "13:59:38", "remaining_time": "13:09:03"} +{"current_steps": 1412, "total_steps": 2737, "loss": 1.0635, "lr": 1.1189781142234917e-05, "epoch": 3.6112531969309463, "percentage": 51.59, "elapsed_time": "14:00:14", "remaining_time": "13:08:27"} +{"current_steps": 1413, "total_steps": 2737, "loss": 1.0177, "lr": 1.1177115632409064e-05, "epoch": 3.6138107416879794, "percentage": 51.63, "elapsed_time": "14:00:49", "remaining_time": "13:07:52"} +{"current_steps": 1414, "total_steps": 2737, "loss": 1.0379, "lr": 1.1164448207489673e-05, "epoch": 3.6163682864450126, "percentage": 51.66, "elapsed_time": "14:01:25", "remaining_time": "13:07:16"} +{"current_steps": 1415, "total_steps": 2737, "loss": 1.0179, "lr": 1.1151778888085856e-05, "epoch": 3.618925831202046, "percentage": 51.7, "elapsed_time": "14:02:01", "remaining_time": "13:06:40"} +{"current_steps": 1416, "total_steps": 2737, "loss": 1.0392, "lr": 1.1139107694809806e-05, "epoch": 3.6214833759590794, "percentage": 51.74, "elapsed_time": "14:02:37", "remaining_time": "13:06:05"} +{"current_steps": 1417, "total_steps": 2737, "loss": 1.0124, "lr": 1.1126434648276756e-05, "epoch": 3.6240409207161126, "percentage": 51.77, "elapsed_time": "14:03:12", "remaining_time": "13:05:29"} +{"current_steps": 1418, "total_steps": 2737, "loss": 1.0496, "lr": 1.1113759769104965e-05, "epoch": 3.626598465473146, "percentage": 51.81, "elapsed_time": "14:03:48", "remaining_time": "13:04:53"} +{"current_steps": 1419, "total_steps": 2737, "loss": 1.043, "lr": 1.1101083077915667e-05, "epoch": 3.629156010230179, "percentage": 51.85, "elapsed_time": "14:04:24", "remaining_time": "13:04:18"} +{"current_steps": 1420, "total_steps": 2737, "loss": 1.0449, "lr": 1.1088404595333046e-05, "epoch": 3.631713554987212, "percentage": 51.88, "elapsed_time": "14:05:00", "remaining_time": "13:03:42"} +{"current_steps": 1421, "total_steps": 2737, "loss": 1.0622, "lr": 1.1075724341984201e-05, "epoch": 3.634271099744246, "percentage": 51.92, "elapsed_time": "14:05:35", "remaining_time": "13:03:06"} +{"current_steps": 1422, "total_steps": 2737, "loss": 1.015, "lr": 1.1063042338499113e-05, "epoch": 3.6368286445012785, "percentage": 51.95, "elapsed_time": "14:06:11", "remaining_time": "13:02:31"} +{"current_steps": 1423, "total_steps": 2737, "loss": 1.0413, "lr": 1.1050358605510606e-05, "epoch": 3.639386189258312, "percentage": 51.99, "elapsed_time": "14:06:47", "remaining_time": "13:01:55"} +{"current_steps": 1424, "total_steps": 2737, "loss": 1.0307, "lr": 1.1037673163654321e-05, "epoch": 3.6419437340153453, "percentage": 52.03, "elapsed_time": "14:07:23", "remaining_time": "13:01:19"} +{"current_steps": 1425, "total_steps": 2737, "loss": 1.0605, "lr": 1.1024986033568683e-05, "epoch": 3.6445012787723785, "percentage": 52.06, "elapsed_time": "14:07:58", "remaining_time": "13:00:44"} +{"current_steps": 1426, "total_steps": 2737, "loss": 1.0192, "lr": 1.101229723589485e-05, "epoch": 3.6470588235294117, "percentage": 52.1, "elapsed_time": "14:08:34", "remaining_time": "13:00:08"} +{"current_steps": 1427, "total_steps": 2737, "loss": 1.0349, "lr": 1.099960679127671e-05, "epoch": 3.649616368286445, "percentage": 52.14, "elapsed_time": "14:09:10", "remaining_time": "12:59:32"} +{"current_steps": 1428, "total_steps": 2737, "loss": 1.0253, "lr": 1.0986914720360821e-05, "epoch": 3.6521739130434785, "percentage": 52.17, "elapsed_time": "14:09:45", "remaining_time": "12:58:56"} +{"current_steps": 1429, "total_steps": 2737, "loss": 1.018, "lr": 1.097422104379639e-05, "epoch": 3.6547314578005117, "percentage": 52.21, "elapsed_time": "14:10:21", "remaining_time": "12:58:21"} +{"current_steps": 1430, "total_steps": 2737, "loss": 1.0473, "lr": 1.0961525782235233e-05, "epoch": 3.657289002557545, "percentage": 52.25, "elapsed_time": "14:10:57", "remaining_time": "12:57:45"} +{"current_steps": 1431, "total_steps": 2737, "loss": 1.0424, "lr": 1.0948828956331752e-05, "epoch": 3.659846547314578, "percentage": 52.28, "elapsed_time": "14:11:32", "remaining_time": "12:57:09"} +{"current_steps": 1432, "total_steps": 2737, "loss": 1.0453, "lr": 1.0936130586742881e-05, "epoch": 3.662404092071611, "percentage": 52.32, "elapsed_time": "14:12:08", "remaining_time": "12:56:34"} +{"current_steps": 1433, "total_steps": 2737, "loss": 1.0193, "lr": 1.0923430694128074e-05, "epoch": 3.6649616368286444, "percentage": 52.36, "elapsed_time": "14:12:44", "remaining_time": "12:55:58"} +{"current_steps": 1434, "total_steps": 2737, "loss": 1.0256, "lr": 1.091072929914927e-05, "epoch": 3.6675191815856776, "percentage": 52.39, "elapsed_time": "14:13:19", "remaining_time": "12:55:22"} +{"current_steps": 1435, "total_steps": 2737, "loss": 1.0232, "lr": 1.0898026422470838e-05, "epoch": 3.670076726342711, "percentage": 52.43, "elapsed_time": "14:13:56", "remaining_time": "12:54:47"} +{"current_steps": 1436, "total_steps": 2737, "loss": 1.0536, "lr": 1.0885322084759566e-05, "epoch": 3.6726342710997444, "percentage": 52.47, "elapsed_time": "14:14:32", "remaining_time": "12:54:12"} +{"current_steps": 1437, "total_steps": 2737, "loss": 1.0287, "lr": 1.0872616306684616e-05, "epoch": 3.6751918158567776, "percentage": 52.5, "elapsed_time": "14:15:08", "remaining_time": "12:53:36"} +{"current_steps": 1438, "total_steps": 2737, "loss": 1.0909, "lr": 1.0859909108917497e-05, "epoch": 3.6777493606138107, "percentage": 52.54, "elapsed_time": "14:15:43", "remaining_time": "12:53:00"} +{"current_steps": 1439, "total_steps": 2737, "loss": 1.0193, "lr": 1.084720051213202e-05, "epoch": 3.680306905370844, "percentage": 52.58, "elapsed_time": "14:16:19", "remaining_time": "12:52:25"} +{"current_steps": 1440, "total_steps": 2737, "loss": 1.0212, "lr": 1.0834490537004286e-05, "epoch": 3.682864450127877, "percentage": 52.61, "elapsed_time": "14:16:55", "remaining_time": "12:51:50"} +{"current_steps": 1441, "total_steps": 2737, "loss": 1.0249, "lr": 1.0821779204212623e-05, "epoch": 3.6854219948849103, "percentage": 52.65, "elapsed_time": "14:17:31", "remaining_time": "12:51:14"} +{"current_steps": 1442, "total_steps": 2737, "loss": 1.0179, "lr": 1.0809066534437576e-05, "epoch": 3.687979539641944, "percentage": 52.69, "elapsed_time": "14:18:07", "remaining_time": "12:50:38"} +{"current_steps": 1443, "total_steps": 2737, "loss": 1.0026, "lr": 1.0796352548361863e-05, "epoch": 3.690537084398977, "percentage": 52.72, "elapsed_time": "14:18:42", "remaining_time": "12:50:02"} +{"current_steps": 1444, "total_steps": 2737, "loss": 1.0287, "lr": 1.0783637266670348e-05, "epoch": 3.6930946291560103, "percentage": 52.76, "elapsed_time": "14:19:18", "remaining_time": "12:49:26"} +{"current_steps": 1445, "total_steps": 2737, "loss": 1.0507, "lr": 1.0770920710049997e-05, "epoch": 3.6956521739130435, "percentage": 52.8, "elapsed_time": "14:19:55", "remaining_time": "12:48:52"} +{"current_steps": 1446, "total_steps": 2737, "loss": 1.0135, "lr": 1.0758202899189852e-05, "epoch": 3.6982097186700766, "percentage": 52.83, "elapsed_time": "14:20:30", "remaining_time": "12:48:16"} +{"current_steps": 1447, "total_steps": 2737, "loss": 1.0408, "lr": 1.0745483854780996e-05, "epoch": 3.70076726342711, "percentage": 52.87, "elapsed_time": "14:21:06", "remaining_time": "12:47:40"} +{"current_steps": 1448, "total_steps": 2737, "loss": 1.0642, "lr": 1.073276359751652e-05, "epoch": 3.703324808184143, "percentage": 52.9, "elapsed_time": "14:21:42", "remaining_time": "12:47:05"} +{"current_steps": 1449, "total_steps": 2737, "loss": 1.0136, "lr": 1.0720042148091487e-05, "epoch": 3.7058823529411766, "percentage": 52.94, "elapsed_time": "14:22:18", "remaining_time": "12:46:29"} +{"current_steps": 1450, "total_steps": 2737, "loss": 1.0297, "lr": 1.0707319527202902e-05, "epoch": 3.70843989769821, "percentage": 52.98, "elapsed_time": "14:22:53", "remaining_time": "12:45:53"} +{"current_steps": 1451, "total_steps": 2737, "loss": 1.0088, "lr": 1.0694595755549668e-05, "epoch": 3.710997442455243, "percentage": 53.01, "elapsed_time": "14:23:29", "remaining_time": "12:45:17"} +{"current_steps": 1452, "total_steps": 2737, "loss": 1.0411, "lr": 1.0681870853832572e-05, "epoch": 3.713554987212276, "percentage": 53.05, "elapsed_time": "14:24:05", "remaining_time": "12:44:42"} +{"current_steps": 1453, "total_steps": 2737, "loss": 1.0237, "lr": 1.066914484275423e-05, "epoch": 3.7161125319693094, "percentage": 53.09, "elapsed_time": "14:24:40", "remaining_time": "12:44:06"} +{"current_steps": 1454, "total_steps": 2737, "loss": 1.0661, "lr": 1.0656417743019065e-05, "epoch": 3.718670076726343, "percentage": 53.12, "elapsed_time": "14:25:16", "remaining_time": "12:43:30"} +{"current_steps": 1455, "total_steps": 2737, "loss": 1.0205, "lr": 1.0643689575333276e-05, "epoch": 3.7212276214833757, "percentage": 53.16, "elapsed_time": "14:25:52", "remaining_time": "12:42:55"} +{"current_steps": 1456, "total_steps": 2737, "loss": 1.0179, "lr": 1.0630960360404793e-05, "epoch": 3.7237851662404093, "percentage": 53.2, "elapsed_time": "14:26:27", "remaining_time": "12:42:19"} +{"current_steps": 1457, "total_steps": 2737, "loss": 1.0622, "lr": 1.061823011894326e-05, "epoch": 3.7263427109974425, "percentage": 53.23, "elapsed_time": "14:27:03", "remaining_time": "12:41:43"} +{"current_steps": 1458, "total_steps": 2737, "loss": 1.0185, "lr": 1.0605498871659974e-05, "epoch": 3.7289002557544757, "percentage": 53.27, "elapsed_time": "14:27:39", "remaining_time": "12:41:08"} +{"current_steps": 1459, "total_steps": 2737, "loss": 1.0534, "lr": 1.0592766639267885e-05, "epoch": 3.731457800511509, "percentage": 53.31, "elapsed_time": "14:28:15", "remaining_time": "12:40:32"} +{"current_steps": 1460, "total_steps": 2737, "loss": 1.0384, "lr": 1.0580033442481532e-05, "epoch": 3.734015345268542, "percentage": 53.34, "elapsed_time": "14:28:50", "remaining_time": "12:39:56"} +{"current_steps": 1461, "total_steps": 2737, "loss": 1.0143, "lr": 1.0567299302017038e-05, "epoch": 3.7365728900255757, "percentage": 53.38, "elapsed_time": "14:29:26", "remaining_time": "12:39:21"} +{"current_steps": 1462, "total_steps": 2737, "loss": 1.021, "lr": 1.0554564238592051e-05, "epoch": 3.7391304347826084, "percentage": 53.42, "elapsed_time": "14:30:02", "remaining_time": "12:38:45"} +{"current_steps": 1463, "total_steps": 2737, "loss": 1.0292, "lr": 1.0541828272925721e-05, "epoch": 3.741687979539642, "percentage": 53.45, "elapsed_time": "14:30:38", "remaining_time": "12:38:09"} +{"current_steps": 1464, "total_steps": 2737, "loss": 1.0489, "lr": 1.0529091425738669e-05, "epoch": 3.7442455242966752, "percentage": 53.49, "elapsed_time": "14:31:13", "remaining_time": "12:37:33"} +{"current_steps": 1465, "total_steps": 2737, "loss": 1.0359, "lr": 1.0516353717752947e-05, "epoch": 3.7468030690537084, "percentage": 53.53, "elapsed_time": "14:31:49", "remaining_time": "12:36:58"} +{"current_steps": 1466, "total_steps": 2737, "loss": 1.0342, "lr": 1.0503615169692012e-05, "epoch": 3.7493606138107416, "percentage": 53.56, "elapsed_time": "14:32:25", "remaining_time": "12:36:22"} +{"current_steps": 1467, "total_steps": 2737, "loss": 1.0353, "lr": 1.0490875802280685e-05, "epoch": 3.7519181585677748, "percentage": 53.6, "elapsed_time": "14:33:00", "remaining_time": "12:35:46"} +{"current_steps": 1468, "total_steps": 2737, "loss": 1.0306, "lr": 1.0478135636245122e-05, "epoch": 3.7544757033248084, "percentage": 53.64, "elapsed_time": "14:33:36", "remaining_time": "12:35:11"} +{"current_steps": 1469, "total_steps": 2737, "loss": 1.0548, "lr": 1.046539469231277e-05, "epoch": 3.7570332480818416, "percentage": 53.67, "elapsed_time": "14:34:12", "remaining_time": "12:34:35"} +{"current_steps": 1470, "total_steps": 2737, "loss": 1.0094, "lr": 1.0452652991212357e-05, "epoch": 3.7595907928388748, "percentage": 53.71, "elapsed_time": "14:34:48", "remaining_time": "12:33:59"} +{"current_steps": 1471, "total_steps": 2737, "loss": 1.0439, "lr": 1.0439910553673829e-05, "epoch": 3.762148337595908, "percentage": 53.74, "elapsed_time": "14:35:23", "remaining_time": "12:33:24"} +{"current_steps": 1472, "total_steps": 2737, "loss": 1.0373, "lr": 1.0427167400428331e-05, "epoch": 3.764705882352941, "percentage": 53.78, "elapsed_time": "14:35:59", "remaining_time": "12:32:48"} +{"current_steps": 1473, "total_steps": 2737, "loss": 1.0199, "lr": 1.0414423552208184e-05, "epoch": 3.7672634271099743, "percentage": 53.82, "elapsed_time": "14:36:35", "remaining_time": "12:32:12"} +{"current_steps": 1474, "total_steps": 2737, "loss": 1.0311, "lr": 1.0401679029746828e-05, "epoch": 3.7698209718670075, "percentage": 53.85, "elapsed_time": "14:37:10", "remaining_time": "12:31:36"} +{"current_steps": 1475, "total_steps": 2737, "loss": 1.0445, "lr": 1.038893385377881e-05, "epoch": 3.772378516624041, "percentage": 53.89, "elapsed_time": "14:37:46", "remaining_time": "12:31:01"} +{"current_steps": 1476, "total_steps": 2737, "loss": 1.035, "lr": 1.0376188045039723e-05, "epoch": 3.7749360613810743, "percentage": 53.93, "elapsed_time": "14:38:22", "remaining_time": "12:30:25"} +{"current_steps": 1477, "total_steps": 2737, "loss": 1.0054, "lr": 1.0363441624266213e-05, "epoch": 3.7774936061381075, "percentage": 53.96, "elapsed_time": "14:38:57", "remaining_time": "12:29:49"} +{"current_steps": 1478, "total_steps": 2737, "loss": 1.0299, "lr": 1.0350694612195905e-05, "epoch": 3.7800511508951407, "percentage": 54.0, "elapsed_time": "14:39:33", "remaining_time": "12:29:14"} +{"current_steps": 1479, "total_steps": 2737, "loss": 1.013, "lr": 1.0337947029567388e-05, "epoch": 3.782608695652174, "percentage": 54.04, "elapsed_time": "14:40:09", "remaining_time": "12:28:38"} +{"current_steps": 1480, "total_steps": 2737, "loss": 1.0239, "lr": 1.0325198897120183e-05, "epoch": 3.785166240409207, "percentage": 54.07, "elapsed_time": "14:40:45", "remaining_time": "12:28:02"} +{"current_steps": 1481, "total_steps": 2737, "loss": 1.0262, "lr": 1.0312450235594706e-05, "epoch": 3.78772378516624, "percentage": 54.11, "elapsed_time": "14:41:20", "remaining_time": "12:27:27"} +{"current_steps": 1482, "total_steps": 2737, "loss": 1.0444, "lr": 1.0299701065732235e-05, "epoch": 3.790281329923274, "percentage": 54.15, "elapsed_time": "14:41:56", "remaining_time": "12:26:51"} +{"current_steps": 1483, "total_steps": 2737, "loss": 1.0993, "lr": 1.0286951408274865e-05, "epoch": 3.792838874680307, "percentage": 54.18, "elapsed_time": "14:42:32", "remaining_time": "12:26:15"} +{"current_steps": 1484, "total_steps": 2737, "loss": 1.0409, "lr": 1.0274201283965497e-05, "epoch": 3.79539641943734, "percentage": 54.22, "elapsed_time": "14:43:08", "remaining_time": "12:25:39"} +{"current_steps": 1485, "total_steps": 2737, "loss": 1.075, "lr": 1.0261450713547785e-05, "epoch": 3.7979539641943734, "percentage": 54.26, "elapsed_time": "14:43:43", "remaining_time": "12:25:04"} +{"current_steps": 1486, "total_steps": 2737, "loss": 1.0679, "lr": 1.0248699717766107e-05, "epoch": 3.8005115089514065, "percentage": 54.29, "elapsed_time": "14:44:19", "remaining_time": "12:24:28"} +{"current_steps": 1487, "total_steps": 2737, "loss": 1.0484, "lr": 1.023594831736554e-05, "epoch": 3.80306905370844, "percentage": 54.33, "elapsed_time": "14:44:55", "remaining_time": "12:23:52"} +{"current_steps": 1488, "total_steps": 2737, "loss": 1.0287, "lr": 1.0223196533091813e-05, "epoch": 3.805626598465473, "percentage": 54.37, "elapsed_time": "14:45:30", "remaining_time": "12:23:16"} +{"current_steps": 1489, "total_steps": 2737, "loss": 1.0373, "lr": 1.0210444385691282e-05, "epoch": 3.8081841432225065, "percentage": 54.4, "elapsed_time": "14:46:06", "remaining_time": "12:22:41"} +{"current_steps": 1490, "total_steps": 2737, "loss": 1.0396, "lr": 1.0197691895910895e-05, "epoch": 3.8107416879795397, "percentage": 54.44, "elapsed_time": "14:46:42", "remaining_time": "12:22:05"} +{"current_steps": 1491, "total_steps": 2737, "loss": 1.0383, "lr": 1.0184939084498153e-05, "epoch": 3.813299232736573, "percentage": 54.48, "elapsed_time": "14:47:18", "remaining_time": "12:21:30"} +{"current_steps": 1492, "total_steps": 2737, "loss": 1.0341, "lr": 1.0172185972201082e-05, "epoch": 3.815856777493606, "percentage": 54.51, "elapsed_time": "14:47:53", "remaining_time": "12:20:54"} +{"current_steps": 1493, "total_steps": 2737, "loss": 1.0419, "lr": 1.01594325797682e-05, "epoch": 3.8184143222506393, "percentage": 54.55, "elapsed_time": "14:48:29", "remaining_time": "12:20:18"} +{"current_steps": 1494, "total_steps": 2737, "loss": 1.0178, "lr": 1.0146678927948484e-05, "epoch": 3.820971867007673, "percentage": 54.59, "elapsed_time": "14:49:05", "remaining_time": "12:19:42"} +{"current_steps": 1495, "total_steps": 2737, "loss": 1.0701, "lr": 1.013392503749132e-05, "epoch": 3.8235294117647056, "percentage": 54.62, "elapsed_time": "14:49:40", "remaining_time": "12:19:07"} +{"current_steps": 1496, "total_steps": 2737, "loss": 1.0359, "lr": 1.0121170929146493e-05, "epoch": 3.8260869565217392, "percentage": 54.66, "elapsed_time": "14:50:16", "remaining_time": "12:18:31"} +{"current_steps": 1497, "total_steps": 2737, "loss": 1.0483, "lr": 1.0108416623664142e-05, "epoch": 3.8286445012787724, "percentage": 54.69, "elapsed_time": "14:50:52", "remaining_time": "12:17:55"} +{"current_steps": 1498, "total_steps": 2737, "loss": 1.0167, "lr": 1.0095662141794725e-05, "epoch": 3.8312020460358056, "percentage": 54.73, "elapsed_time": "14:51:27", "remaining_time": "12:17:20"} +{"current_steps": 1499, "total_steps": 2737, "loss": 1.0271, "lr": 1.0082907504288977e-05, "epoch": 3.833759590792839, "percentage": 54.77, "elapsed_time": "14:52:03", "remaining_time": "12:16:44"} +{"current_steps": 1500, "total_steps": 2737, "loss": 1.0525, "lr": 1.0070152731897911e-05, "epoch": 3.836317135549872, "percentage": 54.8, "elapsed_time": "14:52:39", "remaining_time": "12:16:08"} +{"current_steps": 1501, "total_steps": 2737, "loss": 1.0354, "lr": 1.0057397845372734e-05, "epoch": 3.8388746803069056, "percentage": 54.84, "elapsed_time": "14:53:15", "remaining_time": "12:15:32"} +{"current_steps": 1502, "total_steps": 2737, "loss": 1.0439, "lr": 1.004464286546485e-05, "epoch": 3.8414322250639388, "percentage": 54.88, "elapsed_time": "14:53:50", "remaining_time": "12:14:57"} +{"current_steps": 1503, "total_steps": 2737, "loss": 1.0073, "lr": 1.0031887812925818e-05, "epoch": 3.843989769820972, "percentage": 54.91, "elapsed_time": "14:54:26", "remaining_time": "12:14:21"} +{"current_steps": 1504, "total_steps": 2737, "loss": 1.0549, "lr": 1.0019132708507307e-05, "epoch": 3.846547314578005, "percentage": 54.95, "elapsed_time": "14:55:02", "remaining_time": "12:13:45"} +{"current_steps": 1505, "total_steps": 2737, "loss": 1.056, "lr": 1.0006377572961075e-05, "epoch": 3.8491048593350383, "percentage": 54.99, "elapsed_time": "14:55:37", "remaining_time": "12:13:09"} +{"current_steps": 1506, "total_steps": 2737, "loss": 1.0317, "lr": 9.99362242703893e-06, "epoch": 3.8516624040920715, "percentage": 55.02, "elapsed_time": "14:56:13", "remaining_time": "12:12:34"} +{"current_steps": 1507, "total_steps": 2737, "loss": 1.0496, "lr": 9.980867291492697e-06, "epoch": 3.8542199488491047, "percentage": 55.06, "elapsed_time": "14:56:49", "remaining_time": "12:11:58"} +{"current_steps": 1508, "total_steps": 2737, "loss": 1.0321, "lr": 9.968112187074187e-06, "epoch": 3.8567774936061383, "percentage": 55.1, "elapsed_time": "14:57:25", "remaining_time": "12:11:23"} +{"current_steps": 1509, "total_steps": 2737, "loss": 1.0612, "lr": 9.955357134535153e-06, "epoch": 3.8593350383631715, "percentage": 55.13, "elapsed_time": "14:58:00", "remaining_time": "12:10:47"} +{"current_steps": 1510, "total_steps": 2737, "loss": 1.0371, "lr": 9.94260215462727e-06, "epoch": 3.8618925831202047, "percentage": 55.17, "elapsed_time": "14:58:36", "remaining_time": "12:10:11"} +{"current_steps": 1511, "total_steps": 2737, "loss": 1.0116, "lr": 9.929847268102092e-06, "epoch": 3.864450127877238, "percentage": 55.21, "elapsed_time": "14:59:12", "remaining_time": "12:09:35"} +{"current_steps": 1512, "total_steps": 2737, "loss": 0.9975, "lr": 9.917092495711023e-06, "epoch": 3.867007672634271, "percentage": 55.24, "elapsed_time": "14:59:47", "remaining_time": "12:09:00"} +{"current_steps": 1513, "total_steps": 2737, "loss": 1.0261, "lr": 9.904337858205282e-06, "epoch": 3.869565217391304, "percentage": 55.28, "elapsed_time": "15:00:23", "remaining_time": "12:08:24"} +{"current_steps": 1514, "total_steps": 2737, "loss": 1.036, "lr": 9.891583376335861e-06, "epoch": 3.8721227621483374, "percentage": 55.32, "elapsed_time": "15:00:59", "remaining_time": "12:07:48"} +{"current_steps": 1515, "total_steps": 2737, "loss": 1.0353, "lr": 9.87882907085351e-06, "epoch": 3.874680306905371, "percentage": 55.35, "elapsed_time": "15:01:35", "remaining_time": "12:07:13"} +{"current_steps": 1516, "total_steps": 2737, "loss": 1.048, "lr": 9.866074962508684e-06, "epoch": 3.877237851662404, "percentage": 55.39, "elapsed_time": "15:02:10", "remaining_time": "12:06:37"} +{"current_steps": 1517, "total_steps": 2737, "loss": 1.0719, "lr": 9.85332107205152e-06, "epoch": 3.8797953964194374, "percentage": 55.43, "elapsed_time": "15:02:46", "remaining_time": "12:06:01"} +{"current_steps": 1518, "total_steps": 2737, "loss": 1.0436, "lr": 9.840567420231802e-06, "epoch": 3.8823529411764706, "percentage": 55.46, "elapsed_time": "15:03:22", "remaining_time": "12:05:26"} +{"current_steps": 1519, "total_steps": 2737, "loss": 1.0611, "lr": 9.82781402779892e-06, "epoch": 3.8849104859335037, "percentage": 55.5, "elapsed_time": "15:03:58", "remaining_time": "12:04:50"} +{"current_steps": 1520, "total_steps": 2737, "loss": 1.0517, "lr": 9.815060915501852e-06, "epoch": 3.887468030690537, "percentage": 55.54, "elapsed_time": "15:04:33", "remaining_time": "12:04:14"} +{"current_steps": 1521, "total_steps": 2737, "loss": 1.0249, "lr": 9.802308104089109e-06, "epoch": 3.89002557544757, "percentage": 55.57, "elapsed_time": "15:05:09", "remaining_time": "12:03:39"} +{"current_steps": 1522, "total_steps": 2737, "loss": 0.9947, "lr": 9.789555614308721e-06, "epoch": 3.8925831202046037, "percentage": 55.61, "elapsed_time": "15:05:45", "remaining_time": "12:03:03"} +{"current_steps": 1523, "total_steps": 2737, "loss": 1.0352, "lr": 9.77680346690819e-06, "epoch": 3.895140664961637, "percentage": 55.64, "elapsed_time": "15:06:21", "remaining_time": "12:02:27"} +{"current_steps": 1524, "total_steps": 2737, "loss": 1.0275, "lr": 9.764051682634462e-06, "epoch": 3.89769820971867, "percentage": 55.68, "elapsed_time": "15:06:56", "remaining_time": "12:01:52"} +{"current_steps": 1525, "total_steps": 2737, "loss": 1.0534, "lr": 9.751300282233895e-06, "epoch": 3.9002557544757033, "percentage": 55.72, "elapsed_time": "15:07:32", "remaining_time": "12:01:16"} +{"current_steps": 1526, "total_steps": 2737, "loss": 1.038, "lr": 9.738549286452218e-06, "epoch": 3.9028132992327365, "percentage": 55.75, "elapsed_time": "15:08:08", "remaining_time": "12:00:40"} +{"current_steps": 1527, "total_steps": 2737, "loss": 1.0286, "lr": 9.725798716034507e-06, "epoch": 3.90537084398977, "percentage": 55.79, "elapsed_time": "15:08:44", "remaining_time": "12:00:05"} +{"current_steps": 1528, "total_steps": 2737, "loss": 1.0448, "lr": 9.713048591725138e-06, "epoch": 3.907928388746803, "percentage": 55.83, "elapsed_time": "15:09:19", "remaining_time": "11:59:29"} +{"current_steps": 1529, "total_steps": 2737, "loss": 1.0069, "lr": 9.700298934267766e-06, "epoch": 3.9104859335038364, "percentage": 55.86, "elapsed_time": "15:09:55", "remaining_time": "11:58:53"} +{"current_steps": 1530, "total_steps": 2737, "loss": 1.0376, "lr": 9.687549764405296e-06, "epoch": 3.9130434782608696, "percentage": 55.9, "elapsed_time": "15:10:31", "remaining_time": "11:58:18"} +{"current_steps": 1531, "total_steps": 2737, "loss": 1.0274, "lr": 9.674801102879817e-06, "epoch": 3.915601023017903, "percentage": 55.94, "elapsed_time": "15:11:07", "remaining_time": "11:57:42"} +{"current_steps": 1532, "total_steps": 2737, "loss": 1.0407, "lr": 9.662052970432617e-06, "epoch": 3.918158567774936, "percentage": 55.97, "elapsed_time": "15:11:42", "remaining_time": "11:57:06"} +{"current_steps": 1533, "total_steps": 2737, "loss": 1.0401, "lr": 9.6493053878041e-06, "epoch": 3.920716112531969, "percentage": 56.01, "elapsed_time": "15:12:18", "remaining_time": "11:56:31"} +{"current_steps": 1534, "total_steps": 2737, "loss": 1.0521, "lr": 9.63655837573379e-06, "epoch": 3.923273657289003, "percentage": 56.05, "elapsed_time": "15:12:54", "remaining_time": "11:55:55"} +{"current_steps": 1535, "total_steps": 2737, "loss": 1.0396, "lr": 9.623811954960279e-06, "epoch": 3.9258312020460355, "percentage": 56.08, "elapsed_time": "15:13:30", "remaining_time": "11:55:19"} +{"current_steps": 1536, "total_steps": 2737, "loss": 1.0272, "lr": 9.611066146221192e-06, "epoch": 3.928388746803069, "percentage": 56.12, "elapsed_time": "15:14:05", "remaining_time": "11:54:43"} +{"current_steps": 1537, "total_steps": 2737, "loss": 1.0263, "lr": 9.598320970253175e-06, "epoch": 3.9309462915601023, "percentage": 56.16, "elapsed_time": "15:14:41", "remaining_time": "11:54:08"} +{"current_steps": 1538, "total_steps": 2737, "loss": 1.044, "lr": 9.585576447791817e-06, "epoch": 3.9335038363171355, "percentage": 56.19, "elapsed_time": "15:15:17", "remaining_time": "11:53:32"} +{"current_steps": 1539, "total_steps": 2737, "loss": 1.0268, "lr": 9.572832599571674e-06, "epoch": 3.9360613810741687, "percentage": 56.23, "elapsed_time": "15:15:53", "remaining_time": "11:52:57"} +{"current_steps": 1540, "total_steps": 2737, "loss": 1.0313, "lr": 9.560089446326175e-06, "epoch": 3.938618925831202, "percentage": 56.27, "elapsed_time": "15:16:28", "remaining_time": "11:52:21"} +{"current_steps": 1541, "total_steps": 2737, "loss": 1.0321, "lr": 9.547347008787648e-06, "epoch": 3.9411764705882355, "percentage": 56.3, "elapsed_time": "15:17:04", "remaining_time": "11:51:45"} +{"current_steps": 1542, "total_steps": 2737, "loss": 1.0027, "lr": 9.534605307687233e-06, "epoch": 3.9437340153452687, "percentage": 56.34, "elapsed_time": "15:17:40", "remaining_time": "11:51:09"} +{"current_steps": 1543, "total_steps": 2737, "loss": 1.0272, "lr": 9.52186436375488e-06, "epoch": 3.946291560102302, "percentage": 56.38, "elapsed_time": "15:18:15", "remaining_time": "11:50:34"} +{"current_steps": 1544, "total_steps": 2737, "loss": 1.0074, "lr": 9.509124197719317e-06, "epoch": 3.948849104859335, "percentage": 56.41, "elapsed_time": "15:18:51", "remaining_time": "11:49:58"} +{"current_steps": 1545, "total_steps": 2737, "loss": 1.0481, "lr": 9.496384830307988e-06, "epoch": 3.9514066496163682, "percentage": 56.45, "elapsed_time": "15:19:27", "remaining_time": "11:49:22"} +{"current_steps": 1546, "total_steps": 2737, "loss": 1.0167, "lr": 9.483646282247056e-06, "epoch": 3.9539641943734014, "percentage": 56.49, "elapsed_time": "15:20:02", "remaining_time": "11:48:47"} +{"current_steps": 1547, "total_steps": 2737, "loss": 1.0478, "lr": 9.470908574261333e-06, "epoch": 3.9565217391304346, "percentage": 56.52, "elapsed_time": "15:20:38", "remaining_time": "11:48:11"} +{"current_steps": 1548, "total_steps": 2737, "loss": 1.0257, "lr": 9.458171727074284e-06, "epoch": 3.959079283887468, "percentage": 56.56, "elapsed_time": "15:21:14", "remaining_time": "11:47:35"} +{"current_steps": 1549, "total_steps": 2737, "loss": 1.0904, "lr": 9.44543576140795e-06, "epoch": 3.9616368286445014, "percentage": 56.59, "elapsed_time": "15:21:50", "remaining_time": "11:46:59"} +{"current_steps": 1550, "total_steps": 2737, "loss": 1.0562, "lr": 9.432700697982962e-06, "epoch": 3.9641943734015346, "percentage": 56.63, "elapsed_time": "15:22:25", "remaining_time": "11:46:24"} +{"current_steps": 1551, "total_steps": 2737, "loss": 1.048, "lr": 9.419966557518472e-06, "epoch": 3.9667519181585678, "percentage": 56.67, "elapsed_time": "15:23:01", "remaining_time": "11:45:48"} +{"current_steps": 1552, "total_steps": 2737, "loss": 1.0453, "lr": 9.407233360732119e-06, "epoch": 3.969309462915601, "percentage": 56.7, "elapsed_time": "15:23:37", "remaining_time": "11:45:12"} +{"current_steps": 1553, "total_steps": 2737, "loss": 1.0416, "lr": 9.39450112834003e-06, "epoch": 3.971867007672634, "percentage": 56.74, "elapsed_time": "15:24:13", "remaining_time": "11:44:37"} +{"current_steps": 1554, "total_steps": 2737, "loss": 1.0302, "lr": 9.381769881056744e-06, "epoch": 3.9744245524296673, "percentage": 56.78, "elapsed_time": "15:24:48", "remaining_time": "11:44:01"} +{"current_steps": 1555, "total_steps": 2737, "loss": 1.0509, "lr": 9.36903963959521e-06, "epoch": 3.976982097186701, "percentage": 56.81, "elapsed_time": "15:25:24", "remaining_time": "11:43:25"} +{"current_steps": 1556, "total_steps": 2737, "loss": 1.0674, "lr": 9.356310424666725e-06, "epoch": 3.979539641943734, "percentage": 56.85, "elapsed_time": "15:26:00", "remaining_time": "11:42:50"} +{"current_steps": 1557, "total_steps": 2737, "loss": 1.0327, "lr": 9.343582256980937e-06, "epoch": 3.9820971867007673, "percentage": 56.89, "elapsed_time": "15:26:35", "remaining_time": "11:42:14"} +{"current_steps": 1558, "total_steps": 2737, "loss": 1.049, "lr": 9.330855157245776e-06, "epoch": 3.9846547314578005, "percentage": 56.92, "elapsed_time": "15:27:11", "remaining_time": "11:41:38"} +{"current_steps": 1559, "total_steps": 2737, "loss": 1.0285, "lr": 9.318129146167432e-06, "epoch": 3.9872122762148337, "percentage": 56.96, "elapsed_time": "15:27:47", "remaining_time": "11:41:02"} +{"current_steps": 1560, "total_steps": 2737, "loss": 1.0447, "lr": 9.305404244450337e-06, "epoch": 3.9897698209718673, "percentage": 57.0, "elapsed_time": "15:28:22", "remaining_time": "11:40:27"} +{"current_steps": 1561, "total_steps": 2737, "loss": 1.0411, "lr": 9.292680472797101e-06, "epoch": 3.9923273657289, "percentage": 57.03, "elapsed_time": "15:28:58", "remaining_time": "11:39:51"} +{"current_steps": 1562, "total_steps": 2737, "loss": 1.0535, "lr": 9.279957851908513e-06, "epoch": 3.9948849104859336, "percentage": 57.07, "elapsed_time": "15:29:34", "remaining_time": "11:39:15"} +{"current_steps": 1563, "total_steps": 2737, "loss": 1.036, "lr": 9.267236402483482e-06, "epoch": 3.997442455242967, "percentage": 57.11, "elapsed_time": "15:30:10", "remaining_time": "11:38:40"} +{"current_steps": 1564, "total_steps": 2737, "loss": 1.0435, "lr": 9.254516145219006e-06, "epoch": 4.0, "percentage": 57.14, "elapsed_time": "15:30:45", "remaining_time": "11:38:04"} +{"current_steps": 1565, "total_steps": 2737, "loss": 1.0143, "lr": 9.241797100810152e-06, "epoch": 4.002557544757034, "percentage": 57.18, "elapsed_time": "15:31:29", "remaining_time": "11:37:34"} +{"current_steps": 1566, "total_steps": 2737, "loss": 1.0249, "lr": 9.229079289950005e-06, "epoch": 4.005115089514066, "percentage": 57.22, "elapsed_time": "15:32:05", "remaining_time": "11:36:59"} +{"current_steps": 1567, "total_steps": 2737, "loss": 0.9987, "lr": 9.216362733329657e-06, "epoch": 4.0076726342711, "percentage": 57.25, "elapsed_time": "15:32:41", "remaining_time": "11:36:23"} +{"current_steps": 1568, "total_steps": 2737, "loss": 1.0198, "lr": 9.203647451638138e-06, "epoch": 4.010230179028133, "percentage": 57.29, "elapsed_time": "15:33:16", "remaining_time": "11:35:47"} +{"current_steps": 1569, "total_steps": 2737, "loss": 1.0328, "lr": 9.190933465562426e-06, "epoch": 4.012787723785166, "percentage": 57.33, "elapsed_time": "15:33:52", "remaining_time": "11:35:12"} +{"current_steps": 1570, "total_steps": 2737, "loss": 1.0358, "lr": 9.17822079578738e-06, "epoch": 4.015345268542199, "percentage": 57.36, "elapsed_time": "15:34:29", "remaining_time": "11:34:36"} +{"current_steps": 1571, "total_steps": 2737, "loss": 1.0312, "lr": 9.165509462995716e-06, "epoch": 4.017902813299233, "percentage": 57.4, "elapsed_time": "15:35:04", "remaining_time": "11:34:01"} +{"current_steps": 1572, "total_steps": 2737, "loss": 1.0167, "lr": 9.152799487867981e-06, "epoch": 4.020460358056266, "percentage": 57.44, "elapsed_time": "15:35:40", "remaining_time": "11:33:25"} +{"current_steps": 1573, "total_steps": 2737, "loss": 1.0173, "lr": 9.140090891082506e-06, "epoch": 4.023017902813299, "percentage": 57.47, "elapsed_time": "15:36:16", "remaining_time": "11:32:49"} +{"current_steps": 1574, "total_steps": 2737, "loss": 1.0122, "lr": 9.127383693315387e-06, "epoch": 4.025575447570333, "percentage": 57.51, "elapsed_time": "15:36:51", "remaining_time": "11:32:13"} +{"current_steps": 1575, "total_steps": 2737, "loss": 1.0207, "lr": 9.114677915240436e-06, "epoch": 4.028132992327365, "percentage": 57.54, "elapsed_time": "15:37:27", "remaining_time": "11:31:38"} +{"current_steps": 1576, "total_steps": 2737, "loss": 1.0339, "lr": 9.101973577529164e-06, "epoch": 4.030690537084399, "percentage": 57.58, "elapsed_time": "15:38:03", "remaining_time": "11:31:02"} +{"current_steps": 1577, "total_steps": 2737, "loss": 1.0007, "lr": 9.089270700850733e-06, "epoch": 4.033248081841432, "percentage": 57.62, "elapsed_time": "15:38:39", "remaining_time": "11:30:26"} +{"current_steps": 1578, "total_steps": 2737, "loss": 1.0314, "lr": 9.076569305871926e-06, "epoch": 4.035805626598465, "percentage": 57.65, "elapsed_time": "15:39:14", "remaining_time": "11:29:51"} +{"current_steps": 1579, "total_steps": 2737, "loss": 1.0302, "lr": 9.063869413257124e-06, "epoch": 4.038363171355499, "percentage": 57.69, "elapsed_time": "15:39:50", "remaining_time": "11:29:15"} +{"current_steps": 1580, "total_steps": 2737, "loss": 1.0476, "lr": 9.051171043668251e-06, "epoch": 4.040920716112532, "percentage": 57.73, "elapsed_time": "15:40:26", "remaining_time": "11:28:39"} +{"current_steps": 1581, "total_steps": 2737, "loss": 1.025, "lr": 9.038474217764768e-06, "epoch": 4.043478260869565, "percentage": 57.76, "elapsed_time": "15:41:01", "remaining_time": "11:28:03"} +{"current_steps": 1582, "total_steps": 2737, "loss": 1.0098, "lr": 9.025778956203611e-06, "epoch": 4.046035805626598, "percentage": 57.8, "elapsed_time": "15:41:37", "remaining_time": "11:27:28"} +{"current_steps": 1583, "total_steps": 2737, "loss": 1.0017, "lr": 9.013085279639178e-06, "epoch": 4.048593350383632, "percentage": 57.84, "elapsed_time": "15:42:13", "remaining_time": "11:26:52"} +{"current_steps": 1584, "total_steps": 2737, "loss": 1.0219, "lr": 9.000393208723291e-06, "epoch": 4.051150895140665, "percentage": 57.87, "elapsed_time": "15:42:48", "remaining_time": "11:26:16"} +{"current_steps": 1585, "total_steps": 2737, "loss": 1.0177, "lr": 8.987702764105151e-06, "epoch": 4.053708439897698, "percentage": 57.91, "elapsed_time": "15:43:24", "remaining_time": "11:25:40"} +{"current_steps": 1586, "total_steps": 2737, "loss": 1.0601, "lr": 8.975013966431323e-06, "epoch": 4.056265984654732, "percentage": 57.95, "elapsed_time": "15:44:00", "remaining_time": "11:25:05"} +{"current_steps": 1587, "total_steps": 2737, "loss": 0.9847, "lr": 8.96232683634568e-06, "epoch": 4.0588235294117645, "percentage": 57.98, "elapsed_time": "15:44:36", "remaining_time": "11:24:29"} +{"current_steps": 1588, "total_steps": 2737, "loss": 1.0099, "lr": 8.949641394489399e-06, "epoch": 4.061381074168798, "percentage": 58.02, "elapsed_time": "15:45:11", "remaining_time": "11:23:53"} +{"current_steps": 1589, "total_steps": 2737, "loss": 1.0538, "lr": 8.93695766150089e-06, "epoch": 4.063938618925831, "percentage": 58.06, "elapsed_time": "15:45:47", "remaining_time": "11:23:18"} +{"current_steps": 1590, "total_steps": 2737, "loss": 1.0172, "lr": 8.9242756580158e-06, "epoch": 4.0664961636828645, "percentage": 58.09, "elapsed_time": "15:46:23", "remaining_time": "11:22:42"} +{"current_steps": 1591, "total_steps": 2737, "loss": 1.0546, "lr": 8.911595404666957e-06, "epoch": 4.069053708439898, "percentage": 58.13, "elapsed_time": "15:46:59", "remaining_time": "11:22:07"} +{"current_steps": 1592, "total_steps": 2737, "loss": 1.0464, "lr": 8.898916922084336e-06, "epoch": 4.071611253196931, "percentage": 58.17, "elapsed_time": "15:47:35", "remaining_time": "11:21:31"} +{"current_steps": 1593, "total_steps": 2737, "loss": 1.0545, "lr": 8.88624023089504e-06, "epoch": 4.0741687979539645, "percentage": 58.2, "elapsed_time": "15:48:11", "remaining_time": "11:20:56"} +{"current_steps": 1594, "total_steps": 2737, "loss": 1.0589, "lr": 8.873565351723249e-06, "epoch": 4.076726342710997, "percentage": 58.24, "elapsed_time": "15:48:47", "remaining_time": "11:20:20"} +{"current_steps": 1595, "total_steps": 2737, "loss": 1.0179, "lr": 8.8608923051902e-06, "epoch": 4.079283887468031, "percentage": 58.28, "elapsed_time": "15:49:22", "remaining_time": "11:19:44"} +{"current_steps": 1596, "total_steps": 2737, "loss": 1.0447, "lr": 8.848221111914147e-06, "epoch": 4.081841432225064, "percentage": 58.31, "elapsed_time": "15:49:58", "remaining_time": "11:19:08"} +{"current_steps": 1597, "total_steps": 2737, "loss": 1.0307, "lr": 8.835551792510329e-06, "epoch": 4.084398976982097, "percentage": 58.35, "elapsed_time": "15:50:33", "remaining_time": "11:18:32"} +{"current_steps": 1598, "total_steps": 2737, "loss": 0.9952, "lr": 8.822884367590941e-06, "epoch": 4.086956521739131, "percentage": 58.39, "elapsed_time": "15:51:09", "remaining_time": "11:17:57"} +{"current_steps": 1599, "total_steps": 2737, "loss": 1.0005, "lr": 8.810218857765085e-06, "epoch": 4.089514066496164, "percentage": 58.42, "elapsed_time": "15:51:45", "remaining_time": "11:17:21"} +{"current_steps": 1600, "total_steps": 2737, "loss": 1.0361, "lr": 8.79755528363876e-06, "epoch": 4.092071611253197, "percentage": 58.46, "elapsed_time": "15:52:21", "remaining_time": "11:16:45"} +{"current_steps": 1601, "total_steps": 2737, "loss": 1.0328, "lr": 8.7848936658148e-06, "epoch": 4.09462915601023, "percentage": 58.49, "elapsed_time": "15:52:56", "remaining_time": "11:16:10"} +{"current_steps": 1602, "total_steps": 2737, "loss": 1.0133, "lr": 8.772234024892872e-06, "epoch": 4.0971867007672635, "percentage": 58.53, "elapsed_time": "15:53:32", "remaining_time": "11:15:34"} +{"current_steps": 1603, "total_steps": 2737, "loss": 1.0027, "lr": 8.759576381469425e-06, "epoch": 4.099744245524296, "percentage": 58.57, "elapsed_time": "15:54:08", "remaining_time": "11:14:58"} +{"current_steps": 1604, "total_steps": 2737, "loss": 1.0437, "lr": 8.746920756137642e-06, "epoch": 4.10230179028133, "percentage": 58.6, "elapsed_time": "15:54:43", "remaining_time": "11:14:22"} +{"current_steps": 1605, "total_steps": 2737, "loss": 1.0265, "lr": 8.734267169487444e-06, "epoch": 4.1048593350383635, "percentage": 58.64, "elapsed_time": "15:55:19", "remaining_time": "11:13:47"} +{"current_steps": 1606, "total_steps": 2737, "loss": 1.0338, "lr": 8.721615642105417e-06, "epoch": 4.107416879795396, "percentage": 58.68, "elapsed_time": "15:55:54", "remaining_time": "11:13:11"} +{"current_steps": 1607, "total_steps": 2737, "loss": 1.0083, "lr": 8.708966194574814e-06, "epoch": 4.10997442455243, "percentage": 58.71, "elapsed_time": "15:56:30", "remaining_time": "11:12:35"} +{"current_steps": 1608, "total_steps": 2737, "loss": 1.0169, "lr": 8.696318847475487e-06, "epoch": 4.112531969309463, "percentage": 58.75, "elapsed_time": "15:57:06", "remaining_time": "11:11:59"} +{"current_steps": 1609, "total_steps": 2737, "loss": 1.0323, "lr": 8.68367362138388e-06, "epoch": 4.115089514066496, "percentage": 58.79, "elapsed_time": "15:57:41", "remaining_time": "11:11:23"} +{"current_steps": 1610, "total_steps": 2737, "loss": 1.0299, "lr": 8.671030536872995e-06, "epoch": 4.117647058823529, "percentage": 58.82, "elapsed_time": "15:58:17", "remaining_time": "11:10:48"} +{"current_steps": 1611, "total_steps": 2737, "loss": 1.0189, "lr": 8.658389614512325e-06, "epoch": 4.120204603580563, "percentage": 58.86, "elapsed_time": "15:58:53", "remaining_time": "11:10:12"} +{"current_steps": 1612, "total_steps": 2737, "loss": 1.0134, "lr": 8.645750874867876e-06, "epoch": 4.122762148337596, "percentage": 58.9, "elapsed_time": "15:59:28", "remaining_time": "11:09:36"} +{"current_steps": 1613, "total_steps": 2737, "loss": 1.0403, "lr": 8.633114338502073e-06, "epoch": 4.125319693094629, "percentage": 58.93, "elapsed_time": "16:00:04", "remaining_time": "11:09:01"} +{"current_steps": 1614, "total_steps": 2737, "loss": 1.0288, "lr": 8.62048002597378e-06, "epoch": 4.127877237851663, "percentage": 58.97, "elapsed_time": "16:00:40", "remaining_time": "11:08:25"} +{"current_steps": 1615, "total_steps": 2737, "loss": 1.0301, "lr": 8.607847957838227e-06, "epoch": 4.130434782608695, "percentage": 59.01, "elapsed_time": "16:01:16", "remaining_time": "11:07:49"} +{"current_steps": 1616, "total_steps": 2737, "loss": 1.0301, "lr": 8.595218154647001e-06, "epoch": 4.132992327365729, "percentage": 59.04, "elapsed_time": "16:01:51", "remaining_time": "11:07:13"} +{"current_steps": 1617, "total_steps": 2737, "loss": 1.0222, "lr": 8.58259063694801e-06, "epoch": 4.135549872122763, "percentage": 59.08, "elapsed_time": "16:02:27", "remaining_time": "11:06:38"} +{"current_steps": 1618, "total_steps": 2737, "loss": 1.0235, "lr": 8.56996542528542e-06, "epoch": 4.138107416879795, "percentage": 59.12, "elapsed_time": "16:03:03", "remaining_time": "11:06:02"} +{"current_steps": 1619, "total_steps": 2737, "loss": 0.9988, "lr": 8.55734254019968e-06, "epoch": 4.140664961636829, "percentage": 59.15, "elapsed_time": "16:03:39", "remaining_time": "11:05:26"} +{"current_steps": 1620, "total_steps": 2737, "loss": 1.0538, "lr": 8.544722002227417e-06, "epoch": 4.143222506393862, "percentage": 59.19, "elapsed_time": "16:04:14", "remaining_time": "11:04:51"} +{"current_steps": 1621, "total_steps": 2737, "loss": 1.035, "lr": 8.532103831901472e-06, "epoch": 4.145780051150895, "percentage": 59.23, "elapsed_time": "16:04:50", "remaining_time": "11:04:15"} +{"current_steps": 1622, "total_steps": 2737, "loss": 1.0298, "lr": 8.519488049750808e-06, "epoch": 4.148337595907928, "percentage": 59.26, "elapsed_time": "16:05:25", "remaining_time": "11:03:39"} +{"current_steps": 1623, "total_steps": 2737, "loss": 1.0485, "lr": 8.506874676300514e-06, "epoch": 4.150895140664962, "percentage": 59.3, "elapsed_time": "16:06:01", "remaining_time": "11:03:03"} +{"current_steps": 1624, "total_steps": 2737, "loss": 1.0092, "lr": 8.494263732071772e-06, "epoch": 4.153452685421995, "percentage": 59.34, "elapsed_time": "16:06:37", "remaining_time": "11:02:28"} +{"current_steps": 1625, "total_steps": 2737, "loss": 1.0209, "lr": 8.481655237581785e-06, "epoch": 4.156010230179028, "percentage": 59.37, "elapsed_time": "16:07:12", "remaining_time": "11:01:52"} +{"current_steps": 1626, "total_steps": 2737, "loss": 1.0358, "lr": 8.469049213343798e-06, "epoch": 4.158567774936062, "percentage": 59.41, "elapsed_time": "16:07:48", "remaining_time": "11:01:16"} +{"current_steps": 1627, "total_steps": 2737, "loss": 1.0235, "lr": 8.456445679867013e-06, "epoch": 4.161125319693094, "percentage": 59.44, "elapsed_time": "16:08:24", "remaining_time": "11:00:40"} +{"current_steps": 1628, "total_steps": 2737, "loss": 1.0436, "lr": 8.443844657656596e-06, "epoch": 4.163682864450128, "percentage": 59.48, "elapsed_time": "16:09:00", "remaining_time": "11:00:05"} +{"current_steps": 1629, "total_steps": 2737, "loss": 1.0444, "lr": 8.431246167213627e-06, "epoch": 4.166240409207161, "percentage": 59.52, "elapsed_time": "16:09:35", "remaining_time": "10:59:29"} +{"current_steps": 1630, "total_steps": 2737, "loss": 1.0321, "lr": 8.418650229035054e-06, "epoch": 4.168797953964194, "percentage": 59.55, "elapsed_time": "16:10:11", "remaining_time": "10:58:53"} +{"current_steps": 1631, "total_steps": 2737, "loss": 1.0539, "lr": 8.406056863613689e-06, "epoch": 4.171355498721228, "percentage": 59.59, "elapsed_time": "16:10:47", "remaining_time": "10:58:17"} +{"current_steps": 1632, "total_steps": 2737, "loss": 1.0282, "lr": 8.393466091438139e-06, "epoch": 4.173913043478261, "percentage": 59.63, "elapsed_time": "16:11:22", "remaining_time": "10:57:42"} +{"current_steps": 1633, "total_steps": 2737, "loss": 1.0239, "lr": 8.380877932992815e-06, "epoch": 4.176470588235294, "percentage": 59.66, "elapsed_time": "16:11:58", "remaining_time": "10:57:06"} +{"current_steps": 1634, "total_steps": 2737, "loss": 1.02, "lr": 8.368292408757853e-06, "epoch": 4.179028132992327, "percentage": 59.7, "elapsed_time": "16:12:34", "remaining_time": "10:56:30"} +{"current_steps": 1635, "total_steps": 2737, "loss": 1.0392, "lr": 8.355709539209121e-06, "epoch": 4.181585677749361, "percentage": 59.74, "elapsed_time": "16:13:10", "remaining_time": "10:55:55"} +{"current_steps": 1636, "total_steps": 2737, "loss": 1.0714, "lr": 8.343129344818162e-06, "epoch": 4.1841432225063935, "percentage": 59.77, "elapsed_time": "16:13:45", "remaining_time": "10:55:19"} +{"current_steps": 1637, "total_steps": 2737, "loss": 1.0217, "lr": 8.33055184605216e-06, "epoch": 4.186700767263427, "percentage": 59.81, "elapsed_time": "16:14:21", "remaining_time": "10:54:43"} +{"current_steps": 1638, "total_steps": 2737, "loss": 1.0391, "lr": 8.317977063373925e-06, "epoch": 4.189258312020461, "percentage": 59.85, "elapsed_time": "16:14:57", "remaining_time": "10:54:08"} +{"current_steps": 1639, "total_steps": 2737, "loss": 1.0215, "lr": 8.305405017241837e-06, "epoch": 4.1918158567774935, "percentage": 59.88, "elapsed_time": "16:15:32", "remaining_time": "10:53:32"} +{"current_steps": 1640, "total_steps": 2737, "loss": 1.0141, "lr": 8.292835728109835e-06, "epoch": 4.194373401534527, "percentage": 59.92, "elapsed_time": "16:16:08", "remaining_time": "10:52:56"} +{"current_steps": 1641, "total_steps": 2737, "loss": 0.995, "lr": 8.28026921642736e-06, "epoch": 4.19693094629156, "percentage": 59.96, "elapsed_time": "16:16:44", "remaining_time": "10:52:21"} +{"current_steps": 1642, "total_steps": 2737, "loss": 1.0443, "lr": 8.267705502639342e-06, "epoch": 4.1994884910485935, "percentage": 59.99, "elapsed_time": "16:17:20", "remaining_time": "10:51:45"} +{"current_steps": 1643, "total_steps": 2737, "loss": 0.9988, "lr": 8.255144607186161e-06, "epoch": 4.202046035805626, "percentage": 60.03, "elapsed_time": "16:17:55", "remaining_time": "10:51:09"} +{"current_steps": 1644, "total_steps": 2737, "loss": 1.0413, "lr": 8.242586550503607e-06, "epoch": 4.20460358056266, "percentage": 60.07, "elapsed_time": "16:18:31", "remaining_time": "10:50:33"} +{"current_steps": 1645, "total_steps": 2737, "loss": 1.0305, "lr": 8.230031353022855e-06, "epoch": 4.207161125319693, "percentage": 60.1, "elapsed_time": "16:19:06", "remaining_time": "10:49:57"} +{"current_steps": 1646, "total_steps": 2737, "loss": 1.0075, "lr": 8.217479035170422e-06, "epoch": 4.209718670076726, "percentage": 60.14, "elapsed_time": "16:19:42", "remaining_time": "10:49:22"} +{"current_steps": 1647, "total_steps": 2737, "loss": 1.0119, "lr": 8.204929617368147e-06, "epoch": 4.21227621483376, "percentage": 60.18, "elapsed_time": "16:20:18", "remaining_time": "10:48:46"} +{"current_steps": 1648, "total_steps": 2737, "loss": 1.0239, "lr": 8.192383120033147e-06, "epoch": 4.2148337595907925, "percentage": 60.21, "elapsed_time": "16:20:54", "remaining_time": "10:48:10"} +{"current_steps": 1649, "total_steps": 2737, "loss": 1.0044, "lr": 8.179839563577789e-06, "epoch": 4.217391304347826, "percentage": 60.25, "elapsed_time": "16:21:29", "remaining_time": "10:47:35"} +{"current_steps": 1650, "total_steps": 2737, "loss": 1.0114, "lr": 8.167298968409658e-06, "epoch": 4.21994884910486, "percentage": 60.28, "elapsed_time": "16:22:05", "remaining_time": "10:46:59"} +{"current_steps": 1651, "total_steps": 2737, "loss": 1.0342, "lr": 8.154761354931513e-06, "epoch": 4.2225063938618925, "percentage": 60.32, "elapsed_time": "16:22:41", "remaining_time": "10:46:23"} +{"current_steps": 1652, "total_steps": 2737, "loss": 1.0196, "lr": 8.142226743541273e-06, "epoch": 4.225063938618926, "percentage": 60.36, "elapsed_time": "16:23:16", "remaining_time": "10:45:48"} +{"current_steps": 1653, "total_steps": 2737, "loss": 1.0319, "lr": 8.12969515463196e-06, "epoch": 4.227621483375959, "percentage": 60.39, "elapsed_time": "16:23:52", "remaining_time": "10:45:12"} +{"current_steps": 1654, "total_steps": 2737, "loss": 1.027, "lr": 8.117166608591693e-06, "epoch": 4.2301790281329925, "percentage": 60.43, "elapsed_time": "16:24:28", "remaining_time": "10:44:36"} +{"current_steps": 1655, "total_steps": 2737, "loss": 1.0512, "lr": 8.104641125803628e-06, "epoch": 4.232736572890025, "percentage": 60.47, "elapsed_time": "16:25:04", "remaining_time": "10:44:01"} +{"current_steps": 1656, "total_steps": 2737, "loss": 1.0289, "lr": 8.092118726645943e-06, "epoch": 4.235294117647059, "percentage": 60.5, "elapsed_time": "16:25:40", "remaining_time": "10:43:25"} +{"current_steps": 1657, "total_steps": 2737, "loss": 1.0134, "lr": 8.0795994314918e-06, "epoch": 4.2378516624040925, "percentage": 60.54, "elapsed_time": "16:26:16", "remaining_time": "10:42:49"} +{"current_steps": 1658, "total_steps": 2737, "loss": 1.0482, "lr": 8.067083260709309e-06, "epoch": 4.240409207161125, "percentage": 60.58, "elapsed_time": "16:26:51", "remaining_time": "10:42:14"} +{"current_steps": 1659, "total_steps": 2737, "loss": 1.0317, "lr": 8.054570234661498e-06, "epoch": 4.242966751918159, "percentage": 60.61, "elapsed_time": "16:27:27", "remaining_time": "10:41:38"} +{"current_steps": 1660, "total_steps": 2737, "loss": 1.0348, "lr": 8.042060373706275e-06, "epoch": 4.245524296675192, "percentage": 60.65, "elapsed_time": "16:28:03", "remaining_time": "10:41:02"} +{"current_steps": 1661, "total_steps": 2737, "loss": 1.0401, "lr": 8.029553698196405e-06, "epoch": 4.248081841432225, "percentage": 60.69, "elapsed_time": "16:28:38", "remaining_time": "10:40:26"} +{"current_steps": 1662, "total_steps": 2737, "loss": 1.0356, "lr": 8.017050228479467e-06, "epoch": 4.250639386189258, "percentage": 60.72, "elapsed_time": "16:29:14", "remaining_time": "10:39:51"} +{"current_steps": 1663, "total_steps": 2737, "loss": 1.0191, "lr": 8.004549984897822e-06, "epoch": 4.253196930946292, "percentage": 60.76, "elapsed_time": "16:29:50", "remaining_time": "10:39:15"} +{"current_steps": 1664, "total_steps": 2737, "loss": 1.0162, "lr": 7.992052987788586e-06, "epoch": 4.255754475703325, "percentage": 60.8, "elapsed_time": "16:30:26", "remaining_time": "10:38:39"} +{"current_steps": 1665, "total_steps": 2737, "loss": 1.0229, "lr": 7.979559257483591e-06, "epoch": 4.258312020460358, "percentage": 60.83, "elapsed_time": "16:31:01", "remaining_time": "10:38:04"} +{"current_steps": 1666, "total_steps": 2737, "loss": 1.0202, "lr": 7.967068814309359e-06, "epoch": 4.260869565217392, "percentage": 60.87, "elapsed_time": "16:31:37", "remaining_time": "10:37:28"} +{"current_steps": 1667, "total_steps": 2737, "loss": 1.0324, "lr": 7.954581678587054e-06, "epoch": 4.263427109974424, "percentage": 60.91, "elapsed_time": "16:32:13", "remaining_time": "10:36:52"} +{"current_steps": 1668, "total_steps": 2737, "loss": 0.9793, "lr": 7.942097870632467e-06, "epoch": 4.265984654731458, "percentage": 60.94, "elapsed_time": "16:32:49", "remaining_time": "10:36:17"} +{"current_steps": 1669, "total_steps": 2737, "loss": 1.0249, "lr": 7.929617410755977e-06, "epoch": 4.268542199488491, "percentage": 60.98, "elapsed_time": "16:33:25", "remaining_time": "10:35:41"} +{"current_steps": 1670, "total_steps": 2737, "loss": 1.0365, "lr": 7.917140319262507e-06, "epoch": 4.271099744245524, "percentage": 61.02, "elapsed_time": "16:34:00", "remaining_time": "10:35:05"} +{"current_steps": 1671, "total_steps": 2737, "loss": 1.0118, "lr": 7.90466661645151e-06, "epoch": 4.273657289002558, "percentage": 61.05, "elapsed_time": "16:34:36", "remaining_time": "10:34:30"} +{"current_steps": 1672, "total_steps": 2737, "loss": 1.0247, "lr": 7.892196322616912e-06, "epoch": 4.276214833759591, "percentage": 61.09, "elapsed_time": "16:35:12", "remaining_time": "10:33:54"} +{"current_steps": 1673, "total_steps": 2737, "loss": 0.978, "lr": 7.879729458047111e-06, "epoch": 4.278772378516624, "percentage": 61.13, "elapsed_time": "16:35:47", "remaining_time": "10:33:18"} +{"current_steps": 1674, "total_steps": 2737, "loss": 1.0175, "lr": 7.86726604302491e-06, "epoch": 4.281329923273657, "percentage": 61.16, "elapsed_time": "16:36:23", "remaining_time": "10:32:43"} +{"current_steps": 1675, "total_steps": 2737, "loss": 1.0288, "lr": 7.854806097827507e-06, "epoch": 4.283887468030691, "percentage": 61.2, "elapsed_time": "16:36:59", "remaining_time": "10:32:07"} +{"current_steps": 1676, "total_steps": 2737, "loss": 1.0166, "lr": 7.842349642726458e-06, "epoch": 4.286445012787723, "percentage": 61.23, "elapsed_time": "16:37:35", "remaining_time": "10:31:31"} +{"current_steps": 1677, "total_steps": 2737, "loss": 1.0348, "lr": 7.829896697987627e-06, "epoch": 4.289002557544757, "percentage": 61.27, "elapsed_time": "16:38:10", "remaining_time": "10:30:55"} +{"current_steps": 1678, "total_steps": 2737, "loss": 1.0342, "lr": 7.817447283871187e-06, "epoch": 4.291560102301791, "percentage": 61.31, "elapsed_time": "16:38:46", "remaining_time": "10:30:20"} +{"current_steps": 1679, "total_steps": 2737, "loss": 1.0214, "lr": 7.80500142063155e-06, "epoch": 4.294117647058823, "percentage": 61.34, "elapsed_time": "16:39:22", "remaining_time": "10:29:44"} +{"current_steps": 1680, "total_steps": 2737, "loss": 1.0404, "lr": 7.792559128517363e-06, "epoch": 4.296675191815857, "percentage": 61.38, "elapsed_time": "16:39:57", "remaining_time": "10:29:08"} +{"current_steps": 1681, "total_steps": 2737, "loss": 1.0112, "lr": 7.780120427771449e-06, "epoch": 4.29923273657289, "percentage": 61.42, "elapsed_time": "16:40:33", "remaining_time": "10:28:32"} +{"current_steps": 1682, "total_steps": 2737, "loss": 1.0605, "lr": 7.7676853386308e-06, "epoch": 4.301790281329923, "percentage": 61.45, "elapsed_time": "16:41:09", "remaining_time": "10:27:57"} +{"current_steps": 1683, "total_steps": 2737, "loss": 1.0371, "lr": 7.755253881326535e-06, "epoch": 4.304347826086957, "percentage": 61.49, "elapsed_time": "16:41:45", "remaining_time": "10:27:21"} +{"current_steps": 1684, "total_steps": 2737, "loss": 1.06, "lr": 7.742826076083848e-06, "epoch": 4.30690537084399, "percentage": 61.53, "elapsed_time": "16:42:20", "remaining_time": "10:26:45"} +{"current_steps": 1685, "total_steps": 2737, "loss": 1.0084, "lr": 7.730401943122007e-06, "epoch": 4.309462915601023, "percentage": 61.56, "elapsed_time": "16:42:56", "remaining_time": "10:26:10"} +{"current_steps": 1686, "total_steps": 2737, "loss": 1.0418, "lr": 7.717981502654297e-06, "epoch": 4.312020460358056, "percentage": 61.6, "elapsed_time": "16:43:32", "remaining_time": "10:25:34"} +{"current_steps": 1687, "total_steps": 2737, "loss": 1.0039, "lr": 7.705564774888001e-06, "epoch": 4.31457800511509, "percentage": 61.64, "elapsed_time": "16:44:09", "remaining_time": "10:24:59"} +{"current_steps": 1688, "total_steps": 2737, "loss": 1.0041, "lr": 7.693151780024354e-06, "epoch": 4.3171355498721224, "percentage": 61.67, "elapsed_time": "16:44:45", "remaining_time": "10:24:23"} +{"current_steps": 1689, "total_steps": 2737, "loss": 1.0087, "lr": 7.680742538258524e-06, "epoch": 4.319693094629156, "percentage": 61.71, "elapsed_time": "16:45:20", "remaining_time": "10:23:48"} +{"current_steps": 1690, "total_steps": 2737, "loss": 1.0716, "lr": 7.668337069779577e-06, "epoch": 4.322250639386189, "percentage": 61.75, "elapsed_time": "16:45:56", "remaining_time": "10:23:12"} +{"current_steps": 1691, "total_steps": 2737, "loss": 1.0185, "lr": 7.655935394770425e-06, "epoch": 4.324808184143222, "percentage": 61.78, "elapsed_time": "16:46:32", "remaining_time": "10:22:36"} +{"current_steps": 1692, "total_steps": 2737, "loss": 1.0173, "lr": 7.643537533407828e-06, "epoch": 4.327365728900256, "percentage": 61.82, "elapsed_time": "16:47:07", "remaining_time": "10:22:01"} +{"current_steps": 1693, "total_steps": 2737, "loss": 1.0351, "lr": 7.631143505862325e-06, "epoch": 4.329923273657289, "percentage": 61.86, "elapsed_time": "16:47:43", "remaining_time": "10:21:25"} +{"current_steps": 1694, "total_steps": 2737, "loss": 1.0303, "lr": 7.618753332298219e-06, "epoch": 4.332480818414322, "percentage": 61.89, "elapsed_time": "16:48:19", "remaining_time": "10:20:49"} +{"current_steps": 1695, "total_steps": 2737, "loss": 1.0129, "lr": 7.606367032873562e-06, "epoch": 4.335038363171355, "percentage": 61.93, "elapsed_time": "16:48:55", "remaining_time": "10:20:14"} +{"current_steps": 1696, "total_steps": 2737, "loss": 1.0526, "lr": 7.593984627740075e-06, "epoch": 4.337595907928389, "percentage": 61.97, "elapsed_time": "16:49:31", "remaining_time": "10:19:38"} +{"current_steps": 1697, "total_steps": 2737, "loss": 1.0181, "lr": 7.5816061370431674e-06, "epoch": 4.340153452685422, "percentage": 62.0, "elapsed_time": "16:50:06", "remaining_time": "10:19:02"} +{"current_steps": 1698, "total_steps": 2737, "loss": 0.996, "lr": 7.569231580921858e-06, "epoch": 4.342710997442455, "percentage": 62.04, "elapsed_time": "16:50:42", "remaining_time": "10:18:27"} +{"current_steps": 1699, "total_steps": 2737, "loss": 1.0301, "lr": 7.556860979508791e-06, "epoch": 4.345268542199489, "percentage": 62.08, "elapsed_time": "16:51:18", "remaining_time": "10:17:51"} +{"current_steps": 1700, "total_steps": 2737, "loss": 1.03, "lr": 7.544494352930145e-06, "epoch": 4.3478260869565215, "percentage": 62.11, "elapsed_time": "16:51:54", "remaining_time": "10:17:15"} +{"current_steps": 1701, "total_steps": 2737, "loss": 0.9895, "lr": 7.532131721305659e-06, "epoch": 4.350383631713555, "percentage": 62.15, "elapsed_time": "16:52:29", "remaining_time": "10:16:39"} +{"current_steps": 1702, "total_steps": 2737, "loss": 1.0428, "lr": 7.519773104748562e-06, "epoch": 4.352941176470588, "percentage": 62.18, "elapsed_time": "16:53:05", "remaining_time": "10:16:04"} +{"current_steps": 1703, "total_steps": 2737, "loss": 1.058, "lr": 7.507418523365542e-06, "epoch": 4.3554987212276215, "percentage": 62.22, "elapsed_time": "16:53:41", "remaining_time": "10:15:28"} +{"current_steps": 1704, "total_steps": 2737, "loss": 1.0112, "lr": 7.495067997256742e-06, "epoch": 4.358056265984655, "percentage": 62.26, "elapsed_time": "16:54:16", "remaining_time": "10:14:52"} +{"current_steps": 1705, "total_steps": 2737, "loss": 1.0281, "lr": 7.482721546515683e-06, "epoch": 4.360613810741688, "percentage": 62.29, "elapsed_time": "16:54:52", "remaining_time": "10:14:17"} +{"current_steps": 1706, "total_steps": 2737, "loss": 1.0418, "lr": 7.47037919122928e-06, "epoch": 4.3631713554987215, "percentage": 62.33, "elapsed_time": "16:55:28", "remaining_time": "10:13:41"} +{"current_steps": 1707, "total_steps": 2737, "loss": 1.0279, "lr": 7.458040951477763e-06, "epoch": 4.365728900255754, "percentage": 62.37, "elapsed_time": "16:56:04", "remaining_time": "10:13:05"} +{"current_steps": 1708, "total_steps": 2737, "loss": 1.0155, "lr": 7.4457068473346836e-06, "epoch": 4.368286445012788, "percentage": 62.4, "elapsed_time": "16:56:39", "remaining_time": "10:12:29"} +{"current_steps": 1709, "total_steps": 2737, "loss": 1.0423, "lr": 7.43337689886686e-06, "epoch": 4.370843989769821, "percentage": 62.44, "elapsed_time": "16:57:15", "remaining_time": "10:11:54"} +{"current_steps": 1710, "total_steps": 2737, "loss": 1.0317, "lr": 7.42105112613434e-06, "epoch": 4.373401534526854, "percentage": 62.48, "elapsed_time": "16:57:51", "remaining_time": "10:11:18"} +{"current_steps": 1711, "total_steps": 2737, "loss": 1.0536, "lr": 7.408729549190393e-06, "epoch": 4.375959079283888, "percentage": 62.51, "elapsed_time": "16:58:26", "remaining_time": "10:10:42"} +{"current_steps": 1712, "total_steps": 2737, "loss": 1.0549, "lr": 7.3964121880814445e-06, "epoch": 4.378516624040921, "percentage": 62.55, "elapsed_time": "16:59:02", "remaining_time": "10:10:06"} +{"current_steps": 1713, "total_steps": 2737, "loss": 1.0168, "lr": 7.3840990628470824e-06, "epoch": 4.381074168797954, "percentage": 62.59, "elapsed_time": "16:59:38", "remaining_time": "10:09:31"} +{"current_steps": 1714, "total_steps": 2737, "loss": 1.0435, "lr": 7.371790193519979e-06, "epoch": 4.383631713554987, "percentage": 62.62, "elapsed_time": "17:00:13", "remaining_time": "10:08:55"} +{"current_steps": 1715, "total_steps": 2737, "loss": 1.0389, "lr": 7.359485600125904e-06, "epoch": 4.3861892583120206, "percentage": 62.66, "elapsed_time": "17:00:49", "remaining_time": "10:08:19"} +{"current_steps": 1716, "total_steps": 2737, "loss": 1.0264, "lr": 7.347185302683662e-06, "epoch": 4.388746803069053, "percentage": 62.7, "elapsed_time": "17:01:25", "remaining_time": "10:07:44"} +{"current_steps": 1717, "total_steps": 2737, "loss": 1.0622, "lr": 7.334889321205063e-06, "epoch": 4.391304347826087, "percentage": 62.73, "elapsed_time": "17:02:01", "remaining_time": "10:07:08"} +{"current_steps": 1718, "total_steps": 2737, "loss": 1.0029, "lr": 7.322597675694904e-06, "epoch": 4.3938618925831205, "percentage": 62.77, "elapsed_time": "17:02:36", "remaining_time": "10:06:32"} +{"current_steps": 1719, "total_steps": 2737, "loss": 1.0165, "lr": 7.31031038615092e-06, "epoch": 4.396419437340153, "percentage": 62.81, "elapsed_time": "17:03:12", "remaining_time": "10:05:56"} +{"current_steps": 1720, "total_steps": 2737, "loss": 1.0357, "lr": 7.298027472563768e-06, "epoch": 4.398976982097187, "percentage": 62.84, "elapsed_time": "17:03:48", "remaining_time": "10:05:21"} +{"current_steps": 1721, "total_steps": 2737, "loss": 1.0562, "lr": 7.285748954916973e-06, "epoch": 4.40153452685422, "percentage": 62.88, "elapsed_time": "17:04:23", "remaining_time": "10:04:45"} +{"current_steps": 1722, "total_steps": 2737, "loss": 1.0409, "lr": 7.273474853186922e-06, "epoch": 4.404092071611253, "percentage": 62.92, "elapsed_time": "17:04:59", "remaining_time": "10:04:09"} +{"current_steps": 1723, "total_steps": 2737, "loss": 1.0464, "lr": 7.261205187342809e-06, "epoch": 4.406649616368286, "percentage": 62.95, "elapsed_time": "17:05:35", "remaining_time": "10:03:34"} +{"current_steps": 1724, "total_steps": 2737, "loss": 1.0153, "lr": 7.248939977346612e-06, "epoch": 4.40920716112532, "percentage": 62.99, "elapsed_time": "17:06:11", "remaining_time": "10:02:58"} +{"current_steps": 1725, "total_steps": 2737, "loss": 1.0274, "lr": 7.236679243153062e-06, "epoch": 4.411764705882353, "percentage": 63.03, "elapsed_time": "17:06:46", "remaining_time": "10:02:22"} +{"current_steps": 1726, "total_steps": 2737, "loss": 1.0302, "lr": 7.224423004709607e-06, "epoch": 4.414322250639386, "percentage": 63.06, "elapsed_time": "17:07:22", "remaining_time": "10:01:46"} +{"current_steps": 1727, "total_steps": 2737, "loss": 1.0173, "lr": 7.212171281956377e-06, "epoch": 4.41687979539642, "percentage": 63.1, "elapsed_time": "17:07:58", "remaining_time": "10:01:11"} +{"current_steps": 1728, "total_steps": 2737, "loss": 1.0154, "lr": 7.199924094826167e-06, "epoch": 4.419437340153452, "percentage": 63.13, "elapsed_time": "17:08:34", "remaining_time": "10:00:35"} +{"current_steps": 1729, "total_steps": 2737, "loss": 1.0252, "lr": 7.187681463244377e-06, "epoch": 4.421994884910486, "percentage": 63.17, "elapsed_time": "17:09:10", "remaining_time": "10:00:00"} +{"current_steps": 1730, "total_steps": 2737, "loss": 1.0643, "lr": 7.175443407129008e-06, "epoch": 4.42455242966752, "percentage": 63.21, "elapsed_time": "17:09:46", "remaining_time": "9:59:24"} +{"current_steps": 1731, "total_steps": 2737, "loss": 1.0094, "lr": 7.163209946390608e-06, "epoch": 4.427109974424552, "percentage": 63.24, "elapsed_time": "17:10:21", "remaining_time": "9:58:48"} +{"current_steps": 1732, "total_steps": 2737, "loss": 1.0011, "lr": 7.1509811009322574e-06, "epoch": 4.429667519181586, "percentage": 63.28, "elapsed_time": "17:10:57", "remaining_time": "9:58:13"} +{"current_steps": 1733, "total_steps": 2737, "loss": 1.0344, "lr": 7.138756890649516e-06, "epoch": 4.432225063938619, "percentage": 63.32, "elapsed_time": "17:11:33", "remaining_time": "9:57:37"} +{"current_steps": 1734, "total_steps": 2737, "loss": 1.0187, "lr": 7.126537335430417e-06, "epoch": 4.434782608695652, "percentage": 63.35, "elapsed_time": "17:12:09", "remaining_time": "9:57:01"} +{"current_steps": 1735, "total_steps": 2737, "loss": 1.0391, "lr": 7.1143224551554115e-06, "epoch": 4.437340153452685, "percentage": 63.39, "elapsed_time": "17:12:44", "remaining_time": "9:56:25"} +{"current_steps": 1736, "total_steps": 2737, "loss": 1.0599, "lr": 7.102112269697341e-06, "epoch": 4.439897698209719, "percentage": 63.43, "elapsed_time": "17:13:20", "remaining_time": "9:55:50"} +{"current_steps": 1737, "total_steps": 2737, "loss": 1.0205, "lr": 7.08990679892142e-06, "epoch": 4.442455242966752, "percentage": 63.46, "elapsed_time": "17:13:56", "remaining_time": "9:55:14"} +{"current_steps": 1738, "total_steps": 2737, "loss": 1.0254, "lr": 7.077706062685181e-06, "epoch": 4.445012787723785, "percentage": 63.5, "elapsed_time": "17:14:31", "remaining_time": "9:54:38"} +{"current_steps": 1739, "total_steps": 2737, "loss": 1.0375, "lr": 7.065510080838465e-06, "epoch": 4.447570332480819, "percentage": 63.54, "elapsed_time": "17:15:07", "remaining_time": "9:54:02"} +{"current_steps": 1740, "total_steps": 2737, "loss": 0.9962, "lr": 7.053318873223365e-06, "epoch": 4.450127877237851, "percentage": 63.57, "elapsed_time": "17:15:42", "remaining_time": "9:53:27"} +{"current_steps": 1741, "total_steps": 2737, "loss": 1.0151, "lr": 7.041132459674216e-06, "epoch": 4.452685421994885, "percentage": 63.61, "elapsed_time": "17:16:18", "remaining_time": "9:52:51"} +{"current_steps": 1742, "total_steps": 2737, "loss": 1.059, "lr": 7.028950860017555e-06, "epoch": 4.455242966751918, "percentage": 63.65, "elapsed_time": "17:16:54", "remaining_time": "9:52:15"} +{"current_steps": 1743, "total_steps": 2737, "loss": 1.0151, "lr": 7.016774094072077e-06, "epoch": 4.457800511508951, "percentage": 63.68, "elapsed_time": "17:17:30", "remaining_time": "9:51:40"} +{"current_steps": 1744, "total_steps": 2737, "loss": 1.0226, "lr": 7.004602181648626e-06, "epoch": 4.460358056265985, "percentage": 63.72, "elapsed_time": "17:18:05", "remaining_time": "9:51:04"} +{"current_steps": 1745, "total_steps": 2737, "loss": 1.0315, "lr": 6.992435142550133e-06, "epoch": 4.462915601023018, "percentage": 63.76, "elapsed_time": "17:18:41", "remaining_time": "9:50:28"} +{"current_steps": 1746, "total_steps": 2737, "loss": 1.035, "lr": 6.980272996571617e-06, "epoch": 4.465473145780051, "percentage": 63.79, "elapsed_time": "17:19:16", "remaining_time": "9:49:52"} +{"current_steps": 1747, "total_steps": 2737, "loss": 1.0212, "lr": 6.968115763500127e-06, "epoch": 4.468030690537084, "percentage": 63.83, "elapsed_time": "17:19:52", "remaining_time": "9:49:16"} +{"current_steps": 1748, "total_steps": 2737, "loss": 1.0262, "lr": 6.95596346311472e-06, "epoch": 4.470588235294118, "percentage": 63.87, "elapsed_time": "17:20:28", "remaining_time": "9:48:41"} +{"current_steps": 1749, "total_steps": 2737, "loss": 1.0285, "lr": 6.943816115186432e-06, "epoch": 4.4731457800511505, "percentage": 63.9, "elapsed_time": "17:21:03", "remaining_time": "9:48:05"} +{"current_steps": 1750, "total_steps": 2737, "loss": 1.0526, "lr": 6.931673739478235e-06, "epoch": 4.475703324808184, "percentage": 63.94, "elapsed_time": "17:21:39", "remaining_time": "9:47:29"} +{"current_steps": 1751, "total_steps": 2737, "loss": 1.0174, "lr": 6.919536355745018e-06, "epoch": 4.478260869565218, "percentage": 63.98, "elapsed_time": "17:22:15", "remaining_time": "9:46:54"} +{"current_steps": 1752, "total_steps": 2737, "loss": 1.035, "lr": 6.907403983733543e-06, "epoch": 4.4808184143222505, "percentage": 64.01, "elapsed_time": "17:22:50", "remaining_time": "9:46:18"} +{"current_steps": 1753, "total_steps": 2737, "loss": 1.047, "lr": 6.895276643182423e-06, "epoch": 4.483375959079284, "percentage": 64.05, "elapsed_time": "17:23:26", "remaining_time": "9:45:42"} +{"current_steps": 1754, "total_steps": 2737, "loss": 1.0465, "lr": 6.883154353822079e-06, "epoch": 4.485933503836317, "percentage": 64.08, "elapsed_time": "17:24:02", "remaining_time": "9:45:06"} +{"current_steps": 1755, "total_steps": 2737, "loss": 1.0184, "lr": 6.871037135374722e-06, "epoch": 4.4884910485933505, "percentage": 64.12, "elapsed_time": "17:24:38", "remaining_time": "9:44:31"} +{"current_steps": 1756, "total_steps": 2737, "loss": 1.0307, "lr": 6.858925007554308e-06, "epoch": 4.491048593350383, "percentage": 64.16, "elapsed_time": "17:25:13", "remaining_time": "9:43:55"} +{"current_steps": 1757, "total_steps": 2737, "loss": 1.0363, "lr": 6.8468179900665095e-06, "epoch": 4.493606138107417, "percentage": 64.19, "elapsed_time": "17:25:49", "remaining_time": "9:43:19"} +{"current_steps": 1758, "total_steps": 2737, "loss": 1.0083, "lr": 6.834716102608689e-06, "epoch": 4.4961636828644505, "percentage": 64.23, "elapsed_time": "17:26:25", "remaining_time": "9:42:43"} +{"current_steps": 1759, "total_steps": 2737, "loss": 0.996, "lr": 6.8226193648698605e-06, "epoch": 4.498721227621483, "percentage": 64.27, "elapsed_time": "17:27:00", "remaining_time": "9:42:08"} +{"current_steps": 1760, "total_steps": 2737, "loss": 1.0476, "lr": 6.810527796530655e-06, "epoch": 4.501278772378517, "percentage": 64.3, "elapsed_time": "17:27:36", "remaining_time": "9:41:32"} +{"current_steps": 1761, "total_steps": 2737, "loss": 1.042, "lr": 6.798441417263311e-06, "epoch": 4.5038363171355495, "percentage": 64.34, "elapsed_time": "17:28:12", "remaining_time": "9:40:56"} +{"current_steps": 1762, "total_steps": 2737, "loss": 1.0535, "lr": 6.786360246731595e-06, "epoch": 4.506393861892583, "percentage": 64.38, "elapsed_time": "17:28:47", "remaining_time": "9:40:21"} +{"current_steps": 1763, "total_steps": 2737, "loss": 1.0384, "lr": 6.774284304590832e-06, "epoch": 4.508951406649617, "percentage": 64.41, "elapsed_time": "17:29:23", "remaining_time": "9:39:45"} +{"current_steps": 1764, "total_steps": 2737, "loss": 1.0124, "lr": 6.762213610487813e-06, "epoch": 4.5115089514066495, "percentage": 64.45, "elapsed_time": "17:29:59", "remaining_time": "9:39:09"} +{"current_steps": 1765, "total_steps": 2737, "loss": 1.0282, "lr": 6.75014818406081e-06, "epoch": 4.514066496163683, "percentage": 64.49, "elapsed_time": "17:30:35", "remaining_time": "9:38:33"} +{"current_steps": 1766, "total_steps": 2737, "loss": 1.017, "lr": 6.7380880449395105e-06, "epoch": 4.516624040920716, "percentage": 64.52, "elapsed_time": "17:31:10", "remaining_time": "9:37:58"} +{"current_steps": 1767, "total_steps": 2737, "loss": 1.0448, "lr": 6.726033212745009e-06, "epoch": 4.5191815856777495, "percentage": 64.56, "elapsed_time": "17:31:46", "remaining_time": "9:37:22"} +{"current_steps": 1768, "total_steps": 2737, "loss": 1.0431, "lr": 6.713983707089773e-06, "epoch": 4.521739130434782, "percentage": 64.6, "elapsed_time": "17:32:22", "remaining_time": "9:36:46"} +{"current_steps": 1769, "total_steps": 2737, "loss": 1.0014, "lr": 6.7019395475775805e-06, "epoch": 4.524296675191816, "percentage": 64.63, "elapsed_time": "17:32:57", "remaining_time": "9:36:11"} +{"current_steps": 1770, "total_steps": 2737, "loss": 1.0277, "lr": 6.6899007538035376e-06, "epoch": 4.526854219948849, "percentage": 64.67, "elapsed_time": "17:33:33", "remaining_time": "9:35:35"} +{"current_steps": 1771, "total_steps": 2737, "loss": 1.0214, "lr": 6.6778673453539984e-06, "epoch": 4.529411764705882, "percentage": 64.71, "elapsed_time": "17:34:09", "remaining_time": "9:34:59"} +{"current_steps": 1772, "total_steps": 2737, "loss": 1.0254, "lr": 6.66583934180658e-06, "epoch": 4.531969309462916, "percentage": 64.74, "elapsed_time": "17:34:44", "remaining_time": "9:34:23"} +{"current_steps": 1773, "total_steps": 2737, "loss": 1.0128, "lr": 6.653816762730079e-06, "epoch": 4.534526854219949, "percentage": 64.78, "elapsed_time": "17:35:20", "remaining_time": "9:33:48"} +{"current_steps": 1774, "total_steps": 2737, "loss": 1.0117, "lr": 6.641799627684481e-06, "epoch": 4.537084398976982, "percentage": 64.82, "elapsed_time": "17:35:56", "remaining_time": "9:33:12"} +{"current_steps": 1775, "total_steps": 2737, "loss": 1.0047, "lr": 6.629787956220924e-06, "epoch": 4.539641943734015, "percentage": 64.85, "elapsed_time": "17:36:31", "remaining_time": "9:32:36"} +{"current_steps": 1776, "total_steps": 2737, "loss": 1.0193, "lr": 6.617781767881635e-06, "epoch": 4.542199488491049, "percentage": 64.89, "elapsed_time": "17:37:07", "remaining_time": "9:32:00"} +{"current_steps": 1777, "total_steps": 2737, "loss": 1.0344, "lr": 6.6057810821999406e-06, "epoch": 4.544757033248082, "percentage": 64.93, "elapsed_time": "17:37:43", "remaining_time": "9:31:25"} +{"current_steps": 1778, "total_steps": 2737, "loss": 1.046, "lr": 6.593785918700197e-06, "epoch": 4.547314578005115, "percentage": 64.96, "elapsed_time": "17:38:18", "remaining_time": "9:30:49"} +{"current_steps": 1779, "total_steps": 2737, "loss": 1.0264, "lr": 6.581796296897795e-06, "epoch": 4.549872122762149, "percentage": 65.0, "elapsed_time": "17:38:55", "remaining_time": "9:30:14"} +{"current_steps": 1780, "total_steps": 2737, "loss": 1.0207, "lr": 6.569812236299089e-06, "epoch": 4.552429667519181, "percentage": 65.03, "elapsed_time": "17:39:30", "remaining_time": "9:29:38"} +{"current_steps": 1781, "total_steps": 2737, "loss": 1.049, "lr": 6.557833756401404e-06, "epoch": 4.554987212276215, "percentage": 65.07, "elapsed_time": "17:40:06", "remaining_time": "9:29:02"} +{"current_steps": 1782, "total_steps": 2737, "loss": 1.0266, "lr": 6.545860876692979e-06, "epoch": 4.557544757033249, "percentage": 65.11, "elapsed_time": "17:40:42", "remaining_time": "9:28:26"} +{"current_steps": 1783, "total_steps": 2737, "loss": 1.0791, "lr": 6.533893616652932e-06, "epoch": 4.560102301790281, "percentage": 65.14, "elapsed_time": "17:41:17", "remaining_time": "9:27:51"} +{"current_steps": 1784, "total_steps": 2737, "loss": 1.001, "lr": 6.521931995751258e-06, "epoch": 4.562659846547315, "percentage": 65.18, "elapsed_time": "17:41:53", "remaining_time": "9:27:15"} +{"current_steps": 1785, "total_steps": 2737, "loss": 1.0029, "lr": 6.509976033448755e-06, "epoch": 4.565217391304348, "percentage": 65.22, "elapsed_time": "17:42:29", "remaining_time": "9:26:39"} +{"current_steps": 1786, "total_steps": 2737, "loss": 1.0085, "lr": 6.498025749197036e-06, "epoch": 4.567774936061381, "percentage": 65.25, "elapsed_time": "17:43:05", "remaining_time": "9:26:04"} +{"current_steps": 1787, "total_steps": 2737, "loss": 1.0215, "lr": 6.486081162438458e-06, "epoch": 4.570332480818414, "percentage": 65.29, "elapsed_time": "17:43:41", "remaining_time": "9:25:28"} +{"current_steps": 1788, "total_steps": 2737, "loss": 1.0101, "lr": 6.4741422926061225e-06, "epoch": 4.572890025575448, "percentage": 65.33, "elapsed_time": "17:44:17", "remaining_time": "9:24:52"} +{"current_steps": 1789, "total_steps": 2737, "loss": 1.0594, "lr": 6.462209159123825e-06, "epoch": 4.57544757033248, "percentage": 65.36, "elapsed_time": "17:44:53", "remaining_time": "9:24:17"} +{"current_steps": 1790, "total_steps": 2737, "loss": 1.0351, "lr": 6.450281781406022e-06, "epoch": 4.578005115089514, "percentage": 65.4, "elapsed_time": "17:45:28", "remaining_time": "9:23:41"} +{"current_steps": 1791, "total_steps": 2737, "loss": 1.0237, "lr": 6.438360178857818e-06, "epoch": 4.580562659846548, "percentage": 65.44, "elapsed_time": "17:46:04", "remaining_time": "9:23:05"} +{"current_steps": 1792, "total_steps": 2737, "loss": 1.0262, "lr": 6.426444370874906e-06, "epoch": 4.58312020460358, "percentage": 65.47, "elapsed_time": "17:46:40", "remaining_time": "9:22:30"} +{"current_steps": 1793, "total_steps": 2737, "loss": 1.018, "lr": 6.414534376843566e-06, "epoch": 4.585677749360614, "percentage": 65.51, "elapsed_time": "17:47:16", "remaining_time": "9:21:54"} +{"current_steps": 1794, "total_steps": 2737, "loss": 1.0286, "lr": 6.402630216140618e-06, "epoch": 4.588235294117647, "percentage": 65.55, "elapsed_time": "17:47:51", "remaining_time": "9:21:18"} +{"current_steps": 1795, "total_steps": 2737, "loss": 1.0103, "lr": 6.39073190813338e-06, "epoch": 4.59079283887468, "percentage": 65.58, "elapsed_time": "17:48:27", "remaining_time": "9:20:42"} +{"current_steps": 1796, "total_steps": 2737, "loss": 1.0228, "lr": 6.37883947217966e-06, "epoch": 4.593350383631714, "percentage": 65.62, "elapsed_time": "17:49:02", "remaining_time": "9:20:07"} +{"current_steps": 1797, "total_steps": 2737, "loss": 1.0306, "lr": 6.366952927627703e-06, "epoch": 4.595907928388747, "percentage": 65.66, "elapsed_time": "17:49:38", "remaining_time": "9:19:31"} +{"current_steps": 1798, "total_steps": 2737, "loss": 1.0072, "lr": 6.355072293816178e-06, "epoch": 4.59846547314578, "percentage": 65.69, "elapsed_time": "17:50:14", "remaining_time": "9:18:55"} +{"current_steps": 1799, "total_steps": 2737, "loss": 1.0122, "lr": 6.34319759007413e-06, "epoch": 4.601023017902813, "percentage": 65.73, "elapsed_time": "17:50:50", "remaining_time": "9:18:20"} +{"current_steps": 1800, "total_steps": 2737, "loss": 1.0465, "lr": 6.331328835720961e-06, "epoch": 4.603580562659847, "percentage": 65.77, "elapsed_time": "17:51:25", "remaining_time": "9:17:44"} +{"current_steps": 1801, "total_steps": 2737, "loss": 1.0069, "lr": 6.319466050066395e-06, "epoch": 4.6061381074168795, "percentage": 65.8, "elapsed_time": "17:52:01", "remaining_time": "9:17:08"} +{"current_steps": 1802, "total_steps": 2737, "loss": 0.9955, "lr": 6.307609252410438e-06, "epoch": 4.608695652173913, "percentage": 65.84, "elapsed_time": "17:52:37", "remaining_time": "9:16:32"} +{"current_steps": 1803, "total_steps": 2737, "loss": 1.021, "lr": 6.295758462043362e-06, "epoch": 4.611253196930946, "percentage": 65.88, "elapsed_time": "17:53:13", "remaining_time": "9:15:57"} +{"current_steps": 1804, "total_steps": 2737, "loss": 0.9887, "lr": 6.283913698245659e-06, "epoch": 4.6138107416879794, "percentage": 65.91, "elapsed_time": "17:53:48", "remaining_time": "9:15:21"} +{"current_steps": 1805, "total_steps": 2737, "loss": 1.0315, "lr": 6.272074980288021e-06, "epoch": 4.616368286445013, "percentage": 65.95, "elapsed_time": "17:54:24", "remaining_time": "9:14:45"} +{"current_steps": 1806, "total_steps": 2737, "loss": 0.9946, "lr": 6.2602423274313e-06, "epoch": 4.618925831202046, "percentage": 65.98, "elapsed_time": "17:55:00", "remaining_time": "9:14:10"} +{"current_steps": 1807, "total_steps": 2737, "loss": 1.0247, "lr": 6.248415758926485e-06, "epoch": 4.621483375959079, "percentage": 66.02, "elapsed_time": "17:55:35", "remaining_time": "9:13:34"} +{"current_steps": 1808, "total_steps": 2737, "loss": 1.0695, "lr": 6.236595294014662e-06, "epoch": 4.624040920716112, "percentage": 66.06, "elapsed_time": "17:56:11", "remaining_time": "9:12:58"} +{"current_steps": 1809, "total_steps": 2737, "loss": 1.0361, "lr": 6.22478095192699e-06, "epoch": 4.626598465473146, "percentage": 66.09, "elapsed_time": "17:56:47", "remaining_time": "9:12:23"} +{"current_steps": 1810, "total_steps": 2737, "loss": 1.0263, "lr": 6.212972751884663e-06, "epoch": 4.629156010230179, "percentage": 66.13, "elapsed_time": "17:57:23", "remaining_time": "9:11:47"} +{"current_steps": 1811, "total_steps": 2737, "loss": 1.0248, "lr": 6.201170713098883e-06, "epoch": 4.631713554987212, "percentage": 66.17, "elapsed_time": "17:57:58", "remaining_time": "9:11:11"} +{"current_steps": 1812, "total_steps": 2737, "loss": 1.053, "lr": 6.189374854770832e-06, "epoch": 4.634271099744246, "percentage": 66.2, "elapsed_time": "17:58:34", "remaining_time": "9:10:35"} +{"current_steps": 1813, "total_steps": 2737, "loss": 0.9904, "lr": 6.177585196091631e-06, "epoch": 4.6368286445012785, "percentage": 66.24, "elapsed_time": "17:59:10", "remaining_time": "9:10:00"} +{"current_steps": 1814, "total_steps": 2737, "loss": 1.0595, "lr": 6.16580175624232e-06, "epoch": 4.639386189258312, "percentage": 66.28, "elapsed_time": "17:59:45", "remaining_time": "9:09:24"} +{"current_steps": 1815, "total_steps": 2737, "loss": 1.0517, "lr": 6.15402455439382e-06, "epoch": 4.641943734015345, "percentage": 66.31, "elapsed_time": "18:00:21", "remaining_time": "9:08:48"} +{"current_steps": 1816, "total_steps": 2737, "loss": 1.054, "lr": 6.142253609706898e-06, "epoch": 4.6445012787723785, "percentage": 66.35, "elapsed_time": "18:00:57", "remaining_time": "9:08:12"} +{"current_steps": 1817, "total_steps": 2737, "loss": 1.0512, "lr": 6.130488941332151e-06, "epoch": 4.647058823529412, "percentage": 66.39, "elapsed_time": "18:01:33", "remaining_time": "9:07:37"} +{"current_steps": 1818, "total_steps": 2737, "loss": 1.039, "lr": 6.118730568409951e-06, "epoch": 4.649616368286445, "percentage": 66.42, "elapsed_time": "18:02:08", "remaining_time": "9:07:01"} +{"current_steps": 1819, "total_steps": 2737, "loss": 1.0129, "lr": 6.106978510070443e-06, "epoch": 4.6521739130434785, "percentage": 66.46, "elapsed_time": "18:02:44", "remaining_time": "9:06:25"} +{"current_steps": 1820, "total_steps": 2737, "loss": 1.0003, "lr": 6.095232785433485e-06, "epoch": 4.654731457800511, "percentage": 66.5, "elapsed_time": "18:03:20", "remaining_time": "9:05:50"} +{"current_steps": 1821, "total_steps": 2737, "loss": 1.0032, "lr": 6.083493413608639e-06, "epoch": 4.657289002557545, "percentage": 66.53, "elapsed_time": "18:03:55", "remaining_time": "9:05:14"} +{"current_steps": 1822, "total_steps": 2737, "loss": 1.0575, "lr": 6.0717604136951315e-06, "epoch": 4.659846547314578, "percentage": 66.57, "elapsed_time": "18:04:31", "remaining_time": "9:04:38"} +{"current_steps": 1823, "total_steps": 2737, "loss": 1.0012, "lr": 6.0600338047818155e-06, "epoch": 4.662404092071611, "percentage": 66.61, "elapsed_time": "18:05:07", "remaining_time": "9:04:02"} +{"current_steps": 1824, "total_steps": 2737, "loss": 1.0152, "lr": 6.048313605947153e-06, "epoch": 4.664961636828645, "percentage": 66.64, "elapsed_time": "18:05:42", "remaining_time": "9:03:27"} +{"current_steps": 1825, "total_steps": 2737, "loss": 1.0202, "lr": 6.036599836259175e-06, "epoch": 4.667519181585678, "percentage": 66.68, "elapsed_time": "18:06:18", "remaining_time": "9:02:51"} +{"current_steps": 1826, "total_steps": 2737, "loss": 1.0152, "lr": 6.024892514775451e-06, "epoch": 4.670076726342711, "percentage": 66.72, "elapsed_time": "18:06:54", "remaining_time": "9:02:15"} +{"current_steps": 1827, "total_steps": 2737, "loss": 1.0185, "lr": 6.013191660543063e-06, "epoch": 4.672634271099744, "percentage": 66.75, "elapsed_time": "18:07:29", "remaining_time": "9:01:39"} +{"current_steps": 1828, "total_steps": 2737, "loss": 1.0091, "lr": 6.001497292598566e-06, "epoch": 4.675191815856778, "percentage": 66.79, "elapsed_time": "18:08:05", "remaining_time": "9:01:04"} +{"current_steps": 1829, "total_steps": 2737, "loss": 1.0171, "lr": 5.98980942996797e-06, "epoch": 4.677749360613811, "percentage": 66.82, "elapsed_time": "18:08:41", "remaining_time": "9:00:28"} +{"current_steps": 1830, "total_steps": 2737, "loss": 1.04, "lr": 5.97812809166669e-06, "epoch": 4.680306905370844, "percentage": 66.86, "elapsed_time": "18:09:16", "remaining_time": "8:59:52"} +{"current_steps": 1831, "total_steps": 2737, "loss": 1.0219, "lr": 5.966453296699541e-06, "epoch": 4.6828644501278776, "percentage": 66.9, "elapsed_time": "18:09:52", "remaining_time": "8:59:16"} +{"current_steps": 1832, "total_steps": 2737, "loss": 1.0466, "lr": 5.954785064060678e-06, "epoch": 4.68542199488491, "percentage": 66.93, "elapsed_time": "18:10:28", "remaining_time": "8:58:41"} +{"current_steps": 1833, "total_steps": 2737, "loss": 1.0168, "lr": 5.943123412733587e-06, "epoch": 4.687979539641944, "percentage": 66.97, "elapsed_time": "18:11:04", "remaining_time": "8:58:05"} +{"current_steps": 1834, "total_steps": 2737, "loss": 1.074, "lr": 5.931468361691053e-06, "epoch": 4.690537084398977, "percentage": 67.01, "elapsed_time": "18:11:40", "remaining_time": "8:57:30"} +{"current_steps": 1835, "total_steps": 2737, "loss": 1.0365, "lr": 5.919819929895106e-06, "epoch": 4.69309462915601, "percentage": 67.04, "elapsed_time": "18:12:16", "remaining_time": "8:56:54"} +{"current_steps": 1836, "total_steps": 2737, "loss": 1.0461, "lr": 5.9081781362970205e-06, "epoch": 4.695652173913043, "percentage": 67.08, "elapsed_time": "18:12:51", "remaining_time": "8:56:18"} +{"current_steps": 1837, "total_steps": 2737, "loss": 1.0305, "lr": 5.896542999837265e-06, "epoch": 4.698209718670077, "percentage": 67.12, "elapsed_time": "18:13:27", "remaining_time": "8:55:43"} +{"current_steps": 1838, "total_steps": 2737, "loss": 0.9987, "lr": 5.8849145394454806e-06, "epoch": 4.70076726342711, "percentage": 67.15, "elapsed_time": "18:14:03", "remaining_time": "8:55:07"} +{"current_steps": 1839, "total_steps": 2737, "loss": 0.9943, "lr": 5.873292774040442e-06, "epoch": 4.703324808184143, "percentage": 67.19, "elapsed_time": "18:14:38", "remaining_time": "8:54:31"} +{"current_steps": 1840, "total_steps": 2737, "loss": 1.0579, "lr": 5.861677722530037e-06, "epoch": 4.705882352941177, "percentage": 67.23, "elapsed_time": "18:15:14", "remaining_time": "8:53:55"} +{"current_steps": 1841, "total_steps": 2737, "loss": 1.0181, "lr": 5.850069403811235e-06, "epoch": 4.708439897698209, "percentage": 67.26, "elapsed_time": "18:15:50", "remaining_time": "8:53:20"} +{"current_steps": 1842, "total_steps": 2737, "loss": 1.0125, "lr": 5.8384678367700325e-06, "epoch": 4.710997442455243, "percentage": 67.3, "elapsed_time": "18:16:25", "remaining_time": "8:52:44"} +{"current_steps": 1843, "total_steps": 2737, "loss": 1.0157, "lr": 5.826873040281462e-06, "epoch": 4.713554987212277, "percentage": 67.34, "elapsed_time": "18:17:01", "remaining_time": "8:52:08"} +{"current_steps": 1844, "total_steps": 2737, "loss": 1.0343, "lr": 5.81528503320953e-06, "epoch": 4.716112531969309, "percentage": 67.37, "elapsed_time": "18:17:37", "remaining_time": "8:51:33"} +{"current_steps": 1845, "total_steps": 2737, "loss": 1.0318, "lr": 5.8037038344072e-06, "epoch": 4.718670076726343, "percentage": 67.41, "elapsed_time": "18:18:13", "remaining_time": "8:50:57"} +{"current_steps": 1846, "total_steps": 2737, "loss": 1.0219, "lr": 5.792129462716355e-06, "epoch": 4.721227621483376, "percentage": 67.45, "elapsed_time": "18:18:48", "remaining_time": "8:50:21"} +{"current_steps": 1847, "total_steps": 2737, "loss": 1.0272, "lr": 5.780561936967779e-06, "epoch": 4.723785166240409, "percentage": 67.48, "elapsed_time": "18:19:24", "remaining_time": "8:49:45"} +{"current_steps": 1848, "total_steps": 2737, "loss": 1.0565, "lr": 5.769001275981112e-06, "epoch": 4.726342710997442, "percentage": 67.52, "elapsed_time": "18:20:00", "remaining_time": "8:49:10"} +{"current_steps": 1849, "total_steps": 2737, "loss": 1.0535, "lr": 5.757447498564821e-06, "epoch": 4.728900255754476, "percentage": 67.56, "elapsed_time": "18:20:35", "remaining_time": "8:48:34"} +{"current_steps": 1850, "total_steps": 2737, "loss": 1.021, "lr": 5.745900623516189e-06, "epoch": 4.731457800511509, "percentage": 67.59, "elapsed_time": "18:21:11", "remaining_time": "8:47:58"} +{"current_steps": 1851, "total_steps": 2737, "loss": 1.0248, "lr": 5.734360669621255e-06, "epoch": 4.734015345268542, "percentage": 67.63, "elapsed_time": "18:21:47", "remaining_time": "8:47:22"} +{"current_steps": 1852, "total_steps": 2737, "loss": 1.0156, "lr": 5.722827655654801e-06, "epoch": 4.736572890025576, "percentage": 67.67, "elapsed_time": "18:22:22", "remaining_time": "8:46:47"} +{"current_steps": 1853, "total_steps": 2737, "loss": 1.0569, "lr": 5.711301600380317e-06, "epoch": 4.739130434782608, "percentage": 67.7, "elapsed_time": "18:22:58", "remaining_time": "8:46:11"} +{"current_steps": 1854, "total_steps": 2737, "loss": 1.0509, "lr": 5.699782522549983e-06, "epoch": 4.741687979539642, "percentage": 67.74, "elapsed_time": "18:23:34", "remaining_time": "8:45:35"} +{"current_steps": 1855, "total_steps": 2737, "loss": 1.0273, "lr": 5.688270440904613e-06, "epoch": 4.744245524296675, "percentage": 67.77, "elapsed_time": "18:24:10", "remaining_time": "8:45:00"} +{"current_steps": 1856, "total_steps": 2737, "loss": 0.9938, "lr": 5.6767653741736405e-06, "epoch": 4.746803069053708, "percentage": 67.81, "elapsed_time": "18:24:45", "remaining_time": "8:44:24"} +{"current_steps": 1857, "total_steps": 2737, "loss": 1.0144, "lr": 5.665267341075098e-06, "epoch": 4.749360613810742, "percentage": 67.85, "elapsed_time": "18:25:21", "remaining_time": "8:43:48"} +{"current_steps": 1858, "total_steps": 2737, "loss": 1.0478, "lr": 5.653776360315562e-06, "epoch": 4.751918158567775, "percentage": 67.88, "elapsed_time": "18:25:57", "remaining_time": "8:43:12"} +{"current_steps": 1859, "total_steps": 2737, "loss": 1.0122, "lr": 5.642292450590134e-06, "epoch": 4.754475703324808, "percentage": 67.92, "elapsed_time": "18:26:32", "remaining_time": "8:42:37"} +{"current_steps": 1860, "total_steps": 2737, "loss": 1.0413, "lr": 5.630815630582429e-06, "epoch": 4.757033248081841, "percentage": 67.96, "elapsed_time": "18:27:08", "remaining_time": "8:42:01"} +{"current_steps": 1861, "total_steps": 2737, "loss": 1.0337, "lr": 5.61934591896451e-06, "epoch": 4.759590792838875, "percentage": 67.99, "elapsed_time": "18:27:44", "remaining_time": "8:41:25"} +{"current_steps": 1862, "total_steps": 2737, "loss": 1.0287, "lr": 5.60788333439688e-06, "epoch": 4.762148337595908, "percentage": 68.03, "elapsed_time": "18:28:20", "remaining_time": "8:40:50"} +{"current_steps": 1863, "total_steps": 2737, "loss": 1.0443, "lr": 5.596427895528443e-06, "epoch": 4.764705882352941, "percentage": 68.07, "elapsed_time": "18:28:55", "remaining_time": "8:40:14"} +{"current_steps": 1864, "total_steps": 2737, "loss": 1.0489, "lr": 5.584979620996491e-06, "epoch": 4.767263427109975, "percentage": 68.1, "elapsed_time": "18:29:31", "remaining_time": "8:39:38"} +{"current_steps": 1865, "total_steps": 2737, "loss": 1.0144, "lr": 5.573538529426645e-06, "epoch": 4.7698209718670075, "percentage": 68.14, "elapsed_time": "18:30:07", "remaining_time": "8:39:02"} +{"current_steps": 1866, "total_steps": 2737, "loss": 1.0427, "lr": 5.562104639432845e-06, "epoch": 4.772378516624041, "percentage": 68.18, "elapsed_time": "18:30:42", "remaining_time": "8:38:27"} +{"current_steps": 1867, "total_steps": 2737, "loss": 1.0162, "lr": 5.550677969617319e-06, "epoch": 4.774936061381074, "percentage": 68.21, "elapsed_time": "18:31:18", "remaining_time": "8:37:51"} +{"current_steps": 1868, "total_steps": 2737, "loss": 1.0164, "lr": 5.539258538570544e-06, "epoch": 4.7774936061381075, "percentage": 68.25, "elapsed_time": "18:31:54", "remaining_time": "8:37:15"} +{"current_steps": 1869, "total_steps": 2737, "loss": 1.0309, "lr": 5.527846364871219e-06, "epoch": 4.78005115089514, "percentage": 68.29, "elapsed_time": "18:32:30", "remaining_time": "8:36:40"} +{"current_steps": 1870, "total_steps": 2737, "loss": 1.0228, "lr": 5.516441467086231e-06, "epoch": 4.782608695652174, "percentage": 68.32, "elapsed_time": "18:33:05", "remaining_time": "8:36:04"} +{"current_steps": 1871, "total_steps": 2737, "loss": 1.0734, "lr": 5.505043863770646e-06, "epoch": 4.7851662404092075, "percentage": 68.36, "elapsed_time": "18:33:41", "remaining_time": "8:35:28"} +{"current_steps": 1872, "total_steps": 2737, "loss": 1.0048, "lr": 5.493653573467647e-06, "epoch": 4.78772378516624, "percentage": 68.4, "elapsed_time": "18:34:16", "remaining_time": "8:34:52"} +{"current_steps": 1873, "total_steps": 2737, "loss": 1.0125, "lr": 5.4822706147085205e-06, "epoch": 4.790281329923274, "percentage": 68.43, "elapsed_time": "18:34:52", "remaining_time": "8:34:17"} +{"current_steps": 1874, "total_steps": 2737, "loss": 0.9959, "lr": 5.470895006012637e-06, "epoch": 4.792838874680307, "percentage": 68.47, "elapsed_time": "18:35:28", "remaining_time": "8:33:41"} +{"current_steps": 1875, "total_steps": 2737, "loss": 1.0297, "lr": 5.459526765887397e-06, "epoch": 4.79539641943734, "percentage": 68.51, "elapsed_time": "18:36:03", "remaining_time": "8:33:05"} +{"current_steps": 1876, "total_steps": 2737, "loss": 0.9945, "lr": 5.448165912828214e-06, "epoch": 4.797953964194374, "percentage": 68.54, "elapsed_time": "18:36:39", "remaining_time": "8:32:29"} +{"current_steps": 1877, "total_steps": 2737, "loss": 1.0363, "lr": 5.4368124653184835e-06, "epoch": 4.8005115089514065, "percentage": 68.58, "elapsed_time": "18:37:15", "remaining_time": "8:31:54"} +{"current_steps": 1878, "total_steps": 2737, "loss": 1.0273, "lr": 5.4254664418295634e-06, "epoch": 4.80306905370844, "percentage": 68.62, "elapsed_time": "18:37:50", "remaining_time": "8:31:18"} +{"current_steps": 1879, "total_steps": 2737, "loss": 1.0098, "lr": 5.414127860820719e-06, "epoch": 4.805626598465473, "percentage": 68.65, "elapsed_time": "18:38:26", "remaining_time": "8:30:42"} +{"current_steps": 1880, "total_steps": 2737, "loss": 1.0057, "lr": 5.402796740739109e-06, "epoch": 4.8081841432225065, "percentage": 68.69, "elapsed_time": "18:39:02", "remaining_time": "8:30:06"} +{"current_steps": 1881, "total_steps": 2737, "loss": 1.0378, "lr": 5.391473100019767e-06, "epoch": 4.810741687979539, "percentage": 68.72, "elapsed_time": "18:39:37", "remaining_time": "8:29:31"} +{"current_steps": 1882, "total_steps": 2737, "loss": 1.0054, "lr": 5.380156957085536e-06, "epoch": 4.813299232736573, "percentage": 68.76, "elapsed_time": "18:40:13", "remaining_time": "8:28:55"} +{"current_steps": 1883, "total_steps": 2737, "loss": 0.9945, "lr": 5.3688483303470895e-06, "epoch": 4.8158567774936065, "percentage": 68.8, "elapsed_time": "18:40:49", "remaining_time": "8:28:19"} +{"current_steps": 1884, "total_steps": 2737, "loss": 1.018, "lr": 5.3575472382028386e-06, "epoch": 4.818414322250639, "percentage": 68.83, "elapsed_time": "18:41:24", "remaining_time": "8:27:43"} +{"current_steps": 1885, "total_steps": 2737, "loss": 1.0175, "lr": 5.346253699038966e-06, "epoch": 4.820971867007673, "percentage": 68.87, "elapsed_time": "18:42:00", "remaining_time": "8:27:08"} +{"current_steps": 1886, "total_steps": 2737, "loss": 1.0343, "lr": 5.334967731229348e-06, "epoch": 4.823529411764706, "percentage": 68.91, "elapsed_time": "18:42:36", "remaining_time": "8:26:32"} +{"current_steps": 1887, "total_steps": 2737, "loss": 1.033, "lr": 5.323689353135546e-06, "epoch": 4.826086956521739, "percentage": 68.94, "elapsed_time": "18:43:11", "remaining_time": "8:25:56"} +{"current_steps": 1888, "total_steps": 2737, "loss": 1.0341, "lr": 5.312418583106784e-06, "epoch": 4.828644501278772, "percentage": 68.98, "elapsed_time": "18:43:47", "remaining_time": "8:25:20"} +{"current_steps": 1889, "total_steps": 2737, "loss": 1.0189, "lr": 5.301155439479893e-06, "epoch": 4.831202046035806, "percentage": 69.02, "elapsed_time": "18:44:23", "remaining_time": "8:24:45"} +{"current_steps": 1890, "total_steps": 2737, "loss": 0.9979, "lr": 5.289899940579315e-06, "epoch": 4.833759590792839, "percentage": 69.05, "elapsed_time": "18:44:58", "remaining_time": "8:24:09"} +{"current_steps": 1891, "total_steps": 2737, "loss": 1.033, "lr": 5.278652104717026e-06, "epoch": 4.836317135549872, "percentage": 69.09, "elapsed_time": "18:45:34", "remaining_time": "8:23:33"} +{"current_steps": 1892, "total_steps": 2737, "loss": 1.0006, "lr": 5.267411950192558e-06, "epoch": 4.838874680306906, "percentage": 69.13, "elapsed_time": "18:46:10", "remaining_time": "8:22:58"} +{"current_steps": 1893, "total_steps": 2737, "loss": 0.976, "lr": 5.256179495292953e-06, "epoch": 4.841432225063938, "percentage": 69.16, "elapsed_time": "18:46:45", "remaining_time": "8:22:22"} +{"current_steps": 1894, "total_steps": 2737, "loss": 1.03, "lr": 5.244954758292691e-06, "epoch": 4.843989769820972, "percentage": 69.2, "elapsed_time": "18:47:21", "remaining_time": "8:21:46"} +{"current_steps": 1895, "total_steps": 2737, "loss": 1.017, "lr": 5.233737757453733e-06, "epoch": 4.846547314578006, "percentage": 69.24, "elapsed_time": "18:47:57", "remaining_time": "8:21:10"} +{"current_steps": 1896, "total_steps": 2737, "loss": 1.0544, "lr": 5.222528511025429e-06, "epoch": 4.849104859335038, "percentage": 69.27, "elapsed_time": "18:48:32", "remaining_time": "8:20:35"} +{"current_steps": 1897, "total_steps": 2737, "loss": 1.0199, "lr": 5.2113270372445334e-06, "epoch": 4.851662404092072, "percentage": 69.31, "elapsed_time": "18:49:08", "remaining_time": "8:19:59"} +{"current_steps": 1898, "total_steps": 2737, "loss": 1.0297, "lr": 5.200133354335129e-06, "epoch": 4.854219948849105, "percentage": 69.35, "elapsed_time": "18:49:44", "remaining_time": "8:19:23"} +{"current_steps": 1899, "total_steps": 2737, "loss": 1.0618, "lr": 5.188947480508644e-06, "epoch": 4.856777493606138, "percentage": 69.38, "elapsed_time": "18:50:20", "remaining_time": "8:18:47"} +{"current_steps": 1900, "total_steps": 2737, "loss": 1.0095, "lr": 5.177769433963801e-06, "epoch": 4.859335038363171, "percentage": 69.42, "elapsed_time": "18:50:55", "remaining_time": "8:18:12"} +{"current_steps": 1901, "total_steps": 2737, "loss": 1.0132, "lr": 5.166599232886579e-06, "epoch": 4.861892583120205, "percentage": 69.46, "elapsed_time": "18:51:31", "remaining_time": "8:17:36"} +{"current_steps": 1902, "total_steps": 2737, "loss": 1.0231, "lr": 5.155436895450197e-06, "epoch": 4.864450127877237, "percentage": 69.49, "elapsed_time": "18:52:06", "remaining_time": "8:17:00"} +{"current_steps": 1903, "total_steps": 2737, "loss": 1.0299, "lr": 5.144282439815075e-06, "epoch": 4.867007672634271, "percentage": 69.53, "elapsed_time": "18:52:42", "remaining_time": "8:16:24"} +{"current_steps": 1904, "total_steps": 2737, "loss": 1.0426, "lr": 5.133135884128828e-06, "epoch": 4.869565217391305, "percentage": 69.57, "elapsed_time": "18:53:18", "remaining_time": "8:15:49"} +{"current_steps": 1905, "total_steps": 2737, "loss": 1.0335, "lr": 5.121997246526188e-06, "epoch": 4.872122762148337, "percentage": 69.6, "elapsed_time": "18:53:54", "remaining_time": "8:15:13"} +{"current_steps": 1906, "total_steps": 2737, "loss": 1.0226, "lr": 5.110866545129031e-06, "epoch": 4.874680306905371, "percentage": 69.64, "elapsed_time": "18:54:29", "remaining_time": "8:14:37"} +{"current_steps": 1907, "total_steps": 2737, "loss": 1.03, "lr": 5.099743798046315e-06, "epoch": 4.877237851662404, "percentage": 69.67, "elapsed_time": "18:55:05", "remaining_time": "8:14:02"} +{"current_steps": 1908, "total_steps": 2737, "loss": 1.0524, "lr": 5.088629023374052e-06, "epoch": 4.879795396419437, "percentage": 69.71, "elapsed_time": "18:55:41", "remaining_time": "8:13:26"} +{"current_steps": 1909, "total_steps": 2737, "loss": 1.0598, "lr": 5.0775222391952826e-06, "epoch": 4.882352941176471, "percentage": 69.75, "elapsed_time": "18:56:16", "remaining_time": "8:12:50"} +{"current_steps": 1910, "total_steps": 2737, "loss": 1.0197, "lr": 5.06642346358005e-06, "epoch": 4.884910485933504, "percentage": 69.78, "elapsed_time": "18:56:52", "remaining_time": "8:12:15"} +{"current_steps": 1911, "total_steps": 2737, "loss": 1.001, "lr": 5.055332714585372e-06, "epoch": 4.887468030690537, "percentage": 69.82, "elapsed_time": "18:57:28", "remaining_time": "8:11:39"} +{"current_steps": 1912, "total_steps": 2737, "loss": 1.0432, "lr": 5.044250010255202e-06, "epoch": 4.89002557544757, "percentage": 69.86, "elapsed_time": "18:58:03", "remaining_time": "8:11:03"} +{"current_steps": 1913, "total_steps": 2737, "loss": 1.0314, "lr": 5.033175368620406e-06, "epoch": 4.892583120204604, "percentage": 69.89, "elapsed_time": "18:58:39", "remaining_time": "8:10:27"} +{"current_steps": 1914, "total_steps": 2737, "loss": 1.0358, "lr": 5.022108807698735e-06, "epoch": 4.8951406649616365, "percentage": 69.93, "elapsed_time": "18:59:15", "remaining_time": "8:09:52"} +{"current_steps": 1915, "total_steps": 2737, "loss": 1.0265, "lr": 5.0110503454947926e-06, "epoch": 4.89769820971867, "percentage": 69.97, "elapsed_time": "18:59:50", "remaining_time": "8:09:16"} +{"current_steps": 1916, "total_steps": 2737, "loss": 1.0495, "lr": 5.000000000000003e-06, "epoch": 4.900255754475703, "percentage": 70.0, "elapsed_time": "19:00:26", "remaining_time": "8:08:40"} +{"current_steps": 1917, "total_steps": 2737, "loss": 1.0044, "lr": 4.988957789192583e-06, "epoch": 4.9028132992327365, "percentage": 70.04, "elapsed_time": "19:01:02", "remaining_time": "8:08:04"} +{"current_steps": 1918, "total_steps": 2737, "loss": 0.977, "lr": 4.97792373103753e-06, "epoch": 4.90537084398977, "percentage": 70.08, "elapsed_time": "19:01:38", "remaining_time": "8:07:29"} +{"current_steps": 1919, "total_steps": 2737, "loss": 1.0563, "lr": 4.966897843486561e-06, "epoch": 4.907928388746803, "percentage": 70.11, "elapsed_time": "19:02:13", "remaining_time": "8:06:53"} +{"current_steps": 1920, "total_steps": 2737, "loss": 1.0172, "lr": 4.955880144478101e-06, "epoch": 4.910485933503836, "percentage": 70.15, "elapsed_time": "19:02:49", "remaining_time": "8:06:17"} +{"current_steps": 1921, "total_steps": 2737, "loss": 1.0332, "lr": 4.944870651937267e-06, "epoch": 4.913043478260869, "percentage": 70.19, "elapsed_time": "19:03:25", "remaining_time": "8:05:41"} +{"current_steps": 1922, "total_steps": 2737, "loss": 1.0285, "lr": 4.933869383775809e-06, "epoch": 4.915601023017903, "percentage": 70.22, "elapsed_time": "19:04:00", "remaining_time": "8:05:06"} +{"current_steps": 1923, "total_steps": 2737, "loss": 1.0082, "lr": 4.922876357892103e-06, "epoch": 4.918158567774936, "percentage": 70.26, "elapsed_time": "19:04:36", "remaining_time": "8:04:30"} +{"current_steps": 1924, "total_steps": 2737, "loss": 1.0131, "lr": 4.911891592171113e-06, "epoch": 4.920716112531969, "percentage": 70.3, "elapsed_time": "19:05:12", "remaining_time": "8:03:54"} +{"current_steps": 1925, "total_steps": 2737, "loss": 1.0502, "lr": 4.900915104484372e-06, "epoch": 4.923273657289003, "percentage": 70.33, "elapsed_time": "19:05:47", "remaining_time": "8:03:19"} +{"current_steps": 1926, "total_steps": 2737, "loss": 1.0457, "lr": 4.889946912689936e-06, "epoch": 4.9258312020460355, "percentage": 70.37, "elapsed_time": "19:06:23", "remaining_time": "8:02:43"} +{"current_steps": 1927, "total_steps": 2737, "loss": 1.0491, "lr": 4.878987034632361e-06, "epoch": 4.928388746803069, "percentage": 70.41, "elapsed_time": "19:06:59", "remaining_time": "8:02:07"} +{"current_steps": 1928, "total_steps": 2737, "loss": 1.011, "lr": 4.8680354881426935e-06, "epoch": 4.930946291560103, "percentage": 70.44, "elapsed_time": "19:07:34", "remaining_time": "8:01:31"} +{"current_steps": 1929, "total_steps": 2737, "loss": 1.0356, "lr": 4.857092291038411e-06, "epoch": 4.9335038363171355, "percentage": 70.48, "elapsed_time": "19:08:10", "remaining_time": "8:00:56"} +{"current_steps": 1930, "total_steps": 2737, "loss": 1.0556, "lr": 4.846157461123411e-06, "epoch": 4.936061381074169, "percentage": 70.52, "elapsed_time": "19:08:45", "remaining_time": "8:00:20"} +{"current_steps": 1931, "total_steps": 2737, "loss": 1.0521, "lr": 4.8352310161879724e-06, "epoch": 4.938618925831202, "percentage": 70.55, "elapsed_time": "19:09:21", "remaining_time": "7:59:44"} +{"current_steps": 1932, "total_steps": 2737, "loss": 1.0348, "lr": 4.824312974008748e-06, "epoch": 4.9411764705882355, "percentage": 70.59, "elapsed_time": "19:09:57", "remaining_time": "7:59:08"} +{"current_steps": 1933, "total_steps": 2737, "loss": 1.003, "lr": 4.813403352348703e-06, "epoch": 4.943734015345268, "percentage": 70.62, "elapsed_time": "19:10:32", "remaining_time": "7:58:33"} +{"current_steps": 1934, "total_steps": 2737, "loss": 1.0261, "lr": 4.8025021689571095e-06, "epoch": 4.946291560102302, "percentage": 70.66, "elapsed_time": "19:11:08", "remaining_time": "7:57:57"} +{"current_steps": 1935, "total_steps": 2737, "loss": 1.013, "lr": 4.791609441569517e-06, "epoch": 4.948849104859335, "percentage": 70.7, "elapsed_time": "19:11:44", "remaining_time": "7:57:21"} +{"current_steps": 1936, "total_steps": 2737, "loss": 1.0211, "lr": 4.780725187907707e-06, "epoch": 4.951406649616368, "percentage": 70.73, "elapsed_time": "19:12:19", "remaining_time": "7:56:45"} +{"current_steps": 1937, "total_steps": 2737, "loss": 1.0222, "lr": 4.769849425679683e-06, "epoch": 4.953964194373402, "percentage": 70.77, "elapsed_time": "19:12:55", "remaining_time": "7:56:10"} +{"current_steps": 1938, "total_steps": 2737, "loss": 0.9967, "lr": 4.758982172579621e-06, "epoch": 4.956521739130435, "percentage": 70.81, "elapsed_time": "19:13:31", "remaining_time": "7:55:34"} +{"current_steps": 1939, "total_steps": 2737, "loss": 1.0321, "lr": 4.748123446287875e-06, "epoch": 4.959079283887468, "percentage": 70.84, "elapsed_time": "19:14:06", "remaining_time": "7:54:58"} +{"current_steps": 1940, "total_steps": 2737, "loss": 1.0923, "lr": 4.737273264470909e-06, "epoch": 4.961636828644501, "percentage": 70.88, "elapsed_time": "19:14:42", "remaining_time": "7:54:22"} +{"current_steps": 1941, "total_steps": 2737, "loss": 1.0245, "lr": 4.726431644781284e-06, "epoch": 4.964194373401535, "percentage": 70.92, "elapsed_time": "19:15:18", "remaining_time": "7:53:47"} +{"current_steps": 1942, "total_steps": 2737, "loss": 1.0378, "lr": 4.715598604857648e-06, "epoch": 4.966751918158568, "percentage": 70.95, "elapsed_time": "19:15:53", "remaining_time": "7:53:11"} +{"current_steps": 1943, "total_steps": 2737, "loss": 1.0287, "lr": 4.704774162324673e-06, "epoch": 4.969309462915601, "percentage": 70.99, "elapsed_time": "19:16:29", "remaining_time": "7:52:35"} +{"current_steps": 1944, "total_steps": 2737, "loss": 1.0024, "lr": 4.6939583347930525e-06, "epoch": 4.971867007672635, "percentage": 71.03, "elapsed_time": "19:17:05", "remaining_time": "7:52:00"} +{"current_steps": 1945, "total_steps": 2737, "loss": 1.0216, "lr": 4.6831511398594574e-06, "epoch": 4.974424552429667, "percentage": 71.06, "elapsed_time": "19:17:40", "remaining_time": "7:51:24"} +{"current_steps": 1946, "total_steps": 2737, "loss": 1.0595, "lr": 4.672352595106525e-06, "epoch": 4.976982097186701, "percentage": 71.1, "elapsed_time": "19:18:16", "remaining_time": "7:50:48"} +{"current_steps": 1947, "total_steps": 2737, "loss": 1.0056, "lr": 4.661562718102808e-06, "epoch": 4.979539641943734, "percentage": 71.14, "elapsed_time": "19:18:52", "remaining_time": "7:50:12"} +{"current_steps": 1948, "total_steps": 2737, "loss": 1.0221, "lr": 4.65078152640276e-06, "epoch": 4.982097186700767, "percentage": 71.17, "elapsed_time": "19:19:27", "remaining_time": "7:49:37"} +{"current_steps": 1949, "total_steps": 2737, "loss": 1.0534, "lr": 4.640009037546711e-06, "epoch": 4.9846547314578, "percentage": 71.21, "elapsed_time": "19:20:03", "remaining_time": "7:49:01"} +{"current_steps": 1950, "total_steps": 2737, "loss": 1.046, "lr": 4.629245269060826e-06, "epoch": 4.987212276214834, "percentage": 71.25, "elapsed_time": "19:20:39", "remaining_time": "7:48:25"} +{"current_steps": 1951, "total_steps": 2737, "loss": 1.0119, "lr": 4.61849023845708e-06, "epoch": 4.989769820971867, "percentage": 71.28, "elapsed_time": "19:21:14", "remaining_time": "7:47:49"} +{"current_steps": 1952, "total_steps": 2737, "loss": 1.0373, "lr": 4.607743963233233e-06, "epoch": 4.9923273657289, "percentage": 71.32, "elapsed_time": "19:21:50", "remaining_time": "7:47:14"} +{"current_steps": 1953, "total_steps": 2737, "loss": 0.9995, "lr": 4.5970064608728085e-06, "epoch": 4.994884910485934, "percentage": 71.36, "elapsed_time": "19:22:26", "remaining_time": "7:46:38"} +{"current_steps": 1954, "total_steps": 2737, "loss": 1.0053, "lr": 4.586277748845056e-06, "epoch": 4.997442455242966, "percentage": 71.39, "elapsed_time": "19:23:01", "remaining_time": "7:46:02"} +{"current_steps": 1955, "total_steps": 2737, "loss": 1.0268, "lr": 4.575557844604905e-06, "epoch": 5.0, "percentage": 71.43, "elapsed_time": "19:23:37", "remaining_time": "7:45:26"} +{"current_steps": 1956, "total_steps": 2737, "loss": 1.0199, "lr": 4.5648467655929815e-06, "epoch": 5.002557544757034, "percentage": 71.47, "elapsed_time": "19:24:21", "remaining_time": "7:44:54"} +{"current_steps": 1957, "total_steps": 2737, "loss": 1.0277, "lr": 4.554144529235537e-06, "epoch": 5.005115089514066, "percentage": 71.5, "elapsed_time": "19:24:57", "remaining_time": "7:44:18"} +{"current_steps": 1958, "total_steps": 2737, "loss": 1.0562, "lr": 4.543451152944438e-06, "epoch": 5.0076726342711, "percentage": 71.54, "elapsed_time": "19:25:32", "remaining_time": "7:43:43"} +{"current_steps": 1959, "total_steps": 2737, "loss": 1.031, "lr": 4.532766654117146e-06, "epoch": 5.010230179028133, "percentage": 71.57, "elapsed_time": "19:26:08", "remaining_time": "7:43:07"} +{"current_steps": 1960, "total_steps": 2737, "loss": 1.0368, "lr": 4.5220910501366635e-06, "epoch": 5.012787723785166, "percentage": 71.61, "elapsed_time": "19:26:44", "remaining_time": "7:42:31"} +{"current_steps": 1961, "total_steps": 2737, "loss": 1.0358, "lr": 4.511424358371544e-06, "epoch": 5.015345268542199, "percentage": 71.65, "elapsed_time": "19:27:19", "remaining_time": "7:41:55"} +{"current_steps": 1962, "total_steps": 2737, "loss": 1.0037, "lr": 4.500766596175813e-06, "epoch": 5.017902813299233, "percentage": 71.68, "elapsed_time": "19:27:55", "remaining_time": "7:41:20"} +{"current_steps": 1963, "total_steps": 2737, "loss": 1.0191, "lr": 4.490117780888994e-06, "epoch": 5.020460358056266, "percentage": 71.72, "elapsed_time": "19:28:31", "remaining_time": "7:40:44"} +{"current_steps": 1964, "total_steps": 2737, "loss": 1.0225, "lr": 4.479477929836039e-06, "epoch": 5.023017902813299, "percentage": 71.76, "elapsed_time": "19:29:07", "remaining_time": "7:40:08"} +{"current_steps": 1965, "total_steps": 2737, "loss": 0.9987, "lr": 4.4688470603273184e-06, "epoch": 5.025575447570333, "percentage": 71.79, "elapsed_time": "19:29:42", "remaining_time": "7:39:33"} +{"current_steps": 1966, "total_steps": 2737, "loss": 1.0244, "lr": 4.458225189658598e-06, "epoch": 5.028132992327365, "percentage": 71.83, "elapsed_time": "19:30:18", "remaining_time": "7:38:57"} +{"current_steps": 1967, "total_steps": 2737, "loss": 1.0147, "lr": 4.447612335110991e-06, "epoch": 5.030690537084399, "percentage": 71.87, "elapsed_time": "19:30:53", "remaining_time": "7:38:21"} +{"current_steps": 1968, "total_steps": 2737, "loss": 1.0056, "lr": 4.43700851395096e-06, "epoch": 5.033248081841432, "percentage": 71.9, "elapsed_time": "19:31:29", "remaining_time": "7:37:45"} +{"current_steps": 1969, "total_steps": 2737, "loss": 1.0486, "lr": 4.426413743430241e-06, "epoch": 5.035805626598465, "percentage": 71.94, "elapsed_time": "19:32:05", "remaining_time": "7:37:10"} +{"current_steps": 1970, "total_steps": 2737, "loss": 1.0046, "lr": 4.415828040785877e-06, "epoch": 5.038363171355499, "percentage": 71.98, "elapsed_time": "19:32:41", "remaining_time": "7:36:34"} +{"current_steps": 1971, "total_steps": 2737, "loss": 1.0158, "lr": 4.405251423240138e-06, "epoch": 5.040920716112532, "percentage": 72.01, "elapsed_time": "19:33:16", "remaining_time": "7:35:58"} +{"current_steps": 1972, "total_steps": 2737, "loss": 1.0167, "lr": 4.3946839080005236e-06, "epoch": 5.043478260869565, "percentage": 72.05, "elapsed_time": "19:33:52", "remaining_time": "7:35:22"} +{"current_steps": 1973, "total_steps": 2737, "loss": 1.0412, "lr": 4.384125512259718e-06, "epoch": 5.046035805626598, "percentage": 72.09, "elapsed_time": "19:34:27", "remaining_time": "7:34:47"} +{"current_steps": 1974, "total_steps": 2737, "loss": 1.0058, "lr": 4.373576253195568e-06, "epoch": 5.048593350383632, "percentage": 72.12, "elapsed_time": "19:35:03", "remaining_time": "7:34:11"} +{"current_steps": 1975, "total_steps": 2737, "loss": 0.9958, "lr": 4.363036147971069e-06, "epoch": 5.051150895140665, "percentage": 72.16, "elapsed_time": "19:35:39", "remaining_time": "7:33:35"} +{"current_steps": 1976, "total_steps": 2737, "loss": 1.0202, "lr": 4.352505213734298e-06, "epoch": 5.053708439897698, "percentage": 72.2, "elapsed_time": "19:36:14", "remaining_time": "7:32:59"} +{"current_steps": 1977, "total_steps": 2737, "loss": 1.0221, "lr": 4.3419834676184395e-06, "epoch": 5.056265984654732, "percentage": 72.23, "elapsed_time": "19:36:50", "remaining_time": "7:32:24"} +{"current_steps": 1978, "total_steps": 2737, "loss": 1.0264, "lr": 4.331470926741707e-06, "epoch": 5.0588235294117645, "percentage": 72.27, "elapsed_time": "19:37:26", "remaining_time": "7:31:48"} +{"current_steps": 1979, "total_steps": 2737, "loss": 1.0256, "lr": 4.320967608207354e-06, "epoch": 5.061381074168798, "percentage": 72.31, "elapsed_time": "19:38:01", "remaining_time": "7:31:12"} +{"current_steps": 1980, "total_steps": 2737, "loss": 1.0246, "lr": 4.3104735291036214e-06, "epoch": 5.063938618925831, "percentage": 72.34, "elapsed_time": "19:38:37", "remaining_time": "7:30:36"} +{"current_steps": 1981, "total_steps": 2737, "loss": 0.9895, "lr": 4.299988706503716e-06, "epoch": 5.0664961636828645, "percentage": 72.38, "elapsed_time": "19:39:13", "remaining_time": "7:30:01"} +{"current_steps": 1982, "total_steps": 2737, "loss": 1.0069, "lr": 4.289513157465796e-06, "epoch": 5.069053708439898, "percentage": 72.42, "elapsed_time": "19:39:48", "remaining_time": "7:29:25"} +{"current_steps": 1983, "total_steps": 2737, "loss": 1.028, "lr": 4.279046899032918e-06, "epoch": 5.071611253196931, "percentage": 72.45, "elapsed_time": "19:40:24", "remaining_time": "7:28:49"} +{"current_steps": 1984, "total_steps": 2737, "loss": 0.9806, "lr": 4.268589948233034e-06, "epoch": 5.0741687979539645, "percentage": 72.49, "elapsed_time": "19:41:00", "remaining_time": "7:28:14"} +{"current_steps": 1985, "total_steps": 2737, "loss": 1.0141, "lr": 4.258142322078944e-06, "epoch": 5.076726342710997, "percentage": 72.52, "elapsed_time": "19:41:36", "remaining_time": "7:27:38"} +{"current_steps": 1986, "total_steps": 2737, "loss": 1.0484, "lr": 4.247704037568289e-06, "epoch": 5.079283887468031, "percentage": 72.56, "elapsed_time": "19:42:12", "remaining_time": "7:27:02"} +{"current_steps": 1987, "total_steps": 2737, "loss": 1.0176, "lr": 4.237275111683502e-06, "epoch": 5.081841432225064, "percentage": 72.6, "elapsed_time": "19:42:48", "remaining_time": "7:26:27"} +{"current_steps": 1988, "total_steps": 2737, "loss": 1.0241, "lr": 4.226855561391792e-06, "epoch": 5.084398976982097, "percentage": 72.63, "elapsed_time": "19:43:23", "remaining_time": "7:25:51"} +{"current_steps": 1989, "total_steps": 2737, "loss": 1.0105, "lr": 4.2164454036451185e-06, "epoch": 5.086956521739131, "percentage": 72.67, "elapsed_time": "19:43:59", "remaining_time": "7:25:15"} +{"current_steps": 1990, "total_steps": 2737, "loss": 1.0571, "lr": 4.2060446553801585e-06, "epoch": 5.089514066496164, "percentage": 72.71, "elapsed_time": "19:44:35", "remaining_time": "7:24:39"} +{"current_steps": 1991, "total_steps": 2737, "loss": 1.0309, "lr": 4.195653333518271e-06, "epoch": 5.092071611253197, "percentage": 72.74, "elapsed_time": "19:45:10", "remaining_time": "7:24:04"} +{"current_steps": 1992, "total_steps": 2737, "loss": 1.0286, "lr": 4.1852714549654985e-06, "epoch": 5.09462915601023, "percentage": 72.78, "elapsed_time": "19:45:46", "remaining_time": "7:23:28"} +{"current_steps": 1993, "total_steps": 2737, "loss": 1.0092, "lr": 4.1748990366125005e-06, "epoch": 5.0971867007672635, "percentage": 72.82, "elapsed_time": "19:46:22", "remaining_time": "7:22:52"} +{"current_steps": 1994, "total_steps": 2737, "loss": 1.0055, "lr": 4.164536095334557e-06, "epoch": 5.099744245524296, "percentage": 72.85, "elapsed_time": "19:46:57", "remaining_time": "7:22:16"} +{"current_steps": 1995, "total_steps": 2737, "loss": 1.0492, "lr": 4.154182647991519e-06, "epoch": 5.10230179028133, "percentage": 72.89, "elapsed_time": "19:47:33", "remaining_time": "7:21:41"} +{"current_steps": 1996, "total_steps": 2737, "loss": 1.0103, "lr": 4.143838711427808e-06, "epoch": 5.1048593350383635, "percentage": 72.93, "elapsed_time": "19:48:09", "remaining_time": "7:21:05"} +{"current_steps": 1997, "total_steps": 2737, "loss": 1.0015, "lr": 4.133504302472356e-06, "epoch": 5.107416879795396, "percentage": 72.96, "elapsed_time": "19:48:44", "remaining_time": "7:20:29"} +{"current_steps": 1998, "total_steps": 2737, "loss": 1.0394, "lr": 4.123179437938596e-06, "epoch": 5.10997442455243, "percentage": 73.0, "elapsed_time": "19:49:20", "remaining_time": "7:19:54"} +{"current_steps": 1999, "total_steps": 2737, "loss": 1.0406, "lr": 4.112864134624447e-06, "epoch": 5.112531969309463, "percentage": 73.04, "elapsed_time": "19:49:55", "remaining_time": "7:19:18"} +{"current_steps": 2000, "total_steps": 2737, "loss": 1.022, "lr": 4.102558409312256e-06, "epoch": 5.115089514066496, "percentage": 73.07, "elapsed_time": "19:50:31", "remaining_time": "7:18:42"} +{"current_steps": 2001, "total_steps": 2737, "loss": 1.0132, "lr": 4.092262278768797e-06, "epoch": 5.117647058823529, "percentage": 73.11, "elapsed_time": "19:51:07", "remaining_time": "7:18:06"} +{"current_steps": 2002, "total_steps": 2737, "loss": 1.0328, "lr": 4.0819757597452246e-06, "epoch": 5.120204603580563, "percentage": 73.15, "elapsed_time": "19:51:42", "remaining_time": "7:17:31"} +{"current_steps": 2003, "total_steps": 2737, "loss": 1.0067, "lr": 4.0716988689770695e-06, "epoch": 5.122762148337596, "percentage": 73.18, "elapsed_time": "19:52:18", "remaining_time": "7:16:55"} +{"current_steps": 2004, "total_steps": 2737, "loss": 1.0289, "lr": 4.061431623184188e-06, "epoch": 5.125319693094629, "percentage": 73.22, "elapsed_time": "19:52:54", "remaining_time": "7:16:19"} +{"current_steps": 2005, "total_steps": 2737, "loss": 0.9812, "lr": 4.051174039070742e-06, "epoch": 5.127877237851663, "percentage": 73.26, "elapsed_time": "19:53:29", "remaining_time": "7:15:43"} +{"current_steps": 2006, "total_steps": 2737, "loss": 1.0059, "lr": 4.040926133325188e-06, "epoch": 5.130434782608695, "percentage": 73.29, "elapsed_time": "19:54:05", "remaining_time": "7:15:08"} +{"current_steps": 2007, "total_steps": 2737, "loss": 1.0183, "lr": 4.030687922620223e-06, "epoch": 5.132992327365729, "percentage": 73.33, "elapsed_time": "19:54:41", "remaining_time": "7:14:32"} +{"current_steps": 2008, "total_steps": 2737, "loss": 1.0328, "lr": 4.020459423612777e-06, "epoch": 5.135549872122763, "percentage": 73.36, "elapsed_time": "19:55:16", "remaining_time": "7:13:56"} +{"current_steps": 2009, "total_steps": 2737, "loss": 1.0247, "lr": 4.010240652943974e-06, "epoch": 5.138107416879795, "percentage": 73.4, "elapsed_time": "19:55:52", "remaining_time": "7:13:20"} +{"current_steps": 2010, "total_steps": 2737, "loss": 1.0271, "lr": 4.000031627239123e-06, "epoch": 5.140664961636829, "percentage": 73.44, "elapsed_time": "19:56:29", "remaining_time": "7:12:45"} +{"current_steps": 2011, "total_steps": 2737, "loss": 0.9729, "lr": 3.989832363107664e-06, "epoch": 5.143222506393862, "percentage": 73.47, "elapsed_time": "19:57:04", "remaining_time": "7:12:09"} +{"current_steps": 2012, "total_steps": 2737, "loss": 1.0208, "lr": 3.9796428771431625e-06, "epoch": 5.145780051150895, "percentage": 73.51, "elapsed_time": "19:57:40", "remaining_time": "7:11:33"} +{"current_steps": 2013, "total_steps": 2737, "loss": 0.9944, "lr": 3.96946318592328e-06, "epoch": 5.148337595907928, "percentage": 73.55, "elapsed_time": "19:58:15", "remaining_time": "7:10:58"} +{"current_steps": 2014, "total_steps": 2737, "loss": 1.0606, "lr": 3.959293306009734e-06, "epoch": 5.150895140664962, "percentage": 73.58, "elapsed_time": "19:58:51", "remaining_time": "7:10:22"} +{"current_steps": 2015, "total_steps": 2737, "loss": 1.0035, "lr": 3.949133253948284e-06, "epoch": 5.153452685421995, "percentage": 73.62, "elapsed_time": "19:59:27", "remaining_time": "7:09:46"} +{"current_steps": 2016, "total_steps": 2737, "loss": 0.9869, "lr": 3.938983046268695e-06, "epoch": 5.156010230179028, "percentage": 73.66, "elapsed_time": "20:00:03", "remaining_time": "7:09:11"} +{"current_steps": 2017, "total_steps": 2737, "loss": 1.0238, "lr": 3.9288426994847285e-06, "epoch": 5.158567774936062, "percentage": 73.69, "elapsed_time": "20:00:38", "remaining_time": "7:08:35"} +{"current_steps": 2018, "total_steps": 2737, "loss": 1.0521, "lr": 3.918712230094091e-06, "epoch": 5.161125319693094, "percentage": 73.73, "elapsed_time": "20:01:14", "remaining_time": "7:07:59"} +{"current_steps": 2019, "total_steps": 2737, "loss": 0.9878, "lr": 3.908591654578417e-06, "epoch": 5.163682864450128, "percentage": 73.77, "elapsed_time": "20:01:50", "remaining_time": "7:07:23"} +{"current_steps": 2020, "total_steps": 2737, "loss": 1.0203, "lr": 3.89848098940326e-06, "epoch": 5.166240409207161, "percentage": 73.8, "elapsed_time": "20:02:25", "remaining_time": "7:06:48"} +{"current_steps": 2021, "total_steps": 2737, "loss": 1.0112, "lr": 3.888380251018035e-06, "epoch": 5.168797953964194, "percentage": 73.84, "elapsed_time": "20:03:01", "remaining_time": "7:06:12"} +{"current_steps": 2022, "total_steps": 2737, "loss": 1.0589, "lr": 3.878289455856013e-06, "epoch": 5.171355498721228, "percentage": 73.88, "elapsed_time": "20:03:37", "remaining_time": "7:05:36"} +{"current_steps": 2023, "total_steps": 2737, "loss": 1.0065, "lr": 3.868208620334282e-06, "epoch": 5.173913043478261, "percentage": 73.91, "elapsed_time": "20:04:12", "remaining_time": "7:05:00"} +{"current_steps": 2024, "total_steps": 2737, "loss": 1.0189, "lr": 3.858137760853737e-06, "epoch": 5.176470588235294, "percentage": 73.95, "elapsed_time": "20:04:48", "remaining_time": "7:04:25"} +{"current_steps": 2025, "total_steps": 2737, "loss": 1.0052, "lr": 3.84807689379904e-06, "epoch": 5.179028132992327, "percentage": 73.99, "elapsed_time": "20:05:24", "remaining_time": "7:03:49"} +{"current_steps": 2026, "total_steps": 2737, "loss": 0.9946, "lr": 3.838026035538581e-06, "epoch": 5.181585677749361, "percentage": 74.02, "elapsed_time": "20:05:59", "remaining_time": "7:03:13"} +{"current_steps": 2027, "total_steps": 2737, "loss": 1.0234, "lr": 3.827985202424488e-06, "epoch": 5.1841432225063935, "percentage": 74.06, "elapsed_time": "20:06:35", "remaining_time": "7:02:38"} +{"current_steps": 2028, "total_steps": 2737, "loss": 1.0137, "lr": 3.817954410792565e-06, "epoch": 5.186700767263427, "percentage": 74.1, "elapsed_time": "20:07:11", "remaining_time": "7:02:02"} +{"current_steps": 2029, "total_steps": 2737, "loss": 1.0289, "lr": 3.8079336769622834e-06, "epoch": 5.189258312020461, "percentage": 74.13, "elapsed_time": "20:07:47", "remaining_time": "7:01:26"} +{"current_steps": 2030, "total_steps": 2737, "loss": 1.0148, "lr": 3.7979230172367453e-06, "epoch": 5.1918158567774935, "percentage": 74.17, "elapsed_time": "20:08:22", "remaining_time": "7:00:50"} +{"current_steps": 2031, "total_steps": 2737, "loss": 1.0068, "lr": 3.7879224479026745e-06, "epoch": 5.194373401534527, "percentage": 74.21, "elapsed_time": "20:08:58", "remaining_time": "7:00:15"} +{"current_steps": 2032, "total_steps": 2737, "loss": 1.0572, "lr": 3.7779319852303766e-06, "epoch": 5.19693094629156, "percentage": 74.24, "elapsed_time": "20:09:34", "remaining_time": "6:59:39"} +{"current_steps": 2033, "total_steps": 2737, "loss": 1.0446, "lr": 3.7679516454736977e-06, "epoch": 5.1994884910485935, "percentage": 74.28, "elapsed_time": "20:10:09", "remaining_time": "6:59:03"} +{"current_steps": 2034, "total_steps": 2737, "loss": 0.9957, "lr": 3.757981444870035e-06, "epoch": 5.202046035805626, "percentage": 74.31, "elapsed_time": "20:10:45", "remaining_time": "6:58:27"} +{"current_steps": 2035, "total_steps": 2737, "loss": 1.0276, "lr": 3.748021399640279e-06, "epoch": 5.20460358056266, "percentage": 74.35, "elapsed_time": "20:11:20", "remaining_time": "6:57:52"} +{"current_steps": 2036, "total_steps": 2737, "loss": 1.0344, "lr": 3.7380715259888e-06, "epoch": 5.207161125319693, "percentage": 74.39, "elapsed_time": "20:11:56", "remaining_time": "6:57:16"} +{"current_steps": 2037, "total_steps": 2737, "loss": 0.9949, "lr": 3.7281318401034183e-06, "epoch": 5.209718670076726, "percentage": 74.42, "elapsed_time": "20:12:32", "remaining_time": "6:56:40"} +{"current_steps": 2038, "total_steps": 2737, "loss": 1.0545, "lr": 3.718202358155384e-06, "epoch": 5.21227621483376, "percentage": 74.46, "elapsed_time": "20:13:07", "remaining_time": "6:56:05"} +{"current_steps": 2039, "total_steps": 2737, "loss": 1.0388, "lr": 3.7082830962993497e-06, "epoch": 5.2148337595907925, "percentage": 74.5, "elapsed_time": "20:13:43", "remaining_time": "6:55:29"} +{"current_steps": 2040, "total_steps": 2737, "loss": 0.9945, "lr": 3.6983740706733207e-06, "epoch": 5.217391304347826, "percentage": 74.53, "elapsed_time": "20:14:19", "remaining_time": "6:54:53"} +{"current_steps": 2041, "total_steps": 2737, "loss": 1.037, "lr": 3.688475297398674e-06, "epoch": 5.21994884910486, "percentage": 74.57, "elapsed_time": "20:14:55", "remaining_time": "6:54:17"} +{"current_steps": 2042, "total_steps": 2737, "loss": 1.0019, "lr": 3.6785867925800856e-06, "epoch": 5.2225063938618925, "percentage": 74.61, "elapsed_time": "20:15:30", "remaining_time": "6:53:42"} +{"current_steps": 2043, "total_steps": 2737, "loss": 1.0384, "lr": 3.668708572305546e-06, "epoch": 5.225063938618926, "percentage": 74.64, "elapsed_time": "20:16:06", "remaining_time": "6:53:06"} +{"current_steps": 2044, "total_steps": 2737, "loss": 1.0018, "lr": 3.658840652646287e-06, "epoch": 5.227621483375959, "percentage": 74.68, "elapsed_time": "20:16:42", "remaining_time": "6:52:30"} +{"current_steps": 2045, "total_steps": 2737, "loss": 1.0221, "lr": 3.6489830496568067e-06, "epoch": 5.2301790281329925, "percentage": 74.72, "elapsed_time": "20:17:17", "remaining_time": "6:51:54"} +{"current_steps": 2046, "total_steps": 2737, "loss": 1.0462, "lr": 3.639135779374813e-06, "epoch": 5.232736572890025, "percentage": 74.75, "elapsed_time": "20:17:53", "remaining_time": "6:51:19"} +{"current_steps": 2047, "total_steps": 2737, "loss": 1.0242, "lr": 3.6292988578211863e-06, "epoch": 5.235294117647059, "percentage": 74.79, "elapsed_time": "20:18:29", "remaining_time": "6:50:43"} +{"current_steps": 2048, "total_steps": 2737, "loss": 1.002, "lr": 3.619472300999992e-06, "epoch": 5.2378516624040925, "percentage": 74.83, "elapsed_time": "20:19:04", "remaining_time": "6:50:07"} +{"current_steps": 2049, "total_steps": 2737, "loss": 1.0365, "lr": 3.6096561248984186e-06, "epoch": 5.240409207161125, "percentage": 74.86, "elapsed_time": "20:19:40", "remaining_time": "6:49:32"} +{"current_steps": 2050, "total_steps": 2737, "loss": 1.0206, "lr": 3.5998503454867807e-06, "epoch": 5.242966751918159, "percentage": 74.9, "elapsed_time": "20:20:16", "remaining_time": "6:48:56"} +{"current_steps": 2051, "total_steps": 2737, "loss": 1.0086, "lr": 3.5900549787184534e-06, "epoch": 5.245524296675192, "percentage": 74.94, "elapsed_time": "20:20:51", "remaining_time": "6:48:20"} +{"current_steps": 2052, "total_steps": 2737, "loss": 1.0457, "lr": 3.580270040529894e-06, "epoch": 5.248081841432225, "percentage": 74.97, "elapsed_time": "20:21:27", "remaining_time": "6:47:44"} +{"current_steps": 2053, "total_steps": 2737, "loss": 1.0316, "lr": 3.570495546840591e-06, "epoch": 5.250639386189258, "percentage": 75.01, "elapsed_time": "20:22:03", "remaining_time": "6:47:09"} +{"current_steps": 2054, "total_steps": 2737, "loss": 1.033, "lr": 3.560731513553022e-06, "epoch": 5.253196930946292, "percentage": 75.05, "elapsed_time": "20:22:38", "remaining_time": "6:46:33"} +{"current_steps": 2055, "total_steps": 2737, "loss": 1.0341, "lr": 3.5509779565526683e-06, "epoch": 5.255754475703325, "percentage": 75.08, "elapsed_time": "20:23:14", "remaining_time": "6:45:57"} +{"current_steps": 2056, "total_steps": 2737, "loss": 1.0621, "lr": 3.5412348917079507e-06, "epoch": 5.258312020460358, "percentage": 75.12, "elapsed_time": "20:23:49", "remaining_time": "6:45:21"} +{"current_steps": 2057, "total_steps": 2737, "loss": 1.0366, "lr": 3.5315023348702325e-06, "epoch": 5.260869565217392, "percentage": 75.16, "elapsed_time": "20:24:25", "remaining_time": "6:44:46"} +{"current_steps": 2058, "total_steps": 2737, "loss": 1.0008, "lr": 3.521780301873773e-06, "epoch": 5.263427109974424, "percentage": 75.19, "elapsed_time": "20:25:01", "remaining_time": "6:44:10"} +{"current_steps": 2059, "total_steps": 2737, "loss": 1.0147, "lr": 3.512068808535707e-06, "epoch": 5.265984654731458, "percentage": 75.23, "elapsed_time": "20:25:37", "remaining_time": "6:43:34"} +{"current_steps": 2060, "total_steps": 2737, "loss": 1.028, "lr": 3.502367870656035e-06, "epoch": 5.268542199488491, "percentage": 75.26, "elapsed_time": "20:26:13", "remaining_time": "6:42:59"} +{"current_steps": 2061, "total_steps": 2737, "loss": 1.0026, "lr": 3.492677504017573e-06, "epoch": 5.271099744245524, "percentage": 75.3, "elapsed_time": "20:26:48", "remaining_time": "6:42:23"} +{"current_steps": 2062, "total_steps": 2737, "loss": 1.0093, "lr": 3.4829977243859414e-06, "epoch": 5.273657289002558, "percentage": 75.34, "elapsed_time": "20:27:24", "remaining_time": "6:41:47"} +{"current_steps": 2063, "total_steps": 2737, "loss": 1.0255, "lr": 3.4733285475095324e-06, "epoch": 5.276214833759591, "percentage": 75.37, "elapsed_time": "20:28:00", "remaining_time": "6:41:11"} +{"current_steps": 2064, "total_steps": 2737, "loss": 1.0176, "lr": 3.4636699891195e-06, "epoch": 5.278772378516624, "percentage": 75.41, "elapsed_time": "20:28:35", "remaining_time": "6:40:36"} +{"current_steps": 2065, "total_steps": 2737, "loss": 1.0355, "lr": 3.454022064929711e-06, "epoch": 5.281329923273657, "percentage": 75.45, "elapsed_time": "20:29:11", "remaining_time": "6:40:00"} +{"current_steps": 2066, "total_steps": 2737, "loss": 0.9999, "lr": 3.4443847906367313e-06, "epoch": 5.283887468030691, "percentage": 75.48, "elapsed_time": "20:29:48", "remaining_time": "6:39:25"} +{"current_steps": 2067, "total_steps": 2737, "loss": 1.0069, "lr": 3.4347581819198095e-06, "epoch": 5.286445012787723, "percentage": 75.52, "elapsed_time": "20:30:24", "remaining_time": "6:38:49"} +{"current_steps": 2068, "total_steps": 2737, "loss": 1.0316, "lr": 3.425142254440835e-06, "epoch": 5.289002557544757, "percentage": 75.56, "elapsed_time": "20:30:59", "remaining_time": "6:38:13"} +{"current_steps": 2069, "total_steps": 2737, "loss": 0.9929, "lr": 3.4155370238443185e-06, "epoch": 5.291560102301791, "percentage": 75.59, "elapsed_time": "20:31:35", "remaining_time": "6:37:37"} +{"current_steps": 2070, "total_steps": 2737, "loss": 1.0235, "lr": 3.405942505757367e-06, "epoch": 5.294117647058823, "percentage": 75.63, "elapsed_time": "20:32:11", "remaining_time": "6:37:02"} +{"current_steps": 2071, "total_steps": 2737, "loss": 0.9883, "lr": 3.3963587157896694e-06, "epoch": 5.296675191815857, "percentage": 75.67, "elapsed_time": "20:32:46", "remaining_time": "6:36:26"} +{"current_steps": 2072, "total_steps": 2737, "loss": 1.0614, "lr": 3.386785669533447e-06, "epoch": 5.29923273657289, "percentage": 75.7, "elapsed_time": "20:33:22", "remaining_time": "6:35:50"} +{"current_steps": 2073, "total_steps": 2737, "loss": 1.019, "lr": 3.377223382563446e-06, "epoch": 5.301790281329923, "percentage": 75.74, "elapsed_time": "20:33:58", "remaining_time": "6:35:15"} +{"current_steps": 2074, "total_steps": 2737, "loss": 1.0744, "lr": 3.367671870436915e-06, "epoch": 5.304347826086957, "percentage": 75.78, "elapsed_time": "20:34:34", "remaining_time": "6:34:39"} +{"current_steps": 2075, "total_steps": 2737, "loss": 1.0204, "lr": 3.358131148693564e-06, "epoch": 5.30690537084399, "percentage": 75.81, "elapsed_time": "20:35:10", "remaining_time": "6:34:03"} +{"current_steps": 2076, "total_steps": 2737, "loss": 1.0361, "lr": 3.3486012328555505e-06, "epoch": 5.309462915601023, "percentage": 75.85, "elapsed_time": "20:35:46", "remaining_time": "6:33:28"} +{"current_steps": 2077, "total_steps": 2737, "loss": 1.0416, "lr": 3.33908213842745e-06, "epoch": 5.312020460358056, "percentage": 75.89, "elapsed_time": "20:36:21", "remaining_time": "6:32:52"} +{"current_steps": 2078, "total_steps": 2737, "loss": 1.0398, "lr": 3.3295738808962388e-06, "epoch": 5.31457800511509, "percentage": 75.92, "elapsed_time": "20:36:57", "remaining_time": "6:32:16"} +{"current_steps": 2079, "total_steps": 2737, "loss": 1.0211, "lr": 3.3200764757312555e-06, "epoch": 5.3171355498721224, "percentage": 75.96, "elapsed_time": "20:37:33", "remaining_time": "6:31:41"} +{"current_steps": 2080, "total_steps": 2737, "loss": 1.0246, "lr": 3.310589938384179e-06, "epoch": 5.319693094629156, "percentage": 76.0, "elapsed_time": "20:38:08", "remaining_time": "6:31:05"} +{"current_steps": 2081, "total_steps": 2737, "loss": 1.0228, "lr": 3.301114284289021e-06, "epoch": 5.322250639386189, "percentage": 76.03, "elapsed_time": "20:38:44", "remaining_time": "6:30:29"} +{"current_steps": 2082, "total_steps": 2737, "loss": 1.0366, "lr": 3.291649528862074e-06, "epoch": 5.324808184143222, "percentage": 76.07, "elapsed_time": "20:39:20", "remaining_time": "6:29:53"} +{"current_steps": 2083, "total_steps": 2737, "loss": 0.9983, "lr": 3.2821956875019045e-06, "epoch": 5.327365728900256, "percentage": 76.11, "elapsed_time": "20:39:55", "remaining_time": "6:29:18"} +{"current_steps": 2084, "total_steps": 2737, "loss": 1.0262, "lr": 3.272752775589316e-06, "epoch": 5.329923273657289, "percentage": 76.14, "elapsed_time": "20:40:31", "remaining_time": "6:28:42"} +{"current_steps": 2085, "total_steps": 2737, "loss": 1.0214, "lr": 3.2633208084873445e-06, "epoch": 5.332480818414322, "percentage": 76.18, "elapsed_time": "20:41:07", "remaining_time": "6:28:06"} +{"current_steps": 2086, "total_steps": 2737, "loss": 1.0458, "lr": 3.253899801541206e-06, "epoch": 5.335038363171355, "percentage": 76.21, "elapsed_time": "20:41:43", "remaining_time": "6:27:31"} +{"current_steps": 2087, "total_steps": 2737, "loss": 1.0699, "lr": 3.244489770078286e-06, "epoch": 5.337595907928389, "percentage": 76.25, "elapsed_time": "20:42:19", "remaining_time": "6:26:55"} +{"current_steps": 2088, "total_steps": 2737, "loss": 0.9936, "lr": 3.2350907294081258e-06, "epoch": 5.340153452685422, "percentage": 76.29, "elapsed_time": "20:42:54", "remaining_time": "6:26:19"} +{"current_steps": 2089, "total_steps": 2737, "loss": 1.0565, "lr": 3.2257026948223726e-06, "epoch": 5.342710997442455, "percentage": 76.32, "elapsed_time": "20:43:30", "remaining_time": "6:25:43"} +{"current_steps": 2090, "total_steps": 2737, "loss": 0.9993, "lr": 3.2163256815947674e-06, "epoch": 5.345268542199489, "percentage": 76.36, "elapsed_time": "20:44:05", "remaining_time": "6:25:08"} +{"current_steps": 2091, "total_steps": 2737, "loss": 1.0555, "lr": 3.206959704981133e-06, "epoch": 5.3478260869565215, "percentage": 76.4, "elapsed_time": "20:44:41", "remaining_time": "6:24:32"} +{"current_steps": 2092, "total_steps": 2737, "loss": 0.9652, "lr": 3.197604780219323e-06, "epoch": 5.350383631713555, "percentage": 76.43, "elapsed_time": "20:45:17", "remaining_time": "6:23:56"} +{"current_steps": 2093, "total_steps": 2737, "loss": 1.0432, "lr": 3.188260922529215e-06, "epoch": 5.352941176470588, "percentage": 76.47, "elapsed_time": "20:45:53", "remaining_time": "6:23:20"} +{"current_steps": 2094, "total_steps": 2737, "loss": 1.0175, "lr": 3.1789281471126786e-06, "epoch": 5.3554987212276215, "percentage": 76.51, "elapsed_time": "20:46:28", "remaining_time": "6:22:45"} +{"current_steps": 2095, "total_steps": 2737, "loss": 1.0024, "lr": 3.1696064691535634e-06, "epoch": 5.358056265984655, "percentage": 76.54, "elapsed_time": "20:47:04", "remaining_time": "6:22:09"} +{"current_steps": 2096, "total_steps": 2737, "loss": 1.016, "lr": 3.1602959038176516e-06, "epoch": 5.360613810741688, "percentage": 76.58, "elapsed_time": "20:47:40", "remaining_time": "6:21:33"} +{"current_steps": 2097, "total_steps": 2737, "loss": 1.0072, "lr": 3.1509964662526484e-06, "epoch": 5.3631713554987215, "percentage": 76.62, "elapsed_time": "20:48:15", "remaining_time": "6:20:58"} +{"current_steps": 2098, "total_steps": 2737, "loss": 0.997, "lr": 3.1417081715881623e-06, "epoch": 5.365728900255754, "percentage": 76.65, "elapsed_time": "20:48:51", "remaining_time": "6:20:22"} +{"current_steps": 2099, "total_steps": 2737, "loss": 1.0286, "lr": 3.132431034935667e-06, "epoch": 5.368286445012788, "percentage": 76.69, "elapsed_time": "20:49:27", "remaining_time": "6:19:46"} +{"current_steps": 2100, "total_steps": 2737, "loss": 1.0331, "lr": 3.1231650713884832e-06, "epoch": 5.370843989769821, "percentage": 76.73, "elapsed_time": "20:50:02", "remaining_time": "6:19:10"} +{"current_steps": 2101, "total_steps": 2737, "loss": 1.0041, "lr": 3.1139102960217493e-06, "epoch": 5.373401534526854, "percentage": 76.76, "elapsed_time": "20:50:38", "remaining_time": "6:18:35"} +{"current_steps": 2102, "total_steps": 2737, "loss": 1.0423, "lr": 3.1046667238924155e-06, "epoch": 5.375959079283888, "percentage": 76.8, "elapsed_time": "20:51:14", "remaining_time": "6:17:59"} +{"current_steps": 2103, "total_steps": 2737, "loss": 1.0349, "lr": 3.0954343700391897e-06, "epoch": 5.378516624040921, "percentage": 76.84, "elapsed_time": "20:51:49", "remaining_time": "6:17:23"} +{"current_steps": 2104, "total_steps": 2737, "loss": 1.026, "lr": 3.0862132494825325e-06, "epoch": 5.381074168797954, "percentage": 76.87, "elapsed_time": "20:52:25", "remaining_time": "6:16:47"} +{"current_steps": 2105, "total_steps": 2737, "loss": 0.9938, "lr": 3.0770033772246376e-06, "epoch": 5.383631713554987, "percentage": 76.91, "elapsed_time": "20:53:01", "remaining_time": "6:16:12"} +{"current_steps": 2106, "total_steps": 2737, "loss": 0.9968, "lr": 3.067804768249386e-06, "epoch": 5.3861892583120206, "percentage": 76.95, "elapsed_time": "20:53:36", "remaining_time": "6:15:36"} +{"current_steps": 2107, "total_steps": 2737, "loss": 1.0166, "lr": 3.058617437522342e-06, "epoch": 5.388746803069053, "percentage": 76.98, "elapsed_time": "20:54:12", "remaining_time": "6:15:00"} +{"current_steps": 2108, "total_steps": 2737, "loss": 1.0066, "lr": 3.0494413999907125e-06, "epoch": 5.391304347826087, "percentage": 77.02, "elapsed_time": "20:54:48", "remaining_time": "6:14:25"} +{"current_steps": 2109, "total_steps": 2737, "loss": 1.0052, "lr": 3.0402766705833455e-06, "epoch": 5.3938618925831205, "percentage": 77.06, "elapsed_time": "20:55:23", "remaining_time": "6:13:49"} +{"current_steps": 2110, "total_steps": 2737, "loss": 0.9969, "lr": 3.0311232642106768e-06, "epoch": 5.396419437340153, "percentage": 77.09, "elapsed_time": "20:55:59", "remaining_time": "6:13:13"} +{"current_steps": 2111, "total_steps": 2737, "loss": 1.0283, "lr": 3.021981195764726e-06, "epoch": 5.398976982097187, "percentage": 77.13, "elapsed_time": "20:56:35", "remaining_time": "6:12:37"} +{"current_steps": 2112, "total_steps": 2737, "loss": 1.0179, "lr": 3.0128504801190716e-06, "epoch": 5.40153452685422, "percentage": 77.16, "elapsed_time": "20:57:10", "remaining_time": "6:12:02"} +{"current_steps": 2113, "total_steps": 2737, "loss": 1.0099, "lr": 3.003731132128811e-06, "epoch": 5.404092071611253, "percentage": 77.2, "elapsed_time": "20:57:46", "remaining_time": "6:11:26"} +{"current_steps": 2114, "total_steps": 2737, "loss": 0.998, "lr": 2.9946231666305627e-06, "epoch": 5.406649616368286, "percentage": 77.24, "elapsed_time": "20:58:22", "remaining_time": "6:10:50"} +{"current_steps": 2115, "total_steps": 2737, "loss": 1.0069, "lr": 2.9855265984424042e-06, "epoch": 5.40920716112532, "percentage": 77.27, "elapsed_time": "20:58:57", "remaining_time": "6:10:14"} +{"current_steps": 2116, "total_steps": 2737, "loss": 0.9928, "lr": 2.976441442363893e-06, "epoch": 5.411764705882353, "percentage": 77.31, "elapsed_time": "20:59:33", "remaining_time": "6:09:39"} +{"current_steps": 2117, "total_steps": 2737, "loss": 1.0082, "lr": 2.967367713176007e-06, "epoch": 5.414322250639386, "percentage": 77.35, "elapsed_time": "21:00:09", "remaining_time": "6:09:03"} +{"current_steps": 2118, "total_steps": 2737, "loss": 0.9779, "lr": 2.9583054256411326e-06, "epoch": 5.41687979539642, "percentage": 77.38, "elapsed_time": "21:00:44", "remaining_time": "6:08:27"} +{"current_steps": 2119, "total_steps": 2737, "loss": 0.9947, "lr": 2.9492545945030517e-06, "epoch": 5.419437340153452, "percentage": 77.42, "elapsed_time": "21:01:20", "remaining_time": "6:07:51"} +{"current_steps": 2120, "total_steps": 2737, "loss": 1.0304, "lr": 2.940215234486894e-06, "epoch": 5.421994884910486, "percentage": 77.46, "elapsed_time": "21:01:56", "remaining_time": "6:07:16"} +{"current_steps": 2121, "total_steps": 2737, "loss": 1.0265, "lr": 2.9311873602991435e-06, "epoch": 5.42455242966752, "percentage": 77.49, "elapsed_time": "21:02:32", "remaining_time": "6:06:40"} +{"current_steps": 2122, "total_steps": 2737, "loss": 0.9907, "lr": 2.922170986627573e-06, "epoch": 5.427109974424552, "percentage": 77.53, "elapsed_time": "21:03:07", "remaining_time": "6:06:04"} +{"current_steps": 2123, "total_steps": 2737, "loss": 1.0362, "lr": 2.913166128141265e-06, "epoch": 5.429667519181586, "percentage": 77.57, "elapsed_time": "21:03:43", "remaining_time": "6:05:29"} +{"current_steps": 2124, "total_steps": 2737, "loss": 1.0335, "lr": 2.9041727994905686e-06, "epoch": 5.432225063938619, "percentage": 77.6, "elapsed_time": "21:04:18", "remaining_time": "6:04:53"} +{"current_steps": 2125, "total_steps": 2737, "loss": 0.9863, "lr": 2.895191015307055e-06, "epoch": 5.434782608695652, "percentage": 77.64, "elapsed_time": "21:04:54", "remaining_time": "6:04:17"} +{"current_steps": 2126, "total_steps": 2737, "loss": 1.0279, "lr": 2.8862207902035334e-06, "epoch": 5.437340153452685, "percentage": 77.68, "elapsed_time": "21:05:30", "remaining_time": "6:03:41"} +{"current_steps": 2127, "total_steps": 2737, "loss": 1.0074, "lr": 2.877262138773994e-06, "epoch": 5.439897698209719, "percentage": 77.71, "elapsed_time": "21:06:06", "remaining_time": "6:03:06"} +{"current_steps": 2128, "total_steps": 2737, "loss": 1.0007, "lr": 2.8683150755936107e-06, "epoch": 5.442455242966752, "percentage": 77.75, "elapsed_time": "21:06:41", "remaining_time": "6:02:30"} +{"current_steps": 2129, "total_steps": 2737, "loss": 1.0183, "lr": 2.859379615218685e-06, "epoch": 5.445012787723785, "percentage": 77.79, "elapsed_time": "21:07:17", "remaining_time": "6:01:54"} +{"current_steps": 2130, "total_steps": 2737, "loss": 1.0553, "lr": 2.850455772186658e-06, "epoch": 5.447570332480819, "percentage": 77.82, "elapsed_time": "21:07:53", "remaining_time": "6:01:19"} +{"current_steps": 2131, "total_steps": 2737, "loss": 1.0029, "lr": 2.8415435610160667e-06, "epoch": 5.450127877237851, "percentage": 77.86, "elapsed_time": "21:08:28", "remaining_time": "6:00:43"} +{"current_steps": 2132, "total_steps": 2737, "loss": 1.0591, "lr": 2.8326429962065184e-06, "epoch": 5.452685421994885, "percentage": 77.9, "elapsed_time": "21:09:04", "remaining_time": "6:00:07"} +{"current_steps": 2133, "total_steps": 2737, "loss": 1.0234, "lr": 2.8237540922386764e-06, "epoch": 5.455242966751918, "percentage": 77.93, "elapsed_time": "21:09:39", "remaining_time": "5:59:31"} +{"current_steps": 2134, "total_steps": 2737, "loss": 1.0408, "lr": 2.8148768635742286e-06, "epoch": 5.457800511508951, "percentage": 77.97, "elapsed_time": "21:10:15", "remaining_time": "5:58:56"} +{"current_steps": 2135, "total_steps": 2737, "loss": 1.0582, "lr": 2.8060113246558783e-06, "epoch": 5.460358056265985, "percentage": 78.01, "elapsed_time": "21:10:51", "remaining_time": "5:58:20"} +{"current_steps": 2136, "total_steps": 2737, "loss": 1.0557, "lr": 2.7971574899072938e-06, "epoch": 5.462915601023018, "percentage": 78.04, "elapsed_time": "21:11:27", "remaining_time": "5:57:44"} +{"current_steps": 2137, "total_steps": 2737, "loss": 1.0213, "lr": 2.7883153737331136e-06, "epoch": 5.465473145780051, "percentage": 78.08, "elapsed_time": "21:12:02", "remaining_time": "5:57:08"} +{"current_steps": 2138, "total_steps": 2737, "loss": 1.0258, "lr": 2.7794849905189138e-06, "epoch": 5.468030690537084, "percentage": 78.11, "elapsed_time": "21:12:38", "remaining_time": "5:56:33"} +{"current_steps": 2139, "total_steps": 2737, "loss": 0.9791, "lr": 2.7706663546311705e-06, "epoch": 5.470588235294118, "percentage": 78.15, "elapsed_time": "21:13:14", "remaining_time": "5:55:57"} +{"current_steps": 2140, "total_steps": 2737, "loss": 1.0364, "lr": 2.761859480417255e-06, "epoch": 5.4731457800511505, "percentage": 78.19, "elapsed_time": "21:13:49", "remaining_time": "5:55:21"} +{"current_steps": 2141, "total_steps": 2737, "loss": 1.046, "lr": 2.753064382205396e-06, "epoch": 5.475703324808184, "percentage": 78.22, "elapsed_time": "21:14:25", "remaining_time": "5:54:46"} +{"current_steps": 2142, "total_steps": 2737, "loss": 1.0377, "lr": 2.7442810743046742e-06, "epoch": 5.478260869565218, "percentage": 78.26, "elapsed_time": "21:15:01", "remaining_time": "5:54:10"} +{"current_steps": 2143, "total_steps": 2737, "loss": 1.0095, "lr": 2.735509571004982e-06, "epoch": 5.4808184143222505, "percentage": 78.3, "elapsed_time": "21:15:37", "remaining_time": "5:53:34"} +{"current_steps": 2144, "total_steps": 2737, "loss": 0.9769, "lr": 2.7267498865770005e-06, "epoch": 5.483375959079284, "percentage": 78.33, "elapsed_time": "21:16:12", "remaining_time": "5:52:58"} +{"current_steps": 2145, "total_steps": 2737, "loss": 1.0057, "lr": 2.718002035272197e-06, "epoch": 5.485933503836317, "percentage": 78.37, "elapsed_time": "21:16:48", "remaining_time": "5:52:23"} +{"current_steps": 2146, "total_steps": 2737, "loss": 1.0064, "lr": 2.7092660313227748e-06, "epoch": 5.4884910485933505, "percentage": 78.41, "elapsed_time": "21:17:24", "remaining_time": "5:51:47"} +{"current_steps": 2147, "total_steps": 2737, "loss": 1.0025, "lr": 2.700541888941667e-06, "epoch": 5.491048593350383, "percentage": 78.44, "elapsed_time": "21:17:59", "remaining_time": "5:51:11"} +{"current_steps": 2148, "total_steps": 2737, "loss": 1.0227, "lr": 2.6918296223225026e-06, "epoch": 5.493606138107417, "percentage": 78.48, "elapsed_time": "21:18:35", "remaining_time": "5:50:36"} +{"current_steps": 2149, "total_steps": 2737, "loss": 1.0393, "lr": 2.683129245639603e-06, "epoch": 5.4961636828644505, "percentage": 78.52, "elapsed_time": "21:19:11", "remaining_time": "5:50:00"} +{"current_steps": 2150, "total_steps": 2737, "loss": 1.0279, "lr": 2.6744407730479325e-06, "epoch": 5.498721227621483, "percentage": 78.55, "elapsed_time": "21:19:47", "remaining_time": "5:49:24"} +{"current_steps": 2151, "total_steps": 2737, "loss": 1.0295, "lr": 2.66576421868309e-06, "epoch": 5.501278772378517, "percentage": 78.59, "elapsed_time": "21:20:22", "remaining_time": "5:48:48"} +{"current_steps": 2152, "total_steps": 2737, "loss": 1.0299, "lr": 2.6570995966612945e-06, "epoch": 5.5038363171355495, "percentage": 78.63, "elapsed_time": "21:20:58", "remaining_time": "5:48:13"} +{"current_steps": 2153, "total_steps": 2737, "loss": 1.037, "lr": 2.6484469210793384e-06, "epoch": 5.506393861892583, "percentage": 78.66, "elapsed_time": "21:21:34", "remaining_time": "5:47:37"} +{"current_steps": 2154, "total_steps": 2737, "loss": 1.017, "lr": 2.6398062060145867e-06, "epoch": 5.508951406649617, "percentage": 78.7, "elapsed_time": "21:22:09", "remaining_time": "5:47:01"} +{"current_steps": 2155, "total_steps": 2737, "loss": 1.0217, "lr": 2.631177465524938e-06, "epoch": 5.5115089514066495, "percentage": 78.74, "elapsed_time": "21:22:45", "remaining_time": "5:46:26"} +{"current_steps": 2156, "total_steps": 2737, "loss": 1.0021, "lr": 2.6225607136488194e-06, "epoch": 5.514066496163683, "percentage": 78.77, "elapsed_time": "21:23:21", "remaining_time": "5:45:50"} +{"current_steps": 2157, "total_steps": 2737, "loss": 1.052, "lr": 2.613955964405146e-06, "epoch": 5.516624040920716, "percentage": 78.81, "elapsed_time": "21:23:56", "remaining_time": "5:45:14"} +{"current_steps": 2158, "total_steps": 2737, "loss": 1.0499, "lr": 2.605363231793302e-06, "epoch": 5.5191815856777495, "percentage": 78.85, "elapsed_time": "21:24:32", "remaining_time": "5:44:38"} +{"current_steps": 2159, "total_steps": 2737, "loss": 1.0172, "lr": 2.5967825297931328e-06, "epoch": 5.521739130434782, "percentage": 78.88, "elapsed_time": "21:25:08", "remaining_time": "5:44:03"} +{"current_steps": 2160, "total_steps": 2737, "loss": 1.0334, "lr": 2.5882138723649018e-06, "epoch": 5.524296675191816, "percentage": 78.92, "elapsed_time": "21:25:43", "remaining_time": "5:43:27"} +{"current_steps": 2161, "total_steps": 2737, "loss": 1.0103, "lr": 2.5796572734492777e-06, "epoch": 5.526854219948849, "percentage": 78.96, "elapsed_time": "21:26:19", "remaining_time": "5:42:51"} +{"current_steps": 2162, "total_steps": 2737, "loss": 1.0218, "lr": 2.571112746967309e-06, "epoch": 5.529411764705882, "percentage": 78.99, "elapsed_time": "21:26:55", "remaining_time": "5:42:16"} +{"current_steps": 2163, "total_steps": 2737, "loss": 1.0759, "lr": 2.5625803068204126e-06, "epoch": 5.531969309462916, "percentage": 79.03, "elapsed_time": "21:27:30", "remaining_time": "5:41:40"} +{"current_steps": 2164, "total_steps": 2737, "loss": 1.0042, "lr": 2.554059966890332e-06, "epoch": 5.534526854219949, "percentage": 79.06, "elapsed_time": "21:28:06", "remaining_time": "5:41:04"} +{"current_steps": 2165, "total_steps": 2737, "loss": 1.0084, "lr": 2.545551741039125e-06, "epoch": 5.537084398976982, "percentage": 79.1, "elapsed_time": "21:28:42", "remaining_time": "5:40:28"} +{"current_steps": 2166, "total_steps": 2737, "loss": 1.0447, "lr": 2.5370556431091486e-06, "epoch": 5.539641943734015, "percentage": 79.14, "elapsed_time": "21:29:17", "remaining_time": "5:39:53"} +{"current_steps": 2167, "total_steps": 2737, "loss": 1.0352, "lr": 2.5285716869230192e-06, "epoch": 5.542199488491049, "percentage": 79.17, "elapsed_time": "21:29:53", "remaining_time": "5:39:17"} +{"current_steps": 2168, "total_steps": 2737, "loss": 1.0456, "lr": 2.5200998862836044e-06, "epoch": 5.544757033248082, "percentage": 79.21, "elapsed_time": "21:30:29", "remaining_time": "5:38:41"} +{"current_steps": 2169, "total_steps": 2737, "loss": 1.0111, "lr": 2.5116402549739904e-06, "epoch": 5.547314578005115, "percentage": 79.25, "elapsed_time": "21:31:04", "remaining_time": "5:38:05"} +{"current_steps": 2170, "total_steps": 2737, "loss": 1.0555, "lr": 2.503192806757474e-06, "epoch": 5.549872122762149, "percentage": 79.28, "elapsed_time": "21:31:40", "remaining_time": "5:37:30"} +{"current_steps": 2171, "total_steps": 2737, "loss": 1.0217, "lr": 2.494757555377524e-06, "epoch": 5.552429667519181, "percentage": 79.32, "elapsed_time": "21:32:16", "remaining_time": "5:36:54"} +{"current_steps": 2172, "total_steps": 2737, "loss": 1.0175, "lr": 2.486334514557761e-06, "epoch": 5.554987212276215, "percentage": 79.36, "elapsed_time": "21:32:51", "remaining_time": "5:36:18"} +{"current_steps": 2173, "total_steps": 2737, "loss": 1.03, "lr": 2.477923698001955e-06, "epoch": 5.557544757033249, "percentage": 79.39, "elapsed_time": "21:33:27", "remaining_time": "5:35:42"} +{"current_steps": 2174, "total_steps": 2737, "loss": 1.0316, "lr": 2.469525119393974e-06, "epoch": 5.560102301790281, "percentage": 79.43, "elapsed_time": "21:34:03", "remaining_time": "5:35:07"} +{"current_steps": 2175, "total_steps": 2737, "loss": 1.0429, "lr": 2.461138792397779e-06, "epoch": 5.562659846547315, "percentage": 79.47, "elapsed_time": "21:34:38", "remaining_time": "5:34:31"} +{"current_steps": 2176, "total_steps": 2737, "loss": 1.0005, "lr": 2.4527647306574e-06, "epoch": 5.565217391304348, "percentage": 79.5, "elapsed_time": "21:35:14", "remaining_time": "5:33:55"} +{"current_steps": 2177, "total_steps": 2737, "loss": 1.0083, "lr": 2.4444029477969157e-06, "epoch": 5.567774936061381, "percentage": 79.54, "elapsed_time": "21:35:50", "remaining_time": "5:33:20"} +{"current_steps": 2178, "total_steps": 2737, "loss": 1.0064, "lr": 2.4360534574204196e-06, "epoch": 5.570332480818414, "percentage": 79.58, "elapsed_time": "21:36:25", "remaining_time": "5:32:44"} +{"current_steps": 2179, "total_steps": 2737, "loss": 1.026, "lr": 2.427716273112011e-06, "epoch": 5.572890025575448, "percentage": 79.61, "elapsed_time": "21:37:01", "remaining_time": "5:32:08"} +{"current_steps": 2180, "total_steps": 2737, "loss": 1.0311, "lr": 2.4193914084357708e-06, "epoch": 5.57544757033248, "percentage": 79.65, "elapsed_time": "21:37:36", "remaining_time": "5:31:32"} +{"current_steps": 2181, "total_steps": 2737, "loss": 1.0245, "lr": 2.4110788769357305e-06, "epoch": 5.578005115089514, "percentage": 79.69, "elapsed_time": "21:38:12", "remaining_time": "5:30:57"} +{"current_steps": 2182, "total_steps": 2737, "loss": 1.0218, "lr": 2.402778692135861e-06, "epoch": 5.580562659846548, "percentage": 79.72, "elapsed_time": "21:38:48", "remaining_time": "5:30:21"} +{"current_steps": 2183, "total_steps": 2737, "loss": 1.0275, "lr": 2.394490867540039e-06, "epoch": 5.58312020460358, "percentage": 79.76, "elapsed_time": "21:39:24", "remaining_time": "5:29:45"} +{"current_steps": 2184, "total_steps": 2737, "loss": 1.0055, "lr": 2.3862154166320417e-06, "epoch": 5.585677749360614, "percentage": 79.8, "elapsed_time": "21:39:59", "remaining_time": "5:29:09"} +{"current_steps": 2185, "total_steps": 2737, "loss": 1.0298, "lr": 2.3779523528755143e-06, "epoch": 5.588235294117647, "percentage": 79.83, "elapsed_time": "21:40:35", "remaining_time": "5:28:34"} +{"current_steps": 2186, "total_steps": 2737, "loss": 1.0568, "lr": 2.3697016897139345e-06, "epoch": 5.59079283887468, "percentage": 79.87, "elapsed_time": "21:41:10", "remaining_time": "5:27:58"} +{"current_steps": 2187, "total_steps": 2737, "loss": 1.0211, "lr": 2.361463440570623e-06, "epoch": 5.593350383631714, "percentage": 79.91, "elapsed_time": "21:41:46", "remaining_time": "5:27:22"} +{"current_steps": 2188, "total_steps": 2737, "loss": 1.0388, "lr": 2.353237618848695e-06, "epoch": 5.595907928388747, "percentage": 79.94, "elapsed_time": "21:42:22", "remaining_time": "5:26:46"} +{"current_steps": 2189, "total_steps": 2737, "loss": 1.0423, "lr": 2.3450242379310427e-06, "epoch": 5.59846547314578, "percentage": 79.98, "elapsed_time": "21:42:58", "remaining_time": "5:26:11"} +{"current_steps": 2190, "total_steps": 2737, "loss": 1.0209, "lr": 2.3368233111803305e-06, "epoch": 5.601023017902813, "percentage": 80.01, "elapsed_time": "21:43:33", "remaining_time": "5:25:35"} +{"current_steps": 2191, "total_steps": 2737, "loss": 1.0548, "lr": 2.328634851938949e-06, "epoch": 5.603580562659847, "percentage": 80.05, "elapsed_time": "21:44:09", "remaining_time": "5:24:59"} +{"current_steps": 2192, "total_steps": 2737, "loss": 1.0283, "lr": 2.3204588735290155e-06, "epoch": 5.6061381074168795, "percentage": 80.09, "elapsed_time": "21:44:45", "remaining_time": "5:24:24"} +{"current_steps": 2193, "total_steps": 2737, "loss": 1.0253, "lr": 2.312295389252326e-06, "epoch": 5.608695652173913, "percentage": 80.12, "elapsed_time": "21:45:20", "remaining_time": "5:23:48"} +{"current_steps": 2194, "total_steps": 2737, "loss": 1.0289, "lr": 2.304144412390367e-06, "epoch": 5.611253196930946, "percentage": 80.16, "elapsed_time": "21:45:56", "remaining_time": "5:23:12"} +{"current_steps": 2195, "total_steps": 2737, "loss": 1.0227, "lr": 2.2960059562042647e-06, "epoch": 5.6138107416879794, "percentage": 80.2, "elapsed_time": "21:46:32", "remaining_time": "5:22:36"} +{"current_steps": 2196, "total_steps": 2737, "loss": 1.0256, "lr": 2.2878800339347763e-06, "epoch": 5.616368286445013, "percentage": 80.23, "elapsed_time": "21:47:07", "remaining_time": "5:22:01"} +{"current_steps": 2197, "total_steps": 2737, "loss": 1.0468, "lr": 2.279766658802275e-06, "epoch": 5.618925831202046, "percentage": 80.27, "elapsed_time": "21:47:43", "remaining_time": "5:21:25"} +{"current_steps": 2198, "total_steps": 2737, "loss": 1.0045, "lr": 2.2716658440067085e-06, "epoch": 5.621483375959079, "percentage": 80.31, "elapsed_time": "21:48:18", "remaining_time": "5:20:49"} +{"current_steps": 2199, "total_steps": 2737, "loss": 1.0211, "lr": 2.2635776027276056e-06, "epoch": 5.624040920716112, "percentage": 80.34, "elapsed_time": "21:48:54", "remaining_time": "5:20:14"} +{"current_steps": 2200, "total_steps": 2737, "loss": 1.0318, "lr": 2.255501948124017e-06, "epoch": 5.626598465473146, "percentage": 80.38, "elapsed_time": "21:49:30", "remaining_time": "5:19:38"} +{"current_steps": 2201, "total_steps": 2737, "loss": 1.0219, "lr": 2.247438893334537e-06, "epoch": 5.629156010230179, "percentage": 80.42, "elapsed_time": "21:50:05", "remaining_time": "5:19:02"} +{"current_steps": 2202, "total_steps": 2737, "loss": 0.9929, "lr": 2.2393884514772457e-06, "epoch": 5.631713554987212, "percentage": 80.45, "elapsed_time": "21:50:41", "remaining_time": "5:18:26"} +{"current_steps": 2203, "total_steps": 2737, "loss": 1.0452, "lr": 2.231350635649713e-06, "epoch": 5.634271099744246, "percentage": 80.49, "elapsed_time": "21:51:17", "remaining_time": "5:17:51"} +{"current_steps": 2204, "total_steps": 2737, "loss": 1.0078, "lr": 2.223325458928961e-06, "epoch": 5.6368286445012785, "percentage": 80.53, "elapsed_time": "21:51:52", "remaining_time": "5:17:15"} +{"current_steps": 2205, "total_steps": 2737, "loss": 1.044, "lr": 2.2153129343714484e-06, "epoch": 5.639386189258312, "percentage": 80.56, "elapsed_time": "21:52:28", "remaining_time": "5:16:39"} +{"current_steps": 2206, "total_steps": 2737, "loss": 1.021, "lr": 2.207313075013059e-06, "epoch": 5.641943734015345, "percentage": 80.6, "elapsed_time": "21:53:03", "remaining_time": "5:16:03"} +{"current_steps": 2207, "total_steps": 2737, "loss": 0.9935, "lr": 2.1993258938690533e-06, "epoch": 5.6445012787723785, "percentage": 80.64, "elapsed_time": "21:53:39", "remaining_time": "5:15:28"} +{"current_steps": 2208, "total_steps": 2737, "loss": 1.0314, "lr": 2.191351403934082e-06, "epoch": 5.647058823529412, "percentage": 80.67, "elapsed_time": "21:54:14", "remaining_time": "5:14:52"} +{"current_steps": 2209, "total_steps": 2737, "loss": 1.0046, "lr": 2.183389618182139e-06, "epoch": 5.649616368286445, "percentage": 80.71, "elapsed_time": "21:54:51", "remaining_time": "5:14:16"} +{"current_steps": 2210, "total_steps": 2737, "loss": 1.0373, "lr": 2.1754405495665553e-06, "epoch": 5.6521739130434785, "percentage": 80.75, "elapsed_time": "21:55:26", "remaining_time": "5:13:41"} +{"current_steps": 2211, "total_steps": 2737, "loss": 1.016, "lr": 2.1675042110199664e-06, "epoch": 5.654731457800511, "percentage": 80.78, "elapsed_time": "21:56:02", "remaining_time": "5:13:05"} +{"current_steps": 2212, "total_steps": 2737, "loss": 1.0203, "lr": 2.1595806154542965e-06, "epoch": 5.657289002557545, "percentage": 80.82, "elapsed_time": "21:56:38", "remaining_time": "5:12:29"} +{"current_steps": 2213, "total_steps": 2737, "loss": 1.048, "lr": 2.1516697757607464e-06, "epoch": 5.659846547314578, "percentage": 80.85, "elapsed_time": "21:57:13", "remaining_time": "5:11:53"} +{"current_steps": 2214, "total_steps": 2737, "loss": 1.0221, "lr": 2.143771704809753e-06, "epoch": 5.662404092071611, "percentage": 80.89, "elapsed_time": "21:57:49", "remaining_time": "5:11:18"} +{"current_steps": 2215, "total_steps": 2737, "loss": 0.995, "lr": 2.1358864154509838e-06, "epoch": 5.664961636828645, "percentage": 80.93, "elapsed_time": "21:58:25", "remaining_time": "5:10:42"} +{"current_steps": 2216, "total_steps": 2737, "loss": 1.002, "lr": 2.128013920513311e-06, "epoch": 5.667519181585678, "percentage": 80.96, "elapsed_time": "21:59:00", "remaining_time": "5:10:06"} +{"current_steps": 2217, "total_steps": 2737, "loss": 1.0307, "lr": 2.1201542328047965e-06, "epoch": 5.670076726342711, "percentage": 81.0, "elapsed_time": "21:59:36", "remaining_time": "5:09:30"} +{"current_steps": 2218, "total_steps": 2737, "loss": 1.0042, "lr": 2.112307365112657e-06, "epoch": 5.672634271099744, "percentage": 81.04, "elapsed_time": "22:00:12", "remaining_time": "5:08:55"} +{"current_steps": 2219, "total_steps": 2737, "loss": 1.0089, "lr": 2.1044733302032527e-06, "epoch": 5.675191815856778, "percentage": 81.07, "elapsed_time": "22:00:47", "remaining_time": "5:08:19"} +{"current_steps": 2220, "total_steps": 2737, "loss": 1.0191, "lr": 2.0966521408220753e-06, "epoch": 5.677749360613811, "percentage": 81.11, "elapsed_time": "22:01:23", "remaining_time": "5:07:43"} +{"current_steps": 2221, "total_steps": 2737, "loss": 1.0389, "lr": 2.088843809693708e-06, "epoch": 5.680306905370844, "percentage": 81.15, "elapsed_time": "22:01:59", "remaining_time": "5:07:08"} +{"current_steps": 2222, "total_steps": 2737, "loss": 1.0386, "lr": 2.081048349521814e-06, "epoch": 5.6828644501278776, "percentage": 81.18, "elapsed_time": "22:02:34", "remaining_time": "5:06:32"} +{"current_steps": 2223, "total_steps": 2737, "loss": 1.0237, "lr": 2.0732657729891236e-06, "epoch": 5.68542199488491, "percentage": 81.22, "elapsed_time": "22:03:10", "remaining_time": "5:05:56"} +{"current_steps": 2224, "total_steps": 2737, "loss": 1.0039, "lr": 2.065496092757403e-06, "epoch": 5.687979539641944, "percentage": 81.26, "elapsed_time": "22:03:46", "remaining_time": "5:05:20"} +{"current_steps": 2225, "total_steps": 2737, "loss": 1.0782, "lr": 2.0577393214674335e-06, "epoch": 5.690537084398977, "percentage": 81.29, "elapsed_time": "22:04:21", "remaining_time": "5:04:45"} +{"current_steps": 2226, "total_steps": 2737, "loss": 1.029, "lr": 2.049995471738995e-06, "epoch": 5.69309462915601, "percentage": 81.33, "elapsed_time": "22:04:57", "remaining_time": "5:04:09"} +{"current_steps": 2227, "total_steps": 2737, "loss": 0.9846, "lr": 2.042264556170853e-06, "epoch": 5.695652173913043, "percentage": 81.37, "elapsed_time": "22:05:33", "remaining_time": "5:03:33"} +{"current_steps": 2228, "total_steps": 2737, "loss": 1.0143, "lr": 2.034546587340719e-06, "epoch": 5.698209718670077, "percentage": 81.4, "elapsed_time": "22:06:08", "remaining_time": "5:02:57"} +{"current_steps": 2229, "total_steps": 2737, "loss": 1.0534, "lr": 2.026841577805245e-06, "epoch": 5.70076726342711, "percentage": 81.44, "elapsed_time": "22:06:44", "remaining_time": "5:02:22"} +{"current_steps": 2230, "total_steps": 2737, "loss": 1.0568, "lr": 2.019149540100005e-06, "epoch": 5.703324808184143, "percentage": 81.48, "elapsed_time": "22:07:20", "remaining_time": "5:01:46"} +{"current_steps": 2231, "total_steps": 2737, "loss": 1.014, "lr": 2.0114704867394598e-06, "epoch": 5.705882352941177, "percentage": 81.51, "elapsed_time": "22:07:55", "remaining_time": "5:01:10"} +{"current_steps": 2232, "total_steps": 2737, "loss": 1.0246, "lr": 2.0038044302169492e-06, "epoch": 5.708439897698209, "percentage": 81.55, "elapsed_time": "22:08:31", "remaining_time": "5:00:35"} +{"current_steps": 2233, "total_steps": 2737, "loss": 1.0335, "lr": 1.9961513830046663e-06, "epoch": 5.710997442455243, "percentage": 81.59, "elapsed_time": "22:09:07", "remaining_time": "4:59:59"} +{"current_steps": 2234, "total_steps": 2737, "loss": 1.0107, "lr": 1.988511357553644e-06, "epoch": 5.713554987212277, "percentage": 81.62, "elapsed_time": "22:09:42", "remaining_time": "4:59:23"} +{"current_steps": 2235, "total_steps": 2737, "loss": 1.002, "lr": 1.980884366293725e-06, "epoch": 5.716112531969309, "percentage": 81.66, "elapsed_time": "22:10:18", "remaining_time": "4:58:47"} +{"current_steps": 2236, "total_steps": 2737, "loss": 1.0281, "lr": 1.973270421633543e-06, "epoch": 5.718670076726343, "percentage": 81.7, "elapsed_time": "22:10:53", "remaining_time": "4:58:12"} +{"current_steps": 2237, "total_steps": 2737, "loss": 1.0032, "lr": 1.965669535960516e-06, "epoch": 5.721227621483376, "percentage": 81.73, "elapsed_time": "22:11:29", "remaining_time": "4:57:36"} +{"current_steps": 2238, "total_steps": 2737, "loss": 1.0151, "lr": 1.9580817216408075e-06, "epoch": 5.723785166240409, "percentage": 81.77, "elapsed_time": "22:12:05", "remaining_time": "4:57:00"} +{"current_steps": 2239, "total_steps": 2737, "loss": 0.9876, "lr": 1.9505069910193164e-06, "epoch": 5.726342710997442, "percentage": 81.8, "elapsed_time": "22:12:40", "remaining_time": "4:56:24"} +{"current_steps": 2240, "total_steps": 2737, "loss": 1.0203, "lr": 1.9429453564196543e-06, "epoch": 5.728900255754476, "percentage": 81.84, "elapsed_time": "22:13:16", "remaining_time": "4:55:49"} +{"current_steps": 2241, "total_steps": 2737, "loss": 0.9752, "lr": 1.9353968301441306e-06, "epoch": 5.731457800511509, "percentage": 81.88, "elapsed_time": "22:13:51", "remaining_time": "4:55:13"} +{"current_steps": 2242, "total_steps": 2737, "loss": 1.025, "lr": 1.927861424473726e-06, "epoch": 5.734015345268542, "percentage": 81.91, "elapsed_time": "22:14:27", "remaining_time": "4:54:37"} +{"current_steps": 2243, "total_steps": 2737, "loss": 1.0125, "lr": 1.920339151668069e-06, "epoch": 5.736572890025576, "percentage": 81.95, "elapsed_time": "22:15:03", "remaining_time": "4:54:02"} +{"current_steps": 2244, "total_steps": 2737, "loss": 1.0103, "lr": 1.9128300239654353e-06, "epoch": 5.739130434782608, "percentage": 81.99, "elapsed_time": "22:15:38", "remaining_time": "4:53:26"} +{"current_steps": 2245, "total_steps": 2737, "loss": 1.0365, "lr": 1.9053340535827004e-06, "epoch": 5.741687979539642, "percentage": 82.02, "elapsed_time": "22:16:14", "remaining_time": "4:52:50"} +{"current_steps": 2246, "total_steps": 2737, "loss": 1.0208, "lr": 1.8978512527153414e-06, "epoch": 5.744245524296675, "percentage": 82.06, "elapsed_time": "22:16:50", "remaining_time": "4:52:14"} +{"current_steps": 2247, "total_steps": 2737, "loss": 1.0092, "lr": 1.8903816335374048e-06, "epoch": 5.746803069053708, "percentage": 82.1, "elapsed_time": "22:17:25", "remaining_time": "4:51:39"} +{"current_steps": 2248, "total_steps": 2737, "loss": 0.9976, "lr": 1.882925208201498e-06, "epoch": 5.749360613810742, "percentage": 82.13, "elapsed_time": "22:18:01", "remaining_time": "4:51:03"} +{"current_steps": 2249, "total_steps": 2737, "loss": 1.0226, "lr": 1.8754819888387576e-06, "epoch": 5.751918158567775, "percentage": 82.17, "elapsed_time": "22:18:37", "remaining_time": "4:50:27"} +{"current_steps": 2250, "total_steps": 2737, "loss": 1.0547, "lr": 1.868051987558832e-06, "epoch": 5.754475703324808, "percentage": 82.21, "elapsed_time": "22:19:12", "remaining_time": "4:49:51"} +{"current_steps": 2251, "total_steps": 2737, "loss": 1.022, "lr": 1.8606352164498754e-06, "epoch": 5.757033248081841, "percentage": 82.24, "elapsed_time": "22:19:48", "remaining_time": "4:49:16"} +{"current_steps": 2252, "total_steps": 2737, "loss": 1.059, "lr": 1.8532316875785084e-06, "epoch": 5.759590792838875, "percentage": 82.28, "elapsed_time": "22:20:24", "remaining_time": "4:48:40"} +{"current_steps": 2253, "total_steps": 2737, "loss": 1.0121, "lr": 1.8458414129898072e-06, "epoch": 5.762148337595908, "percentage": 82.32, "elapsed_time": "22:20:59", "remaining_time": "4:48:04"} +{"current_steps": 2254, "total_steps": 2737, "loss": 1.0363, "lr": 1.8384644047072864e-06, "epoch": 5.764705882352941, "percentage": 82.35, "elapsed_time": "22:21:35", "remaining_time": "4:47:28"} +{"current_steps": 2255, "total_steps": 2737, "loss": 1.0342, "lr": 1.8311006747328775e-06, "epoch": 5.767263427109975, "percentage": 82.39, "elapsed_time": "22:22:10", "remaining_time": "4:46:53"} +{"current_steps": 2256, "total_steps": 2737, "loss": 1.028, "lr": 1.8237502350469161e-06, "epoch": 5.7698209718670075, "percentage": 82.43, "elapsed_time": "22:22:46", "remaining_time": "4:46:17"} +{"current_steps": 2257, "total_steps": 2737, "loss": 0.9998, "lr": 1.8164130976080962e-06, "epoch": 5.772378516624041, "percentage": 82.46, "elapsed_time": "22:23:22", "remaining_time": "4:45:41"} +{"current_steps": 2258, "total_steps": 2737, "loss": 0.9861, "lr": 1.8090892743534904e-06, "epoch": 5.774936061381074, "percentage": 82.5, "elapsed_time": "22:23:57", "remaining_time": "4:45:06"} +{"current_steps": 2259, "total_steps": 2737, "loss": 1.0196, "lr": 1.8017787771984973e-06, "epoch": 5.7774936061381075, "percentage": 82.54, "elapsed_time": "22:24:33", "remaining_time": "4:44:30"} +{"current_steps": 2260, "total_steps": 2737, "loss": 1.0422, "lr": 1.7944816180368408e-06, "epoch": 5.78005115089514, "percentage": 82.57, "elapsed_time": "22:25:08", "remaining_time": "4:43:54"} +{"current_steps": 2261, "total_steps": 2737, "loss": 1.0097, "lr": 1.7871978087405384e-06, "epoch": 5.782608695652174, "percentage": 82.61, "elapsed_time": "22:25:44", "remaining_time": "4:43:18"} +{"current_steps": 2262, "total_steps": 2737, "loss": 1.0121, "lr": 1.7799273611598943e-06, "epoch": 5.7851662404092075, "percentage": 82.65, "elapsed_time": "22:26:20", "remaining_time": "4:42:43"} +{"current_steps": 2263, "total_steps": 2737, "loss": 0.9939, "lr": 1.772670287123479e-06, "epoch": 5.78772378516624, "percentage": 82.68, "elapsed_time": "22:26:56", "remaining_time": "4:42:07"} +{"current_steps": 2264, "total_steps": 2737, "loss": 1.0377, "lr": 1.765426598438088e-06, "epoch": 5.790281329923274, "percentage": 82.72, "elapsed_time": "22:27:32", "remaining_time": "4:41:31"} +{"current_steps": 2265, "total_steps": 2737, "loss": 1.0082, "lr": 1.7581963068887554e-06, "epoch": 5.792838874680307, "percentage": 82.75, "elapsed_time": "22:28:07", "remaining_time": "4:40:56"} +{"current_steps": 2266, "total_steps": 2737, "loss": 1.0455, "lr": 1.7509794242387135e-06, "epoch": 5.79539641943734, "percentage": 82.79, "elapsed_time": "22:28:43", "remaining_time": "4:40:20"} +{"current_steps": 2267, "total_steps": 2737, "loss": 1.0301, "lr": 1.7437759622293771e-06, "epoch": 5.797953964194374, "percentage": 82.83, "elapsed_time": "22:29:18", "remaining_time": "4:39:44"} +{"current_steps": 2268, "total_steps": 2737, "loss": 1.028, "lr": 1.7365859325803269e-06, "epoch": 5.8005115089514065, "percentage": 82.86, "elapsed_time": "22:29:54", "remaining_time": "4:39:08"} +{"current_steps": 2269, "total_steps": 2737, "loss": 1.0253, "lr": 1.7294093469892948e-06, "epoch": 5.80306905370844, "percentage": 82.9, "elapsed_time": "22:30:30", "remaining_time": "4:38:33"} +{"current_steps": 2270, "total_steps": 2737, "loss": 1.0112, "lr": 1.7222462171321397e-06, "epoch": 5.805626598465473, "percentage": 82.94, "elapsed_time": "22:31:05", "remaining_time": "4:37:57"} +{"current_steps": 2271, "total_steps": 2737, "loss": 1.0168, "lr": 1.7150965546628184e-06, "epoch": 5.8081841432225065, "percentage": 82.97, "elapsed_time": "22:31:41", "remaining_time": "4:37:21"} +{"current_steps": 2272, "total_steps": 2737, "loss": 0.9867, "lr": 1.7079603712133908e-06, "epoch": 5.810741687979539, "percentage": 83.01, "elapsed_time": "22:32:17", "remaining_time": "4:36:45"} +{"current_steps": 2273, "total_steps": 2737, "loss": 1.0085, "lr": 1.7008376783939772e-06, "epoch": 5.813299232736573, "percentage": 83.05, "elapsed_time": "22:32:52", "remaining_time": "4:36:10"} +{"current_steps": 2274, "total_steps": 2737, "loss": 1.0162, "lr": 1.6937284877927596e-06, "epoch": 5.8158567774936065, "percentage": 83.08, "elapsed_time": "22:33:28", "remaining_time": "4:35:34"} +{"current_steps": 2275, "total_steps": 2737, "loss": 0.9794, "lr": 1.6866328109759377e-06, "epoch": 5.818414322250639, "percentage": 83.12, "elapsed_time": "22:34:03", "remaining_time": "4:34:58"} +{"current_steps": 2276, "total_steps": 2737, "loss": 1.031, "lr": 1.6795506594877388e-06, "epoch": 5.820971867007673, "percentage": 83.16, "elapsed_time": "22:34:39", "remaining_time": "4:34:22"} +{"current_steps": 2277, "total_steps": 2737, "loss": 1.0204, "lr": 1.6724820448503852e-06, "epoch": 5.823529411764706, "percentage": 83.19, "elapsed_time": "22:35:14", "remaining_time": "4:33:47"} +{"current_steps": 2278, "total_steps": 2737, "loss": 1.0448, "lr": 1.6654269785640608e-06, "epoch": 5.826086956521739, "percentage": 83.23, "elapsed_time": "22:35:50", "remaining_time": "4:33:11"} +{"current_steps": 2279, "total_steps": 2737, "loss": 1.0146, "lr": 1.658385472106926e-06, "epoch": 5.828644501278772, "percentage": 83.27, "elapsed_time": "22:36:26", "remaining_time": "4:32:35"} +{"current_steps": 2280, "total_steps": 2737, "loss": 1.021, "lr": 1.6513575369350654e-06, "epoch": 5.831202046035806, "percentage": 83.3, "elapsed_time": "22:37:01", "remaining_time": "4:32:00"} +{"current_steps": 2281, "total_steps": 2737, "loss": 1.0002, "lr": 1.6443431844824975e-06, "epoch": 5.833759590792839, "percentage": 83.34, "elapsed_time": "22:37:37", "remaining_time": "4:31:24"} +{"current_steps": 2282, "total_steps": 2737, "loss": 1.0013, "lr": 1.637342426161126e-06, "epoch": 5.836317135549872, "percentage": 83.38, "elapsed_time": "22:38:12", "remaining_time": "4:30:48"} +{"current_steps": 2283, "total_steps": 2737, "loss": 1.0083, "lr": 1.630355273360752e-06, "epoch": 5.838874680306906, "percentage": 83.41, "elapsed_time": "22:38:48", "remaining_time": "4:30:12"} +{"current_steps": 2284, "total_steps": 2737, "loss": 1.0495, "lr": 1.623381737449038e-06, "epoch": 5.841432225063938, "percentage": 83.45, "elapsed_time": "22:39:24", "remaining_time": "4:29:37"} +{"current_steps": 2285, "total_steps": 2737, "loss": 0.9778, "lr": 1.6164218297714884e-06, "epoch": 5.843989769820972, "percentage": 83.49, "elapsed_time": "22:40:00", "remaining_time": "4:29:01"} +{"current_steps": 2286, "total_steps": 2737, "loss": 0.9882, "lr": 1.609475561651438e-06, "epoch": 5.846547314578006, "percentage": 83.52, "elapsed_time": "22:40:35", "remaining_time": "4:28:25"} +{"current_steps": 2287, "total_steps": 2737, "loss": 1.0402, "lr": 1.6025429443900286e-06, "epoch": 5.849104859335038, "percentage": 83.56, "elapsed_time": "22:41:11", "remaining_time": "4:27:50"} +{"current_steps": 2288, "total_steps": 2737, "loss": 1.0323, "lr": 1.5956239892661995e-06, "epoch": 5.851662404092072, "percentage": 83.6, "elapsed_time": "22:41:47", "remaining_time": "4:27:14"} +{"current_steps": 2289, "total_steps": 2737, "loss": 1.0153, "lr": 1.588718707536656e-06, "epoch": 5.854219948849105, "percentage": 83.63, "elapsed_time": "22:42:22", "remaining_time": "4:26:38"} +{"current_steps": 2290, "total_steps": 2737, "loss": 1.0359, "lr": 1.5818271104358574e-06, "epoch": 5.856777493606138, "percentage": 83.67, "elapsed_time": "22:42:58", "remaining_time": "4:26:02"} +{"current_steps": 2291, "total_steps": 2737, "loss": 1.0272, "lr": 1.5749492091760054e-06, "epoch": 5.859335038363171, "percentage": 83.7, "elapsed_time": "22:43:33", "remaining_time": "4:25:27"} +{"current_steps": 2292, "total_steps": 2737, "loss": 1.0141, "lr": 1.5680850149470139e-06, "epoch": 5.861892583120205, "percentage": 83.74, "elapsed_time": "22:44:09", "remaining_time": "4:24:51"} +{"current_steps": 2293, "total_steps": 2737, "loss": 1.0213, "lr": 1.5612345389164974e-06, "epoch": 5.864450127877237, "percentage": 83.78, "elapsed_time": "22:44:45", "remaining_time": "4:24:15"} +{"current_steps": 2294, "total_steps": 2737, "loss": 1.0203, "lr": 1.5543977922297494e-06, "epoch": 5.867007672634271, "percentage": 83.81, "elapsed_time": "22:45:21", "remaining_time": "4:23:39"} +{"current_steps": 2295, "total_steps": 2737, "loss": 1.0175, "lr": 1.5475747860097335e-06, "epoch": 5.869565217391305, "percentage": 83.85, "elapsed_time": "22:45:56", "remaining_time": "4:23:04"} +{"current_steps": 2296, "total_steps": 2737, "loss": 1.0187, "lr": 1.5407655313570525e-06, "epoch": 5.872122762148337, "percentage": 83.89, "elapsed_time": "22:46:32", "remaining_time": "4:22:28"} +{"current_steps": 2297, "total_steps": 2737, "loss": 0.978, "lr": 1.5339700393499357e-06, "epoch": 5.874680306905371, "percentage": 83.92, "elapsed_time": "22:47:07", "remaining_time": "4:21:52"} +{"current_steps": 2298, "total_steps": 2737, "loss": 1.0243, "lr": 1.5271883210442285e-06, "epoch": 5.877237851662404, "percentage": 83.96, "elapsed_time": "22:47:43", "remaining_time": "4:21:17"} +{"current_steps": 2299, "total_steps": 2737, "loss": 1.0458, "lr": 1.5204203874733604e-06, "epoch": 5.879795396419437, "percentage": 84.0, "elapsed_time": "22:48:19", "remaining_time": "4:20:41"} +{"current_steps": 2300, "total_steps": 2737, "loss": 1.0159, "lr": 1.5136662496483346e-06, "epoch": 5.882352941176471, "percentage": 84.03, "elapsed_time": "22:48:54", "remaining_time": "4:20:05"} +{"current_steps": 2301, "total_steps": 2737, "loss": 1.0234, "lr": 1.5069259185577112e-06, "epoch": 5.884910485933504, "percentage": 84.07, "elapsed_time": "22:49:30", "remaining_time": "4:19:29"} +{"current_steps": 2302, "total_steps": 2737, "loss": 1.0005, "lr": 1.5001994051675894e-06, "epoch": 5.887468030690537, "percentage": 84.11, "elapsed_time": "22:50:06", "remaining_time": "4:18:54"} +{"current_steps": 2303, "total_steps": 2737, "loss": 1.0182, "lr": 1.4934867204215864e-06, "epoch": 5.89002557544757, "percentage": 84.14, "elapsed_time": "22:50:41", "remaining_time": "4:18:18"} +{"current_steps": 2304, "total_steps": 2737, "loss": 1.0023, "lr": 1.486787875240816e-06, "epoch": 5.892583120204604, "percentage": 84.18, "elapsed_time": "22:51:17", "remaining_time": "4:17:42"} +{"current_steps": 2305, "total_steps": 2737, "loss": 1.0114, "lr": 1.480102880523886e-06, "epoch": 5.8951406649616365, "percentage": 84.22, "elapsed_time": "22:51:53", "remaining_time": "4:17:07"} +{"current_steps": 2306, "total_steps": 2737, "loss": 1.0279, "lr": 1.4734317471468618e-06, "epoch": 5.89769820971867, "percentage": 84.25, "elapsed_time": "22:52:28", "remaining_time": "4:16:31"} +{"current_steps": 2307, "total_steps": 2737, "loss": 0.9748, "lr": 1.4667744859632615e-06, "epoch": 5.900255754475703, "percentage": 84.29, "elapsed_time": "22:53:04", "remaining_time": "4:15:55"} +{"current_steps": 2308, "total_steps": 2737, "loss": 1.0291, "lr": 1.4601311078040304e-06, "epoch": 5.9028132992327365, "percentage": 84.33, "elapsed_time": "22:53:40", "remaining_time": "4:15:19"} +{"current_steps": 2309, "total_steps": 2737, "loss": 0.9835, "lr": 1.4535016234775324e-06, "epoch": 5.90537084398977, "percentage": 84.36, "elapsed_time": "22:54:15", "remaining_time": "4:14:44"} +{"current_steps": 2310, "total_steps": 2737, "loss": 1.0276, "lr": 1.4468860437695243e-06, "epoch": 5.907928388746803, "percentage": 84.4, "elapsed_time": "22:54:51", "remaining_time": "4:14:08"} +{"current_steps": 2311, "total_steps": 2737, "loss": 1.0085, "lr": 1.4402843794431354e-06, "epoch": 5.910485933503836, "percentage": 84.44, "elapsed_time": "22:55:26", "remaining_time": "4:13:32"} +{"current_steps": 2312, "total_steps": 2737, "loss": 1.0392, "lr": 1.4336966412388674e-06, "epoch": 5.913043478260869, "percentage": 84.47, "elapsed_time": "22:56:02", "remaining_time": "4:12:56"} +{"current_steps": 2313, "total_steps": 2737, "loss": 1.0063, "lr": 1.4271228398745552e-06, "epoch": 5.915601023017903, "percentage": 84.51, "elapsed_time": "22:56:38", "remaining_time": "4:12:21"} +{"current_steps": 2314, "total_steps": 2737, "loss": 1.0598, "lr": 1.4205629860453641e-06, "epoch": 5.918158567774936, "percentage": 84.55, "elapsed_time": "22:57:13", "remaining_time": "4:11:45"} +{"current_steps": 2315, "total_steps": 2737, "loss": 1.0078, "lr": 1.4140170904237616e-06, "epoch": 5.920716112531969, "percentage": 84.58, "elapsed_time": "22:57:49", "remaining_time": "4:11:09"} +{"current_steps": 2316, "total_steps": 2737, "loss": 0.9912, "lr": 1.4074851636595165e-06, "epoch": 5.923273657289003, "percentage": 84.62, "elapsed_time": "22:58:24", "remaining_time": "4:10:33"} +{"current_steps": 2317, "total_steps": 2737, "loss": 1.0023, "lr": 1.400967216379663e-06, "epoch": 5.9258312020460355, "percentage": 84.65, "elapsed_time": "22:59:00", "remaining_time": "4:09:58"} +{"current_steps": 2318, "total_steps": 2737, "loss": 1.0097, "lr": 1.394463259188491e-06, "epoch": 5.928388746803069, "percentage": 84.69, "elapsed_time": "22:59:35", "remaining_time": "4:09:22"} +{"current_steps": 2319, "total_steps": 2737, "loss": 1.036, "lr": 1.3879733026675367e-06, "epoch": 5.930946291560103, "percentage": 84.73, "elapsed_time": "23:00:11", "remaining_time": "4:08:46"} +{"current_steps": 2320, "total_steps": 2737, "loss": 1.036, "lr": 1.3814973573755518e-06, "epoch": 5.9335038363171355, "percentage": 84.76, "elapsed_time": "23:00:47", "remaining_time": "4:08:11"} +{"current_steps": 2321, "total_steps": 2737, "loss": 0.9881, "lr": 1.3750354338484916e-06, "epoch": 5.936061381074169, "percentage": 84.8, "elapsed_time": "23:01:22", "remaining_time": "4:07:35"} +{"current_steps": 2322, "total_steps": 2737, "loss": 1.0191, "lr": 1.3685875425995064e-06, "epoch": 5.938618925831202, "percentage": 84.84, "elapsed_time": "23:01:58", "remaining_time": "4:06:59"} +{"current_steps": 2323, "total_steps": 2737, "loss": 1.0144, "lr": 1.3621536941189107e-06, "epoch": 5.9411764705882355, "percentage": 84.87, "elapsed_time": "23:02:33", "remaining_time": "4:06:23"} +{"current_steps": 2324, "total_steps": 2737, "loss": 1.049, "lr": 1.355733898874173e-06, "epoch": 5.943734015345268, "percentage": 84.91, "elapsed_time": "23:03:09", "remaining_time": "4:05:48"} +{"current_steps": 2325, "total_steps": 2737, "loss": 1.015, "lr": 1.3493281673098956e-06, "epoch": 5.946291560102302, "percentage": 84.95, "elapsed_time": "23:03:45", "remaining_time": "4:05:12"} +{"current_steps": 2326, "total_steps": 2737, "loss": 0.9981, "lr": 1.3429365098478087e-06, "epoch": 5.948849104859335, "percentage": 84.98, "elapsed_time": "23:04:20", "remaining_time": "4:04:36"} +{"current_steps": 2327, "total_steps": 2737, "loss": 0.9794, "lr": 1.3365589368867371e-06, "epoch": 5.951406649616368, "percentage": 85.02, "elapsed_time": "23:04:56", "remaining_time": "4:04:00"} +{"current_steps": 2328, "total_steps": 2737, "loss": 1.0249, "lr": 1.330195458802591e-06, "epoch": 5.953964194373402, "percentage": 85.06, "elapsed_time": "23:05:31", "remaining_time": "4:03:25"} +{"current_steps": 2329, "total_steps": 2737, "loss": 0.9898, "lr": 1.323846085948356e-06, "epoch": 5.956521739130435, "percentage": 85.09, "elapsed_time": "23:06:07", "remaining_time": "4:02:49"} +{"current_steps": 2330, "total_steps": 2737, "loss": 1.0352, "lr": 1.3175108286540617e-06, "epoch": 5.959079283887468, "percentage": 85.13, "elapsed_time": "23:06:43", "remaining_time": "4:02:13"} +{"current_steps": 2331, "total_steps": 2737, "loss": 1.0055, "lr": 1.3111896972267768e-06, "epoch": 5.961636828644501, "percentage": 85.17, "elapsed_time": "23:07:18", "remaining_time": "4:01:38"} +{"current_steps": 2332, "total_steps": 2737, "loss": 0.9892, "lr": 1.3048827019505828e-06, "epoch": 5.964194373401535, "percentage": 85.2, "elapsed_time": "23:07:54", "remaining_time": "4:01:02"} +{"current_steps": 2333, "total_steps": 2737, "loss": 0.9883, "lr": 1.2985898530865736e-06, "epoch": 5.966751918158568, "percentage": 85.24, "elapsed_time": "23:08:30", "remaining_time": "4:00:26"} +{"current_steps": 2334, "total_steps": 2737, "loss": 1.0221, "lr": 1.2923111608728168e-06, "epoch": 5.969309462915601, "percentage": 85.28, "elapsed_time": "23:09:05", "remaining_time": "3:59:50"} +{"current_steps": 2335, "total_steps": 2737, "loss": 1.0587, "lr": 1.2860466355243506e-06, "epoch": 5.971867007672635, "percentage": 85.31, "elapsed_time": "23:09:41", "remaining_time": "3:59:15"} +{"current_steps": 2336, "total_steps": 2737, "loss": 1.0096, "lr": 1.2797962872331693e-06, "epoch": 5.974424552429667, "percentage": 85.35, "elapsed_time": "23:10:17", "remaining_time": "3:58:39"} +{"current_steps": 2337, "total_steps": 2737, "loss": 1.0489, "lr": 1.2735601261681985e-06, "epoch": 5.976982097186701, "percentage": 85.39, "elapsed_time": "23:10:52", "remaining_time": "3:58:03"} +{"current_steps": 2338, "total_steps": 2737, "loss": 1.0307, "lr": 1.2673381624752813e-06, "epoch": 5.979539641943734, "percentage": 85.42, "elapsed_time": "23:11:28", "remaining_time": "3:57:28"} +{"current_steps": 2339, "total_steps": 2737, "loss": 1.017, "lr": 1.2611304062771613e-06, "epoch": 5.982097186700767, "percentage": 85.46, "elapsed_time": "23:12:04", "remaining_time": "3:56:52"} +{"current_steps": 2340, "total_steps": 2737, "loss": 1.0056, "lr": 1.254936867673474e-06, "epoch": 5.9846547314578, "percentage": 85.5, "elapsed_time": "23:12:39", "remaining_time": "3:56:16"} +{"current_steps": 2341, "total_steps": 2737, "loss": 0.9998, "lr": 1.2487575567407184e-06, "epoch": 5.987212276214834, "percentage": 85.53, "elapsed_time": "23:13:15", "remaining_time": "3:55:40"} +{"current_steps": 2342, "total_steps": 2737, "loss": 1.0247, "lr": 1.2425924835322422e-06, "epoch": 5.989769820971867, "percentage": 85.57, "elapsed_time": "23:13:51", "remaining_time": "3:55:05"} +{"current_steps": 2343, "total_steps": 2737, "loss": 1.0195, "lr": 1.2364416580782413e-06, "epoch": 5.9923273657289, "percentage": 85.6, "elapsed_time": "23:14:26", "remaining_time": "3:54:29"} +{"current_steps": 2344, "total_steps": 2737, "loss": 1.0156, "lr": 1.2303050903857195e-06, "epoch": 5.994884910485934, "percentage": 85.64, "elapsed_time": "23:15:02", "remaining_time": "3:53:53"} +{"current_steps": 2345, "total_steps": 2737, "loss": 1.0304, "lr": 1.2241827904384928e-06, "epoch": 5.997442455242966, "percentage": 85.68, "elapsed_time": "23:15:38", "remaining_time": "3:53:17"} +{"current_steps": 2346, "total_steps": 2737, "loss": 1.0047, "lr": 1.2180747681971539e-06, "epoch": 6.0, "percentage": 85.71, "elapsed_time": "23:16:13", "remaining_time": "3:52:42"} +{"current_steps": 2347, "total_steps": 2737, "loss": 1.0416, "lr": 1.211981033599079e-06, "epoch": 6.002557544757034, "percentage": 85.75, "elapsed_time": "23:16:57", "remaining_time": "3:52:07"} +{"current_steps": 2348, "total_steps": 2737, "loss": 1.0123, "lr": 1.2059015965583908e-06, "epoch": 6.005115089514066, "percentage": 85.79, "elapsed_time": "23:17:33", "remaining_time": "3:51:32"} +{"current_steps": 2349, "total_steps": 2737, "loss": 0.9796, "lr": 1.1998364669659524e-06, "epoch": 6.0076726342711, "percentage": 85.82, "elapsed_time": "23:18:09", "remaining_time": "3:50:56"} +{"current_steps": 2350, "total_steps": 2737, "loss": 0.9862, "lr": 1.1937856546893533e-06, "epoch": 6.010230179028133, "percentage": 85.86, "elapsed_time": "23:18:44", "remaining_time": "3:50:20"} +{"current_steps": 2351, "total_steps": 2737, "loss": 1.0181, "lr": 1.1877491695728827e-06, "epoch": 6.012787723785166, "percentage": 85.9, "elapsed_time": "23:19:20", "remaining_time": "3:49:45"} +{"current_steps": 2352, "total_steps": 2737, "loss": 0.9901, "lr": 1.181727021437531e-06, "epoch": 6.015345268542199, "percentage": 85.93, "elapsed_time": "23:19:56", "remaining_time": "3:49:09"} +{"current_steps": 2353, "total_steps": 2737, "loss": 1.0139, "lr": 1.1757192200809487e-06, "epoch": 6.017902813299233, "percentage": 85.97, "elapsed_time": "23:20:31", "remaining_time": "3:48:33"} +{"current_steps": 2354, "total_steps": 2737, "loss": 1.0064, "lr": 1.1697257752774581e-06, "epoch": 6.020460358056266, "percentage": 86.01, "elapsed_time": "23:21:07", "remaining_time": "3:47:57"} +{"current_steps": 2355, "total_steps": 2737, "loss": 1.0055, "lr": 1.1637466967780186e-06, "epoch": 6.023017902813299, "percentage": 86.04, "elapsed_time": "23:21:42", "remaining_time": "3:47:22"} +{"current_steps": 2356, "total_steps": 2737, "loss": 1.0334, "lr": 1.1577819943102132e-06, "epoch": 6.025575447570333, "percentage": 86.08, "elapsed_time": "23:22:18", "remaining_time": "3:46:46"} +{"current_steps": 2357, "total_steps": 2737, "loss": 1.063, "lr": 1.1518316775782456e-06, "epoch": 6.028132992327365, "percentage": 86.12, "elapsed_time": "23:22:54", "remaining_time": "3:46:10"} +{"current_steps": 2358, "total_steps": 2737, "loss": 1.0245, "lr": 1.1458957562629048e-06, "epoch": 6.030690537084399, "percentage": 86.15, "elapsed_time": "23:23:29", "remaining_time": "3:45:34"} +{"current_steps": 2359, "total_steps": 2737, "loss": 1.016, "lr": 1.1399742400215685e-06, "epoch": 6.033248081841432, "percentage": 86.19, "elapsed_time": "23:24:05", "remaining_time": "3:44:59"} +{"current_steps": 2360, "total_steps": 2737, "loss": 1.0034, "lr": 1.1340671384881664e-06, "epoch": 6.035805626598465, "percentage": 86.23, "elapsed_time": "23:24:41", "remaining_time": "3:44:23"} +{"current_steps": 2361, "total_steps": 2737, "loss": 1.0303, "lr": 1.128174461273187e-06, "epoch": 6.038363171355499, "percentage": 86.26, "elapsed_time": "23:25:16", "remaining_time": "3:43:47"} +{"current_steps": 2362, "total_steps": 2737, "loss": 0.9908, "lr": 1.122296217963651e-06, "epoch": 6.040920716112532, "percentage": 86.3, "elapsed_time": "23:25:52", "remaining_time": "3:43:12"} +{"current_steps": 2363, "total_steps": 2737, "loss": 1.0143, "lr": 1.116432418123088e-06, "epoch": 6.043478260869565, "percentage": 86.34, "elapsed_time": "23:26:27", "remaining_time": "3:42:36"} +{"current_steps": 2364, "total_steps": 2737, "loss": 1.0389, "lr": 1.1105830712915355e-06, "epoch": 6.046035805626598, "percentage": 86.37, "elapsed_time": "23:27:03", "remaining_time": "3:42:00"} +{"current_steps": 2365, "total_steps": 2737, "loss": 0.9923, "lr": 1.1047481869855136e-06, "epoch": 6.048593350383632, "percentage": 86.41, "elapsed_time": "23:27:38", "remaining_time": "3:41:24"} +{"current_steps": 2366, "total_steps": 2737, "loss": 0.9989, "lr": 1.0989277746980186e-06, "epoch": 6.051150895140665, "percentage": 86.45, "elapsed_time": "23:28:14", "remaining_time": "3:40:49"} +{"current_steps": 2367, "total_steps": 2737, "loss": 1.0002, "lr": 1.0931218438984903e-06, "epoch": 6.053708439897698, "percentage": 86.48, "elapsed_time": "23:28:50", "remaining_time": "3:40:13"} +{"current_steps": 2368, "total_steps": 2737, "loss": 0.9855, "lr": 1.0873304040328193e-06, "epoch": 6.056265984654732, "percentage": 86.52, "elapsed_time": "23:29:25", "remaining_time": "3:39:37"} +{"current_steps": 2369, "total_steps": 2737, "loss": 1.0108, "lr": 1.0815534645233182e-06, "epoch": 6.0588235294117645, "percentage": 86.55, "elapsed_time": "23:30:01", "remaining_time": "3:39:01"} +{"current_steps": 2370, "total_steps": 2737, "loss": 1.0134, "lr": 1.075791034768704e-06, "epoch": 6.061381074168798, "percentage": 86.59, "elapsed_time": "23:30:36", "remaining_time": "3:38:26"} +{"current_steps": 2371, "total_steps": 2737, "loss": 0.9819, "lr": 1.0700431241440888e-06, "epoch": 6.063938618925831, "percentage": 86.63, "elapsed_time": "23:31:12", "remaining_time": "3:37:50"} +{"current_steps": 2372, "total_steps": 2737, "loss": 0.999, "lr": 1.064309742000963e-06, "epoch": 6.0664961636828645, "percentage": 86.66, "elapsed_time": "23:31:47", "remaining_time": "3:37:14"} +{"current_steps": 2373, "total_steps": 2737, "loss": 1.0263, "lr": 1.0585908976671844e-06, "epoch": 6.069053708439898, "percentage": 86.7, "elapsed_time": "23:32:23", "remaining_time": "3:36:39"} +{"current_steps": 2374, "total_steps": 2737, "loss": 0.9989, "lr": 1.052886600446954e-06, "epoch": 6.071611253196931, "percentage": 86.74, "elapsed_time": "23:32:59", "remaining_time": "3:36:03"} +{"current_steps": 2375, "total_steps": 2737, "loss": 1.0168, "lr": 1.0471968596208026e-06, "epoch": 6.0741687979539645, "percentage": 86.77, "elapsed_time": "23:33:34", "remaining_time": "3:35:27"} +{"current_steps": 2376, "total_steps": 2737, "loss": 1.0016, "lr": 1.0415216844455889e-06, "epoch": 6.076726342710997, "percentage": 86.81, "elapsed_time": "23:34:10", "remaining_time": "3:34:51"} +{"current_steps": 2377, "total_steps": 2737, "loss": 1.0207, "lr": 1.0358610841544657e-06, "epoch": 6.079283887468031, "percentage": 86.85, "elapsed_time": "23:34:45", "remaining_time": "3:34:16"} +{"current_steps": 2378, "total_steps": 2737, "loss": 0.9889, "lr": 1.0302150679568745e-06, "epoch": 6.081841432225064, "percentage": 86.88, "elapsed_time": "23:35:21", "remaining_time": "3:33:40"} +{"current_steps": 2379, "total_steps": 2737, "loss": 1.0278, "lr": 1.0245836450385304e-06, "epoch": 6.084398976982097, "percentage": 86.92, "elapsed_time": "23:35:57", "remaining_time": "3:33:04"} +{"current_steps": 2380, "total_steps": 2737, "loss": 1.0024, "lr": 1.0189668245614092e-06, "epoch": 6.086956521739131, "percentage": 86.96, "elapsed_time": "23:36:32", "remaining_time": "3:32:28"} +{"current_steps": 2381, "total_steps": 2737, "loss": 1.0346, "lr": 1.0133646156637244e-06, "epoch": 6.089514066496164, "percentage": 86.99, "elapsed_time": "23:37:08", "remaining_time": "3:31:53"} +{"current_steps": 2382, "total_steps": 2737, "loss": 1.0176, "lr": 1.0077770274599187e-06, "epoch": 6.092071611253197, "percentage": 87.03, "elapsed_time": "23:37:43", "remaining_time": "3:31:17"} +{"current_steps": 2383, "total_steps": 2737, "loss": 0.9894, "lr": 1.002204069040652e-06, "epoch": 6.09462915601023, "percentage": 87.07, "elapsed_time": "23:38:19", "remaining_time": "3:30:41"} +{"current_steps": 2384, "total_steps": 2737, "loss": 1.04, "lr": 9.966457494727777e-07, "epoch": 6.0971867007672635, "percentage": 87.1, "elapsed_time": "23:38:55", "remaining_time": "3:30:06"} +{"current_steps": 2385, "total_steps": 2737, "loss": 1.0049, "lr": 9.91102077799333e-07, "epoch": 6.099744245524296, "percentage": 87.14, "elapsed_time": "23:39:30", "remaining_time": "3:29:30"} +{"current_steps": 2386, "total_steps": 2737, "loss": 0.9933, "lr": 9.855730630395244e-07, "epoch": 6.10230179028133, "percentage": 87.18, "elapsed_time": "23:40:06", "remaining_time": "3:28:54"} +{"current_steps": 2387, "total_steps": 2737, "loss": 1.0285, "lr": 9.800587141887173e-07, "epoch": 6.1048593350383635, "percentage": 87.21, "elapsed_time": "23:40:41", "remaining_time": "3:28:18"} +{"current_steps": 2388, "total_steps": 2737, "loss": 1.0134, "lr": 9.745590402184092e-07, "epoch": 6.107416879795396, "percentage": 87.25, "elapsed_time": "23:41:17", "remaining_time": "3:27:43"} +{"current_steps": 2389, "total_steps": 2737, "loss": 0.9778, "lr": 9.690740500762241e-07, "epoch": 6.10997442455243, "percentage": 87.29, "elapsed_time": "23:41:53", "remaining_time": "3:27:07"} +{"current_steps": 2390, "total_steps": 2737, "loss": 1.0048, "lr": 9.636037526859032e-07, "epoch": 6.112531969309463, "percentage": 87.32, "elapsed_time": "23:42:28", "remaining_time": "3:26:31"} +{"current_steps": 2391, "total_steps": 2737, "loss": 1.0355, "lr": 9.58148156947276e-07, "epoch": 6.115089514066496, "percentage": 87.36, "elapsed_time": "23:43:04", "remaining_time": "3:25:55"} +{"current_steps": 2392, "total_steps": 2737, "loss": 0.9894, "lr": 9.52707271736254e-07, "epoch": 6.117647058823529, "percentage": 87.39, "elapsed_time": "23:43:39", "remaining_time": "3:25:20"} +{"current_steps": 2393, "total_steps": 2737, "loss": 1.034, "lr": 9.472811059048182e-07, "epoch": 6.120204603580563, "percentage": 87.43, "elapsed_time": "23:44:15", "remaining_time": "3:24:44"} +{"current_steps": 2394, "total_steps": 2737, "loss": 1.0279, "lr": 9.418696682810014e-07, "epoch": 6.122762148337596, "percentage": 87.47, "elapsed_time": "23:44:50", "remaining_time": "3:24:08"} +{"current_steps": 2395, "total_steps": 2737, "loss": 1.0346, "lr": 9.364729676688755e-07, "epoch": 6.125319693094629, "percentage": 87.5, "elapsed_time": "23:45:26", "remaining_time": "3:23:32"} +{"current_steps": 2396, "total_steps": 2737, "loss": 1.0042, "lr": 9.310910128485317e-07, "epoch": 6.127877237851663, "percentage": 87.54, "elapsed_time": "23:46:02", "remaining_time": "3:22:57"} +{"current_steps": 2397, "total_steps": 2737, "loss": 0.9979, "lr": 9.257238125760781e-07, "epoch": 6.130434782608695, "percentage": 87.58, "elapsed_time": "23:46:37", "remaining_time": "3:22:21"} +{"current_steps": 2398, "total_steps": 2737, "loss": 1.0151, "lr": 9.203713755836108e-07, "epoch": 6.132992327365729, "percentage": 87.61, "elapsed_time": "23:47:13", "remaining_time": "3:21:45"} +{"current_steps": 2399, "total_steps": 2737, "loss": 1.0003, "lr": 9.150337105792129e-07, "epoch": 6.135549872122763, "percentage": 87.65, "elapsed_time": "23:47:48", "remaining_time": "3:21:10"} +{"current_steps": 2400, "total_steps": 2737, "loss": 1.0174, "lr": 9.097108262469268e-07, "epoch": 6.138107416879795, "percentage": 87.69, "elapsed_time": "23:48:24", "remaining_time": "3:20:34"} +{"current_steps": 2401, "total_steps": 2737, "loss": 1.024, "lr": 9.044027312467574e-07, "epoch": 6.140664961636829, "percentage": 87.72, "elapsed_time": "23:49:00", "remaining_time": "3:19:58"} +{"current_steps": 2402, "total_steps": 2737, "loss": 1.0238, "lr": 8.991094342146423e-07, "epoch": 6.143222506393862, "percentage": 87.76, "elapsed_time": "23:49:36", "remaining_time": "3:19:22"} +{"current_steps": 2403, "total_steps": 2737, "loss": 1.0361, "lr": 8.938309437624415e-07, "epoch": 6.145780051150895, "percentage": 87.8, "elapsed_time": "23:50:11", "remaining_time": "3:18:47"} +{"current_steps": 2404, "total_steps": 2737, "loss": 1.0195, "lr": 8.885672684779345e-07, "epoch": 6.148337595907928, "percentage": 87.83, "elapsed_time": "23:50:47", "remaining_time": "3:18:11"} +{"current_steps": 2405, "total_steps": 2737, "loss": 1.0147, "lr": 8.833184169247877e-07, "epoch": 6.150895140664962, "percentage": 87.87, "elapsed_time": "23:51:23", "remaining_time": "3:17:35"} +{"current_steps": 2406, "total_steps": 2737, "loss": 1.0443, "lr": 8.780843976425568e-07, "epoch": 6.153452685421995, "percentage": 87.91, "elapsed_time": "23:51:58", "remaining_time": "3:17:00"} +{"current_steps": 2407, "total_steps": 2737, "loss": 1.0269, "lr": 8.728652191466602e-07, "epoch": 6.156010230179028, "percentage": 87.94, "elapsed_time": "23:52:34", "remaining_time": "3:16:24"} +{"current_steps": 2408, "total_steps": 2737, "loss": 1.0407, "lr": 8.676608899283789e-07, "epoch": 6.158567774936062, "percentage": 87.98, "elapsed_time": "23:53:10", "remaining_time": "3:15:48"} +{"current_steps": 2409, "total_steps": 2737, "loss": 0.998, "lr": 8.62471418454831e-07, "epoch": 6.161125319693094, "percentage": 88.02, "elapsed_time": "23:53:45", "remaining_time": "3:15:12"} +{"current_steps": 2410, "total_steps": 2737, "loss": 1.0215, "lr": 8.572968131689585e-07, "epoch": 6.163682864450128, "percentage": 88.05, "elapsed_time": "23:54:21", "remaining_time": "3:14:37"} +{"current_steps": 2411, "total_steps": 2737, "loss": 1.0362, "lr": 8.521370824895236e-07, "epoch": 6.166240409207161, "percentage": 88.09, "elapsed_time": "23:54:56", "remaining_time": "3:14:01"} +{"current_steps": 2412, "total_steps": 2737, "loss": 1.0005, "lr": 8.469922348110871e-07, "epoch": 6.168797953964194, "percentage": 88.13, "elapsed_time": "23:55:32", "remaining_time": "3:13:25"} +{"current_steps": 2413, "total_steps": 2737, "loss": 1.0154, "lr": 8.41862278503991e-07, "epoch": 6.171355498721228, "percentage": 88.16, "elapsed_time": "23:56:08", "remaining_time": "3:12:50"} +{"current_steps": 2414, "total_steps": 2737, "loss": 0.9864, "lr": 8.367472219143524e-07, "epoch": 6.173913043478261, "percentage": 88.2, "elapsed_time": "23:56:43", "remaining_time": "3:12:14"} +{"current_steps": 2415, "total_steps": 2737, "loss": 1.01, "lr": 8.316470733640525e-07, "epoch": 6.176470588235294, "percentage": 88.24, "elapsed_time": "23:57:19", "remaining_time": "3:11:38"} +{"current_steps": 2416, "total_steps": 2737, "loss": 1.0349, "lr": 8.265618411507148e-07, "epoch": 6.179028132992327, "percentage": 88.27, "elapsed_time": "23:57:55", "remaining_time": "3:11:02"} +{"current_steps": 2417, "total_steps": 2737, "loss": 1.0108, "lr": 8.214915335476892e-07, "epoch": 6.181585677749361, "percentage": 88.31, "elapsed_time": "23:58:30", "remaining_time": "3:10:27"} +{"current_steps": 2418, "total_steps": 2737, "loss": 1.0316, "lr": 8.164361588040526e-07, "epoch": 6.1841432225063935, "percentage": 88.34, "elapsed_time": "23:59:06", "remaining_time": "3:09:51"} +{"current_steps": 2419, "total_steps": 2737, "loss": 1.0181, "lr": 8.113957251445837e-07, "epoch": 6.186700767263427, "percentage": 88.38, "elapsed_time": "23:59:41", "remaining_time": "3:09:15"} +{"current_steps": 2420, "total_steps": 2737, "loss": 1.0163, "lr": 8.063702407697515e-07, "epoch": 6.189258312020461, "percentage": 88.42, "elapsed_time": "1 day, 0:00:17", "remaining_time": "3:08:39"} +{"current_steps": 2421, "total_steps": 2737, "loss": 1.0316, "lr": 8.013597138557039e-07, "epoch": 6.1918158567774935, "percentage": 88.45, "elapsed_time": "1 day, 0:00:53", "remaining_time": "3:08:04"} +{"current_steps": 2422, "total_steps": 2737, "loss": 1.0295, "lr": 7.963641525542564e-07, "epoch": 6.194373401534527, "percentage": 88.49, "elapsed_time": "1 day, 0:01:28", "remaining_time": "3:07:28"} +{"current_steps": 2423, "total_steps": 2737, "loss": 1.0443, "lr": 7.913835649928792e-07, "epoch": 6.19693094629156, "percentage": 88.53, "elapsed_time": "1 day, 0:02:04", "remaining_time": "3:06:52"} +{"current_steps": 2424, "total_steps": 2737, "loss": 0.9758, "lr": 7.864179592746679e-07, "epoch": 6.1994884910485935, "percentage": 88.56, "elapsed_time": "1 day, 0:02:40", "remaining_time": "3:06:17"} +{"current_steps": 2425, "total_steps": 2737, "loss": 0.998, "lr": 7.814673434783604e-07, "epoch": 6.202046035805626, "percentage": 88.6, "elapsed_time": "1 day, 0:03:15", "remaining_time": "3:05:41"} +{"current_steps": 2426, "total_steps": 2737, "loss": 1.0115, "lr": 7.765317256582949e-07, "epoch": 6.20460358056266, "percentage": 88.64, "elapsed_time": "1 day, 0:03:51", "remaining_time": "3:05:05"} +{"current_steps": 2427, "total_steps": 2737, "loss": 1.0459, "lr": 7.716111138444115e-07, "epoch": 6.207161125319693, "percentage": 88.67, "elapsed_time": "1 day, 0:04:26", "remaining_time": "3:04:29"} +{"current_steps": 2428, "total_steps": 2737, "loss": 1.0274, "lr": 7.667055160422432e-07, "epoch": 6.209718670076726, "percentage": 88.71, "elapsed_time": "1 day, 0:05:02", "remaining_time": "3:03:54"} +{"current_steps": 2429, "total_steps": 2737, "loss": 1.0011, "lr": 7.618149402328867e-07, "epoch": 6.21227621483376, "percentage": 88.75, "elapsed_time": "1 day, 0:05:38", "remaining_time": "3:03:18"} +{"current_steps": 2430, "total_steps": 2737, "loss": 1.0635, "lr": 7.569393943730064e-07, "epoch": 6.2148337595907925, "percentage": 88.78, "elapsed_time": "1 day, 0:06:13", "remaining_time": "3:02:42"} +{"current_steps": 2431, "total_steps": 2737, "loss": 0.9878, "lr": 7.52078886394807e-07, "epoch": 6.217391304347826, "percentage": 88.82, "elapsed_time": "1 day, 0:06:49", "remaining_time": "3:02:07"} +{"current_steps": 2432, "total_steps": 2737, "loss": 1.0316, "lr": 7.472334242060331e-07, "epoch": 6.21994884910486, "percentage": 88.86, "elapsed_time": "1 day, 0:07:25", "remaining_time": "3:01:31"} +{"current_steps": 2433, "total_steps": 2737, "loss": 1.0098, "lr": 7.424030156899475e-07, "epoch": 6.2225063938618925, "percentage": 88.89, "elapsed_time": "1 day, 0:08:00", "remaining_time": "3:00:55"} +{"current_steps": 2434, "total_steps": 2737, "loss": 1.0508, "lr": 7.375876687053252e-07, "epoch": 6.225063938618926, "percentage": 88.93, "elapsed_time": "1 day, 0:08:36", "remaining_time": "3:00:19"} +{"current_steps": 2435, "total_steps": 2737, "loss": 1.0265, "lr": 7.327873910864325e-07, "epoch": 6.227621483375959, "percentage": 88.97, "elapsed_time": "1 day, 0:09:12", "remaining_time": "2:59:44"} +{"current_steps": 2436, "total_steps": 2737, "loss": 1.038, "lr": 7.280021906430201e-07, "epoch": 6.2301790281329925, "percentage": 89.0, "elapsed_time": "1 day, 0:09:47", "remaining_time": "2:59:08"} +{"current_steps": 2437, "total_steps": 2737, "loss": 0.9938, "lr": 7.23232075160315e-07, "epoch": 6.232736572890025, "percentage": 89.04, "elapsed_time": "1 day, 0:10:23", "remaining_time": "2:58:32"} +{"current_steps": 2438, "total_steps": 2737, "loss": 1.0209, "lr": 7.184770523989904e-07, "epoch": 6.235294117647059, "percentage": 89.08, "elapsed_time": "1 day, 0:10:59", "remaining_time": "2:57:57"} +{"current_steps": 2439, "total_steps": 2737, "loss": 1.0369, "lr": 7.137371300951746e-07, "epoch": 6.2378516624040925, "percentage": 89.11, "elapsed_time": "1 day, 0:11:34", "remaining_time": "2:57:21"} +{"current_steps": 2440, "total_steps": 2737, "loss": 1.0417, "lr": 7.090123159604234e-07, "epoch": 6.240409207161125, "percentage": 89.15, "elapsed_time": "1 day, 0:12:10", "remaining_time": "2:56:45"} +{"current_steps": 2441, "total_steps": 2737, "loss": 0.99, "lr": 7.043026176817158e-07, "epoch": 6.242966751918159, "percentage": 89.19, "elapsed_time": "1 day, 0:12:46", "remaining_time": "2:56:09"} +{"current_steps": 2442, "total_steps": 2737, "loss": 1.0065, "lr": 6.996080429214347e-07, "epoch": 6.245524296675192, "percentage": 89.22, "elapsed_time": "1 day, 0:13:21", "remaining_time": "2:55:34"} +{"current_steps": 2443, "total_steps": 2737, "loss": 1.0359, "lr": 6.949285993173593e-07, "epoch": 6.248081841432225, "percentage": 89.26, "elapsed_time": "1 day, 0:13:57", "remaining_time": "2:54:58"} +{"current_steps": 2444, "total_steps": 2737, "loss": 0.97, "lr": 6.902642944826544e-07, "epoch": 6.250639386189258, "percentage": 89.29, "elapsed_time": "1 day, 0:14:32", "remaining_time": "2:54:22"} +{"current_steps": 2445, "total_steps": 2737, "loss": 1.0192, "lr": 6.856151360058505e-07, "epoch": 6.253196930946292, "percentage": 89.33, "elapsed_time": "1 day, 0:15:08", "remaining_time": "2:53:47"} +{"current_steps": 2446, "total_steps": 2737, "loss": 1.0466, "lr": 6.809811314508386e-07, "epoch": 6.255754475703325, "percentage": 89.37, "elapsed_time": "1 day, 0:15:44", "remaining_time": "2:53:11"} +{"current_steps": 2447, "total_steps": 2737, "loss": 1.0356, "lr": 6.763622883568521e-07, "epoch": 6.258312020460358, "percentage": 89.4, "elapsed_time": "1 day, 0:16:19", "remaining_time": "2:52:35"} +{"current_steps": 2448, "total_steps": 2737, "loss": 1.036, "lr": 6.717586142384624e-07, "epoch": 6.260869565217392, "percentage": 89.44, "elapsed_time": "1 day, 0:16:55", "remaining_time": "2:51:59"} +{"current_steps": 2449, "total_steps": 2737, "loss": 1.0261, "lr": 6.671701165855593e-07, "epoch": 6.263427109974424, "percentage": 89.48, "elapsed_time": "1 day, 0:17:31", "remaining_time": "2:51:24"} +{"current_steps": 2450, "total_steps": 2737, "loss": 1.0119, "lr": 6.625968028633389e-07, "epoch": 6.265984654731458, "percentage": 89.51, "elapsed_time": "1 day, 0:18:06", "remaining_time": "2:50:48"} +{"current_steps": 2451, "total_steps": 2737, "loss": 1.021, "lr": 6.580386805122996e-07, "epoch": 6.268542199488491, "percentage": 89.55, "elapsed_time": "1 day, 0:18:42", "remaining_time": "2:50:12"} +{"current_steps": 2452, "total_steps": 2737, "loss": 1.0635, "lr": 6.534957569482214e-07, "epoch": 6.271099744245524, "percentage": 89.59, "elapsed_time": "1 day, 0:19:18", "remaining_time": "2:49:37"} +{"current_steps": 2453, "total_steps": 2737, "loss": 1.0129, "lr": 6.489680395621556e-07, "epoch": 6.273657289002558, "percentage": 89.62, "elapsed_time": "1 day, 0:19:53", "remaining_time": "2:49:01"} +{"current_steps": 2454, "total_steps": 2737, "loss": 0.9876, "lr": 6.444555357204152e-07, "epoch": 6.276214833759591, "percentage": 89.66, "elapsed_time": "1 day, 0:20:29", "remaining_time": "2:48:25"} +{"current_steps": 2455, "total_steps": 2737, "loss": 1.0258, "lr": 6.39958252764562e-07, "epoch": 6.278772378516624, "percentage": 89.7, "elapsed_time": "1 day, 0:21:04", "remaining_time": "2:47:49"} +{"current_steps": 2456, "total_steps": 2737, "loss": 1.0364, "lr": 6.354761980113966e-07, "epoch": 6.281329923273657, "percentage": 89.73, "elapsed_time": "1 day, 0:21:40", "remaining_time": "2:47:14"} +{"current_steps": 2457, "total_steps": 2737, "loss": 1.0295, "lr": 6.31009378752937e-07, "epoch": 6.283887468030691, "percentage": 89.77, "elapsed_time": "1 day, 0:22:16", "remaining_time": "2:46:38"} +{"current_steps": 2458, "total_steps": 2737, "loss": 1.003, "lr": 6.265578022564233e-07, "epoch": 6.286445012787723, "percentage": 89.81, "elapsed_time": "1 day, 0:22:52", "remaining_time": "2:46:02"} +{"current_steps": 2459, "total_steps": 2737, "loss": 1.0186, "lr": 6.221214757642901e-07, "epoch": 6.289002557544757, "percentage": 89.84, "elapsed_time": "1 day, 0:23:27", "remaining_time": "2:45:27"} +{"current_steps": 2460, "total_steps": 2737, "loss": 1.0325, "lr": 6.177004064941616e-07, "epoch": 6.291560102301791, "percentage": 89.88, "elapsed_time": "1 day, 0:24:03", "remaining_time": "2:44:51"} +{"current_steps": 2461, "total_steps": 2737, "loss": 1.0034, "lr": 6.132946016388453e-07, "epoch": 6.294117647058823, "percentage": 89.92, "elapsed_time": "1 day, 0:24:38", "remaining_time": "2:44:15"} +{"current_steps": 2462, "total_steps": 2737, "loss": 0.9823, "lr": 6.089040683663083e-07, "epoch": 6.296675191815857, "percentage": 89.95, "elapsed_time": "1 day, 0:25:14", "remaining_time": "2:43:39"} +{"current_steps": 2463, "total_steps": 2737, "loss": 1.0409, "lr": 6.045288138196725e-07, "epoch": 6.29923273657289, "percentage": 89.99, "elapsed_time": "1 day, 0:25:50", "remaining_time": "2:43:04"} +{"current_steps": 2464, "total_steps": 2737, "loss": 1.0022, "lr": 6.001688451172027e-07, "epoch": 6.301790281329923, "percentage": 90.03, "elapsed_time": "1 day, 0:26:25", "remaining_time": "2:42:28"} +{"current_steps": 2465, "total_steps": 2737, "loss": 1.0107, "lr": 5.958241693522993e-07, "epoch": 6.304347826086957, "percentage": 90.06, "elapsed_time": "1 day, 0:27:01", "remaining_time": "2:41:52"} +{"current_steps": 2466, "total_steps": 2737, "loss": 0.9971, "lr": 5.914947935934756e-07, "epoch": 6.30690537084399, "percentage": 90.1, "elapsed_time": "1 day, 0:27:36", "remaining_time": "2:41:16"} +{"current_steps": 2467, "total_steps": 2737, "loss": 1.0117, "lr": 5.871807248843542e-07, "epoch": 6.309462915601023, "percentage": 90.14, "elapsed_time": "1 day, 0:28:12", "remaining_time": "2:40:41"} +{"current_steps": 2468, "total_steps": 2737, "loss": 1.0199, "lr": 5.828819702436573e-07, "epoch": 6.312020460358056, "percentage": 90.17, "elapsed_time": "1 day, 0:28:48", "remaining_time": "2:40:05"} +{"current_steps": 2469, "total_steps": 2737, "loss": 1.003, "lr": 5.785985366651892e-07, "epoch": 6.31457800511509, "percentage": 90.21, "elapsed_time": "1 day, 0:29:23", "remaining_time": "2:39:29"} +{"current_steps": 2470, "total_steps": 2737, "loss": 1.0067, "lr": 5.743304311178289e-07, "epoch": 6.3171355498721224, "percentage": 90.24, "elapsed_time": "1 day, 0:29:59", "remaining_time": "2:38:54"} +{"current_steps": 2471, "total_steps": 2737, "loss": 1.0196, "lr": 5.70077660545515e-07, "epoch": 6.319693094629156, "percentage": 90.28, "elapsed_time": "1 day, 0:30:34", "remaining_time": "2:38:18"} +{"current_steps": 2472, "total_steps": 2737, "loss": 1.0127, "lr": 5.658402318672418e-07, "epoch": 6.322250639386189, "percentage": 90.32, "elapsed_time": "1 day, 0:31:10", "remaining_time": "2:37:42"} +{"current_steps": 2473, "total_steps": 2737, "loss": 1.0161, "lr": 5.616181519770414e-07, "epoch": 6.324808184143222, "percentage": 90.35, "elapsed_time": "1 day, 0:31:46", "remaining_time": "2:37:06"} +{"current_steps": 2474, "total_steps": 2737, "loss": 1.0216, "lr": 5.574114277439702e-07, "epoch": 6.327365728900256, "percentage": 90.39, "elapsed_time": "1 day, 0:32:21", "remaining_time": "2:36:31"} +{"current_steps": 2475, "total_steps": 2737, "loss": 1.0263, "lr": 5.53220066012109e-07, "epoch": 6.329923273657289, "percentage": 90.43, "elapsed_time": "1 day, 0:32:57", "remaining_time": "2:35:55"} +{"current_steps": 2476, "total_steps": 2737, "loss": 1.0737, "lr": 5.490440736005397e-07, "epoch": 6.332480818414322, "percentage": 90.46, "elapsed_time": "1 day, 0:33:32", "remaining_time": "2:35:19"} +{"current_steps": 2477, "total_steps": 2737, "loss": 1.028, "lr": 5.448834573033424e-07, "epoch": 6.335038363171355, "percentage": 90.5, "elapsed_time": "1 day, 0:34:08", "remaining_time": "2:34:44"} +{"current_steps": 2478, "total_steps": 2737, "loss": 0.9949, "lr": 5.407382238895765e-07, "epoch": 6.337595907928389, "percentage": 90.54, "elapsed_time": "1 day, 0:34:43", "remaining_time": "2:34:08"} +{"current_steps": 2479, "total_steps": 2737, "loss": 1.0422, "lr": 5.366083801032806e-07, "epoch": 6.340153452685422, "percentage": 90.57, "elapsed_time": "1 day, 0:35:19", "remaining_time": "2:33:32"} +{"current_steps": 2480, "total_steps": 2737, "loss": 1.0017, "lr": 5.324939326634515e-07, "epoch": 6.342710997442455, "percentage": 90.61, "elapsed_time": "1 day, 0:35:55", "remaining_time": "2:32:56"} +{"current_steps": 2481, "total_steps": 2737, "loss": 1.0181, "lr": 5.283948882640355e-07, "epoch": 6.345268542199489, "percentage": 90.65, "elapsed_time": "1 day, 0:36:30", "remaining_time": "2:32:21"} +{"current_steps": 2482, "total_steps": 2737, "loss": 1.0346, "lr": 5.24311253573927e-07, "epoch": 6.3478260869565215, "percentage": 90.68, "elapsed_time": "1 day, 0:37:06", "remaining_time": "2:31:45"} +{"current_steps": 2483, "total_steps": 2737, "loss": 1.0135, "lr": 5.202430352369392e-07, "epoch": 6.350383631713555, "percentage": 90.72, "elapsed_time": "1 day, 0:37:42", "remaining_time": "2:31:09"} +{"current_steps": 2484, "total_steps": 2737, "loss": 1.0435, "lr": 5.161902398718121e-07, "epoch": 6.352941176470588, "percentage": 90.76, "elapsed_time": "1 day, 0:38:17", "remaining_time": "2:30:34"} +{"current_steps": 2485, "total_steps": 2737, "loss": 1.0377, "lr": 5.121528740721871e-07, "epoch": 6.3554987212276215, "percentage": 90.79, "elapsed_time": "1 day, 0:38:53", "remaining_time": "2:29:58"} +{"current_steps": 2486, "total_steps": 2737, "loss": 1.034, "lr": 5.081309444066085e-07, "epoch": 6.358056265984655, "percentage": 90.83, "elapsed_time": "1 day, 0:39:28", "remaining_time": "2:29:22"} +{"current_steps": 2487, "total_steps": 2737, "loss": 1.011, "lr": 5.041244574185056e-07, "epoch": 6.360613810741688, "percentage": 90.87, "elapsed_time": "1 day, 0:40:04", "remaining_time": "2:28:46"} +{"current_steps": 2488, "total_steps": 2737, "loss": 0.9861, "lr": 5.001334196261776e-07, "epoch": 6.3631713554987215, "percentage": 90.9, "elapsed_time": "1 day, 0:40:40", "remaining_time": "2:28:11"} +{"current_steps": 2489, "total_steps": 2737, "loss": 1.0146, "lr": 4.961578375227982e-07, "epoch": 6.365728900255754, "percentage": 90.94, "elapsed_time": "1 day, 0:41:15", "remaining_time": "2:27:35"} +{"current_steps": 2490, "total_steps": 2737, "loss": 1.0204, "lr": 4.921977175763881e-07, "epoch": 6.368286445012788, "percentage": 90.98, "elapsed_time": "1 day, 0:41:51", "remaining_time": "2:26:59"} +{"current_steps": 2491, "total_steps": 2737, "loss": 1.0313, "lr": 4.882530662298168e-07, "epoch": 6.370843989769821, "percentage": 91.01, "elapsed_time": "1 day, 0:42:26", "remaining_time": "2:26:23"} +{"current_steps": 2492, "total_steps": 2737, "loss": 1.032, "lr": 4.843238899007829e-07, "epoch": 6.373401534526854, "percentage": 91.05, "elapsed_time": "1 day, 0:43:02", "remaining_time": "2:25:48"} +{"current_steps": 2493, "total_steps": 2737, "loss": 1.0037, "lr": 4.804101949818119e-07, "epoch": 6.375959079283888, "percentage": 91.09, "elapsed_time": "1 day, 0:43:38", "remaining_time": "2:25:12"} +{"current_steps": 2494, "total_steps": 2737, "loss": 1.0218, "lr": 4.765119878402424e-07, "epoch": 6.378516624040921, "percentage": 91.12, "elapsed_time": "1 day, 0:44:13", "remaining_time": "2:24:36"} +{"current_steps": 2495, "total_steps": 2737, "loss": 1.0235, "lr": 4.726292748182104e-07, "epoch": 6.381074168797954, "percentage": 91.16, "elapsed_time": "1 day, 0:44:49", "remaining_time": "2:24:01"} +{"current_steps": 2496, "total_steps": 2737, "loss": 1.0095, "lr": 4.687620622326505e-07, "epoch": 6.383631713554987, "percentage": 91.19, "elapsed_time": "1 day, 0:45:24", "remaining_time": "2:23:25"} +{"current_steps": 2497, "total_steps": 2737, "loss": 1.0211, "lr": 4.6491035637527437e-07, "epoch": 6.3861892583120206, "percentage": 91.23, "elapsed_time": "1 day, 0:46:00", "remaining_time": "2:22:49"} +{"current_steps": 2498, "total_steps": 2737, "loss": 0.996, "lr": 4.6107416351256595e-07, "epoch": 6.388746803069053, "percentage": 91.27, "elapsed_time": "1 day, 0:46:36", "remaining_time": "2:22:13"} +{"current_steps": 2499, "total_steps": 2737, "loss": 1.0473, "lr": 4.5725348988577057e-07, "epoch": 6.391304347826087, "percentage": 91.3, "elapsed_time": "1 day, 0:47:11", "remaining_time": "2:21:38"} +{"current_steps": 2500, "total_steps": 2737, "loss": 0.9916, "lr": 4.5344834171088594e-07, "epoch": 6.3938618925831205, "percentage": 91.34, "elapsed_time": "1 day, 0:47:47", "remaining_time": "2:21:02"} +{"current_steps": 2501, "total_steps": 2737, "loss": 1.0537, "lr": 4.496587251786544e-07, "epoch": 6.396419437340153, "percentage": 91.38, "elapsed_time": "1 day, 0:48:22", "remaining_time": "2:20:26"} +{"current_steps": 2502, "total_steps": 2737, "loss": 1.0354, "lr": 4.4588464645453856e-07, "epoch": 6.398976982097187, "percentage": 91.41, "elapsed_time": "1 day, 0:48:58", "remaining_time": "2:19:51"} +{"current_steps": 2503, "total_steps": 2737, "loss": 1.0056, "lr": 4.421261116787323e-07, "epoch": 6.40153452685422, "percentage": 91.45, "elapsed_time": "1 day, 0:49:34", "remaining_time": "2:19:15"} +{"current_steps": 2504, "total_steps": 2737, "loss": 0.9983, "lr": 4.383831269661343e-07, "epoch": 6.404092071611253, "percentage": 91.49, "elapsed_time": "1 day, 0:50:09", "remaining_time": "2:18:39"} +{"current_steps": 2505, "total_steps": 2737, "loss": 1.0276, "lr": 4.3465569840635105e-07, "epoch": 6.406649616368286, "percentage": 91.52, "elapsed_time": "1 day, 0:50:45", "remaining_time": "2:18:03"} +{"current_steps": 2506, "total_steps": 2737, "loss": 1.0119, "lr": 4.309438320636705e-07, "epoch": 6.40920716112532, "percentage": 91.56, "elapsed_time": "1 day, 0:51:21", "remaining_time": "2:17:28"} +{"current_steps": 2507, "total_steps": 2737, "loss": 1.0257, "lr": 4.272475339770699e-07, "epoch": 6.411764705882353, "percentage": 91.6, "elapsed_time": "1 day, 0:51:56", "remaining_time": "2:16:52"} +{"current_steps": 2508, "total_steps": 2737, "loss": 0.9887, "lr": 4.235668101601964e-07, "epoch": 6.414322250639386, "percentage": 91.63, "elapsed_time": "1 day, 0:52:32", "remaining_time": "2:16:16"} +{"current_steps": 2509, "total_steps": 2737, "loss": 1.0162, "lr": 4.199016666013533e-07, "epoch": 6.41687979539642, "percentage": 91.67, "elapsed_time": "1 day, 0:53:08", "remaining_time": "2:15:41"} +{"current_steps": 2510, "total_steps": 2737, "loss": 1.0141, "lr": 4.1625210926350413e-07, "epoch": 6.419437340153452, "percentage": 91.71, "elapsed_time": "1 day, 0:53:43", "remaining_time": "2:15:05"} +{"current_steps": 2511, "total_steps": 2737, "loss": 1.0251, "lr": 4.1261814408424806e-07, "epoch": 6.421994884910486, "percentage": 91.74, "elapsed_time": "1 day, 0:54:19", "remaining_time": "2:14:29"} +{"current_steps": 2512, "total_steps": 2737, "loss": 1.0365, "lr": 4.089997769758225e-07, "epoch": 6.42455242966752, "percentage": 91.78, "elapsed_time": "1 day, 0:54:54", "remaining_time": "2:13:53"} +{"current_steps": 2513, "total_steps": 2737, "loss": 1.0032, "lr": 4.0539701382507847e-07, "epoch": 6.427109974424552, "percentage": 91.82, "elapsed_time": "1 day, 0:55:30", "remaining_time": "2:13:18"} +{"current_steps": 2514, "total_steps": 2737, "loss": 1.0045, "lr": 4.018098604934906e-07, "epoch": 6.429667519181586, "percentage": 91.85, "elapsed_time": "1 day, 0:56:06", "remaining_time": "2:12:42"} +{"current_steps": 2515, "total_steps": 2737, "loss": 1.0122, "lr": 3.982383228171338e-07, "epoch": 6.432225063938619, "percentage": 91.89, "elapsed_time": "1 day, 0:56:41", "remaining_time": "2:12:06"} +{"current_steps": 2516, "total_steps": 2737, "loss": 1.0091, "lr": 3.946824066066757e-07, "epoch": 6.434782608695652, "percentage": 91.93, "elapsed_time": "1 day, 0:57:17", "remaining_time": "2:11:31"} +{"current_steps": 2517, "total_steps": 2737, "loss": 0.9916, "lr": 3.9114211764736843e-07, "epoch": 6.437340153452685, "percentage": 91.96, "elapsed_time": "1 day, 0:57:53", "remaining_time": "2:10:55"} +{"current_steps": 2518, "total_steps": 2737, "loss": 0.9688, "lr": 3.876174616990402e-07, "epoch": 6.439897698209719, "percentage": 92.0, "elapsed_time": "1 day, 0:58:28", "remaining_time": "2:10:19"} +{"current_steps": 2519, "total_steps": 2737, "loss": 1.0262, "lr": 3.8410844449608966e-07, "epoch": 6.442455242966752, "percentage": 92.04, "elapsed_time": "1 day, 0:59:04", "remaining_time": "2:09:43"} +{"current_steps": 2520, "total_steps": 2737, "loss": 1.0357, "lr": 3.8061507174746326e-07, "epoch": 6.445012787723785, "percentage": 92.07, "elapsed_time": "1 day, 0:59:40", "remaining_time": "2:09:08"} +{"current_steps": 2521, "total_steps": 2737, "loss": 1.0278, "lr": 3.7713734913666254e-07, "epoch": 6.447570332480819, "percentage": 92.11, "elapsed_time": "1 day, 1:00:15", "remaining_time": "2:08:32"} +{"current_steps": 2522, "total_steps": 2737, "loss": 1.0293, "lr": 3.73675282321726e-07, "epoch": 6.450127877237851, "percentage": 92.14, "elapsed_time": "1 day, 1:00:51", "remaining_time": "2:07:56"} +{"current_steps": 2523, "total_steps": 2737, "loss": 1.0432, "lr": 3.7022887693521914e-07, "epoch": 6.452685421994885, "percentage": 92.18, "elapsed_time": "1 day, 1:01:26", "remaining_time": "2:07:21"} +{"current_steps": 2524, "total_steps": 2737, "loss": 1.0451, "lr": 3.6679813858422673e-07, "epoch": 6.455242966751918, "percentage": 92.22, "elapsed_time": "1 day, 1:02:02", "remaining_time": "2:06:45"} +{"current_steps": 2525, "total_steps": 2737, "loss": 1.0166, "lr": 3.6338307285034626e-07, "epoch": 6.457800511508951, "percentage": 92.25, "elapsed_time": "1 day, 1:02:38", "remaining_time": "2:06:09"} +{"current_steps": 2526, "total_steps": 2737, "loss": 1.0221, "lr": 3.5998368528967764e-07, "epoch": 6.460358056265985, "percentage": 92.29, "elapsed_time": "1 day, 1:03:13", "remaining_time": "2:05:34"} +{"current_steps": 2527, "total_steps": 2737, "loss": 1.0474, "lr": 3.5659998143281027e-07, "epoch": 6.462915601023018, "percentage": 92.33, "elapsed_time": "1 day, 1:03:49", "remaining_time": "2:04:58"} +{"current_steps": 2528, "total_steps": 2737, "loss": 1.0187, "lr": 3.532319667848172e-07, "epoch": 6.465473145780051, "percentage": 92.36, "elapsed_time": "1 day, 1:04:24", "remaining_time": "2:04:22"} +{"current_steps": 2529, "total_steps": 2737, "loss": 0.9894, "lr": 3.498796468252508e-07, "epoch": 6.468030690537084, "percentage": 92.4, "elapsed_time": "1 day, 1:05:00", "remaining_time": "2:03:46"} +{"current_steps": 2530, "total_steps": 2737, "loss": 1.0331, "lr": 3.46543027008126e-07, "epoch": 6.470588235294118, "percentage": 92.44, "elapsed_time": "1 day, 1:05:36", "remaining_time": "2:03:11"} +{"current_steps": 2531, "total_steps": 2737, "loss": 1.0259, "lr": 3.4322211276191176e-07, "epoch": 6.4731457800511505, "percentage": 92.47, "elapsed_time": "1 day, 1:06:11", "remaining_time": "2:02:35"} +{"current_steps": 2532, "total_steps": 2737, "loss": 1.0065, "lr": 3.399169094895294e-07, "epoch": 6.475703324808184, "percentage": 92.51, "elapsed_time": "1 day, 1:06:47", "remaining_time": "2:01:59"} +{"current_steps": 2533, "total_steps": 2737, "loss": 1.0382, "lr": 3.366274225683397e-07, "epoch": 6.478260869565218, "percentage": 92.55, "elapsed_time": "1 day, 1:07:22", "remaining_time": "2:01:23"} +{"current_steps": 2534, "total_steps": 2737, "loss": 0.9849, "lr": 3.3335365735012947e-07, "epoch": 6.4808184143222505, "percentage": 92.58, "elapsed_time": "1 day, 1:07:58", "remaining_time": "2:00:48"} +{"current_steps": 2535, "total_steps": 2737, "loss": 1.0441, "lr": 3.3009561916111045e-07, "epoch": 6.483375959079284, "percentage": 92.62, "elapsed_time": "1 day, 1:08:35", "remaining_time": "2:00:12"} +{"current_steps": 2536, "total_steps": 2737, "loss": 1.0256, "lr": 3.2685331330190916e-07, "epoch": 6.485933503836317, "percentage": 92.66, "elapsed_time": "1 day, 1:09:10", "remaining_time": "1:59:36"} +{"current_steps": 2537, "total_steps": 2737, "loss": 1.0006, "lr": 3.2362674504755385e-07, "epoch": 6.4884910485933505, "percentage": 92.69, "elapsed_time": "1 day, 1:09:46", "remaining_time": "1:59:01"} +{"current_steps": 2538, "total_steps": 2737, "loss": 0.9981, "lr": 3.2041591964746767e-07, "epoch": 6.491048593350383, "percentage": 92.73, "elapsed_time": "1 day, 1:10:22", "remaining_time": "1:58:25"} +{"current_steps": 2539, "total_steps": 2737, "loss": 0.9971, "lr": 3.17220842325463e-07, "epoch": 6.493606138107417, "percentage": 92.77, "elapsed_time": "1 day, 1:10:57", "remaining_time": "1:57:49"} +{"current_steps": 2540, "total_steps": 2737, "loss": 1.019, "lr": 3.14041518279733e-07, "epoch": 6.4961636828644505, "percentage": 92.8, "elapsed_time": "1 day, 1:11:33", "remaining_time": "1:57:14"} +{"current_steps": 2541, "total_steps": 2737, "loss": 1.048, "lr": 3.108779526828365e-07, "epoch": 6.498721227621483, "percentage": 92.84, "elapsed_time": "1 day, 1:12:08", "remaining_time": "1:56:38"} +{"current_steps": 2542, "total_steps": 2737, "loss": 1.0205, "lr": 3.0773015068169876e-07, "epoch": 6.501278772378517, "percentage": 92.88, "elapsed_time": "1 day, 1:12:44", "remaining_time": "1:56:02"} +{"current_steps": 2543, "total_steps": 2737, "loss": 1.0062, "lr": 3.045981173975965e-07, "epoch": 6.5038363171355495, "percentage": 92.91, "elapsed_time": "1 day, 1:13:20", "remaining_time": "1:55:26"} +{"current_steps": 2544, "total_steps": 2737, "loss": 1.0221, "lr": 3.0148185792615137e-07, "epoch": 6.506393861892583, "percentage": 92.95, "elapsed_time": "1 day, 1:13:55", "remaining_time": "1:54:51"} +{"current_steps": 2545, "total_steps": 2737, "loss": 1.0336, "lr": 2.9838137733732343e-07, "epoch": 6.508951406649617, "percentage": 92.99, "elapsed_time": "1 day, 1:14:31", "remaining_time": "1:54:15"} +{"current_steps": 2546, "total_steps": 2737, "loss": 1.0085, "lr": 2.9529668067539986e-07, "epoch": 6.5115089514066495, "percentage": 93.02, "elapsed_time": "1 day, 1:15:06", "remaining_time": "1:53:39"} +{"current_steps": 2547, "total_steps": 2737, "loss": 1.0212, "lr": 2.922277729589906e-07, "epoch": 6.514066496163683, "percentage": 93.06, "elapsed_time": "1 day, 1:15:42", "remaining_time": "1:53:04"} +{"current_steps": 2548, "total_steps": 2737, "loss": 1.0062, "lr": 2.891746591810152e-07, "epoch": 6.516624040920716, "percentage": 93.09, "elapsed_time": "1 day, 1:16:18", "remaining_time": "1:52:28"} +{"current_steps": 2549, "total_steps": 2737, "loss": 1.0269, "lr": 2.86137344308699e-07, "epoch": 6.5191815856777495, "percentage": 93.13, "elapsed_time": "1 day, 1:16:53", "remaining_time": "1:51:52"} +{"current_steps": 2550, "total_steps": 2737, "loss": 1.0513, "lr": 2.8311583328356485e-07, "epoch": 6.521739130434782, "percentage": 93.17, "elapsed_time": "1 day, 1:17:29", "remaining_time": "1:51:16"} +{"current_steps": 2551, "total_steps": 2737, "loss": 1.0133, "lr": 2.801101310214205e-07, "epoch": 6.524296675191816, "percentage": 93.2, "elapsed_time": "1 day, 1:18:05", "remaining_time": "1:50:41"} +{"current_steps": 2552, "total_steps": 2737, "loss": 1.0184, "lr": 2.7712024241235757e-07, "epoch": 6.526854219948849, "percentage": 93.24, "elapsed_time": "1 day, 1:18:40", "remaining_time": "1:50:05"} +{"current_steps": 2553, "total_steps": 2737, "loss": 1.0344, "lr": 2.7414617232073505e-07, "epoch": 6.529411764705882, "percentage": 93.28, "elapsed_time": "1 day, 1:19:16", "remaining_time": "1:49:29"} +{"current_steps": 2554, "total_steps": 2737, "loss": 1.0219, "lr": 2.7118792558518237e-07, "epoch": 6.531969309462916, "percentage": 93.31, "elapsed_time": "1 day, 1:19:51", "remaining_time": "1:48:54"} +{"current_steps": 2555, "total_steps": 2737, "loss": 1.0192, "lr": 2.6824550701857966e-07, "epoch": 6.534526854219949, "percentage": 93.35, "elapsed_time": "1 day, 1:20:27", "remaining_time": "1:48:18"} +{"current_steps": 2556, "total_steps": 2737, "loss": 0.9885, "lr": 2.653189214080576e-07, "epoch": 6.537084398976982, "percentage": 93.39, "elapsed_time": "1 day, 1:21:03", "remaining_time": "1:47:42"} +{"current_steps": 2557, "total_steps": 2737, "loss": 1.0225, "lr": 2.624081735149897e-07, "epoch": 6.539641943734015, "percentage": 93.42, "elapsed_time": "1 day, 1:21:38", "remaining_time": "1:47:06"} +{"current_steps": 2558, "total_steps": 2737, "loss": 1.0051, "lr": 2.5951326807498123e-07, "epoch": 6.542199488491049, "percentage": 93.46, "elapsed_time": "1 day, 1:22:14", "remaining_time": "1:46:31"} +{"current_steps": 2559, "total_steps": 2737, "loss": 1.0256, "lr": 2.5663420979785915e-07, "epoch": 6.544757033248082, "percentage": 93.5, "elapsed_time": "1 day, 1:22:49", "remaining_time": "1:45:55"} +{"current_steps": 2560, "total_steps": 2737, "loss": 1.0134, "lr": 2.5377100336767547e-07, "epoch": 6.547314578005115, "percentage": 93.53, "elapsed_time": "1 day, 1:23:25", "remaining_time": "1:45:19"} +{"current_steps": 2561, "total_steps": 2737, "loss": 1.0045, "lr": 2.509236534426851e-07, "epoch": 6.549872122762149, "percentage": 93.57, "elapsed_time": "1 day, 1:24:01", "remaining_time": "1:44:44"} +{"current_steps": 2562, "total_steps": 2737, "loss": 1.0377, "lr": 2.4809216465534913e-07, "epoch": 6.552429667519181, "percentage": 93.61, "elapsed_time": "1 day, 1:24:36", "remaining_time": "1:44:08"} +{"current_steps": 2563, "total_steps": 2737, "loss": 1.0037, "lr": 2.4527654161232153e-07, "epoch": 6.554987212276215, "percentage": 93.64, "elapsed_time": "1 day, 1:25:12", "remaining_time": "1:43:32"} +{"current_steps": 2564, "total_steps": 2737, "loss": 1.0462, "lr": 2.424767888944468e-07, "epoch": 6.557544757033249, "percentage": 93.68, "elapsed_time": "1 day, 1:25:48", "remaining_time": "1:42:56"} +{"current_steps": 2565, "total_steps": 2737, "loss": 0.9959, "lr": 2.3969291105674805e-07, "epoch": 6.560102301790281, "percentage": 93.72, "elapsed_time": "1 day, 1:26:23", "remaining_time": "1:42:21"} +{"current_steps": 2566, "total_steps": 2737, "loss": 0.9783, "lr": 2.3692491262841788e-07, "epoch": 6.562659846547315, "percentage": 93.75, "elapsed_time": "1 day, 1:26:59", "remaining_time": "1:41:45"} +{"current_steps": 2567, "total_steps": 2737, "loss": 0.9778, "lr": 2.3417279811281947e-07, "epoch": 6.565217391304348, "percentage": 93.79, "elapsed_time": "1 day, 1:27:35", "remaining_time": "1:41:09"} +{"current_steps": 2568, "total_steps": 2737, "loss": 1.042, "lr": 2.3143657198746893e-07, "epoch": 6.567774936061381, "percentage": 93.83, "elapsed_time": "1 day, 1:28:10", "remaining_time": "1:40:34"} +{"current_steps": 2569, "total_steps": 2737, "loss": 1.0302, "lr": 2.2871623870403649e-07, "epoch": 6.570332480818414, "percentage": 93.86, "elapsed_time": "1 day, 1:28:46", "remaining_time": "1:39:58"} +{"current_steps": 2570, "total_steps": 2737, "loss": 1.0267, "lr": 2.260118026883318e-07, "epoch": 6.572890025575448, "percentage": 93.9, "elapsed_time": "1 day, 1:29:21", "remaining_time": "1:39:22"} +{"current_steps": 2571, "total_steps": 2737, "loss": 1.0292, "lr": 2.233232683403075e-07, "epoch": 6.57544757033248, "percentage": 93.93, "elapsed_time": "1 day, 1:29:57", "remaining_time": "1:38:47"} +{"current_steps": 2572, "total_steps": 2737, "loss": 1.0017, "lr": 2.206506400340369e-07, "epoch": 6.578005115089514, "percentage": 93.97, "elapsed_time": "1 day, 1:30:32", "remaining_time": "1:38:11"} +{"current_steps": 2573, "total_steps": 2737, "loss": 1.0082, "lr": 2.1799392211772074e-07, "epoch": 6.580562659846548, "percentage": 94.01, "elapsed_time": "1 day, 1:31:08", "remaining_time": "1:37:35"} +{"current_steps": 2574, "total_steps": 2737, "loss": 1.0219, "lr": 2.1535311891367373e-07, "epoch": 6.58312020460358, "percentage": 94.04, "elapsed_time": "1 day, 1:31:43", "remaining_time": "1:36:59"} +{"current_steps": 2575, "total_steps": 2737, "loss": 1.0048, "lr": 2.1272823471831573e-07, "epoch": 6.585677749360614, "percentage": 94.08, "elapsed_time": "1 day, 1:32:19", "remaining_time": "1:36:24"} +{"current_steps": 2576, "total_steps": 2737, "loss": 1.0116, "lr": 2.101192738021718e-07, "epoch": 6.588235294117647, "percentage": 94.12, "elapsed_time": "1 day, 1:32:55", "remaining_time": "1:35:48"} +{"current_steps": 2577, "total_steps": 2737, "loss": 1.0117, "lr": 2.0752624040985436e-07, "epoch": 6.59079283887468, "percentage": 94.15, "elapsed_time": "1 day, 1:33:30", "remaining_time": "1:35:12"} +{"current_steps": 2578, "total_steps": 2737, "loss": 1.0255, "lr": 2.0494913876007105e-07, "epoch": 6.593350383631714, "percentage": 94.19, "elapsed_time": "1 day, 1:34:06", "remaining_time": "1:34:37"} +{"current_steps": 2579, "total_steps": 2737, "loss": 1.0241, "lr": 2.0238797304560243e-07, "epoch": 6.595907928388747, "percentage": 94.23, "elapsed_time": "1 day, 1:34:41", "remaining_time": "1:34:01"} +{"current_steps": 2580, "total_steps": 2737, "loss": 1.0106, "lr": 1.9984274743330424e-07, "epoch": 6.59846547314578, "percentage": 94.26, "elapsed_time": "1 day, 1:35:17", "remaining_time": "1:33:25"} +{"current_steps": 2581, "total_steps": 2737, "loss": 1.0405, "lr": 1.9731346606410185e-07, "epoch": 6.601023017902813, "percentage": 94.3, "elapsed_time": "1 day, 1:35:53", "remaining_time": "1:32:49"} +{"current_steps": 2582, "total_steps": 2737, "loss": 1.0286, "lr": 1.9480013305297585e-07, "epoch": 6.603580562659847, "percentage": 94.34, "elapsed_time": "1 day, 1:36:28", "remaining_time": "1:32:14"} +{"current_steps": 2583, "total_steps": 2737, "loss": 1.0137, "lr": 1.9230275248896425e-07, "epoch": 6.6061381074168795, "percentage": 94.37, "elapsed_time": "1 day, 1:37:04", "remaining_time": "1:31:38"} +{"current_steps": 2584, "total_steps": 2737, "loss": 1.0352, "lr": 1.8982132843514577e-07, "epoch": 6.608695652173913, "percentage": 94.41, "elapsed_time": "1 day, 1:37:40", "remaining_time": "1:31:02"} +{"current_steps": 2585, "total_steps": 2737, "loss": 0.9899, "lr": 1.8735586492864556e-07, "epoch": 6.611253196930946, "percentage": 94.45, "elapsed_time": "1 day, 1:38:15", "remaining_time": "1:30:27"} +{"current_steps": 2586, "total_steps": 2737, "loss": 1.0202, "lr": 1.8490636598061605e-07, "epoch": 6.6138107416879794, "percentage": 94.48, "elapsed_time": "1 day, 1:38:51", "remaining_time": "1:29:51"} +{"current_steps": 2587, "total_steps": 2737, "loss": 1.0801, "lr": 1.8247283557624062e-07, "epoch": 6.616368286445013, "percentage": 94.52, "elapsed_time": "1 day, 1:39:27", "remaining_time": "1:29:15"} +{"current_steps": 2588, "total_steps": 2737, "loss": 1.0323, "lr": 1.8005527767471998e-07, "epoch": 6.618925831202046, "percentage": 94.56, "elapsed_time": "1 day, 1:40:02", "remaining_time": "1:28:39"} +{"current_steps": 2589, "total_steps": 2737, "loss": 1.0247, "lr": 1.7765369620926899e-07, "epoch": 6.621483375959079, "percentage": 94.59, "elapsed_time": "1 day, 1:40:38", "remaining_time": "1:28:04"} +{"current_steps": 2590, "total_steps": 2737, "loss": 1.0561, "lr": 1.752680950871144e-07, "epoch": 6.624040920716112, "percentage": 94.63, "elapsed_time": "1 day, 1:41:13", "remaining_time": "1:27:28"} +{"current_steps": 2591, "total_steps": 2737, "loss": 1.0182, "lr": 1.7289847818947492e-07, "epoch": 6.626598465473146, "percentage": 94.67, "elapsed_time": "1 day, 1:41:49", "remaining_time": "1:26:52"} +{"current_steps": 2592, "total_steps": 2737, "loss": 1.0255, "lr": 1.7054484937157112e-07, "epoch": 6.629156010230179, "percentage": 94.7, "elapsed_time": "1 day, 1:42:25", "remaining_time": "1:26:17"} +{"current_steps": 2593, "total_steps": 2737, "loss": 1.0299, "lr": 1.6820721246261106e-07, "epoch": 6.631713554987212, "percentage": 94.74, "elapsed_time": "1 day, 1:43:00", "remaining_time": "1:25:41"} +{"current_steps": 2594, "total_steps": 2737, "loss": 1.0407, "lr": 1.6588557126578365e-07, "epoch": 6.634271099744246, "percentage": 94.78, "elapsed_time": "1 day, 1:43:36", "remaining_time": "1:25:05"} +{"current_steps": 2595, "total_steps": 2737, "loss": 1.0145, "lr": 1.6357992955825297e-07, "epoch": 6.6368286445012785, "percentage": 94.81, "elapsed_time": "1 day, 1:44:12", "remaining_time": "1:24:29"} +{"current_steps": 2596, "total_steps": 2737, "loss": 1.0106, "lr": 1.6129029109115401e-07, "epoch": 6.639386189258312, "percentage": 94.85, "elapsed_time": "1 day, 1:44:47", "remaining_time": "1:23:54"} +{"current_steps": 2597, "total_steps": 2737, "loss": 0.9862, "lr": 1.59016659589587e-07, "epoch": 6.641943734015345, "percentage": 94.88, "elapsed_time": "1 day, 1:45:23", "remaining_time": "1:23:18"} +{"current_steps": 2598, "total_steps": 2737, "loss": 1.0301, "lr": 1.567590387526041e-07, "epoch": 6.6445012787723785, "percentage": 94.92, "elapsed_time": "1 day, 1:45:58", "remaining_time": "1:22:42"} +{"current_steps": 2599, "total_steps": 2737, "loss": 1.0088, "lr": 1.5451743225321726e-07, "epoch": 6.647058823529412, "percentage": 94.96, "elapsed_time": "1 day, 1:46:34", "remaining_time": "1:22:07"} +{"current_steps": 2600, "total_steps": 2737, "loss": 1.0117, "lr": 1.5229184373837912e-07, "epoch": 6.649616368286445, "percentage": 94.99, "elapsed_time": "1 day, 1:47:10", "remaining_time": "1:21:31"} +{"current_steps": 2601, "total_steps": 2737, "loss": 1.0345, "lr": 1.5008227682898337e-07, "epoch": 6.6521739130434785, "percentage": 95.03, "elapsed_time": "1 day, 1:47:46", "remaining_time": "1:20:55"} +{"current_steps": 2602, "total_steps": 2737, "loss": 1.0074, "lr": 1.4788873511985656e-07, "epoch": 6.654731457800511, "percentage": 95.07, "elapsed_time": "1 day, 1:48:21", "remaining_time": "1:20:20"} +{"current_steps": 2603, "total_steps": 2737, "loss": 1.0295, "lr": 1.4571122217975298e-07, "epoch": 6.657289002557545, "percentage": 95.1, "elapsed_time": "1 day, 1:48:57", "remaining_time": "1:19:44"} +{"current_steps": 2604, "total_steps": 2737, "loss": 1.0287, "lr": 1.4354974155135203e-07, "epoch": 6.659846547314578, "percentage": 95.14, "elapsed_time": "1 day, 1:49:33", "remaining_time": "1:19:08"} +{"current_steps": 2605, "total_steps": 2737, "loss": 1.0059, "lr": 1.4140429675124633e-07, "epoch": 6.662404092071611, "percentage": 95.18, "elapsed_time": "1 day, 1:50:08", "remaining_time": "1:18:32"} +{"current_steps": 2606, "total_steps": 2737, "loss": 1.0347, "lr": 1.3927489126993932e-07, "epoch": 6.664961636828645, "percentage": 95.21, "elapsed_time": "1 day, 1:50:44", "remaining_time": "1:17:57"} +{"current_steps": 2607, "total_steps": 2737, "loss": 1.012, "lr": 1.3716152857184306e-07, "epoch": 6.667519181585678, "percentage": 95.25, "elapsed_time": "1 day, 1:51:19", "remaining_time": "1:17:21"} +{"current_steps": 2608, "total_steps": 2737, "loss": 0.9918, "lr": 1.350642120952661e-07, "epoch": 6.670076726342711, "percentage": 95.29, "elapsed_time": "1 day, 1:51:55", "remaining_time": "1:16:45"} +{"current_steps": 2609, "total_steps": 2737, "loss": 1.0269, "lr": 1.3298294525241008e-07, "epoch": 6.672634271099744, "percentage": 95.32, "elapsed_time": "1 day, 1:52:31", "remaining_time": "1:16:10"} +{"current_steps": 2610, "total_steps": 2737, "loss": 1.0334, "lr": 1.3091773142936525e-07, "epoch": 6.675191815856778, "percentage": 95.36, "elapsed_time": "1 day, 1:53:06", "remaining_time": "1:15:34"} +{"current_steps": 2611, "total_steps": 2737, "loss": 0.9974, "lr": 1.2886857398610731e-07, "epoch": 6.677749360613811, "percentage": 95.4, "elapsed_time": "1 day, 1:53:42", "remaining_time": "1:14:58"} +{"current_steps": 2612, "total_steps": 2737, "loss": 1.0222, "lr": 1.2683547625648718e-07, "epoch": 6.680306905370844, "percentage": 95.43, "elapsed_time": "1 day, 1:54:18", "remaining_time": "1:14:22"} +{"current_steps": 2613, "total_steps": 2737, "loss": 0.9952, "lr": 1.2481844154822565e-07, "epoch": 6.6828644501278776, "percentage": 95.47, "elapsed_time": "1 day, 1:54:53", "remaining_time": "1:13:47"} +{"current_steps": 2614, "total_steps": 2737, "loss": 1.0026, "lr": 1.2281747314291437e-07, "epoch": 6.68542199488491, "percentage": 95.51, "elapsed_time": "1 day, 1:55:29", "remaining_time": "1:13:11"} +{"current_steps": 2615, "total_steps": 2737, "loss": 1.0056, "lr": 1.208325742960037e-07, "epoch": 6.687979539641944, "percentage": 95.54, "elapsed_time": "1 day, 1:56:04", "remaining_time": "1:12:35"} +{"current_steps": 2616, "total_steps": 2737, "loss": 1.0492, "lr": 1.1886374823679825e-07, "epoch": 6.690537084398977, "percentage": 95.58, "elapsed_time": "1 day, 1:56:40", "remaining_time": "1:12:00"} +{"current_steps": 2617, "total_steps": 2737, "loss": 1.0213, "lr": 1.1691099816845574e-07, "epoch": 6.69309462915601, "percentage": 95.62, "elapsed_time": "1 day, 1:57:15", "remaining_time": "1:11:24"} +{"current_steps": 2618, "total_steps": 2737, "loss": 0.9974, "lr": 1.149743272679793e-07, "epoch": 6.695652173913043, "percentage": 95.65, "elapsed_time": "1 day, 1:57:51", "remaining_time": "1:10:48"} +{"current_steps": 2619, "total_steps": 2737, "loss": 0.9967, "lr": 1.1305373868620961e-07, "epoch": 6.698209718670077, "percentage": 95.69, "elapsed_time": "1 day, 1:58:27", "remaining_time": "1:10:13"} +{"current_steps": 2620, "total_steps": 2737, "loss": 0.9956, "lr": 1.1114923554782608e-07, "epoch": 6.70076726342711, "percentage": 95.73, "elapsed_time": "1 day, 1:59:03", "remaining_time": "1:09:37"} +{"current_steps": 2621, "total_steps": 2737, "loss": 1.0193, "lr": 1.0926082095133572e-07, "epoch": 6.703324808184143, "percentage": 95.76, "elapsed_time": "1 day, 1:59:38", "remaining_time": "1:09:01"} +{"current_steps": 2622, "total_steps": 2737, "loss": 1.0473, "lr": 1.0738849796907091e-07, "epoch": 6.705882352941177, "percentage": 95.8, "elapsed_time": "1 day, 2:00:14", "remaining_time": "1:08:25"} +{"current_steps": 2623, "total_steps": 2737, "loss": 1.008, "lr": 1.0553226964718277e-07, "epoch": 6.708439897698209, "percentage": 95.83, "elapsed_time": "1 day, 2:00:49", "remaining_time": "1:07:50"} +{"current_steps": 2624, "total_steps": 2737, "loss": 1.0029, "lr": 1.0369213900564001e-07, "epoch": 6.710997442455243, "percentage": 95.87, "elapsed_time": "1 day, 2:01:25", "remaining_time": "1:07:14"} +{"current_steps": 2625, "total_steps": 2737, "loss": 0.9623, "lr": 1.0186810903822119e-07, "epoch": 6.713554987212277, "percentage": 95.91, "elapsed_time": "1 day, 2:02:01", "remaining_time": "1:06:38"} +{"current_steps": 2626, "total_steps": 2737, "loss": 1.0305, "lr": 1.0006018271250695e-07, "epoch": 6.716112531969309, "percentage": 95.94, "elapsed_time": "1 day, 2:02:36", "remaining_time": "1:06:03"} +{"current_steps": 2627, "total_steps": 2737, "loss": 1.0596, "lr": 9.826836296988107e-08, "epoch": 6.718670076726343, "percentage": 95.98, "elapsed_time": "1 day, 2:03:12", "remaining_time": "1:05:27"} +{"current_steps": 2628, "total_steps": 2737, "loss": 1.0237, "lr": 9.649265272552277e-08, "epoch": 6.721227621483376, "percentage": 96.02, "elapsed_time": "1 day, 2:03:48", "remaining_time": "1:04:51"} +{"current_steps": 2629, "total_steps": 2737, "loss": 1.0177, "lr": 9.473305486840112e-08, "epoch": 6.723785166240409, "percentage": 96.05, "elapsed_time": "1 day, 2:04:23", "remaining_time": "1:04:15"} +{"current_steps": 2630, "total_steps": 2737, "loss": 1.0284, "lr": 9.29895722612717e-08, "epoch": 6.726342710997442, "percentage": 96.09, "elapsed_time": "1 day, 2:04:59", "remaining_time": "1:03:40"} +{"current_steps": 2631, "total_steps": 2737, "loss": 1.0313, "lr": 9.126220774067218e-08, "epoch": 6.728900255754476, "percentage": 96.13, "elapsed_time": "1 day, 2:05:34", "remaining_time": "1:03:04"} +{"current_steps": 2632, "total_steps": 2737, "loss": 1.0156, "lr": 8.955096411691566e-08, "epoch": 6.731457800511509, "percentage": 96.16, "elapsed_time": "1 day, 2:06:10", "remaining_time": "1:02:28"} +{"current_steps": 2633, "total_steps": 2737, "loss": 1.0173, "lr": 8.785584417409065e-08, "epoch": 6.734015345268542, "percentage": 96.2, "elapsed_time": "1 day, 2:06:46", "remaining_time": "1:01:53"} +{"current_steps": 2634, "total_steps": 2737, "loss": 1.0269, "lr": 8.617685067004777e-08, "epoch": 6.736572890025576, "percentage": 96.24, "elapsed_time": "1 day, 2:07:21", "remaining_time": "1:01:17"} +{"current_steps": 2635, "total_steps": 2737, "loss": 0.9978, "lr": 8.451398633640861e-08, "epoch": 6.739130434782608, "percentage": 96.27, "elapsed_time": "1 day, 2:07:57", "remaining_time": "1:00:41"} +{"current_steps": 2636, "total_steps": 2737, "loss": 1.0166, "lr": 8.286725387854689e-08, "epoch": 6.741687979539642, "percentage": 96.31, "elapsed_time": "1 day, 2:08:33", "remaining_time": "1:00:05"} +{"current_steps": 2637, "total_steps": 2737, "loss": 1.03, "lr": 8.123665597559393e-08, "epoch": 6.744245524296675, "percentage": 96.35, "elapsed_time": "1 day, 2:09:08", "remaining_time": "0:59:30"} +{"current_steps": 2638, "total_steps": 2737, "loss": 0.9843, "lr": 7.962219528042991e-08, "epoch": 6.746803069053708, "percentage": 96.38, "elapsed_time": "1 day, 2:09:44", "remaining_time": "0:58:54"} +{"current_steps": 2639, "total_steps": 2737, "loss": 1.0058, "lr": 7.802387441968262e-08, "epoch": 6.749360613810742, "percentage": 96.42, "elapsed_time": "1 day, 2:10:20", "remaining_time": "0:58:18"} +{"current_steps": 2640, "total_steps": 2737, "loss": 1.0451, "lr": 7.644169599371975e-08, "epoch": 6.751918158567775, "percentage": 96.46, "elapsed_time": "1 day, 2:10:55", "remaining_time": "0:57:43"} +{"current_steps": 2641, "total_steps": 2737, "loss": 1.0447, "lr": 7.487566257664558e-08, "epoch": 6.754475703324808, "percentage": 96.49, "elapsed_time": "1 day, 2:11:31", "remaining_time": "0:57:07"} +{"current_steps": 2642, "total_steps": 2737, "loss": 1.0003, "lr": 7.332577671629982e-08, "epoch": 6.757033248081841, "percentage": 96.53, "elapsed_time": "1 day, 2:12:06", "remaining_time": "0:56:31"} +{"current_steps": 2643, "total_steps": 2737, "loss": 1.0152, "lr": 7.179204093424985e-08, "epoch": 6.759590792838875, "percentage": 96.57, "elapsed_time": "1 day, 2:12:42", "remaining_time": "0:55:56"} +{"current_steps": 2644, "total_steps": 2737, "loss": 1.0136, "lr": 7.027445772578856e-08, "epoch": 6.762148337595908, "percentage": 96.6, "elapsed_time": "1 day, 2:13:18", "remaining_time": "0:55:20"} +{"current_steps": 2645, "total_steps": 2737, "loss": 1.0039, "lr": 6.877302955992649e-08, "epoch": 6.764705882352941, "percentage": 96.64, "elapsed_time": "1 day, 2:13:53", "remaining_time": "0:54:44"} +{"current_steps": 2646, "total_steps": 2737, "loss": 1.0263, "lr": 6.72877588793952e-08, "epoch": 6.767263427109975, "percentage": 96.68, "elapsed_time": "1 day, 2:14:29", "remaining_time": "0:54:08"} +{"current_steps": 2647, "total_steps": 2737, "loss": 1.0095, "lr": 6.581864810063732e-08, "epoch": 6.7698209718670075, "percentage": 96.71, "elapsed_time": "1 day, 2:15:04", "remaining_time": "0:53:33"} +{"current_steps": 2648, "total_steps": 2737, "loss": 1.0014, "lr": 6.436569961380313e-08, "epoch": 6.772378516624041, "percentage": 96.75, "elapsed_time": "1 day, 2:15:40", "remaining_time": "0:52:57"} +{"current_steps": 2649, "total_steps": 2737, "loss": 1.0308, "lr": 6.292891578275063e-08, "epoch": 6.774936061381074, "percentage": 96.78, "elapsed_time": "1 day, 2:16:16", "remaining_time": "0:52:21"} +{"current_steps": 2650, "total_steps": 2737, "loss": 1.0107, "lr": 6.150829894503662e-08, "epoch": 6.7774936061381075, "percentage": 96.82, "elapsed_time": "1 day, 2:16:51", "remaining_time": "0:51:46"} +{"current_steps": 2651, "total_steps": 2737, "loss": 1.0279, "lr": 6.010385141191455e-08, "epoch": 6.78005115089514, "percentage": 96.86, "elapsed_time": "1 day, 2:17:27", "remaining_time": "0:51:10"} +{"current_steps": 2652, "total_steps": 2737, "loss": 1.0067, "lr": 5.8715575468333286e-08, "epoch": 6.782608695652174, "percentage": 96.89, "elapsed_time": "1 day, 2:18:02", "remaining_time": "0:50:34"} +{"current_steps": 2653, "total_steps": 2737, "loss": 1.0253, "lr": 5.734347337293167e-08, "epoch": 6.7851662404092075, "percentage": 96.93, "elapsed_time": "1 day, 2:18:39", "remaining_time": "0:49:59"} +{"current_steps": 2654, "total_steps": 2737, "loss": 1.0256, "lr": 5.598754735803513e-08, "epoch": 6.78772378516624, "percentage": 96.97, "elapsed_time": "1 day, 2:19:14", "remaining_time": "0:49:23"} +{"current_steps": 2655, "total_steps": 2737, "loss": 1.023, "lr": 5.464779962964795e-08, "epoch": 6.790281329923274, "percentage": 97.0, "elapsed_time": "1 day, 2:19:50", "remaining_time": "0:48:47"} +{"current_steps": 2656, "total_steps": 2737, "loss": 0.9817, "lr": 5.332423236745765e-08, "epoch": 6.792838874680307, "percentage": 97.04, "elapsed_time": "1 day, 2:20:26", "remaining_time": "0:48:11"} +{"current_steps": 2657, "total_steps": 2737, "loss": 0.9919, "lr": 5.201684772482507e-08, "epoch": 6.79539641943734, "percentage": 97.08, "elapsed_time": "1 day, 2:21:01", "remaining_time": "0:47:36"} +{"current_steps": 2658, "total_steps": 2737, "loss": 0.9949, "lr": 5.0725647828783196e-08, "epoch": 6.797953964194374, "percentage": 97.11, "elapsed_time": "1 day, 2:21:37", "remaining_time": "0:47:00"} +{"current_steps": 2659, "total_steps": 2737, "loss": 1.0246, "lr": 4.945063478003276e-08, "epoch": 6.8005115089514065, "percentage": 97.15, "elapsed_time": "1 day, 2:22:12", "remaining_time": "0:46:24"} +{"current_steps": 2660, "total_steps": 2737, "loss": 1.0434, "lr": 4.8191810652941096e-08, "epoch": 6.80306905370844, "percentage": 97.19, "elapsed_time": "1 day, 2:22:48", "remaining_time": "0:45:49"} +{"current_steps": 2661, "total_steps": 2737, "loss": 1.0256, "lr": 4.694917749553663e-08, "epoch": 6.805626598465473, "percentage": 97.22, "elapsed_time": "1 day, 2:23:24", "remaining_time": "0:45:13"} +{"current_steps": 2662, "total_steps": 2737, "loss": 0.9802, "lr": 4.5722737329505495e-08, "epoch": 6.8081841432225065, "percentage": 97.26, "elapsed_time": "1 day, 2:23:59", "remaining_time": "0:44:37"} +{"current_steps": 2663, "total_steps": 2737, "loss": 1.0593, "lr": 4.451249215018827e-08, "epoch": 6.810741687979539, "percentage": 97.3, "elapsed_time": "1 day, 2:24:35", "remaining_time": "0:44:01"} +{"current_steps": 2664, "total_steps": 2737, "loss": 1.026, "lr": 4.331844392657991e-08, "epoch": 6.813299232736573, "percentage": 97.33, "elapsed_time": "1 day, 2:25:11", "remaining_time": "0:43:26"} +{"current_steps": 2665, "total_steps": 2737, "loss": 1.0162, "lr": 4.2140594601320915e-08, "epoch": 6.8158567774936065, "percentage": 97.37, "elapsed_time": "1 day, 2:25:46", "remaining_time": "0:42:50"} +{"current_steps": 2666, "total_steps": 2737, "loss": 0.9853, "lr": 4.097894609069841e-08, "epoch": 6.818414322250639, "percentage": 97.41, "elapsed_time": "1 day, 2:26:22", "remaining_time": "0:42:14"} +{"current_steps": 2667, "total_steps": 2737, "loss": 1.0022, "lr": 3.983350028464283e-08, "epoch": 6.820971867007673, "percentage": 97.44, "elapsed_time": "1 day, 2:26:58", "remaining_time": "0:41:39"} +{"current_steps": 2668, "total_steps": 2737, "loss": 1.0905, "lr": 3.870425904672237e-08, "epoch": 6.823529411764706, "percentage": 97.48, "elapsed_time": "1 day, 2:27:33", "remaining_time": "0:41:03"} +{"current_steps": 2669, "total_steps": 2737, "loss": 1.032, "lr": 3.7591224214141855e-08, "epoch": 6.826086956521739, "percentage": 97.52, "elapsed_time": "1 day, 2:28:09", "remaining_time": "0:40:27"} +{"current_steps": 2670, "total_steps": 2737, "loss": 1.0273, "lr": 3.649439759773943e-08, "epoch": 6.828644501278772, "percentage": 97.55, "elapsed_time": "1 day, 2:28:44", "remaining_time": "0:39:52"} +{"current_steps": 2671, "total_steps": 2737, "loss": 1.0202, "lr": 3.541378098198323e-08, "epoch": 6.831202046035806, "percentage": 97.59, "elapsed_time": "1 day, 2:29:20", "remaining_time": "0:39:16"} +{"current_steps": 2672, "total_steps": 2737, "loss": 0.9919, "lr": 3.4349376124969136e-08, "epoch": 6.833759590792839, "percentage": 97.63, "elapsed_time": "1 day, 2:29:56", "remaining_time": "0:38:40"} +{"current_steps": 2673, "total_steps": 2737, "loss": 1.019, "lr": 3.330118475841859e-08, "epoch": 6.836317135549872, "percentage": 97.66, "elapsed_time": "1 day, 2:30:31", "remaining_time": "0:38:04"} +{"current_steps": 2674, "total_steps": 2737, "loss": 0.9972, "lr": 3.22692085876708e-08, "epoch": 6.838874680306906, "percentage": 97.7, "elapsed_time": "1 day, 2:31:07", "remaining_time": "0:37:29"} +{"current_steps": 2675, "total_steps": 2737, "loss": 1.0004, "lr": 3.125344929168828e-08, "epoch": 6.841432225063938, "percentage": 97.73, "elapsed_time": "1 day, 2:31:43", "remaining_time": "0:36:53"} +{"current_steps": 2676, "total_steps": 2737, "loss": 1.0273, "lr": 3.025390852304688e-08, "epoch": 6.843989769820972, "percentage": 97.77, "elapsed_time": "1 day, 2:32:18", "remaining_time": "0:36:17"} +{"current_steps": 2677, "total_steps": 2737, "loss": 1.0102, "lr": 2.927058790793802e-08, "epoch": 6.846547314578006, "percentage": 97.81, "elapsed_time": "1 day, 2:32:54", "remaining_time": "0:35:42"} +{"current_steps": 2678, "total_steps": 2737, "loss": 0.991, "lr": 2.830348904616198e-08, "epoch": 6.849104859335038, "percentage": 97.84, "elapsed_time": "1 day, 2:33:29", "remaining_time": "0:35:06"} +{"current_steps": 2679, "total_steps": 2737, "loss": 1.0338, "lr": 2.7352613511127946e-08, "epoch": 6.851662404092072, "percentage": 97.88, "elapsed_time": "1 day, 2:34:05", "remaining_time": "0:34:30"} +{"current_steps": 2680, "total_steps": 2737, "loss": 1.0094, "lr": 2.6417962849852875e-08, "epoch": 6.854219948849105, "percentage": 97.92, "elapsed_time": "1 day, 2:34:41", "remaining_time": "0:33:55"} +{"current_steps": 2681, "total_steps": 2737, "loss": 1.0208, "lr": 2.549953858295262e-08, "epoch": 6.856777493606138, "percentage": 97.95, "elapsed_time": "1 day, 2:35:16", "remaining_time": "0:33:19"} +{"current_steps": 2682, "total_steps": 2737, "loss": 1.0015, "lr": 2.459734220464638e-08, "epoch": 6.859335038363171, "percentage": 97.99, "elapsed_time": "1 day, 2:35:52", "remaining_time": "0:32:43"} +{"current_steps": 2683, "total_steps": 2737, "loss": 1.0261, "lr": 2.3711375182753347e-08, "epoch": 6.861892583120205, "percentage": 98.03, "elapsed_time": "1 day, 2:36:27", "remaining_time": "0:32:07"} +{"current_steps": 2684, "total_steps": 2737, "loss": 1.0135, "lr": 2.2841638958683855e-08, "epoch": 6.864450127877237, "percentage": 98.06, "elapsed_time": "1 day, 2:37:03", "remaining_time": "0:31:32"} +{"current_steps": 2685, "total_steps": 2737, "loss": 1.0035, "lr": 2.1988134947446004e-08, "epoch": 6.867007672634271, "percentage": 98.1, "elapsed_time": "1 day, 2:37:39", "remaining_time": "0:30:56"} +{"current_steps": 2686, "total_steps": 2737, "loss": 1.0321, "lr": 2.1150864537636817e-08, "epoch": 6.869565217391305, "percentage": 98.14, "elapsed_time": "1 day, 2:38:14", "remaining_time": "0:30:20"} +{"current_steps": 2687, "total_steps": 2737, "loss": 1.012, "lr": 2.032982909144332e-08, "epoch": 6.872122762148337, "percentage": 98.17, "elapsed_time": "1 day, 2:38:50", "remaining_time": "0:29:45"} +{"current_steps": 2688, "total_steps": 2737, "loss": 0.9929, "lr": 1.9525029944637008e-08, "epoch": 6.874680306905371, "percentage": 98.21, "elapsed_time": "1 day, 2:39:26", "remaining_time": "0:29:09"} +{"current_steps": 2689, "total_steps": 2737, "loss": 0.9931, "lr": 1.8736468406579388e-08, "epoch": 6.877237851662404, "percentage": 98.25, "elapsed_time": "1 day, 2:40:02", "remaining_time": "0:28:33"} +{"current_steps": 2690, "total_steps": 2737, "loss": 1.0153, "lr": 1.796414576020755e-08, "epoch": 6.879795396419437, "percentage": 98.28, "elapsed_time": "1 day, 2:40:37", "remaining_time": "0:27:57"} +{"current_steps": 2691, "total_steps": 2737, "loss": 1.005, "lr": 1.720806326204305e-08, "epoch": 6.882352941176471, "percentage": 98.32, "elapsed_time": "1 day, 2:41:13", "remaining_time": "0:27:22"} +{"current_steps": 2692, "total_steps": 2737, "loss": 1.049, "lr": 1.646822214218524e-08, "epoch": 6.884910485933504, "percentage": 98.36, "elapsed_time": "1 day, 2:41:49", "remaining_time": "0:26:46"} +{"current_steps": 2693, "total_steps": 2737, "loss": 1.003, "lr": 1.5744623604310172e-08, "epoch": 6.887468030690537, "percentage": 98.39, "elapsed_time": "1 day, 2:42:24", "remaining_time": "0:26:10"} +{"current_steps": 2694, "total_steps": 2737, "loss": 0.9892, "lr": 1.503726882566503e-08, "epoch": 6.89002557544757, "percentage": 98.43, "elapsed_time": "1 day, 2:43:00", "remaining_time": "0:25:35"} +{"current_steps": 2695, "total_steps": 2737, "loss": 1.0261, "lr": 1.4346158957073696e-08, "epoch": 6.892583120204604, "percentage": 98.47, "elapsed_time": "1 day, 2:43:36", "remaining_time": "0:24:59"} +{"current_steps": 2696, "total_steps": 2737, "loss": 1.0118, "lr": 1.3671295122928974e-08, "epoch": 6.8951406649616365, "percentage": 98.5, "elapsed_time": "1 day, 2:44:11", "remaining_time": "0:24:23"} +{"current_steps": 2697, "total_steps": 2737, "loss": 1.0397, "lr": 1.3012678421191471e-08, "epoch": 6.89769820971867, "percentage": 98.54, "elapsed_time": "1 day, 2:44:47", "remaining_time": "0:23:48"} +{"current_steps": 2698, "total_steps": 2737, "loss": 1.0214, "lr": 1.2370309923388501e-08, "epoch": 6.900255754475703, "percentage": 98.58, "elapsed_time": "1 day, 2:45:23", "remaining_time": "0:23:12"} +{"current_steps": 2699, "total_steps": 2737, "loss": 1.0249, "lr": 1.1744190674614076e-08, "epoch": 6.9028132992327365, "percentage": 98.61, "elapsed_time": "1 day, 2:45:58", "remaining_time": "0:22:36"} +{"current_steps": 2700, "total_steps": 2737, "loss": 1.0013, "lr": 1.1134321693525574e-08, "epoch": 6.90537084398977, "percentage": 98.65, "elapsed_time": "1 day, 2:46:34", "remaining_time": "0:22:00"} +{"current_steps": 2701, "total_steps": 2737, "loss": 1.0148, "lr": 1.0540703972341525e-08, "epoch": 6.907928388746803, "percentage": 98.68, "elapsed_time": "1 day, 2:47:10", "remaining_time": "0:21:25"} +{"current_steps": 2702, "total_steps": 2737, "loss": 1.029, "lr": 9.963338476840501e-09, "epoch": 6.910485933503836, "percentage": 98.72, "elapsed_time": "1 day, 2:47:46", "remaining_time": "0:20:49"} +{"current_steps": 2703, "total_steps": 2737, "loss": 1.0136, "lr": 9.402226146361104e-09, "epoch": 6.913043478260869, "percentage": 98.76, "elapsed_time": "1 day, 2:48:21", "remaining_time": "0:20:13"} +{"current_steps": 2704, "total_steps": 2737, "loss": 0.9989, "lr": 8.857367893796431e-09, "epoch": 6.915601023017903, "percentage": 98.79, "elapsed_time": "1 day, 2:48:57", "remaining_time": "0:19:38"} +{"current_steps": 2705, "total_steps": 2737, "loss": 1.0239, "lr": 8.328764605597395e-09, "epoch": 6.918158567774936, "percentage": 98.83, "elapsed_time": "1 day, 2:49:33", "remaining_time": "0:19:02"} +{"current_steps": 2706, "total_steps": 2737, "loss": 1.041, "lr": 7.816417141768284e-09, "epoch": 6.920716112531969, "percentage": 98.87, "elapsed_time": "1 day, 2:50:08", "remaining_time": "0:18:26"} +{"current_steps": 2707, "total_steps": 2737, "loss": 1.0297, "lr": 7.3203263358678775e-09, "epoch": 6.923273657289003, "percentage": 98.9, "elapsed_time": "1 day, 2:50:44", "remaining_time": "0:17:51"} +{"current_steps": 2708, "total_steps": 2737, "loss": 1.0177, "lr": 6.840492995002779e-09, "epoch": 6.9258312020460355, "percentage": 98.94, "elapsed_time": "1 day, 2:51:20", "remaining_time": "0:17:15"} +{"current_steps": 2709, "total_steps": 2737, "loss": 1.0262, "lr": 6.376917899832968e-09, "epoch": 6.928388746803069, "percentage": 98.98, "elapsed_time": "1 day, 2:51:55", "remaining_time": "0:16:39"} +{"current_steps": 2710, "total_steps": 2737, "loss": 1.0057, "lr": 5.929601804566254e-09, "epoch": 6.930946291560103, "percentage": 99.01, "elapsed_time": "1 day, 2:52:31", "remaining_time": "0:16:03"} +{"current_steps": 2711, "total_steps": 2737, "loss": 1.0269, "lr": 5.498545436957159e-09, "epoch": 6.9335038363171355, "percentage": 99.05, "elapsed_time": "1 day, 2:53:07", "remaining_time": "0:15:28"} +{"current_steps": 2712, "total_steps": 2737, "loss": 0.9854, "lr": 5.0837494983091425e-09, "epoch": 6.936061381074169, "percentage": 99.09, "elapsed_time": "1 day, 2:53:42", "remaining_time": "0:14:52"} +{"current_steps": 2713, "total_steps": 2737, "loss": 1.0149, "lr": 4.6852146634668304e-09, "epoch": 6.938618925831202, "percentage": 99.12, "elapsed_time": "1 day, 2:54:18", "remaining_time": "0:14:16"} +{"current_steps": 2714, "total_steps": 2737, "loss": 0.9864, "lr": 4.302941580823783e-09, "epoch": 6.9411764705882355, "percentage": 99.16, "elapsed_time": "1 day, 2:54:53", "remaining_time": "0:13:41"} +{"current_steps": 2715, "total_steps": 2737, "loss": 1.0296, "lr": 3.936930872312506e-09, "epoch": 6.943734015345268, "percentage": 99.2, "elapsed_time": "1 day, 2:55:29", "remaining_time": "0:13:05"} +{"current_steps": 2716, "total_steps": 2737, "loss": 1.0319, "lr": 3.5871831334099992e-09, "epoch": 6.946291560102302, "percentage": 99.23, "elapsed_time": "1 day, 2:56:04", "remaining_time": "0:12:29"} +{"current_steps": 2717, "total_steps": 2737, "loss": 1.0061, "lr": 3.2536989331355406e-09, "epoch": 6.948849104859335, "percentage": 99.27, "elapsed_time": "1 day, 2:56:40", "remaining_time": "0:11:54"} +{"current_steps": 2718, "total_steps": 2737, "loss": 1.0558, "lr": 2.9364788140451296e-09, "epoch": 6.951406649616368, "percentage": 99.31, "elapsed_time": "1 day, 2:57:16", "remaining_time": "0:11:18"} +{"current_steps": 2719, "total_steps": 2737, "loss": 1.043, "lr": 2.635523292237041e-09, "epoch": 6.953964194373402, "percentage": 99.34, "elapsed_time": "1 day, 2:57:51", "remaining_time": "0:10:42"} +{"current_steps": 2720, "total_steps": 2737, "loss": 1.0157, "lr": 2.3508328573462745e-09, "epoch": 6.956521739130435, "percentage": 99.38, "elapsed_time": "1 day, 2:58:27", "remaining_time": "0:10:06"} +{"current_steps": 2721, "total_steps": 2737, "loss": 1.0172, "lr": 2.082407972547884e-09, "epoch": 6.959079283887468, "percentage": 99.42, "elapsed_time": "1 day, 2:59:03", "remaining_time": "0:09:31"} +{"current_steps": 2722, "total_steps": 2737, "loss": 1.0294, "lr": 1.8302490745503166e-09, "epoch": 6.961636828644501, "percentage": 99.45, "elapsed_time": "1 day, 2:59:38", "remaining_time": "0:08:55"} +{"current_steps": 2723, "total_steps": 2737, "loss": 1.0242, "lr": 1.5943565736020739e-09, "epoch": 6.964194373401535, "percentage": 99.49, "elapsed_time": "1 day, 3:00:14", "remaining_time": "0:08:19"} +{"current_steps": 2724, "total_steps": 2737, "loss": 1.0372, "lr": 1.3747308534850512e-09, "epoch": 6.966751918158568, "percentage": 99.53, "elapsed_time": "1 day, 3:00:50", "remaining_time": "0:07:44"} +{"current_steps": 2725, "total_steps": 2737, "loss": 1.0515, "lr": 1.1713722715167575e-09, "epoch": 6.969309462915601, "percentage": 99.56, "elapsed_time": "1 day, 3:01:25", "remaining_time": "0:07:08"} +{"current_steps": 2726, "total_steps": 2737, "loss": 1.0291, "lr": 9.84281158548095e-10, "epoch": 6.971867007672635, "percentage": 99.6, "elapsed_time": "1 day, 3:02:01", "remaining_time": "0:06:32"} +{"current_steps": 2727, "total_steps": 2737, "loss": 1.013, "lr": 8.134578189644692e-10, "epoch": 6.974424552429667, "percentage": 99.63, "elapsed_time": "1 day, 3:02:36", "remaining_time": "0:05:57"} +{"current_steps": 2728, "total_steps": 2737, "loss": 1.0054, "lr": 6.589025306869002e-10, "epoch": 6.976982097186701, "percentage": 99.67, "elapsed_time": "1 day, 3:03:12", "remaining_time": "0:05:21"} +{"current_steps": 2729, "total_steps": 2737, "loss": 1.0299, "lr": 5.206155451642491e-10, "epoch": 6.979539641943734, "percentage": 99.71, "elapsed_time": "1 day, 3:03:48", "remaining_time": "0:04:45"} +{"current_steps": 2730, "total_steps": 2737, "loss": 1.0413, "lr": 3.985970873821021e-10, "epoch": 6.982097186700767, "percentage": 99.74, "elapsed_time": "1 day, 3:04:23", "remaining_time": "0:04:09"} +{"current_steps": 2731, "total_steps": 2737, "loss": 1.0317, "lr": 2.928473558583278e-10, "epoch": 6.9846547314578, "percentage": 99.78, "elapsed_time": "1 day, 3:04:59", "remaining_time": "0:03:34"} +{"current_steps": 2732, "total_steps": 2737, "loss": 1.0144, "lr": 2.033665226386372e-10, "epoch": 6.987212276214834, "percentage": 99.82, "elapsed_time": "1 day, 3:05:35", "remaining_time": "0:02:58"} +{"current_steps": 2733, "total_steps": 2737, "loss": 1.0007, "lr": 1.301547333032449e-10, "epoch": 6.989769820971867, "percentage": 99.85, "elapsed_time": "1 day, 3:06:10", "remaining_time": "0:02:22"} +{"current_steps": 2734, "total_steps": 2737, "loss": 0.9763, "lr": 7.321210696464853e-11, "epoch": 6.9923273657289, "percentage": 99.89, "elapsed_time": "1 day, 3:06:46", "remaining_time": "0:01:47"} +{"current_steps": 2735, "total_steps": 2737, "loss": 1.0013, "lr": 3.253873626429816e-11, "epoch": 6.994884910485934, "percentage": 99.93, "elapsed_time": "1 day, 3:07:22", "remaining_time": "0:01:11"} +{"current_steps": 2736, "total_steps": 2737, "loss": 1.0472, "lr": 8.134687374816708e-12, "epoch": 6.997442455242966, "percentage": 99.96, "elapsed_time": "1 day, 3:07:57", "remaining_time": "0:00:35"} +{"current_steps": 2737, "total_steps": 2737, "loss": 0.9774, "lr": 0.0, "epoch": 7.0, "percentage": 100.0, "elapsed_time": "1 day, 3:08:33", "remaining_time": "0:00:00"} +{"current_steps": 2737, "total_steps": 2737, "epoch": 7.0, "percentage": 100.0, "elapsed_time": "1 day, 3:08:49", "remaining_time": "0:00:00"} diff --git a/trainer_state.json b/trainer_state.json new file mode 100644 index 0000000..3353c8b --- /dev/null +++ b/trainer_state.json @@ -0,0 +1,19201 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 7.0, + "eval_steps": 500, + "global_step": 2737, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.0025575447570332483, + "grad_norm": 2.9635716191319874, + "learning_rate": 7.299270072992701e-08, + "loss": 1.5218, + "step": 1 + }, + { + "epoch": 0.005115089514066497, + "grad_norm": 2.9570039035815743, + "learning_rate": 1.4598540145985402e-07, + "loss": 1.4755, + "step": 2 + }, + { + "epoch": 0.0076726342710997444, + "grad_norm": 3.017937072257941, + "learning_rate": 2.1897810218978106e-07, + "loss": 1.4935, + "step": 3 + }, + { + "epoch": 0.010230179028132993, + "grad_norm": 2.960891290072101, + "learning_rate": 2.9197080291970804e-07, + "loss": 1.4813, + "step": 4 + }, + { + "epoch": 0.01278772378516624, + "grad_norm": 2.976019939455323, + "learning_rate": 3.6496350364963505e-07, + "loss": 1.4941, + "step": 5 + }, + { + "epoch": 0.015345268542199489, + "grad_norm": 3.0149006457959886, + "learning_rate": 4.379562043795621e-07, + "loss": 1.5066, + "step": 6 + }, + { + "epoch": 0.017902813299232736, + "grad_norm": 2.9237260833122214, + "learning_rate": 5.109489051094891e-07, + "loss": 1.478, + "step": 7 + }, + { + "epoch": 0.020460358056265986, + "grad_norm": 2.9640674426484077, + "learning_rate": 5.839416058394161e-07, + "loss": 1.4882, + "step": 8 + }, + { + "epoch": 0.023017902813299233, + "grad_norm": 2.883080870578686, + "learning_rate": 6.569343065693432e-07, + "loss": 1.5219, + "step": 9 + }, + { + "epoch": 0.02557544757033248, + "grad_norm": 2.8912016510708844, + "learning_rate": 7.299270072992701e-07, + "loss": 1.5149, + "step": 10 + }, + { + "epoch": 0.028132992327365727, + "grad_norm": 2.8525137837011734, + "learning_rate": 8.029197080291971e-07, + "loss": 1.5065, + "step": 11 + }, + { + "epoch": 0.030690537084398978, + "grad_norm": 2.6980401328828734, + "learning_rate": 8.759124087591242e-07, + "loss": 1.47, + "step": 12 + }, + { + "epoch": 0.03324808184143223, + "grad_norm": 2.6499759522230795, + "learning_rate": 9.489051094890511e-07, + "loss": 1.5126, + "step": 13 + }, + { + "epoch": 0.03580562659846547, + "grad_norm": 2.646192888612826, + "learning_rate": 1.0218978102189781e-06, + "loss": 1.4605, + "step": 14 + }, + { + "epoch": 0.03836317135549872, + "grad_norm": 2.584050631976731, + "learning_rate": 1.0948905109489052e-06, + "loss": 1.4985, + "step": 15 + }, + { + "epoch": 0.04092071611253197, + "grad_norm": 2.3627571129305425, + "learning_rate": 1.1678832116788322e-06, + "loss": 1.4523, + "step": 16 + }, + { + "epoch": 0.043478260869565216, + "grad_norm": 2.052553239445229, + "learning_rate": 1.2408759124087592e-06, + "loss": 1.4734, + "step": 17 + }, + { + "epoch": 0.04603580562659847, + "grad_norm": 2.0014770644457442, + "learning_rate": 1.3138686131386864e-06, + "loss": 1.479, + "step": 18 + }, + { + "epoch": 0.04859335038363171, + "grad_norm": 1.9847838678835794, + "learning_rate": 1.3868613138686132e-06, + "loss": 1.4702, + "step": 19 + }, + { + "epoch": 0.05115089514066496, + "grad_norm": 1.9111274600693329, + "learning_rate": 1.4598540145985402e-06, + "loss": 1.4617, + "step": 20 + }, + { + "epoch": 0.05370843989769821, + "grad_norm": 1.870897574722989, + "learning_rate": 1.5328467153284674e-06, + "loss": 1.4463, + "step": 21 + }, + { + "epoch": 0.056265984654731455, + "grad_norm": 1.4296640142109796, + "learning_rate": 1.6058394160583942e-06, + "loss": 1.4599, + "step": 22 + }, + { + "epoch": 0.058823529411764705, + "grad_norm": 1.4790607914654283, + "learning_rate": 1.6788321167883212e-06, + "loss": 1.4157, + "step": 23 + }, + { + "epoch": 0.061381074168797956, + "grad_norm": 1.6141927865863235, + "learning_rate": 1.7518248175182485e-06, + "loss": 1.4439, + "step": 24 + }, + { + "epoch": 0.0639386189258312, + "grad_norm": 1.599753856171314, + "learning_rate": 1.8248175182481753e-06, + "loss": 1.4218, + "step": 25 + }, + { + "epoch": 0.06649616368286446, + "grad_norm": 1.4847704184111228, + "learning_rate": 1.8978102189781023e-06, + "loss": 1.4269, + "step": 26 + }, + { + "epoch": 0.06905370843989769, + "grad_norm": 1.3521166489305316, + "learning_rate": 1.9708029197080293e-06, + "loss": 1.4158, + "step": 27 + }, + { + "epoch": 0.07161125319693094, + "grad_norm": 1.2579545228076663, + "learning_rate": 2.0437956204379563e-06, + "loss": 1.4405, + "step": 28 + }, + { + "epoch": 0.0741687979539642, + "grad_norm": 1.009619956209423, + "learning_rate": 2.1167883211678833e-06, + "loss": 1.4151, + "step": 29 + }, + { + "epoch": 0.07672634271099744, + "grad_norm": 1.1838282966029092, + "learning_rate": 2.1897810218978103e-06, + "loss": 1.419, + "step": 30 + }, + { + "epoch": 0.0792838874680307, + "grad_norm": 1.2384598412642265, + "learning_rate": 2.2627737226277373e-06, + "loss": 1.412, + "step": 31 + }, + { + "epoch": 0.08184143222506395, + "grad_norm": 1.1754182466507677, + "learning_rate": 2.3357664233576643e-06, + "loss": 1.3866, + "step": 32 + }, + { + "epoch": 0.08439897698209718, + "grad_norm": 1.0614055850869524, + "learning_rate": 2.4087591240875918e-06, + "loss": 1.4127, + "step": 33 + }, + { + "epoch": 0.08695652173913043, + "grad_norm": 1.0576160445761484, + "learning_rate": 2.4817518248175183e-06, + "loss": 1.4281, + "step": 34 + }, + { + "epoch": 0.08951406649616368, + "grad_norm": 1.0117252925259892, + "learning_rate": 2.5547445255474458e-06, + "loss": 1.3731, + "step": 35 + }, + { + "epoch": 0.09207161125319693, + "grad_norm": 0.9022593000895403, + "learning_rate": 2.627737226277373e-06, + "loss": 1.3866, + "step": 36 + }, + { + "epoch": 0.09462915601023018, + "grad_norm": 0.8340755212001483, + "learning_rate": 2.7007299270072994e-06, + "loss": 1.4026, + "step": 37 + }, + { + "epoch": 0.09718670076726342, + "grad_norm": 0.7261384916519003, + "learning_rate": 2.7737226277372264e-06, + "loss": 1.372, + "step": 38 + }, + { + "epoch": 0.09974424552429667, + "grad_norm": 0.6484685338282444, + "learning_rate": 2.8467153284671534e-06, + "loss": 1.3914, + "step": 39 + }, + { + "epoch": 0.10230179028132992, + "grad_norm": 0.5852202685330168, + "learning_rate": 2.9197080291970804e-06, + "loss": 1.328, + "step": 40 + }, + { + "epoch": 0.10485933503836317, + "grad_norm": 0.7534890308070339, + "learning_rate": 2.992700729927008e-06, + "loss": 1.3525, + "step": 41 + }, + { + "epoch": 0.10741687979539642, + "grad_norm": 0.851146761403294, + "learning_rate": 3.065693430656935e-06, + "loss": 1.3478, + "step": 42 + }, + { + "epoch": 0.10997442455242967, + "grad_norm": 0.7827817647570426, + "learning_rate": 3.1386861313868614e-06, + "loss": 1.3191, + "step": 43 + }, + { + "epoch": 0.11253196930946291, + "grad_norm": 0.664689408470926, + "learning_rate": 3.2116788321167884e-06, + "loss": 1.3222, + "step": 44 + }, + { + "epoch": 0.11508951406649616, + "grad_norm": 0.5490554622557167, + "learning_rate": 3.2846715328467155e-06, + "loss": 1.3238, + "step": 45 + }, + { + "epoch": 0.11764705882352941, + "grad_norm": 0.5108750790400686, + "learning_rate": 3.3576642335766425e-06, + "loss": 1.3436, + "step": 46 + }, + { + "epoch": 0.12020460358056266, + "grad_norm": 0.5445952611951665, + "learning_rate": 3.43065693430657e-06, + "loss": 1.3458, + "step": 47 + }, + { + "epoch": 0.12276214833759591, + "grad_norm": 0.5697581064671751, + "learning_rate": 3.503649635036497e-06, + "loss": 1.3132, + "step": 48 + }, + { + "epoch": 0.12531969309462915, + "grad_norm": 0.578411430323597, + "learning_rate": 3.576642335766424e-06, + "loss": 1.3268, + "step": 49 + }, + { + "epoch": 0.1278772378516624, + "grad_norm": 0.5601792557806415, + "learning_rate": 3.6496350364963505e-06, + "loss": 1.2966, + "step": 50 + }, + { + "epoch": 0.13043478260869565, + "grad_norm": 0.5306373264311374, + "learning_rate": 3.7226277372262775e-06, + "loss": 1.3004, + "step": 51 + }, + { + "epoch": 0.1329923273657289, + "grad_norm": 0.4661660429983145, + "learning_rate": 3.7956204379562045e-06, + "loss": 1.2812, + "step": 52 + }, + { + "epoch": 0.13554987212276215, + "grad_norm": 0.42244352277225405, + "learning_rate": 3.868613138686132e-06, + "loss": 1.2774, + "step": 53 + }, + { + "epoch": 0.13810741687979539, + "grad_norm": 0.39129018686480066, + "learning_rate": 3.9416058394160585e-06, + "loss": 1.3168, + "step": 54 + }, + { + "epoch": 0.14066496163682865, + "grad_norm": 0.3485115346190062, + "learning_rate": 4.014598540145986e-06, + "loss": 1.3283, + "step": 55 + }, + { + "epoch": 0.1432225063938619, + "grad_norm": 0.3976730412907507, + "learning_rate": 4.0875912408759126e-06, + "loss": 1.3135, + "step": 56 + }, + { + "epoch": 0.14578005115089515, + "grad_norm": 0.4153119646875293, + "learning_rate": 4.16058394160584e-06, + "loss": 1.2989, + "step": 57 + }, + { + "epoch": 0.1483375959079284, + "grad_norm": 0.42065859451204163, + "learning_rate": 4.233576642335767e-06, + "loss": 1.3137, + "step": 58 + }, + { + "epoch": 0.15089514066496162, + "grad_norm": 0.35014086468112804, + "learning_rate": 4.306569343065693e-06, + "loss": 1.2743, + "step": 59 + }, + { + "epoch": 0.1534526854219949, + "grad_norm": 0.32228235531527744, + "learning_rate": 4.379562043795621e-06, + "loss": 1.2987, + "step": 60 + }, + { + "epoch": 0.15601023017902813, + "grad_norm": 0.33710245284823415, + "learning_rate": 4.452554744525548e-06, + "loss": 1.2869, + "step": 61 + }, + { + "epoch": 0.1585677749360614, + "grad_norm": 0.34426470471374965, + "learning_rate": 4.525547445255475e-06, + "loss": 1.3199, + "step": 62 + }, + { + "epoch": 0.16112531969309463, + "grad_norm": 0.334431341569014, + "learning_rate": 4.598540145985402e-06, + "loss": 1.2972, + "step": 63 + }, + { + "epoch": 0.1636828644501279, + "grad_norm": 0.33024914298061436, + "learning_rate": 4.671532846715329e-06, + "loss": 1.2928, + "step": 64 + }, + { + "epoch": 0.16624040920716113, + "grad_norm": 0.3058316278280544, + "learning_rate": 4.744525547445255e-06, + "loss": 1.2861, + "step": 65 + }, + { + "epoch": 0.16879795396419436, + "grad_norm": 0.292869194083437, + "learning_rate": 4.8175182481751835e-06, + "loss": 1.2461, + "step": 66 + }, + { + "epoch": 0.17135549872122763, + "grad_norm": 0.24971695111221698, + "learning_rate": 4.89051094890511e-06, + "loss": 1.2661, + "step": 67 + }, + { + "epoch": 0.17391304347826086, + "grad_norm": 0.26954765363549843, + "learning_rate": 4.963503649635037e-06, + "loss": 1.2467, + "step": 68 + }, + { + "epoch": 0.17647058823529413, + "grad_norm": 0.25356010222488795, + "learning_rate": 5.036496350364964e-06, + "loss": 1.2303, + "step": 69 + }, + { + "epoch": 0.17902813299232737, + "grad_norm": 0.2339589024717998, + "learning_rate": 5.1094890510948916e-06, + "loss": 1.2399, + "step": 70 + }, + { + "epoch": 0.1815856777493606, + "grad_norm": 0.22823462929167784, + "learning_rate": 5.182481751824818e-06, + "loss": 1.2498, + "step": 71 + }, + { + "epoch": 0.18414322250639387, + "grad_norm": 0.24948571250389207, + "learning_rate": 5.255474452554746e-06, + "loss": 1.2643, + "step": 72 + }, + { + "epoch": 0.1867007672634271, + "grad_norm": 0.2298632960982471, + "learning_rate": 5.328467153284672e-06, + "loss": 1.2958, + "step": 73 + }, + { + "epoch": 0.18925831202046037, + "grad_norm": 0.22223759951095107, + "learning_rate": 5.401459854014599e-06, + "loss": 1.2422, + "step": 74 + }, + { + "epoch": 0.1918158567774936, + "grad_norm": 0.23124679789968172, + "learning_rate": 5.474452554744526e-06, + "loss": 1.2407, + "step": 75 + }, + { + "epoch": 0.19437340153452684, + "grad_norm": 0.2221181062125986, + "learning_rate": 5.547445255474453e-06, + "loss": 1.2456, + "step": 76 + }, + { + "epoch": 0.1969309462915601, + "grad_norm": 0.1998449044080008, + "learning_rate": 5.62043795620438e-06, + "loss": 1.2514, + "step": 77 + }, + { + "epoch": 0.19948849104859334, + "grad_norm": 0.19727362882566524, + "learning_rate": 5.693430656934307e-06, + "loss": 1.2335, + "step": 78 + }, + { + "epoch": 0.2020460358056266, + "grad_norm": 0.20659124094509168, + "learning_rate": 5.766423357664233e-06, + "loss": 1.2276, + "step": 79 + }, + { + "epoch": 0.20460358056265984, + "grad_norm": 0.22959713985782182, + "learning_rate": 5.839416058394161e-06, + "loss": 1.2435, + "step": 80 + }, + { + "epoch": 0.2071611253196931, + "grad_norm": 0.19904222253631854, + "learning_rate": 5.912408759124088e-06, + "loss": 1.2266, + "step": 81 + }, + { + "epoch": 0.20971867007672634, + "grad_norm": 0.19344151897086864, + "learning_rate": 5.985401459854016e-06, + "loss": 1.2261, + "step": 82 + }, + { + "epoch": 0.21227621483375958, + "grad_norm": 0.19302417663791685, + "learning_rate": 6.058394160583942e-06, + "loss": 1.2384, + "step": 83 + }, + { + "epoch": 0.21483375959079284, + "grad_norm": 0.21396454463521547, + "learning_rate": 6.13138686131387e-06, + "loss": 1.235, + "step": 84 + }, + { + "epoch": 0.21739130434782608, + "grad_norm": 0.1913859035516872, + "learning_rate": 6.204379562043796e-06, + "loss": 1.2838, + "step": 85 + }, + { + "epoch": 0.21994884910485935, + "grad_norm": 0.17510278677847252, + "learning_rate": 6.277372262773723e-06, + "loss": 1.2358, + "step": 86 + }, + { + "epoch": 0.22250639386189258, + "grad_norm": 0.19863525132725016, + "learning_rate": 6.35036496350365e-06, + "loss": 1.2419, + "step": 87 + }, + { + "epoch": 0.22506393861892582, + "grad_norm": 0.19478563516185365, + "learning_rate": 6.423357664233577e-06, + "loss": 1.2641, + "step": 88 + }, + { + "epoch": 0.22762148337595908, + "grad_norm": 0.17875499154062388, + "learning_rate": 6.496350364963504e-06, + "loss": 1.2239, + "step": 89 + }, + { + "epoch": 0.23017902813299232, + "grad_norm": 0.1751251099110654, + "learning_rate": 6.569343065693431e-06, + "loss": 1.2524, + "step": 90 + }, + { + "epoch": 0.23273657289002558, + "grad_norm": 0.1869390091762672, + "learning_rate": 6.6423357664233575e-06, + "loss": 1.2494, + "step": 91 + }, + { + "epoch": 0.23529411764705882, + "grad_norm": 0.17676974553290642, + "learning_rate": 6.715328467153285e-06, + "loss": 1.2537, + "step": 92 + }, + { + "epoch": 0.23785166240409208, + "grad_norm": 0.1806189007928041, + "learning_rate": 6.7883211678832115e-06, + "loss": 1.2349, + "step": 93 + }, + { + "epoch": 0.24040920716112532, + "grad_norm": 0.18193990233718968, + "learning_rate": 6.86131386861314e-06, + "loss": 1.2583, + "step": 94 + }, + { + "epoch": 0.24296675191815856, + "grad_norm": 0.19012671201766562, + "learning_rate": 6.934306569343066e-06, + "loss": 1.2029, + "step": 95 + }, + { + "epoch": 0.24552429667519182, + "grad_norm": 0.16857838785815454, + "learning_rate": 7.007299270072994e-06, + "loss": 1.2423, + "step": 96 + }, + { + "epoch": 0.24808184143222506, + "grad_norm": 0.18952785901605423, + "learning_rate": 7.08029197080292e-06, + "loss": 1.2394, + "step": 97 + }, + { + "epoch": 0.2506393861892583, + "grad_norm": 0.18078294692872968, + "learning_rate": 7.153284671532848e-06, + "loss": 1.2122, + "step": 98 + }, + { + "epoch": 0.2531969309462916, + "grad_norm": 0.17487368586515217, + "learning_rate": 7.2262773722627744e-06, + "loss": 1.2117, + "step": 99 + }, + { + "epoch": 0.2557544757033248, + "grad_norm": 0.17732077203789362, + "learning_rate": 7.299270072992701e-06, + "loss": 1.2041, + "step": 100 + }, + { + "epoch": 0.25831202046035806, + "grad_norm": 0.18421840800752218, + "learning_rate": 7.3722627737226285e-06, + "loss": 1.2231, + "step": 101 + }, + { + "epoch": 0.2608695652173913, + "grad_norm": 0.1768000076239069, + "learning_rate": 7.445255474452555e-06, + "loss": 1.2325, + "step": 102 + }, + { + "epoch": 0.26342710997442453, + "grad_norm": 0.16984854034130697, + "learning_rate": 7.5182481751824825e-06, + "loss": 1.2026, + "step": 103 + }, + { + "epoch": 0.2659846547314578, + "grad_norm": 0.16277787684968492, + "learning_rate": 7.591240875912409e-06, + "loss": 1.193, + "step": 104 + }, + { + "epoch": 0.26854219948849106, + "grad_norm": 0.17357111549131551, + "learning_rate": 7.664233576642336e-06, + "loss": 1.2009, + "step": 105 + }, + { + "epoch": 0.2710997442455243, + "grad_norm": 0.1800163972852127, + "learning_rate": 7.737226277372264e-06, + "loss": 1.1909, + "step": 106 + }, + { + "epoch": 0.27365728900255754, + "grad_norm": 0.1681574320113801, + "learning_rate": 7.810218978102191e-06, + "loss": 1.2194, + "step": 107 + }, + { + "epoch": 0.27621483375959077, + "grad_norm": 0.16885285400717157, + "learning_rate": 7.883211678832117e-06, + "loss": 1.1985, + "step": 108 + }, + { + "epoch": 0.27877237851662406, + "grad_norm": 0.17914067468814437, + "learning_rate": 7.956204379562045e-06, + "loss": 1.2218, + "step": 109 + }, + { + "epoch": 0.2813299232736573, + "grad_norm": 0.16706925568533235, + "learning_rate": 8.029197080291972e-06, + "loss": 1.222, + "step": 110 + }, + { + "epoch": 0.28388746803069054, + "grad_norm": 0.1641264132835115, + "learning_rate": 8.1021897810219e-06, + "loss": 1.2242, + "step": 111 + }, + { + "epoch": 0.2864450127877238, + "grad_norm": 0.18443514799994437, + "learning_rate": 8.175182481751825e-06, + "loss": 1.2118, + "step": 112 + }, + { + "epoch": 0.289002557544757, + "grad_norm": 0.17675822272527503, + "learning_rate": 8.248175182481753e-06, + "loss": 1.1849, + "step": 113 + }, + { + "epoch": 0.2915601023017903, + "grad_norm": 0.1880451995042565, + "learning_rate": 8.32116788321168e-06, + "loss": 1.2103, + "step": 114 + }, + { + "epoch": 0.29411764705882354, + "grad_norm": 0.16598375442205784, + "learning_rate": 8.394160583941606e-06, + "loss": 1.1937, + "step": 115 + }, + { + "epoch": 0.2966751918158568, + "grad_norm": 0.190898911263414, + "learning_rate": 8.467153284671533e-06, + "loss": 1.2028, + "step": 116 + }, + { + "epoch": 0.29923273657289, + "grad_norm": 0.18881369445042054, + "learning_rate": 8.54014598540146e-06, + "loss": 1.1976, + "step": 117 + }, + { + "epoch": 0.30179028132992325, + "grad_norm": 0.20907040258575316, + "learning_rate": 8.613138686131386e-06, + "loss": 1.2476, + "step": 118 + }, + { + "epoch": 0.30434782608695654, + "grad_norm": 0.1704000017989476, + "learning_rate": 8.686131386861315e-06, + "loss": 1.2087, + "step": 119 + }, + { + "epoch": 0.3069053708439898, + "grad_norm": 0.19455649517228424, + "learning_rate": 8.759124087591241e-06, + "loss": 1.214, + "step": 120 + }, + { + "epoch": 0.309462915601023, + "grad_norm": 0.18574238206663096, + "learning_rate": 8.832116788321169e-06, + "loss": 1.2276, + "step": 121 + }, + { + "epoch": 0.31202046035805625, + "grad_norm": 0.19290426166252228, + "learning_rate": 8.905109489051096e-06, + "loss": 1.1805, + "step": 122 + }, + { + "epoch": 0.3145780051150895, + "grad_norm": 0.1995598501375803, + "learning_rate": 8.978102189781024e-06, + "loss": 1.2007, + "step": 123 + }, + { + "epoch": 0.3171355498721228, + "grad_norm": 0.17673439222358, + "learning_rate": 9.05109489051095e-06, + "loss": 1.1966, + "step": 124 + }, + { + "epoch": 0.319693094629156, + "grad_norm": 0.1966681987874607, + "learning_rate": 9.124087591240877e-06, + "loss": 1.1739, + "step": 125 + }, + { + "epoch": 0.32225063938618925, + "grad_norm": 0.20745524723498263, + "learning_rate": 9.197080291970804e-06, + "loss": 1.2309, + "step": 126 + }, + { + "epoch": 0.3248081841432225, + "grad_norm": 0.20371417264487574, + "learning_rate": 9.27007299270073e-06, + "loss": 1.1718, + "step": 127 + }, + { + "epoch": 0.3273657289002558, + "grad_norm": 0.20142192992356361, + "learning_rate": 9.343065693430657e-06, + "loss": 1.1981, + "step": 128 + }, + { + "epoch": 0.329923273657289, + "grad_norm": 0.18157695452516256, + "learning_rate": 9.416058394160585e-06, + "loss": 1.187, + "step": 129 + }, + { + "epoch": 0.33248081841432225, + "grad_norm": 0.18405529622418393, + "learning_rate": 9.48905109489051e-06, + "loss": 1.2154, + "step": 130 + }, + { + "epoch": 0.3350383631713555, + "grad_norm": 0.18826966568044085, + "learning_rate": 9.56204379562044e-06, + "loss": 1.1823, + "step": 131 + }, + { + "epoch": 0.3375959079283887, + "grad_norm": 0.17870276101242044, + "learning_rate": 9.635036496350367e-06, + "loss": 1.2399, + "step": 132 + }, + { + "epoch": 0.340153452685422, + "grad_norm": 0.18386831261657108, + "learning_rate": 9.708029197080293e-06, + "loss": 1.2114, + "step": 133 + }, + { + "epoch": 0.34271099744245526, + "grad_norm": 0.1795896309939293, + "learning_rate": 9.78102189781022e-06, + "loss": 1.1832, + "step": 134 + }, + { + "epoch": 0.3452685421994885, + "grad_norm": 0.21827425129513728, + "learning_rate": 9.854014598540148e-06, + "loss": 1.2389, + "step": 135 + }, + { + "epoch": 0.34782608695652173, + "grad_norm": 0.1768309026825683, + "learning_rate": 9.927007299270073e-06, + "loss": 1.1965, + "step": 136 + }, + { + "epoch": 0.35038363171355497, + "grad_norm": 0.20302569863881262, + "learning_rate": 1e-05, + "loss": 1.2094, + "step": 137 + }, + { + "epoch": 0.35294117647058826, + "grad_norm": 0.19427846203063504, + "learning_rate": 1.0072992700729928e-05, + "loss": 1.1974, + "step": 138 + }, + { + "epoch": 0.3554987212276215, + "grad_norm": 0.17339331224519358, + "learning_rate": 1.0145985401459854e-05, + "loss": 1.1736, + "step": 139 + }, + { + "epoch": 0.35805626598465473, + "grad_norm": 0.2466539718194467, + "learning_rate": 1.0218978102189783e-05, + "loss": 1.2279, + "step": 140 + }, + { + "epoch": 0.36061381074168797, + "grad_norm": 0.21241110450455392, + "learning_rate": 1.0291970802919709e-05, + "loss": 1.1409, + "step": 141 + }, + { + "epoch": 0.3631713554987212, + "grad_norm": 0.18293508498426997, + "learning_rate": 1.0364963503649636e-05, + "loss": 1.1957, + "step": 142 + }, + { + "epoch": 0.3657289002557545, + "grad_norm": 0.19790775478208397, + "learning_rate": 1.0437956204379562e-05, + "loss": 1.2193, + "step": 143 + }, + { + "epoch": 0.36828644501278773, + "grad_norm": 0.20929660856991877, + "learning_rate": 1.0510948905109491e-05, + "loss": 1.1866, + "step": 144 + }, + { + "epoch": 0.37084398976982097, + "grad_norm": 0.1926018989518869, + "learning_rate": 1.0583941605839417e-05, + "loss": 1.2015, + "step": 145 + }, + { + "epoch": 0.3734015345268542, + "grad_norm": 0.19192914492955238, + "learning_rate": 1.0656934306569344e-05, + "loss": 1.1886, + "step": 146 + }, + { + "epoch": 0.37595907928388744, + "grad_norm": 0.20322534422512073, + "learning_rate": 1.072992700729927e-05, + "loss": 1.2199, + "step": 147 + }, + { + "epoch": 0.37851662404092073, + "grad_norm": 0.18947938981971202, + "learning_rate": 1.0802919708029198e-05, + "loss": 1.1829, + "step": 148 + }, + { + "epoch": 0.38107416879795397, + "grad_norm": 0.2154696847726249, + "learning_rate": 1.0875912408759123e-05, + "loss": 1.1655, + "step": 149 + }, + { + "epoch": 0.3836317135549872, + "grad_norm": 0.20859256059256231, + "learning_rate": 1.0948905109489052e-05, + "loss": 1.1815, + "step": 150 + }, + { + "epoch": 0.38618925831202044, + "grad_norm": 0.20565139563521717, + "learning_rate": 1.102189781021898e-05, + "loss": 1.1848, + "step": 151 + }, + { + "epoch": 0.3887468030690537, + "grad_norm": 0.21340531513272162, + "learning_rate": 1.1094890510948906e-05, + "loss": 1.188, + "step": 152 + }, + { + "epoch": 0.391304347826087, + "grad_norm": 0.22952365545919354, + "learning_rate": 1.1167883211678833e-05, + "loss": 1.1772, + "step": 153 + }, + { + "epoch": 0.3938618925831202, + "grad_norm": 0.21489457470648385, + "learning_rate": 1.124087591240876e-05, + "loss": 1.1807, + "step": 154 + }, + { + "epoch": 0.39641943734015345, + "grad_norm": 0.22932079381688553, + "learning_rate": 1.1313868613138688e-05, + "loss": 1.1949, + "step": 155 + }, + { + "epoch": 0.3989769820971867, + "grad_norm": 0.23209900752946952, + "learning_rate": 1.1386861313868614e-05, + "loss": 1.1996, + "step": 156 + }, + { + "epoch": 0.40153452685422, + "grad_norm": 0.22388173844283388, + "learning_rate": 1.1459854014598541e-05, + "loss": 1.2097, + "step": 157 + }, + { + "epoch": 0.4040920716112532, + "grad_norm": 0.21380373488801446, + "learning_rate": 1.1532846715328467e-05, + "loss": 1.2082, + "step": 158 + }, + { + "epoch": 0.40664961636828645, + "grad_norm": 0.21817873889647327, + "learning_rate": 1.1605839416058396e-05, + "loss": 1.1586, + "step": 159 + }, + { + "epoch": 0.4092071611253197, + "grad_norm": 0.2450535248536084, + "learning_rate": 1.1678832116788322e-05, + "loss": 1.1765, + "step": 160 + }, + { + "epoch": 0.4117647058823529, + "grad_norm": 0.24576894425899287, + "learning_rate": 1.1751824817518249e-05, + "loss": 1.1701, + "step": 161 + }, + { + "epoch": 0.4143222506393862, + "grad_norm": 0.2781533359151788, + "learning_rate": 1.1824817518248176e-05, + "loss": 1.1686, + "step": 162 + }, + { + "epoch": 0.41687979539641945, + "grad_norm": 0.23249844406377174, + "learning_rate": 1.1897810218978102e-05, + "loss": 1.169, + "step": 163 + }, + { + "epoch": 0.4194373401534527, + "grad_norm": 0.2425823032194627, + "learning_rate": 1.1970802919708031e-05, + "loss": 1.1821, + "step": 164 + }, + { + "epoch": 0.4219948849104859, + "grad_norm": 0.18932993548929591, + "learning_rate": 1.2043795620437957e-05, + "loss": 1.1538, + "step": 165 + }, + { + "epoch": 0.42455242966751916, + "grad_norm": 0.2884159065917926, + "learning_rate": 1.2116788321167885e-05, + "loss": 1.1787, + "step": 166 + }, + { + "epoch": 0.42710997442455245, + "grad_norm": 0.2667378207082784, + "learning_rate": 1.218978102189781e-05, + "loss": 1.1774, + "step": 167 + }, + { + "epoch": 0.4296675191815857, + "grad_norm": 0.24644746723371008, + "learning_rate": 1.226277372262774e-05, + "loss": 1.1823, + "step": 168 + }, + { + "epoch": 0.4322250639386189, + "grad_norm": 0.3049603900188157, + "learning_rate": 1.2335766423357665e-05, + "loss": 1.1808, + "step": 169 + }, + { + "epoch": 0.43478260869565216, + "grad_norm": 0.24091240924103605, + "learning_rate": 1.2408759124087593e-05, + "loss": 1.1646, + "step": 170 + }, + { + "epoch": 0.4373401534526854, + "grad_norm": 0.31462619972433453, + "learning_rate": 1.2481751824817518e-05, + "loss": 1.1742, + "step": 171 + }, + { + "epoch": 0.4398976982097187, + "grad_norm": 0.25976500149808457, + "learning_rate": 1.2554744525547446e-05, + "loss": 1.1741, + "step": 172 + }, + { + "epoch": 0.4424552429667519, + "grad_norm": 0.22869248627416927, + "learning_rate": 1.2627737226277371e-05, + "loss": 1.1927, + "step": 173 + }, + { + "epoch": 0.44501278772378516, + "grad_norm": 0.27204853892769404, + "learning_rate": 1.27007299270073e-05, + "loss": 1.199, + "step": 174 + }, + { + "epoch": 0.4475703324808184, + "grad_norm": 0.22922656795364751, + "learning_rate": 1.2773722627737228e-05, + "loss": 1.1742, + "step": 175 + }, + { + "epoch": 0.45012787723785164, + "grad_norm": 0.3018012418428905, + "learning_rate": 1.2846715328467154e-05, + "loss": 1.2027, + "step": 176 + }, + { + "epoch": 0.45268542199488493, + "grad_norm": 0.2578612414340434, + "learning_rate": 1.2919708029197083e-05, + "loss": 1.1757, + "step": 177 + }, + { + "epoch": 0.45524296675191817, + "grad_norm": 0.25636745613132944, + "learning_rate": 1.2992700729927009e-05, + "loss": 1.1716, + "step": 178 + }, + { + "epoch": 0.4578005115089514, + "grad_norm": 0.2715386790093217, + "learning_rate": 1.3065693430656936e-05, + "loss": 1.1583, + "step": 179 + }, + { + "epoch": 0.46035805626598464, + "grad_norm": 0.2891675384844315, + "learning_rate": 1.3138686131386862e-05, + "loss": 1.1657, + "step": 180 + }, + { + "epoch": 0.4629156010230179, + "grad_norm": 0.23385863111978508, + "learning_rate": 1.321167883211679e-05, + "loss": 1.1922, + "step": 181 + }, + { + "epoch": 0.46547314578005117, + "grad_norm": 0.22994123507129197, + "learning_rate": 1.3284671532846715e-05, + "loss": 1.1717, + "step": 182 + }, + { + "epoch": 0.4680306905370844, + "grad_norm": 0.23612727422353394, + "learning_rate": 1.3357664233576644e-05, + "loss": 1.1801, + "step": 183 + }, + { + "epoch": 0.47058823529411764, + "grad_norm": 0.2069010463077349, + "learning_rate": 1.343065693430657e-05, + "loss": 1.177, + "step": 184 + }, + { + "epoch": 0.4731457800511509, + "grad_norm": 0.2588170825534718, + "learning_rate": 1.3503649635036497e-05, + "loss": 1.1808, + "step": 185 + }, + { + "epoch": 0.47570332480818417, + "grad_norm": 0.2157790774775731, + "learning_rate": 1.3576642335766423e-05, + "loss": 1.1821, + "step": 186 + }, + { + "epoch": 0.4782608695652174, + "grad_norm": 0.23223470081294634, + "learning_rate": 1.3649635036496352e-05, + "loss": 1.1615, + "step": 187 + }, + { + "epoch": 0.48081841432225064, + "grad_norm": 0.21725466354040374, + "learning_rate": 1.372262773722628e-05, + "loss": 1.1912, + "step": 188 + }, + { + "epoch": 0.4833759590792839, + "grad_norm": 0.211538836700456, + "learning_rate": 1.3795620437956205e-05, + "loss": 1.1678, + "step": 189 + }, + { + "epoch": 0.4859335038363171, + "grad_norm": 0.25537726955126566, + "learning_rate": 1.3868613138686133e-05, + "loss": 1.1745, + "step": 190 + }, + { + "epoch": 0.4884910485933504, + "grad_norm": 0.28371208474889603, + "learning_rate": 1.3941605839416059e-05, + "loss": 1.1193, + "step": 191 + }, + { + "epoch": 0.49104859335038364, + "grad_norm": 0.26303907455029885, + "learning_rate": 1.4014598540145988e-05, + "loss": 1.1622, + "step": 192 + }, + { + "epoch": 0.4936061381074169, + "grad_norm": 0.2799114044156544, + "learning_rate": 1.4087591240875913e-05, + "loss": 1.136, + "step": 193 + }, + { + "epoch": 0.4961636828644501, + "grad_norm": 0.24139333187754325, + "learning_rate": 1.416058394160584e-05, + "loss": 1.1306, + "step": 194 + }, + { + "epoch": 0.49872122762148335, + "grad_norm": 0.2793729959544077, + "learning_rate": 1.4233576642335767e-05, + "loss": 1.2086, + "step": 195 + }, + { + "epoch": 0.5012787723785166, + "grad_norm": 0.27570376951402886, + "learning_rate": 1.4306569343065696e-05, + "loss": 1.1628, + "step": 196 + }, + { + "epoch": 0.5038363171355499, + "grad_norm": 0.32786685913286884, + "learning_rate": 1.4379562043795621e-05, + "loss": 1.1518, + "step": 197 + }, + { + "epoch": 0.5063938618925832, + "grad_norm": 0.45385237120867455, + "learning_rate": 1.4452554744525549e-05, + "loss": 1.1856, + "step": 198 + }, + { + "epoch": 0.5089514066496164, + "grad_norm": 0.41272427110721904, + "learning_rate": 1.4525547445255475e-05, + "loss": 1.1483, + "step": 199 + }, + { + "epoch": 0.5115089514066496, + "grad_norm": 0.2841480764999212, + "learning_rate": 1.4598540145985402e-05, + "loss": 1.1629, + "step": 200 + }, + { + "epoch": 0.5140664961636828, + "grad_norm": 0.27714909479279093, + "learning_rate": 1.4671532846715331e-05, + "loss": 1.1442, + "step": 201 + }, + { + "epoch": 0.5166240409207161, + "grad_norm": 0.403242161588326, + "learning_rate": 1.4744525547445257e-05, + "loss": 1.1385, + "step": 202 + }, + { + "epoch": 0.5191815856777494, + "grad_norm": 0.337013121025594, + "learning_rate": 1.4817518248175184e-05, + "loss": 1.171, + "step": 203 + }, + { + "epoch": 0.5217391304347826, + "grad_norm": 0.4040109170859878, + "learning_rate": 1.489051094890511e-05, + "loss": 1.1418, + "step": 204 + }, + { + "epoch": 0.5242966751918159, + "grad_norm": 0.48665453956547733, + "learning_rate": 1.4963503649635038e-05, + "loss": 1.164, + "step": 205 + }, + { + "epoch": 0.5268542199488491, + "grad_norm": 0.24722444184837292, + "learning_rate": 1.5036496350364965e-05, + "loss": 1.1535, + "step": 206 + }, + { + "epoch": 0.5294117647058824, + "grad_norm": 0.329077822667812, + "learning_rate": 1.5109489051094892e-05, + "loss": 1.1704, + "step": 207 + }, + { + "epoch": 0.5319693094629157, + "grad_norm": 0.41651469422399784, + "learning_rate": 1.5182481751824818e-05, + "loss": 1.1559, + "step": 208 + }, + { + "epoch": 0.5345268542199488, + "grad_norm": 0.32960667919190284, + "learning_rate": 1.5255474452554746e-05, + "loss": 1.1495, + "step": 209 + }, + { + "epoch": 0.5370843989769821, + "grad_norm": 0.4781321369544006, + "learning_rate": 1.5328467153284673e-05, + "loss": 1.1387, + "step": 210 + }, + { + "epoch": 0.5396419437340153, + "grad_norm": 0.43671817015361414, + "learning_rate": 1.54014598540146e-05, + "loss": 1.1607, + "step": 211 + }, + { + "epoch": 0.5421994884910486, + "grad_norm": 0.32190848339790007, + "learning_rate": 1.5474452554744528e-05, + "loss": 1.1286, + "step": 212 + }, + { + "epoch": 0.5447570332480819, + "grad_norm": 0.28497016967310845, + "learning_rate": 1.5547445255474454e-05, + "loss": 1.1701, + "step": 213 + }, + { + "epoch": 0.5473145780051151, + "grad_norm": 0.30316718930544045, + "learning_rate": 1.5620437956204383e-05, + "loss": 1.1236, + "step": 214 + }, + { + "epoch": 0.5498721227621484, + "grad_norm": 0.26835985072996216, + "learning_rate": 1.569343065693431e-05, + "loss": 1.1289, + "step": 215 + }, + { + "epoch": 0.5524296675191815, + "grad_norm": 0.3009095238514411, + "learning_rate": 1.5766423357664234e-05, + "loss": 1.1636, + "step": 216 + }, + { + "epoch": 0.5549872122762148, + "grad_norm": 0.3065933942839116, + "learning_rate": 1.583941605839416e-05, + "loss": 1.1368, + "step": 217 + }, + { + "epoch": 0.5575447570332481, + "grad_norm": 0.26109719009183135, + "learning_rate": 1.591240875912409e-05, + "loss": 1.1077, + "step": 218 + }, + { + "epoch": 0.5601023017902813, + "grad_norm": 0.3164778738084223, + "learning_rate": 1.5985401459854015e-05, + "loss": 1.1333, + "step": 219 + }, + { + "epoch": 0.5626598465473146, + "grad_norm": 0.35400248747839075, + "learning_rate": 1.6058394160583944e-05, + "loss": 1.1865, + "step": 220 + }, + { + "epoch": 0.5652173913043478, + "grad_norm": 0.28805686893200677, + "learning_rate": 1.613138686131387e-05, + "loss": 1.1293, + "step": 221 + }, + { + "epoch": 0.5677749360613811, + "grad_norm": 0.30523736515745126, + "learning_rate": 1.62043795620438e-05, + "loss": 1.1296, + "step": 222 + }, + { + "epoch": 0.5703324808184144, + "grad_norm": 0.4190076909483638, + "learning_rate": 1.6277372262773725e-05, + "loss": 1.1344, + "step": 223 + }, + { + "epoch": 0.5728900255754475, + "grad_norm": 0.42243425644304494, + "learning_rate": 1.635036496350365e-05, + "loss": 1.1665, + "step": 224 + }, + { + "epoch": 0.5754475703324808, + "grad_norm": 0.33398927440080145, + "learning_rate": 1.642335766423358e-05, + "loss": 1.1616, + "step": 225 + }, + { + "epoch": 0.578005115089514, + "grad_norm": 0.31042126932738984, + "learning_rate": 1.6496350364963505e-05, + "loss": 1.1346, + "step": 226 + }, + { + "epoch": 0.5805626598465473, + "grad_norm": 0.4022933069927679, + "learning_rate": 1.6569343065693434e-05, + "loss": 1.1474, + "step": 227 + }, + { + "epoch": 0.5831202046035806, + "grad_norm": 0.34778708873533665, + "learning_rate": 1.664233576642336e-05, + "loss": 1.1328, + "step": 228 + }, + { + "epoch": 0.5856777493606138, + "grad_norm": 0.35235801712692716, + "learning_rate": 1.6715328467153286e-05, + "loss": 1.1507, + "step": 229 + }, + { + "epoch": 0.5882352941176471, + "grad_norm": 0.3378264430318775, + "learning_rate": 1.678832116788321e-05, + "loss": 1.1556, + "step": 230 + }, + { + "epoch": 0.5907928388746803, + "grad_norm": 0.3260621828817585, + "learning_rate": 1.686131386861314e-05, + "loss": 1.152, + "step": 231 + }, + { + "epoch": 0.5933503836317136, + "grad_norm": 0.39226471807556507, + "learning_rate": 1.6934306569343066e-05, + "loss": 1.1398, + "step": 232 + }, + { + "epoch": 0.5959079283887468, + "grad_norm": 0.4562478952465355, + "learning_rate": 1.7007299270072995e-05, + "loss": 1.1447, + "step": 233 + }, + { + "epoch": 0.59846547314578, + "grad_norm": 0.3451241092677777, + "learning_rate": 1.708029197080292e-05, + "loss": 1.1005, + "step": 234 + }, + { + "epoch": 0.6010230179028133, + "grad_norm": 0.35647792283371854, + "learning_rate": 1.7153284671532847e-05, + "loss": 1.1227, + "step": 235 + }, + { + "epoch": 0.6035805626598465, + "grad_norm": 0.4594520420622475, + "learning_rate": 1.7226277372262773e-05, + "loss": 1.1505, + "step": 236 + }, + { + "epoch": 0.6061381074168798, + "grad_norm": 0.45224289985329424, + "learning_rate": 1.7299270072992702e-05, + "loss": 1.1308, + "step": 237 + }, + { + "epoch": 0.6086956521739131, + "grad_norm": 0.40418344343634116, + "learning_rate": 1.737226277372263e-05, + "loss": 1.1181, + "step": 238 + }, + { + "epoch": 0.6112531969309463, + "grad_norm": 0.3386408460236669, + "learning_rate": 1.7445255474452557e-05, + "loss": 1.1584, + "step": 239 + }, + { + "epoch": 0.6138107416879796, + "grad_norm": 0.26946506842987866, + "learning_rate": 1.7518248175182482e-05, + "loss": 1.1264, + "step": 240 + }, + { + "epoch": 0.6163682864450127, + "grad_norm": 0.36854128324837004, + "learning_rate": 1.7591240875912408e-05, + "loss": 1.1234, + "step": 241 + }, + { + "epoch": 0.618925831202046, + "grad_norm": 0.40766745885420824, + "learning_rate": 1.7664233576642337e-05, + "loss": 1.1473, + "step": 242 + }, + { + "epoch": 0.6214833759590793, + "grad_norm": 0.34418627419066, + "learning_rate": 1.7737226277372263e-05, + "loss": 1.1443, + "step": 243 + }, + { + "epoch": 0.6240409207161125, + "grad_norm": 0.3132419041181749, + "learning_rate": 1.7810218978102192e-05, + "loss": 1.1898, + "step": 244 + }, + { + "epoch": 0.6265984654731458, + "grad_norm": 0.3133703026128217, + "learning_rate": 1.7883211678832118e-05, + "loss": 1.1501, + "step": 245 + }, + { + "epoch": 0.629156010230179, + "grad_norm": 0.3441898164827929, + "learning_rate": 1.7956204379562047e-05, + "loss": 1.1452, + "step": 246 + }, + { + "epoch": 0.6317135549872123, + "grad_norm": 0.33750686928448953, + "learning_rate": 1.8029197080291973e-05, + "loss": 1.1359, + "step": 247 + }, + { + "epoch": 0.6342710997442456, + "grad_norm": 0.374020584176404, + "learning_rate": 1.81021897810219e-05, + "loss": 1.1823, + "step": 248 + }, + { + "epoch": 0.6368286445012787, + "grad_norm": 0.3514782831462071, + "learning_rate": 1.8175182481751824e-05, + "loss": 1.1632, + "step": 249 + }, + { + "epoch": 0.639386189258312, + "grad_norm": 0.3606450922286876, + "learning_rate": 1.8248175182481753e-05, + "loss": 1.1409, + "step": 250 + }, + { + "epoch": 0.6419437340153452, + "grad_norm": 0.261265823171208, + "learning_rate": 1.8321167883211683e-05, + "loss": 1.1499, + "step": 251 + }, + { + "epoch": 0.6445012787723785, + "grad_norm": 0.42167995133388064, + "learning_rate": 1.8394160583941608e-05, + "loss": 1.154, + "step": 252 + }, + { + "epoch": 0.6470588235294118, + "grad_norm": 0.3940819685714755, + "learning_rate": 1.8467153284671534e-05, + "loss": 1.1355, + "step": 253 + }, + { + "epoch": 0.649616368286445, + "grad_norm": 0.3265578920410715, + "learning_rate": 1.854014598540146e-05, + "loss": 1.1874, + "step": 254 + }, + { + "epoch": 0.6521739130434783, + "grad_norm": 0.39035015686633145, + "learning_rate": 1.861313868613139e-05, + "loss": 1.1374, + "step": 255 + }, + { + "epoch": 0.6547314578005116, + "grad_norm": 0.41589276832005634, + "learning_rate": 1.8686131386861315e-05, + "loss": 1.1289, + "step": 256 + }, + { + "epoch": 0.6572890025575447, + "grad_norm": 0.45228952583155346, + "learning_rate": 1.8759124087591244e-05, + "loss": 1.1646, + "step": 257 + }, + { + "epoch": 0.659846547314578, + "grad_norm": 0.5348752543777668, + "learning_rate": 1.883211678832117e-05, + "loss": 1.1268, + "step": 258 + }, + { + "epoch": 0.6624040920716112, + "grad_norm": 0.6021227056854751, + "learning_rate": 1.8905109489051095e-05, + "loss": 1.1593, + "step": 259 + }, + { + "epoch": 0.6649616368286445, + "grad_norm": 0.5171238656799629, + "learning_rate": 1.897810218978102e-05, + "loss": 1.1374, + "step": 260 + }, + { + "epoch": 0.6675191815856778, + "grad_norm": 0.4416261577215247, + "learning_rate": 1.905109489051095e-05, + "loss": 1.1093, + "step": 261 + }, + { + "epoch": 0.670076726342711, + "grad_norm": 0.569218554097933, + "learning_rate": 1.912408759124088e-05, + "loss": 1.1232, + "step": 262 + }, + { + "epoch": 0.6726342710997443, + "grad_norm": 0.6811617127901143, + "learning_rate": 1.9197080291970805e-05, + "loss": 1.1682, + "step": 263 + }, + { + "epoch": 0.6751918158567775, + "grad_norm": 0.749600012327492, + "learning_rate": 1.9270072992700734e-05, + "loss": 1.1484, + "step": 264 + }, + { + "epoch": 0.6777493606138107, + "grad_norm": 0.5547245978393044, + "learning_rate": 1.934306569343066e-05, + "loss": 1.1746, + "step": 265 + }, + { + "epoch": 0.680306905370844, + "grad_norm": 0.29516123217758117, + "learning_rate": 1.9416058394160586e-05, + "loss": 1.1414, + "step": 266 + }, + { + "epoch": 0.6828644501278772, + "grad_norm": 0.5616443320978407, + "learning_rate": 1.948905109489051e-05, + "loss": 1.096, + "step": 267 + }, + { + "epoch": 0.6854219948849105, + "grad_norm": 0.7110950485565922, + "learning_rate": 1.956204379562044e-05, + "loss": 1.1383, + "step": 268 + }, + { + "epoch": 0.6879795396419437, + "grad_norm": 0.5747864084575326, + "learning_rate": 1.9635036496350366e-05, + "loss": 1.1157, + "step": 269 + }, + { + "epoch": 0.690537084398977, + "grad_norm": 0.5551996423553552, + "learning_rate": 1.9708029197080295e-05, + "loss": 1.1569, + "step": 270 + }, + { + "epoch": 0.6930946291560103, + "grad_norm": 0.7165225607672224, + "learning_rate": 1.978102189781022e-05, + "loss": 1.1551, + "step": 271 + }, + { + "epoch": 0.6956521739130435, + "grad_norm": 0.7036255091082283, + "learning_rate": 1.9854014598540147e-05, + "loss": 1.1155, + "step": 272 + }, + { + "epoch": 0.6982097186700768, + "grad_norm": 0.37416829306334026, + "learning_rate": 1.9927007299270073e-05, + "loss": 1.1293, + "step": 273 + }, + { + "epoch": 0.7007672634271099, + "grad_norm": 0.5000491272477234, + "learning_rate": 2e-05, + "loss": 1.1495, + "step": 274 + }, + { + "epoch": 0.7033248081841432, + "grad_norm": 0.7162752485719868, + "learning_rate": 1.9999991865312627e-05, + "loss": 1.1267, + "step": 275 + }, + { + "epoch": 0.7058823529411765, + "grad_norm": 0.6356341002049819, + "learning_rate": 1.9999967461263736e-05, + "loss": 1.1469, + "step": 276 + }, + { + "epoch": 0.7084398976982097, + "grad_norm": 0.46429306768513406, + "learning_rate": 1.9999926787893038e-05, + "loss": 1.1605, + "step": 277 + }, + { + "epoch": 0.710997442455243, + "grad_norm": 0.42193730725900314, + "learning_rate": 1.99998698452667e-05, + "loss": 1.1291, + "step": 278 + }, + { + "epoch": 0.7135549872122762, + "grad_norm": 0.45111683276082, + "learning_rate": 1.999979663347736e-05, + "loss": 1.1594, + "step": 279 + }, + { + "epoch": 0.7161125319693095, + "grad_norm": 0.48963964069881194, + "learning_rate": 1.9999707152644143e-05, + "loss": 1.1245, + "step": 280 + }, + { + "epoch": 0.7186700767263428, + "grad_norm": 0.4979629650586617, + "learning_rate": 1.999960140291262e-05, + "loss": 1.119, + "step": 281 + }, + { + "epoch": 0.7212276214833759, + "grad_norm": 0.4664713962878264, + "learning_rate": 1.9999479384454838e-05, + "loss": 1.1468, + "step": 282 + }, + { + "epoch": 0.7237851662404092, + "grad_norm": 0.3844942432737082, + "learning_rate": 1.9999341097469313e-05, + "loss": 1.075, + "step": 283 + }, + { + "epoch": 0.7263427109974424, + "grad_norm": 0.3748435881073205, + "learning_rate": 1.9999186542181038e-05, + "loss": 1.1388, + "step": 284 + }, + { + "epoch": 0.7289002557544757, + "grad_norm": 0.37537611839818713, + "learning_rate": 1.9999015718841453e-05, + "loss": 1.1204, + "step": 285 + }, + { + "epoch": 0.731457800511509, + "grad_norm": 0.2604152551489964, + "learning_rate": 1.9998828627728483e-05, + "loss": 1.1441, + "step": 286 + }, + { + "epoch": 0.7340153452685422, + "grad_norm": 0.3500229133794647, + "learning_rate": 1.9998625269146515e-05, + "loss": 1.1418, + "step": 287 + }, + { + "epoch": 0.7365728900255755, + "grad_norm": 0.40870411685426555, + "learning_rate": 1.9998405643426398e-05, + "loss": 1.107, + "step": 288 + }, + { + "epoch": 0.7391304347826086, + "grad_norm": 0.4142193267583776, + "learning_rate": 1.999816975092545e-05, + "loss": 1.1386, + "step": 289 + }, + { + "epoch": 0.7416879795396419, + "grad_norm": 0.3984615533147621, + "learning_rate": 1.9997917592027455e-05, + "loss": 1.1478, + "step": 290 + }, + { + "epoch": 0.7442455242966752, + "grad_norm": 0.33486990703650343, + "learning_rate": 1.9997649167142654e-05, + "loss": 1.1322, + "step": 291 + }, + { + "epoch": 0.7468030690537084, + "grad_norm": 0.34307927675012156, + "learning_rate": 1.9997364476707765e-05, + "loss": 1.0975, + "step": 292 + }, + { + "epoch": 0.7493606138107417, + "grad_norm": 0.32862273663424796, + "learning_rate": 1.9997063521185956e-05, + "loss": 1.1234, + "step": 293 + }, + { + "epoch": 0.7519181585677749, + "grad_norm": 0.3832334389775187, + "learning_rate": 1.9996746301066867e-05, + "loss": 1.1204, + "step": 294 + }, + { + "epoch": 0.7544757033248082, + "grad_norm": 0.37651748057590684, + "learning_rate": 1.999641281686659e-05, + "loss": 1.1101, + "step": 295 + }, + { + "epoch": 0.7570332480818415, + "grad_norm": 0.3987512509485477, + "learning_rate": 1.999606306912769e-05, + "loss": 1.1182, + "step": 296 + }, + { + "epoch": 0.7595907928388747, + "grad_norm": 0.3135294282014092, + "learning_rate": 1.999569705841918e-05, + "loss": 1.1576, + "step": 297 + }, + { + "epoch": 0.7621483375959079, + "grad_norm": 0.310570536991235, + "learning_rate": 1.9995314785336534e-05, + "loss": 1.1329, + "step": 298 + }, + { + "epoch": 0.7647058823529411, + "grad_norm": 0.28886285275015344, + "learning_rate": 1.999491625050169e-05, + "loss": 1.1486, + "step": 299 + }, + { + "epoch": 0.7672634271099744, + "grad_norm": 0.2810916108747155, + "learning_rate": 1.9994501454563046e-05, + "loss": 1.1067, + "step": 300 + }, + { + "epoch": 0.7698209718670077, + "grad_norm": 0.2641826394714093, + "learning_rate": 1.9994070398195437e-05, + "loss": 1.1391, + "step": 301 + }, + { + "epoch": 0.7723785166240409, + "grad_norm": 0.23992392919351505, + "learning_rate": 1.999362308210017e-05, + "loss": 1.1387, + "step": 302 + }, + { + "epoch": 0.7749360613810742, + "grad_norm": 0.24856265925820004, + "learning_rate": 1.9993159507005e-05, + "loss": 1.1084, + "step": 303 + }, + { + "epoch": 0.7774936061381074, + "grad_norm": 0.22572823705824116, + "learning_rate": 1.9992679673664136e-05, + "loss": 1.1134, + "step": 304 + }, + { + "epoch": 0.7800511508951407, + "grad_norm": 0.27595626439843796, + "learning_rate": 1.9992183582858233e-05, + "loss": 1.1269, + "step": 305 + }, + { + "epoch": 0.782608695652174, + "grad_norm": 0.33828817219220914, + "learning_rate": 1.9991671235394404e-05, + "loss": 1.1211, + "step": 306 + }, + { + "epoch": 0.7851662404092071, + "grad_norm": 0.23908198593915184, + "learning_rate": 1.9991142632106205e-05, + "loss": 1.0874, + "step": 307 + }, + { + "epoch": 0.7877237851662404, + "grad_norm": 0.32916775113793656, + "learning_rate": 1.999059777385364e-05, + "loss": 1.1189, + "step": 308 + }, + { + "epoch": 0.7902813299232737, + "grad_norm": 0.4164086722930908, + "learning_rate": 1.9990036661523162e-05, + "loss": 1.1368, + "step": 309 + }, + { + "epoch": 0.7928388746803069, + "grad_norm": 0.4356985530787425, + "learning_rate": 1.998945929602766e-05, + "loss": 1.1041, + "step": 310 + }, + { + "epoch": 0.7953964194373402, + "grad_norm": 0.32329800121359825, + "learning_rate": 1.9988865678306476e-05, + "loss": 1.1381, + "step": 311 + }, + { + "epoch": 0.7979539641943734, + "grad_norm": 0.28030048685966436, + "learning_rate": 1.998825580932539e-05, + "loss": 1.1505, + "step": 312 + }, + { + "epoch": 0.8005115089514067, + "grad_norm": 0.3736128210505236, + "learning_rate": 1.9987629690076615e-05, + "loss": 1.116, + "step": 313 + }, + { + "epoch": 0.80306905370844, + "grad_norm": 0.3711938440381308, + "learning_rate": 1.998698732157881e-05, + "loss": 1.1233, + "step": 314 + }, + { + "epoch": 0.8056265984654731, + "grad_norm": 0.283799635820317, + "learning_rate": 1.998632870487707e-05, + "loss": 1.1112, + "step": 315 + }, + { + "epoch": 0.8081841432225064, + "grad_norm": 0.29982174151777125, + "learning_rate": 1.9985653841042926e-05, + "loss": 1.1089, + "step": 316 + }, + { + "epoch": 0.8107416879795396, + "grad_norm": 0.33144242270715973, + "learning_rate": 1.9984962731174336e-05, + "loss": 1.1387, + "step": 317 + }, + { + "epoch": 0.8132992327365729, + "grad_norm": 0.33991853938376265, + "learning_rate": 1.998425537639569e-05, + "loss": 1.1292, + "step": 318 + }, + { + "epoch": 0.8158567774936062, + "grad_norm": 0.342802086067408, + "learning_rate": 1.9983531777857817e-05, + "loss": 1.0907, + "step": 319 + }, + { + "epoch": 0.8184143222506394, + "grad_norm": 0.3083367366680541, + "learning_rate": 1.998279193673796e-05, + "loss": 1.1157, + "step": 320 + }, + { + "epoch": 0.8209718670076727, + "grad_norm": 0.32536985414256364, + "learning_rate": 1.9982035854239793e-05, + "loss": 1.0971, + "step": 321 + }, + { + "epoch": 0.8235294117647058, + "grad_norm": 0.3810630836606236, + "learning_rate": 1.9981263531593422e-05, + "loss": 1.1236, + "step": 322 + }, + { + "epoch": 0.8260869565217391, + "grad_norm": 0.36452300722278047, + "learning_rate": 1.9980474970055367e-05, + "loss": 1.1438, + "step": 323 + }, + { + "epoch": 0.8286445012787724, + "grad_norm": 0.2795921565060519, + "learning_rate": 1.997967017090856e-05, + "loss": 1.1465, + "step": 324 + }, + { + "epoch": 0.8312020460358056, + "grad_norm": 0.2986081929713523, + "learning_rate": 1.9978849135462367e-05, + "loss": 1.1061, + "step": 325 + }, + { + "epoch": 0.8337595907928389, + "grad_norm": 0.3054440401423343, + "learning_rate": 1.9978011865052554e-05, + "loss": 1.1146, + "step": 326 + }, + { + "epoch": 0.8363171355498721, + "grad_norm": 0.32318950453837997, + "learning_rate": 1.9977158361041317e-05, + "loss": 1.1554, + "step": 327 + }, + { + "epoch": 0.8388746803069054, + "grad_norm": 0.30472902927491496, + "learning_rate": 1.997628862481725e-05, + "loss": 1.1274, + "step": 328 + }, + { + "epoch": 0.8414322250639387, + "grad_norm": 0.4042829285862421, + "learning_rate": 1.9975402657795355e-05, + "loss": 1.1669, + "step": 329 + }, + { + "epoch": 0.8439897698209718, + "grad_norm": 0.2804285578799784, + "learning_rate": 1.997450046141705e-05, + "loss": 1.1361, + "step": 330 + }, + { + "epoch": 0.8465473145780051, + "grad_norm": 0.3569177728816469, + "learning_rate": 1.997358203715015e-05, + "loss": 1.1095, + "step": 331 + }, + { + "epoch": 0.8491048593350383, + "grad_norm": 0.4230090216431553, + "learning_rate": 1.9972647386488873e-05, + "loss": 1.1016, + "step": 332 + }, + { + "epoch": 0.8516624040920716, + "grad_norm": 0.37021286913388013, + "learning_rate": 1.997169651095384e-05, + "loss": 1.1475, + "step": 333 + }, + { + "epoch": 0.8542199488491049, + "grad_norm": 0.3317123580055209, + "learning_rate": 1.9970729412092064e-05, + "loss": 1.0813, + "step": 334 + }, + { + "epoch": 0.8567774936061381, + "grad_norm": 0.273842287695835, + "learning_rate": 1.9969746091476955e-05, + "loss": 1.1067, + "step": 335 + }, + { + "epoch": 0.8593350383631714, + "grad_norm": 0.2673820670815786, + "learning_rate": 1.9968746550708313e-05, + "loss": 1.1069, + "step": 336 + }, + { + "epoch": 0.8618925831202046, + "grad_norm": 0.2979937548082758, + "learning_rate": 1.996773079141233e-05, + "loss": 1.1279, + "step": 337 + }, + { + "epoch": 0.8644501278772379, + "grad_norm": 0.37172355657034833, + "learning_rate": 1.9966698815241583e-05, + "loss": 1.1339, + "step": 338 + }, + { + "epoch": 0.8670076726342711, + "grad_norm": 0.506903869952954, + "learning_rate": 1.9965650623875034e-05, + "loss": 1.1039, + "step": 339 + }, + { + "epoch": 0.8695652173913043, + "grad_norm": 0.4279498776163848, + "learning_rate": 1.9964586219018018e-05, + "loss": 1.1425, + "step": 340 + }, + { + "epoch": 0.8721227621483376, + "grad_norm": 0.36753587770795587, + "learning_rate": 1.9963505602402263e-05, + "loss": 1.0978, + "step": 341 + }, + { + "epoch": 0.8746803069053708, + "grad_norm": 0.3648609772451092, + "learning_rate": 1.996240877578586e-05, + "loss": 1.1242, + "step": 342 + }, + { + "epoch": 0.8772378516624041, + "grad_norm": 0.37366918011434086, + "learning_rate": 1.996129574095328e-05, + "loss": 1.1191, + "step": 343 + }, + { + "epoch": 0.8797953964194374, + "grad_norm": 0.3879756302273747, + "learning_rate": 1.996016649971536e-05, + "loss": 1.1253, + "step": 344 + }, + { + "epoch": 0.8823529411764706, + "grad_norm": 0.35306903326209926, + "learning_rate": 1.9959021053909304e-05, + "loss": 1.1097, + "step": 345 + }, + { + "epoch": 0.8849104859335039, + "grad_norm": 0.3497813112371213, + "learning_rate": 1.995785940539868e-05, + "loss": 1.1751, + "step": 346 + }, + { + "epoch": 0.887468030690537, + "grad_norm": 0.31991011885719, + "learning_rate": 1.995668155607342e-05, + "loss": 1.06, + "step": 347 + }, + { + "epoch": 0.8900255754475703, + "grad_norm": 0.33100033800466955, + "learning_rate": 1.9955487507849815e-05, + "loss": 1.1217, + "step": 348 + }, + { + "epoch": 0.8925831202046036, + "grad_norm": 0.3302462532169077, + "learning_rate": 1.9954277262670497e-05, + "loss": 1.1016, + "step": 349 + }, + { + "epoch": 0.8951406649616368, + "grad_norm": 0.2988617813500731, + "learning_rate": 1.9953050822504466e-05, + "loss": 1.1259, + "step": 350 + }, + { + "epoch": 0.8976982097186701, + "grad_norm": 0.2467443109516983, + "learning_rate": 1.995180818934706e-05, + "loss": 1.1449, + "step": 351 + }, + { + "epoch": 0.9002557544757033, + "grad_norm": 0.2862819186333417, + "learning_rate": 1.995054936521997e-05, + "loss": 1.1, + "step": 352 + }, + { + "epoch": 0.9028132992327366, + "grad_norm": 0.3386935579478213, + "learning_rate": 1.9949274352171218e-05, + "loss": 1.1215, + "step": 353 + }, + { + "epoch": 0.9053708439897699, + "grad_norm": 0.377267345773294, + "learning_rate": 1.9947983152275175e-05, + "loss": 1.1151, + "step": 354 + }, + { + "epoch": 0.907928388746803, + "grad_norm": 0.26418004315541993, + "learning_rate": 1.9946675767632545e-05, + "loss": 1.0909, + "step": 355 + }, + { + "epoch": 0.9104859335038363, + "grad_norm": 0.3036950602266219, + "learning_rate": 1.9945352200370352e-05, + "loss": 1.1065, + "step": 356 + }, + { + "epoch": 0.9130434782608695, + "grad_norm": 0.2847990677396293, + "learning_rate": 1.9944012452641966e-05, + "loss": 1.1187, + "step": 357 + }, + { + "epoch": 0.9156010230179028, + "grad_norm": 0.3155239647410138, + "learning_rate": 1.994265652662707e-05, + "loss": 1.1402, + "step": 358 + }, + { + "epoch": 0.9181585677749361, + "grad_norm": 0.3011564965680371, + "learning_rate": 1.9941284424531668e-05, + "loss": 1.1232, + "step": 359 + }, + { + "epoch": 0.9207161125319693, + "grad_norm": 0.3119452115804441, + "learning_rate": 1.9939896148588086e-05, + "loss": 1.0879, + "step": 360 + }, + { + "epoch": 0.9232736572890026, + "grad_norm": 0.33133352515569403, + "learning_rate": 1.9938491701054965e-05, + "loss": 1.1384, + "step": 361 + }, + { + "epoch": 0.9258312020460358, + "grad_norm": 0.2085194934877816, + "learning_rate": 1.9937071084217254e-05, + "loss": 1.0616, + "step": 362 + }, + { + "epoch": 0.928388746803069, + "grad_norm": 0.27348539950003964, + "learning_rate": 1.99356343003862e-05, + "loss": 1.127, + "step": 363 + }, + { + "epoch": 0.9309462915601023, + "grad_norm": 0.314231043083254, + "learning_rate": 1.9934181351899365e-05, + "loss": 1.1075, + "step": 364 + }, + { + "epoch": 0.9335038363171355, + "grad_norm": 0.3354380584507947, + "learning_rate": 1.9932712241120606e-05, + "loss": 1.1272, + "step": 365 + }, + { + "epoch": 0.9360613810741688, + "grad_norm": 0.28703321632472045, + "learning_rate": 1.9931226970440075e-05, + "loss": 1.1469, + "step": 366 + }, + { + "epoch": 0.9386189258312021, + "grad_norm": 0.3426859912220677, + "learning_rate": 1.9929725542274215e-05, + "loss": 1.1278, + "step": 367 + }, + { + "epoch": 0.9411764705882353, + "grad_norm": 0.29299540193881474, + "learning_rate": 1.992820795906575e-05, + "loss": 1.1187, + "step": 368 + }, + { + "epoch": 0.9437340153452686, + "grad_norm": 0.39295341923846966, + "learning_rate": 1.99266742232837e-05, + "loss": 1.1126, + "step": 369 + }, + { + "epoch": 0.9462915601023018, + "grad_norm": 0.35353202277391543, + "learning_rate": 1.9925124337423356e-05, + "loss": 1.1139, + "step": 370 + }, + { + "epoch": 0.948849104859335, + "grad_norm": 0.3311467211582019, + "learning_rate": 1.9923558304006283e-05, + "loss": 1.138, + "step": 371 + }, + { + "epoch": 0.9514066496163683, + "grad_norm": 0.3816152174441759, + "learning_rate": 1.992197612558032e-05, + "loss": 1.1176, + "step": 372 + }, + { + "epoch": 0.9539641943734015, + "grad_norm": 0.36605913254516786, + "learning_rate": 1.9920377804719573e-05, + "loss": 1.1221, + "step": 373 + }, + { + "epoch": 0.9565217391304348, + "grad_norm": 0.36097755733897396, + "learning_rate": 1.991876334402441e-05, + "loss": 1.1198, + "step": 374 + }, + { + "epoch": 0.959079283887468, + "grad_norm": 0.34895670740815254, + "learning_rate": 1.9917132746121454e-05, + "loss": 1.1438, + "step": 375 + }, + { + "epoch": 0.9616368286445013, + "grad_norm": 0.2817987248252719, + "learning_rate": 1.9915486013663595e-05, + "loss": 1.0946, + "step": 376 + }, + { + "epoch": 0.9641943734015346, + "grad_norm": 0.2440543185648296, + "learning_rate": 1.9913823149329952e-05, + "loss": 1.1257, + "step": 377 + }, + { + "epoch": 0.9667519181585678, + "grad_norm": 0.29938424755141774, + "learning_rate": 1.9912144155825913e-05, + "loss": 1.1315, + "step": 378 + }, + { + "epoch": 0.969309462915601, + "grad_norm": 0.3042211939245891, + "learning_rate": 1.9910449035883086e-05, + "loss": 1.1005, + "step": 379 + }, + { + "epoch": 0.9718670076726342, + "grad_norm": 0.3662935173068649, + "learning_rate": 1.990873779225933e-05, + "loss": 1.0831, + "step": 380 + }, + { + "epoch": 0.9744245524296675, + "grad_norm": 0.34290782200372855, + "learning_rate": 1.990701042773873e-05, + "loss": 1.1116, + "step": 381 + }, + { + "epoch": 0.9769820971867008, + "grad_norm": 0.2659876511429978, + "learning_rate": 1.99052669451316e-05, + "loss": 1.1172, + "step": 382 + }, + { + "epoch": 0.979539641943734, + "grad_norm": 0.2656583663382276, + "learning_rate": 1.9903507347274478e-05, + "loss": 1.1243, + "step": 383 + }, + { + "epoch": 0.9820971867007673, + "grad_norm": 0.35197356004646674, + "learning_rate": 1.9901731637030123e-05, + "loss": 1.0751, + "step": 384 + }, + { + "epoch": 0.9846547314578005, + "grad_norm": 0.4123186710230891, + "learning_rate": 1.9899939817287494e-05, + "loss": 1.1572, + "step": 385 + }, + { + "epoch": 0.9872122762148338, + "grad_norm": 0.48886837110572706, + "learning_rate": 1.989813189096178e-05, + "loss": 1.1109, + "step": 386 + }, + { + "epoch": 0.989769820971867, + "grad_norm": 0.4200898181195607, + "learning_rate": 1.989630786099436e-05, + "loss": 1.1243, + "step": 387 + }, + { + "epoch": 0.9923273657289002, + "grad_norm": 0.36473186521348727, + "learning_rate": 1.9894467730352817e-05, + "loss": 1.1379, + "step": 388 + }, + { + "epoch": 0.9948849104859335, + "grad_norm": 0.33106729200219565, + "learning_rate": 1.9892611502030932e-05, + "loss": 1.1183, + "step": 389 + }, + { + "epoch": 0.9974424552429667, + "grad_norm": 0.28859949847448485, + "learning_rate": 1.9890739179048666e-05, + "loss": 1.1019, + "step": 390 + }, + { + "epoch": 1.0, + "grad_norm": 0.32343067044443596, + "learning_rate": 1.9888850764452177e-05, + "loss": 1.1315, + "step": 391 + }, + { + "epoch": 1.0025575447570332, + "grad_norm": 0.2946752191785302, + "learning_rate": 1.988694626131379e-05, + "loss": 1.1027, + "step": 392 + }, + { + "epoch": 1.0051150895140666, + "grad_norm": 0.2840956310037306, + "learning_rate": 1.9885025672732024e-05, + "loss": 1.1255, + "step": 393 + }, + { + "epoch": 1.0076726342710998, + "grad_norm": 0.3834929641779387, + "learning_rate": 1.9883089001831545e-05, + "loss": 1.0926, + "step": 394 + }, + { + "epoch": 1.010230179028133, + "grad_norm": 0.37119046465058125, + "learning_rate": 1.9881136251763203e-05, + "loss": 1.1024, + "step": 395 + }, + { + "epoch": 1.0127877237851663, + "grad_norm": 0.3481999615848297, + "learning_rate": 1.9879167425703998e-05, + "loss": 1.1177, + "step": 396 + }, + { + "epoch": 1.0153452685421995, + "grad_norm": 0.4174534154279672, + "learning_rate": 1.9877182526857086e-05, + "loss": 1.1194, + "step": 397 + }, + { + "epoch": 1.0179028132992327, + "grad_norm": 0.428283352237624, + "learning_rate": 1.9875181558451774e-05, + "loss": 1.1126, + "step": 398 + }, + { + "epoch": 1.020460358056266, + "grad_norm": 0.34788898984052513, + "learning_rate": 1.9873164523743517e-05, + "loss": 1.0826, + "step": 399 + }, + { + "epoch": 1.0230179028132993, + "grad_norm": 0.3235948349939345, + "learning_rate": 1.9871131426013894e-05, + "loss": 1.137, + "step": 400 + }, + { + "epoch": 1.0255754475703325, + "grad_norm": 0.3661886910233816, + "learning_rate": 1.9869082268570637e-05, + "loss": 1.1135, + "step": 401 + }, + { + "epoch": 1.0281329923273657, + "grad_norm": 0.3844357019706309, + "learning_rate": 1.9867017054747593e-05, + "loss": 1.1316, + "step": 402 + }, + { + "epoch": 1.030690537084399, + "grad_norm": 0.3351625771872402, + "learning_rate": 1.9864935787904734e-05, + "loss": 1.1009, + "step": 403 + }, + { + "epoch": 1.0332480818414322, + "grad_norm": 0.34602161255624664, + "learning_rate": 1.986283847142816e-05, + "loss": 1.1047, + "step": 404 + }, + { + "epoch": 1.0358056265984654, + "grad_norm": 0.3709821493330784, + "learning_rate": 1.9860725108730065e-05, + "loss": 1.1031, + "step": 405 + }, + { + "epoch": 1.0383631713554988, + "grad_norm": 0.37774483264562303, + "learning_rate": 1.9858595703248755e-05, + "loss": 1.137, + "step": 406 + }, + { + "epoch": 1.040920716112532, + "grad_norm": 0.3599825369273542, + "learning_rate": 1.985645025844865e-05, + "loss": 1.0707, + "step": 407 + }, + { + "epoch": 1.0434782608695652, + "grad_norm": 0.39966584857588405, + "learning_rate": 1.9854288777820246e-05, + "loss": 1.1033, + "step": 408 + }, + { + "epoch": 1.0460358056265984, + "grad_norm": 0.40289071310305824, + "learning_rate": 1.9852111264880145e-05, + "loss": 1.0806, + "step": 409 + }, + { + "epoch": 1.0485933503836318, + "grad_norm": 0.47128238325065436, + "learning_rate": 1.984991772317102e-05, + "loss": 1.0756, + "step": 410 + }, + { + "epoch": 1.051150895140665, + "grad_norm": 0.5298917118212448, + "learning_rate": 1.9847708156261622e-05, + "loss": 1.1055, + "step": 411 + }, + { + "epoch": 1.0537084398976981, + "grad_norm": 0.47297356768421134, + "learning_rate": 1.9845482567746783e-05, + "loss": 1.0836, + "step": 412 + }, + { + "epoch": 1.0562659846547315, + "grad_norm": 0.38344561089251955, + "learning_rate": 1.9843240961247398e-05, + "loss": 1.0904, + "step": 413 + }, + { + "epoch": 1.0588235294117647, + "grad_norm": 0.27676602705991193, + "learning_rate": 1.9840983340410414e-05, + "loss": 1.1402, + "step": 414 + }, + { + "epoch": 1.061381074168798, + "grad_norm": 0.4125473070163219, + "learning_rate": 1.9838709708908848e-05, + "loss": 1.1108, + "step": 415 + }, + { + "epoch": 1.0639386189258313, + "grad_norm": 0.39100913652365626, + "learning_rate": 1.983642007044175e-05, + "loss": 1.0894, + "step": 416 + }, + { + "epoch": 1.0664961636828645, + "grad_norm": 0.3635147529725554, + "learning_rate": 1.983411442873422e-05, + "loss": 1.0751, + "step": 417 + }, + { + "epoch": 1.0690537084398977, + "grad_norm": 0.3157457311508148, + "learning_rate": 1.983179278753739e-05, + "loss": 1.0867, + "step": 418 + }, + { + "epoch": 1.0716112531969308, + "grad_norm": 0.3380507668468239, + "learning_rate": 1.9829455150628432e-05, + "loss": 1.1428, + "step": 419 + }, + { + "epoch": 1.0741687979539642, + "grad_norm": 0.3531121689418475, + "learning_rate": 1.982710152181053e-05, + "loss": 1.0877, + "step": 420 + }, + { + "epoch": 1.0767263427109974, + "grad_norm": 0.2800940522052926, + "learning_rate": 1.982473190491289e-05, + "loss": 1.1025, + "step": 421 + }, + { + "epoch": 1.0792838874680306, + "grad_norm": 0.3045440051536889, + "learning_rate": 1.9822346303790732e-05, + "loss": 1.0954, + "step": 422 + }, + { + "epoch": 1.081841432225064, + "grad_norm": 0.2875179180998631, + "learning_rate": 1.9819944722325283e-05, + "loss": 1.0799, + "step": 423 + }, + { + "epoch": 1.0843989769820972, + "grad_norm": 0.3671466904640979, + "learning_rate": 1.981752716442376e-05, + "loss": 1.1239, + "step": 424 + }, + { + "epoch": 1.0869565217391304, + "grad_norm": 0.310905332933887, + "learning_rate": 1.9815093634019384e-05, + "loss": 1.0885, + "step": 425 + }, + { + "epoch": 1.0895140664961638, + "grad_norm": 0.34866191023824383, + "learning_rate": 1.9812644135071358e-05, + "loss": 1.0789, + "step": 426 + }, + { + "epoch": 1.092071611253197, + "grad_norm": 0.3670206738107968, + "learning_rate": 1.9810178671564853e-05, + "loss": 1.1051, + "step": 427 + }, + { + "epoch": 1.0946291560102301, + "grad_norm": 0.46475258100798056, + "learning_rate": 1.980769724751104e-05, + "loss": 1.0838, + "step": 428 + }, + { + "epoch": 1.0971867007672633, + "grad_norm": 0.3157024370545657, + "learning_rate": 1.9805199866947026e-05, + "loss": 1.114, + "step": 429 + }, + { + "epoch": 1.0997442455242967, + "grad_norm": 0.29958992335623563, + "learning_rate": 1.9802686533935903e-05, + "loss": 1.0909, + "step": 430 + }, + { + "epoch": 1.10230179028133, + "grad_norm": 0.3045539331442299, + "learning_rate": 1.9800157252566698e-05, + "loss": 1.119, + "step": 431 + }, + { + "epoch": 1.104859335038363, + "grad_norm": 0.35388881893166907, + "learning_rate": 1.97976120269544e-05, + "loss": 1.1357, + "step": 432 + }, + { + "epoch": 1.1074168797953965, + "grad_norm": 0.4072658855507119, + "learning_rate": 1.9795050861239932e-05, + "loss": 1.1153, + "step": 433 + }, + { + "epoch": 1.1099744245524297, + "grad_norm": 0.3515081652084557, + "learning_rate": 1.9792473759590148e-05, + "loss": 1.1051, + "step": 434 + }, + { + "epoch": 1.1125319693094629, + "grad_norm": 0.30513537117496636, + "learning_rate": 1.978988072619783e-05, + "loss": 1.0943, + "step": 435 + }, + { + "epoch": 1.1150895140664963, + "grad_norm": 0.5088746516427844, + "learning_rate": 1.9787271765281684e-05, + "loss": 1.0947, + "step": 436 + }, + { + "epoch": 1.1176470588235294, + "grad_norm": 0.6682126794134292, + "learning_rate": 1.9784646881086327e-05, + "loss": 1.0743, + "step": 437 + }, + { + "epoch": 1.1202046035805626, + "grad_norm": 0.5551640593749172, + "learning_rate": 1.9782006077882282e-05, + "loss": 1.0861, + "step": 438 + }, + { + "epoch": 1.1227621483375958, + "grad_norm": 0.3278866812808205, + "learning_rate": 1.9779349359965966e-05, + "loss": 1.1069, + "step": 439 + }, + { + "epoch": 1.1253196930946292, + "grad_norm": 0.38591224008325814, + "learning_rate": 1.9776676731659695e-05, + "loss": 1.0849, + "step": 440 + }, + { + "epoch": 1.1278772378516624, + "grad_norm": 0.35719651550677206, + "learning_rate": 1.977398819731167e-05, + "loss": 1.1053, + "step": 441 + }, + { + "epoch": 1.1304347826086956, + "grad_norm": 0.4232965403621678, + "learning_rate": 1.9771283761295966e-05, + "loss": 1.0848, + "step": 442 + }, + { + "epoch": 1.132992327365729, + "grad_norm": 0.2697343671368354, + "learning_rate": 1.9768563428012536e-05, + "loss": 1.1091, + "step": 443 + }, + { + "epoch": 1.1355498721227621, + "grad_norm": 0.3193367309932036, + "learning_rate": 1.9765827201887183e-05, + "loss": 1.0767, + "step": 444 + }, + { + "epoch": 1.1381074168797953, + "grad_norm": 0.36846576847881124, + "learning_rate": 1.9763075087371583e-05, + "loss": 1.0996, + "step": 445 + }, + { + "epoch": 1.1406649616368287, + "grad_norm": 0.31668666427159936, + "learning_rate": 1.9760307088943254e-05, + "loss": 1.0713, + "step": 446 + }, + { + "epoch": 1.143222506393862, + "grad_norm": 0.35150116619841826, + "learning_rate": 1.9757523211105555e-05, + "loss": 1.0564, + "step": 447 + }, + { + "epoch": 1.145780051150895, + "grad_norm": 0.429831549745095, + "learning_rate": 1.975472345838768e-05, + "loss": 1.0907, + "step": 448 + }, + { + "epoch": 1.1483375959079285, + "grad_norm": 0.44872565734771747, + "learning_rate": 1.9751907835344654e-05, + "loss": 1.0817, + "step": 449 + }, + { + "epoch": 1.1508951406649617, + "grad_norm": 0.33913236381932554, + "learning_rate": 1.9749076346557318e-05, + "loss": 1.129, + "step": 450 + }, + { + "epoch": 1.1534526854219949, + "grad_norm": 0.33115586128973074, + "learning_rate": 1.9746228996632326e-05, + "loss": 1.1034, + "step": 451 + }, + { + "epoch": 1.156010230179028, + "grad_norm": 0.3057185791661933, + "learning_rate": 1.974336579020214e-05, + "loss": 1.1076, + "step": 452 + }, + { + "epoch": 1.1585677749360614, + "grad_norm": 0.43316526036175457, + "learning_rate": 1.9740486731925022e-05, + "loss": 1.1224, + "step": 453 + }, + { + "epoch": 1.1611253196930946, + "grad_norm": 0.5066112837446138, + "learning_rate": 1.9737591826485013e-05, + "loss": 1.0962, + "step": 454 + }, + { + "epoch": 1.1636828644501278, + "grad_norm": 0.4014502906289108, + "learning_rate": 1.9734681078591943e-05, + "loss": 1.0905, + "step": 455 + }, + { + "epoch": 1.1662404092071612, + "grad_norm": 0.30247128311625804, + "learning_rate": 1.9731754492981423e-05, + "loss": 1.0812, + "step": 456 + }, + { + "epoch": 1.1687979539641944, + "grad_norm": 0.31145252945008656, + "learning_rate": 1.9728812074414822e-05, + "loss": 1.0729, + "step": 457 + }, + { + "epoch": 1.1713554987212276, + "grad_norm": 0.33968915375934183, + "learning_rate": 1.9725853827679266e-05, + "loss": 1.078, + "step": 458 + }, + { + "epoch": 1.1739130434782608, + "grad_norm": 0.27618072861680876, + "learning_rate": 1.9722879757587647e-05, + "loss": 1.0864, + "step": 459 + }, + { + "epoch": 1.1764705882352942, + "grad_norm": 0.28234315821124384, + "learning_rate": 1.9719889868978582e-05, + "loss": 1.1135, + "step": 460 + }, + { + "epoch": 1.1790281329923273, + "grad_norm": 0.29884726287169866, + "learning_rate": 1.971688416671644e-05, + "loss": 1.1363, + "step": 461 + }, + { + "epoch": 1.1815856777493605, + "grad_norm": 0.27600448666706423, + "learning_rate": 1.9713862655691302e-05, + "loss": 1.0791, + "step": 462 + }, + { + "epoch": 1.184143222506394, + "grad_norm": 0.2803813788615088, + "learning_rate": 1.971082534081899e-05, + "loss": 1.0718, + "step": 463 + }, + { + "epoch": 1.186700767263427, + "grad_norm": 0.2696501099289663, + "learning_rate": 1.970777222704101e-05, + "loss": 1.0961, + "step": 464 + }, + { + "epoch": 1.1892583120204603, + "grad_norm": 0.3010556872116562, + "learning_rate": 1.97047033193246e-05, + "loss": 1.1038, + "step": 465 + }, + { + "epoch": 1.1918158567774937, + "grad_norm": 0.28235325514025905, + "learning_rate": 1.970161862266268e-05, + "loss": 1.1054, + "step": 466 + }, + { + "epoch": 1.1943734015345269, + "grad_norm": 0.28808186970685423, + "learning_rate": 1.969851814207385e-05, + "loss": 1.0807, + "step": 467 + }, + { + "epoch": 1.19693094629156, + "grad_norm": 0.33258411208957883, + "learning_rate": 1.9695401882602406e-05, + "loss": 1.1296, + "step": 468 + }, + { + "epoch": 1.1994884910485935, + "grad_norm": 0.3318703383183081, + "learning_rate": 1.9692269849318303e-05, + "loss": 1.0936, + "step": 469 + }, + { + "epoch": 1.2020460358056266, + "grad_norm": 0.30178464518160203, + "learning_rate": 1.9689122047317166e-05, + "loss": 1.1155, + "step": 470 + }, + { + "epoch": 1.2046035805626598, + "grad_norm": 0.30521273043475255, + "learning_rate": 1.968595848172027e-05, + "loss": 1.0896, + "step": 471 + }, + { + "epoch": 1.207161125319693, + "grad_norm": 0.34614634138914463, + "learning_rate": 1.968277915767454e-05, + "loss": 1.0452, + "step": 472 + }, + { + "epoch": 1.2097186700767264, + "grad_norm": 0.32741746531886684, + "learning_rate": 1.9679584080352537e-05, + "loss": 1.1045, + "step": 473 + }, + { + "epoch": 1.2122762148337596, + "grad_norm": 0.2615489309131341, + "learning_rate": 1.967637325495245e-05, + "loss": 1.0855, + "step": 474 + }, + { + "epoch": 1.2148337595907928, + "grad_norm": 0.27476592859150684, + "learning_rate": 1.9673146686698093e-05, + "loss": 1.1001, + "step": 475 + }, + { + "epoch": 1.2173913043478262, + "grad_norm": 0.3421071933190777, + "learning_rate": 1.9669904380838892e-05, + "loss": 1.0729, + "step": 476 + }, + { + "epoch": 1.2199488491048593, + "grad_norm": 0.3598257915245131, + "learning_rate": 1.966664634264987e-05, + "loss": 1.1242, + "step": 477 + }, + { + "epoch": 1.2225063938618925, + "grad_norm": 0.32107570559715254, + "learning_rate": 1.9663372577431663e-05, + "loss": 1.1087, + "step": 478 + }, + { + "epoch": 1.2250639386189257, + "grad_norm": 0.341209086018264, + "learning_rate": 1.966008309051047e-05, + "loss": 1.1167, + "step": 479 + }, + { + "epoch": 1.227621483375959, + "grad_norm": 0.29733249941263845, + "learning_rate": 1.965677788723809e-05, + "loss": 1.07, + "step": 480 + }, + { + "epoch": 1.2301790281329923, + "grad_norm": 0.26502862394927407, + "learning_rate": 1.9653456972991877e-05, + "loss": 1.0775, + "step": 481 + }, + { + "epoch": 1.2327365728900257, + "grad_norm": 0.28986896788872485, + "learning_rate": 1.965012035317475e-05, + "loss": 1.0967, + "step": 482 + }, + { + "epoch": 1.2352941176470589, + "grad_norm": 0.33295845795202056, + "learning_rate": 1.9646768033215183e-05, + "loss": 1.0879, + "step": 483 + }, + { + "epoch": 1.237851662404092, + "grad_norm": 0.3705619524001342, + "learning_rate": 1.9643400018567195e-05, + "loss": 1.1019, + "step": 484 + }, + { + "epoch": 1.2404092071611252, + "grad_norm": 0.3266347911273673, + "learning_rate": 1.9640016314710323e-05, + "loss": 1.1084, + "step": 485 + }, + { + "epoch": 1.2429667519181586, + "grad_norm": 0.3761069051897771, + "learning_rate": 1.9636616927149655e-05, + "loss": 1.1029, + "step": 486 + }, + { + "epoch": 1.2455242966751918, + "grad_norm": 0.2621662577070755, + "learning_rate": 1.9633201861415773e-05, + "loss": 1.0735, + "step": 487 + }, + { + "epoch": 1.248081841432225, + "grad_norm": 0.266376960810325, + "learning_rate": 1.9629771123064784e-05, + "loss": 1.0948, + "step": 488 + }, + { + "epoch": 1.2506393861892584, + "grad_norm": 0.3408438115021644, + "learning_rate": 1.9626324717678275e-05, + "loss": 1.0984, + "step": 489 + }, + { + "epoch": 1.2531969309462916, + "grad_norm": 0.3255066954002719, + "learning_rate": 1.962286265086334e-05, + "loss": 1.1213, + "step": 490 + }, + { + "epoch": 1.2557544757033248, + "grad_norm": 0.3765758476751633, + "learning_rate": 1.961938492825254e-05, + "loss": 1.0909, + "step": 491 + }, + { + "epoch": 1.258312020460358, + "grad_norm": 0.3109670040308706, + "learning_rate": 1.9615891555503914e-05, + "loss": 1.1164, + "step": 492 + }, + { + "epoch": 1.2608695652173914, + "grad_norm": 0.28523527744811616, + "learning_rate": 1.961238253830096e-05, + "loss": 1.0834, + "step": 493 + }, + { + "epoch": 1.2634271099744245, + "grad_norm": 0.3472113617037474, + "learning_rate": 1.9608857882352636e-05, + "loss": 1.0823, + "step": 494 + }, + { + "epoch": 1.265984654731458, + "grad_norm": 0.45214384592951995, + "learning_rate": 1.9605317593393326e-05, + "loss": 1.1084, + "step": 495 + }, + { + "epoch": 1.2685421994884911, + "grad_norm": 0.3401855972965097, + "learning_rate": 1.9601761677182868e-05, + "loss": 1.0978, + "step": 496 + }, + { + "epoch": 1.2710997442455243, + "grad_norm": 0.3025957486994177, + "learning_rate": 1.959819013950651e-05, + "loss": 1.0889, + "step": 497 + }, + { + "epoch": 1.2736572890025575, + "grad_norm": 0.29140422941812544, + "learning_rate": 1.9594602986174923e-05, + "loss": 1.0792, + "step": 498 + }, + { + "epoch": 1.2762148337595907, + "grad_norm": 0.3620688439157377, + "learning_rate": 1.959100022302418e-05, + "loss": 1.092, + "step": 499 + }, + { + "epoch": 1.278772378516624, + "grad_norm": 0.3498507983384518, + "learning_rate": 1.9587381855915754e-05, + "loss": 1.0652, + "step": 500 + }, + { + "epoch": 1.2813299232736572, + "grad_norm": 0.34633148833870603, + "learning_rate": 1.95837478907365e-05, + "loss": 1.0859, + "step": 501 + }, + { + "epoch": 1.2838874680306906, + "grad_norm": 0.28466962730903933, + "learning_rate": 1.958009833339865e-05, + "loss": 1.0912, + "step": 502 + }, + { + "epoch": 1.2864450127877238, + "grad_norm": 0.26890207030009217, + "learning_rate": 1.9576433189839807e-05, + "loss": 1.1088, + "step": 503 + }, + { + "epoch": 1.289002557544757, + "grad_norm": 0.273263645379487, + "learning_rate": 1.957275246602293e-05, + "loss": 1.0837, + "step": 504 + }, + { + "epoch": 1.2915601023017902, + "grad_norm": 0.2716148540613851, + "learning_rate": 1.9569056167936332e-05, + "loss": 1.105, + "step": 505 + }, + { + "epoch": 1.2941176470588236, + "grad_norm": 0.24370260465489227, + "learning_rate": 1.956534430159365e-05, + "loss": 1.0726, + "step": 506 + }, + { + "epoch": 1.2966751918158568, + "grad_norm": 0.2620730046771573, + "learning_rate": 1.9561616873033867e-05, + "loss": 1.1079, + "step": 507 + }, + { + "epoch": 1.29923273657289, + "grad_norm": 0.3135544306790673, + "learning_rate": 1.955787388832127e-05, + "loss": 1.0697, + "step": 508 + }, + { + "epoch": 1.3017902813299234, + "grad_norm": 0.26135639483849105, + "learning_rate": 1.9554115353545464e-05, + "loss": 1.1016, + "step": 509 + }, + { + "epoch": 1.3043478260869565, + "grad_norm": 0.25771344651987327, + "learning_rate": 1.9550341274821348e-05, + "loss": 1.0727, + "step": 510 + }, + { + "epoch": 1.3069053708439897, + "grad_norm": 0.3167223084832456, + "learning_rate": 1.9546551658289113e-05, + "loss": 1.0792, + "step": 511 + }, + { + "epoch": 1.309462915601023, + "grad_norm": 0.37857845074967256, + "learning_rate": 1.954274651011423e-05, + "loss": 1.1143, + "step": 512 + }, + { + "epoch": 1.3120204603580563, + "grad_norm": 0.2580494189739856, + "learning_rate": 1.9538925836487436e-05, + "loss": 1.0674, + "step": 513 + }, + { + "epoch": 1.3145780051150895, + "grad_norm": 0.39297270925108346, + "learning_rate": 1.953508964362473e-05, + "loss": 1.0885, + "step": 514 + }, + { + "epoch": 1.317135549872123, + "grad_norm": 0.4568937813346712, + "learning_rate": 1.9531237937767352e-05, + "loss": 1.0807, + "step": 515 + }, + { + "epoch": 1.319693094629156, + "grad_norm": 0.4182414922758871, + "learning_rate": 1.9527370725181793e-05, + "loss": 1.0766, + "step": 516 + }, + { + "epoch": 1.3222506393861893, + "grad_norm": 0.4402863172879326, + "learning_rate": 1.9523488012159762e-05, + "loss": 1.0712, + "step": 517 + }, + { + "epoch": 1.3248081841432224, + "grad_norm": 0.3810424193074309, + "learning_rate": 1.9519589805018187e-05, + "loss": 1.0888, + "step": 518 + }, + { + "epoch": 1.3273657289002558, + "grad_norm": 0.4051938816832732, + "learning_rate": 1.951567611009922e-05, + "loss": 1.0801, + "step": 519 + }, + { + "epoch": 1.329923273657289, + "grad_norm": 0.3260440045944625, + "learning_rate": 1.9511746933770186e-05, + "loss": 1.1149, + "step": 520 + }, + { + "epoch": 1.3324808184143222, + "grad_norm": 0.31554258651135036, + "learning_rate": 1.9507802282423612e-05, + "loss": 1.1202, + "step": 521 + }, + { + "epoch": 1.3350383631713556, + "grad_norm": 0.2622342243824476, + "learning_rate": 1.9503842162477205e-05, + "loss": 1.1006, + "step": 522 + }, + { + "epoch": 1.3375959079283888, + "grad_norm": 0.3015423536266443, + "learning_rate": 1.9499866580373826e-05, + "loss": 1.0873, + "step": 523 + }, + { + "epoch": 1.340153452685422, + "grad_norm": 0.3920165036339574, + "learning_rate": 1.94958755425815e-05, + "loss": 1.1154, + "step": 524 + }, + { + "epoch": 1.3427109974424551, + "grad_norm": 0.2769409471650046, + "learning_rate": 1.9491869055593392e-05, + "loss": 1.0867, + "step": 525 + }, + { + "epoch": 1.3452685421994885, + "grad_norm": 0.30161940340621723, + "learning_rate": 1.9487847125927814e-05, + "loss": 1.1126, + "step": 526 + }, + { + "epoch": 1.3478260869565217, + "grad_norm": 0.41990580701086677, + "learning_rate": 1.948380976012819e-05, + "loss": 1.0625, + "step": 527 + }, + { + "epoch": 1.350383631713555, + "grad_norm": 0.3940286196901995, + "learning_rate": 1.9479756964763062e-05, + "loss": 1.1262, + "step": 528 + }, + { + "epoch": 1.3529411764705883, + "grad_norm": 0.3683443524857737, + "learning_rate": 1.9475688746426075e-05, + "loss": 1.0865, + "step": 529 + }, + { + "epoch": 1.3554987212276215, + "grad_norm": 0.2675607272032647, + "learning_rate": 1.9471605111735964e-05, + "loss": 1.0594, + "step": 530 + }, + { + "epoch": 1.3580562659846547, + "grad_norm": 0.30194225210114733, + "learning_rate": 1.9467506067336554e-05, + "loss": 1.0955, + "step": 531 + }, + { + "epoch": 1.3606138107416879, + "grad_norm": 0.32576735510414695, + "learning_rate": 1.946339161989672e-05, + "loss": 1.0824, + "step": 532 + }, + { + "epoch": 1.3631713554987213, + "grad_norm": 0.3598150497292756, + "learning_rate": 1.9459261776110426e-05, + "loss": 1.1215, + "step": 533 + }, + { + "epoch": 1.3657289002557544, + "grad_norm": 0.30585802865605916, + "learning_rate": 1.945511654269666e-05, + "loss": 1.086, + "step": 534 + }, + { + "epoch": 1.3682864450127878, + "grad_norm": 0.2832294529242309, + "learning_rate": 1.945095592639946e-05, + "loss": 1.0992, + "step": 535 + }, + { + "epoch": 1.370843989769821, + "grad_norm": 0.29056128095513195, + "learning_rate": 1.944677993398789e-05, + "loss": 1.1311, + "step": 536 + }, + { + "epoch": 1.3734015345268542, + "grad_norm": 0.2598885076655647, + "learning_rate": 1.944258857225603e-05, + "loss": 1.0869, + "step": 537 + }, + { + "epoch": 1.3759590792838874, + "grad_norm": 0.29819735030908995, + "learning_rate": 1.943838184802296e-05, + "loss": 1.1034, + "step": 538 + }, + { + "epoch": 1.3785166240409208, + "grad_norm": 0.27354562935410204, + "learning_rate": 1.9434159768132762e-05, + "loss": 1.0834, + "step": 539 + }, + { + "epoch": 1.381074168797954, + "grad_norm": 0.3164865864885613, + "learning_rate": 1.9429922339454486e-05, + "loss": 1.0952, + "step": 540 + }, + { + "epoch": 1.3836317135549872, + "grad_norm": 0.34458030079305596, + "learning_rate": 1.9425669568882175e-05, + "loss": 1.1195, + "step": 541 + }, + { + "epoch": 1.3861892583120206, + "grad_norm": 0.2973996932273863, + "learning_rate": 1.942140146333481e-05, + "loss": 1.1082, + "step": 542 + }, + { + "epoch": 1.3887468030690537, + "grad_norm": 0.41583952226086746, + "learning_rate": 1.9417118029756342e-05, + "loss": 1.0664, + "step": 543 + }, + { + "epoch": 1.391304347826087, + "grad_norm": 0.33101469656406096, + "learning_rate": 1.9412819275115648e-05, + "loss": 1.087, + "step": 544 + }, + { + "epoch": 1.39386189258312, + "grad_norm": 0.2709972180594455, + "learning_rate": 1.9408505206406526e-05, + "loss": 1.078, + "step": 545 + }, + { + "epoch": 1.3964194373401535, + "grad_norm": 0.3358832525629651, + "learning_rate": 1.9404175830647703e-05, + "loss": 1.0549, + "step": 546 + }, + { + "epoch": 1.3989769820971867, + "grad_norm": 0.2987798463061033, + "learning_rate": 1.93998311548828e-05, + "loss": 1.0946, + "step": 547 + }, + { + "epoch": 1.40153452685422, + "grad_norm": 0.3337061384486843, + "learning_rate": 1.939547118618033e-05, + "loss": 1.0898, + "step": 548 + }, + { + "epoch": 1.4040920716112533, + "grad_norm": 0.3217064113312768, + "learning_rate": 1.9391095931633694e-05, + "loss": 1.1098, + "step": 549 + }, + { + "epoch": 1.4066496163682864, + "grad_norm": 0.2752108304141071, + "learning_rate": 1.9386705398361156e-05, + "loss": 1.0469, + "step": 550 + }, + { + "epoch": 1.4092071611253196, + "grad_norm": 0.25580623137423647, + "learning_rate": 1.938229959350584e-05, + "loss": 1.0616, + "step": 551 + }, + { + "epoch": 1.4117647058823528, + "grad_norm": 0.3326332518112022, + "learning_rate": 1.937787852423571e-05, + "loss": 1.1083, + "step": 552 + }, + { + "epoch": 1.4143222506393862, + "grad_norm": 0.28662569595039195, + "learning_rate": 1.937344219774358e-05, + "loss": 1.0908, + "step": 553 + }, + { + "epoch": 1.4168797953964194, + "grad_norm": 0.27173135593182157, + "learning_rate": 1.9368990621247062e-05, + "loss": 1.102, + "step": 554 + }, + { + "epoch": 1.4194373401534528, + "grad_norm": 0.2468084134139675, + "learning_rate": 1.9364523801988606e-05, + "loss": 1.1147, + "step": 555 + }, + { + "epoch": 1.421994884910486, + "grad_norm": 0.2709546209917836, + "learning_rate": 1.9360041747235437e-05, + "loss": 1.0962, + "step": 556 + }, + { + "epoch": 1.4245524296675192, + "grad_norm": 0.2653203619472685, + "learning_rate": 1.9355544464279587e-05, + "loss": 1.0864, + "step": 557 + }, + { + "epoch": 1.4271099744245523, + "grad_norm": 0.28467968268797966, + "learning_rate": 1.9351031960437848e-05, + "loss": 1.0747, + "step": 558 + }, + { + "epoch": 1.4296675191815857, + "grad_norm": 0.31847968792917525, + "learning_rate": 1.934650424305178e-05, + "loss": 1.0731, + "step": 559 + }, + { + "epoch": 1.432225063938619, + "grad_norm": 0.3091639351747145, + "learning_rate": 1.9341961319487704e-05, + "loss": 1.0598, + "step": 560 + }, + { + "epoch": 1.434782608695652, + "grad_norm": 0.26120102379692217, + "learning_rate": 1.9337403197136663e-05, + "loss": 1.0712, + "step": 561 + }, + { + "epoch": 1.4373401534526855, + "grad_norm": 0.283165316308832, + "learning_rate": 1.9332829883414444e-05, + "loss": 1.0883, + "step": 562 + }, + { + "epoch": 1.4398976982097187, + "grad_norm": 0.2767794060421261, + "learning_rate": 1.932824138576154e-05, + "loss": 1.1141, + "step": 563 + }, + { + "epoch": 1.4424552429667519, + "grad_norm": 0.3027787955580307, + "learning_rate": 1.9323637711643147e-05, + "loss": 1.1109, + "step": 564 + }, + { + "epoch": 1.445012787723785, + "grad_norm": 0.32071961002527666, + "learning_rate": 1.9319018868549165e-05, + "loss": 1.1192, + "step": 565 + }, + { + "epoch": 1.4475703324808185, + "grad_norm": 0.33467873672280385, + "learning_rate": 1.931438486399415e-05, + "loss": 1.0817, + "step": 566 + }, + { + "epoch": 1.4501278772378516, + "grad_norm": 0.30569240173237483, + "learning_rate": 1.930973570551735e-05, + "loss": 1.0607, + "step": 567 + }, + { + "epoch": 1.452685421994885, + "grad_norm": 0.298726423982734, + "learning_rate": 1.9305071400682644e-05, + "loss": 1.0914, + "step": 568 + }, + { + "epoch": 1.4552429667519182, + "grad_norm": 0.3038529339878212, + "learning_rate": 1.9300391957078564e-05, + "loss": 1.0834, + "step": 569 + }, + { + "epoch": 1.4578005115089514, + "grad_norm": 0.30563450154931243, + "learning_rate": 1.9295697382318286e-05, + "loss": 1.0733, + "step": 570 + }, + { + "epoch": 1.4603580562659846, + "grad_norm": 0.3808106030288731, + "learning_rate": 1.9290987684039576e-05, + "loss": 1.0955, + "step": 571 + }, + { + "epoch": 1.4629156010230178, + "grad_norm": 0.32964679230942334, + "learning_rate": 1.9286262869904827e-05, + "loss": 1.0977, + "step": 572 + }, + { + "epoch": 1.4654731457800512, + "grad_norm": 0.3576744350781661, + "learning_rate": 1.928152294760101e-05, + "loss": 1.0826, + "step": 573 + }, + { + "epoch": 1.4680306905370843, + "grad_norm": 0.3442477800849191, + "learning_rate": 1.9276767924839687e-05, + "loss": 1.0693, + "step": 574 + }, + { + "epoch": 1.4705882352941178, + "grad_norm": 0.4177409226360097, + "learning_rate": 1.927199780935698e-05, + "loss": 1.1031, + "step": 575 + }, + { + "epoch": 1.473145780051151, + "grad_norm": 0.5022744214347684, + "learning_rate": 1.926721260891357e-05, + "loss": 1.1081, + "step": 576 + }, + { + "epoch": 1.4757033248081841, + "grad_norm": 0.5089458782552098, + "learning_rate": 1.9262412331294677e-05, + "loss": 1.0984, + "step": 577 + }, + { + "epoch": 1.4782608695652173, + "grad_norm": 0.28913442828013464, + "learning_rate": 1.9257596984310055e-05, + "loss": 1.0907, + "step": 578 + }, + { + "epoch": 1.4808184143222507, + "grad_norm": 0.36385701502207274, + "learning_rate": 1.925276657579397e-05, + "loss": 1.0667, + "step": 579 + }, + { + "epoch": 1.4833759590792839, + "grad_norm": 0.39854637256040343, + "learning_rate": 1.9247921113605197e-05, + "loss": 1.0814, + "step": 580 + }, + { + "epoch": 1.485933503836317, + "grad_norm": 0.3421920326108303, + "learning_rate": 1.9243060605626995e-05, + "loss": 1.0984, + "step": 581 + }, + { + "epoch": 1.4884910485933505, + "grad_norm": 0.2806970145004491, + "learning_rate": 1.9238185059767116e-05, + "loss": 1.0903, + "step": 582 + }, + { + "epoch": 1.4910485933503836, + "grad_norm": 0.458875989536999, + "learning_rate": 1.9233294483957758e-05, + "loss": 1.1135, + "step": 583 + }, + { + "epoch": 1.4936061381074168, + "grad_norm": 0.5204446417118193, + "learning_rate": 1.922838888615559e-05, + "loss": 1.1228, + "step": 584 + }, + { + "epoch": 1.49616368286445, + "grad_norm": 0.4574878580551403, + "learning_rate": 1.922346827434171e-05, + "loss": 1.0595, + "step": 585 + }, + { + "epoch": 1.4987212276214834, + "grad_norm": 0.26814443608722427, + "learning_rate": 1.921853265652164e-05, + "loss": 1.0742, + "step": 586 + }, + { + "epoch": 1.5012787723785166, + "grad_norm": 0.4321843380909753, + "learning_rate": 1.9213582040725333e-05, + "loss": 1.0823, + "step": 587 + }, + { + "epoch": 1.50383631713555, + "grad_norm": 0.3998584041466985, + "learning_rate": 1.9208616435007124e-05, + "loss": 1.1113, + "step": 588 + }, + { + "epoch": 1.5063938618925832, + "grad_norm": 0.36340166424292447, + "learning_rate": 1.9203635847445743e-05, + "loss": 1.0495, + "step": 589 + }, + { + "epoch": 1.5089514066496164, + "grad_norm": 0.30341924814307153, + "learning_rate": 1.9198640286144296e-05, + "loss": 1.0778, + "step": 590 + }, + { + "epoch": 1.5115089514066495, + "grad_norm": 0.3549252043532506, + "learning_rate": 1.9193629759230252e-05, + "loss": 1.0526, + "step": 591 + }, + { + "epoch": 1.5140664961636827, + "grad_norm": 0.3706707482911529, + "learning_rate": 1.9188604274855417e-05, + "loss": 1.1082, + "step": 592 + }, + { + "epoch": 1.5166240409207161, + "grad_norm": 0.3221161365565599, + "learning_rate": 1.9183563841195948e-05, + "loss": 1.0358, + "step": 593 + }, + { + "epoch": 1.5191815856777495, + "grad_norm": 0.35561020647213454, + "learning_rate": 1.917850846645231e-05, + "loss": 1.1016, + "step": 594 + }, + { + "epoch": 1.5217391304347827, + "grad_norm": 0.3891453948051964, + "learning_rate": 1.917343815884929e-05, + "loss": 1.0723, + "step": 595 + }, + { + "epoch": 1.5242966751918159, + "grad_norm": 0.293218650160261, + "learning_rate": 1.9168352926635948e-05, + "loss": 1.0842, + "step": 596 + }, + { + "epoch": 1.526854219948849, + "grad_norm": 0.331624086856979, + "learning_rate": 1.9163252778085646e-05, + "loss": 1.0928, + "step": 597 + }, + { + "epoch": 1.5294117647058822, + "grad_norm": 0.36005628746389595, + "learning_rate": 1.9158137721496014e-05, + "loss": 1.0954, + "step": 598 + }, + { + "epoch": 1.5319693094629157, + "grad_norm": 0.25854576697363735, + "learning_rate": 1.9153007765188918e-05, + "loss": 1.0703, + "step": 599 + }, + { + "epoch": 1.5345268542199488, + "grad_norm": 0.3178892680337157, + "learning_rate": 1.914786291751048e-05, + "loss": 1.1178, + "step": 600 + }, + { + "epoch": 1.5370843989769822, + "grad_norm": 0.3276728285320476, + "learning_rate": 1.9142703186831044e-05, + "loss": 1.0711, + "step": 601 + }, + { + "epoch": 1.5396419437340154, + "grad_norm": 0.34402306746609335, + "learning_rate": 1.9137528581545172e-05, + "loss": 1.0669, + "step": 602 + }, + { + "epoch": 1.5421994884910486, + "grad_norm": 0.3658697294408855, + "learning_rate": 1.9132339110071623e-05, + "loss": 1.0738, + "step": 603 + }, + { + "epoch": 1.5447570332480818, + "grad_norm": 0.33272997926321957, + "learning_rate": 1.9127134780853343e-05, + "loss": 1.0891, + "step": 604 + }, + { + "epoch": 1.547314578005115, + "grad_norm": 0.26256059097959605, + "learning_rate": 1.9121915602357447e-05, + "loss": 1.0752, + "step": 605 + }, + { + "epoch": 1.5498721227621484, + "grad_norm": 0.29698212652722755, + "learning_rate": 1.9116681583075215e-05, + "loss": 1.0531, + "step": 606 + }, + { + "epoch": 1.5524296675191815, + "grad_norm": 0.3308461220455405, + "learning_rate": 1.9111432731522067e-05, + "loss": 1.0775, + "step": 607 + }, + { + "epoch": 1.554987212276215, + "grad_norm": 0.28434303668023103, + "learning_rate": 1.910616905623756e-05, + "loss": 1.0989, + "step": 608 + }, + { + "epoch": 1.5575447570332481, + "grad_norm": 0.2949610693246568, + "learning_rate": 1.910089056578536e-05, + "loss": 1.0942, + "step": 609 + }, + { + "epoch": 1.5601023017902813, + "grad_norm": 0.26028511630293355, + "learning_rate": 1.9095597268753243e-05, + "loss": 1.0639, + "step": 610 + }, + { + "epoch": 1.5626598465473145, + "grad_norm": 0.2736816450940113, + "learning_rate": 1.9090289173753077e-05, + "loss": 1.1013, + "step": 611 + }, + { + "epoch": 1.5652173913043477, + "grad_norm": 0.24169212652369965, + "learning_rate": 1.908496628942079e-05, + "loss": 1.0904, + "step": 612 + }, + { + "epoch": 1.567774936061381, + "grad_norm": 0.2790060046832418, + "learning_rate": 1.907962862441639e-05, + "loss": 1.0789, + "step": 613 + }, + { + "epoch": 1.5703324808184145, + "grad_norm": 0.25148763709880523, + "learning_rate": 1.9074276187423925e-05, + "loss": 1.083, + "step": 614 + }, + { + "epoch": 1.5728900255754477, + "grad_norm": 0.260089635225582, + "learning_rate": 1.906890898715147e-05, + "loss": 1.1052, + "step": 615 + }, + { + "epoch": 1.5754475703324808, + "grad_norm": 0.24239290344853867, + "learning_rate": 1.9063527032331128e-05, + "loss": 1.0587, + "step": 616 + }, + { + "epoch": 1.578005115089514, + "grad_norm": 0.31033949728422483, + "learning_rate": 1.9058130331719002e-05, + "loss": 1.0906, + "step": 617 + }, + { + "epoch": 1.5805626598465472, + "grad_norm": 0.29694640873919886, + "learning_rate": 1.9052718894095183e-05, + "loss": 1.0828, + "step": 618 + }, + { + "epoch": 1.5831202046035806, + "grad_norm": 0.268458744450183, + "learning_rate": 1.904729272826375e-05, + "loss": 1.0697, + "step": 619 + }, + { + "epoch": 1.5856777493606138, + "grad_norm": 0.3328538025026265, + "learning_rate": 1.9041851843052727e-05, + "loss": 1.0556, + "step": 620 + }, + { + "epoch": 1.5882352941176472, + "grad_norm": 0.4354576423430095, + "learning_rate": 1.90363962473141e-05, + "loss": 1.0888, + "step": 621 + }, + { + "epoch": 1.5907928388746804, + "grad_norm": 0.4488970201166202, + "learning_rate": 1.9030925949923777e-05, + "loss": 1.0991, + "step": 622 + }, + { + "epoch": 1.5933503836317136, + "grad_norm": 0.30850235477610843, + "learning_rate": 1.9025440959781593e-05, + "loss": 1.0721, + "step": 623 + }, + { + "epoch": 1.5959079283887467, + "grad_norm": 0.24306011770668454, + "learning_rate": 1.9019941285811284e-05, + "loss": 1.1146, + "step": 624 + }, + { + "epoch": 1.59846547314578, + "grad_norm": 0.31927732953474425, + "learning_rate": 1.9014426936960477e-05, + "loss": 1.1386, + "step": 625 + }, + { + "epoch": 1.6010230179028133, + "grad_norm": 0.30395309199867215, + "learning_rate": 1.900889792220067e-05, + "loss": 1.0651, + "step": 626 + }, + { + "epoch": 1.6035805626598465, + "grad_norm": 0.2641664347228699, + "learning_rate": 1.9003354250527225e-05, + "loss": 1.0737, + "step": 627 + }, + { + "epoch": 1.60613810741688, + "grad_norm": 0.2541673904415416, + "learning_rate": 1.899779593095935e-05, + "loss": 1.1093, + "step": 628 + }, + { + "epoch": 1.608695652173913, + "grad_norm": 0.248114384702292, + "learning_rate": 1.8992222972540083e-05, + "loss": 1.0631, + "step": 629 + }, + { + "epoch": 1.6112531969309463, + "grad_norm": 0.27098670487834897, + "learning_rate": 1.8986635384336275e-05, + "loss": 1.0684, + "step": 630 + }, + { + "epoch": 1.6138107416879794, + "grad_norm": 0.2707047290641469, + "learning_rate": 1.8981033175438593e-05, + "loss": 1.0793, + "step": 631 + }, + { + "epoch": 1.6163682864450126, + "grad_norm": 0.2248022175811438, + "learning_rate": 1.897541635496147e-05, + "loss": 1.0741, + "step": 632 + }, + { + "epoch": 1.618925831202046, + "grad_norm": 0.33046089699268805, + "learning_rate": 1.896978493204313e-05, + "loss": 1.0536, + "step": 633 + }, + { + "epoch": 1.6214833759590794, + "grad_norm": 0.2897890506100947, + "learning_rate": 1.896413891584554e-05, + "loss": 1.1041, + "step": 634 + }, + { + "epoch": 1.6240409207161126, + "grad_norm": 0.24423929651462964, + "learning_rate": 1.8958478315554414e-05, + "loss": 1.0554, + "step": 635 + }, + { + "epoch": 1.6265984654731458, + "grad_norm": 0.2824637389915044, + "learning_rate": 1.8952803140379198e-05, + "loss": 1.105, + "step": 636 + }, + { + "epoch": 1.629156010230179, + "grad_norm": 0.34172319194434536, + "learning_rate": 1.894711339955305e-05, + "loss": 1.0966, + "step": 637 + }, + { + "epoch": 1.6317135549872122, + "grad_norm": 0.2986624598202099, + "learning_rate": 1.8941409102332818e-05, + "loss": 1.0801, + "step": 638 + }, + { + "epoch": 1.6342710997442456, + "grad_norm": 0.35330551163337126, + "learning_rate": 1.893569025799904e-05, + "loss": 1.1168, + "step": 639 + }, + { + "epoch": 1.6368286445012787, + "grad_norm": 0.37997527154753075, + "learning_rate": 1.8929956875855913e-05, + "loss": 1.044, + "step": 640 + }, + { + "epoch": 1.6393861892583121, + "grad_norm": 0.3987670557181093, + "learning_rate": 1.89242089652313e-05, + "loss": 1.0678, + "step": 641 + }, + { + "epoch": 1.6419437340153453, + "grad_norm": 0.4164983853962145, + "learning_rate": 1.8918446535476683e-05, + "loss": 1.0713, + "step": 642 + }, + { + "epoch": 1.6445012787723785, + "grad_norm": 0.36634278907361967, + "learning_rate": 1.8912669595967182e-05, + "loss": 1.0845, + "step": 643 + }, + { + "epoch": 1.6470588235294117, + "grad_norm": 0.3377854105852521, + "learning_rate": 1.890687815610151e-05, + "loss": 1.1325, + "step": 644 + }, + { + "epoch": 1.6496163682864449, + "grad_norm": 0.2921364211079459, + "learning_rate": 1.8901072225301983e-05, + "loss": 1.0417, + "step": 645 + }, + { + "epoch": 1.6521739130434783, + "grad_norm": 0.40803324585389733, + "learning_rate": 1.8895251813014486e-05, + "loss": 1.0985, + "step": 646 + }, + { + "epoch": 1.6547314578005117, + "grad_norm": 0.4777584379650545, + "learning_rate": 1.8889416928708465e-05, + "loss": 1.0579, + "step": 647 + }, + { + "epoch": 1.6572890025575449, + "grad_norm": 0.4575863335013247, + "learning_rate": 1.8883567581876913e-05, + "loss": 1.075, + "step": 648 + }, + { + "epoch": 1.659846547314578, + "grad_norm": 0.44868767506108537, + "learning_rate": 1.887770378203635e-05, + "loss": 1.082, + "step": 649 + }, + { + "epoch": 1.6624040920716112, + "grad_norm": 0.3990360823870846, + "learning_rate": 1.8871825538726815e-05, + "loss": 1.0618, + "step": 650 + }, + { + "epoch": 1.6649616368286444, + "grad_norm": 0.384455268117493, + "learning_rate": 1.8865932861511836e-05, + "loss": 1.0883, + "step": 651 + }, + { + "epoch": 1.6675191815856778, + "grad_norm": 0.4308655650983798, + "learning_rate": 1.8860025759978436e-05, + "loss": 1.1136, + "step": 652 + }, + { + "epoch": 1.670076726342711, + "grad_norm": 0.5161027640726775, + "learning_rate": 1.8854104243737096e-05, + "loss": 1.0876, + "step": 653 + }, + { + "epoch": 1.6726342710997444, + "grad_norm": 0.5710337903727111, + "learning_rate": 1.8848168322421756e-05, + "loss": 1.0921, + "step": 654 + }, + { + "epoch": 1.6751918158567776, + "grad_norm": 0.4680011964164238, + "learning_rate": 1.884221800568979e-05, + "loss": 1.0817, + "step": 655 + }, + { + "epoch": 1.6777493606138107, + "grad_norm": 0.273509418810932, + "learning_rate": 1.8836253303221985e-05, + "loss": 1.0676, + "step": 656 + }, + { + "epoch": 1.680306905370844, + "grad_norm": 0.36238937602325755, + "learning_rate": 1.8830274224722544e-05, + "loss": 1.0694, + "step": 657 + }, + { + "epoch": 1.682864450127877, + "grad_norm": 0.4331370312585361, + "learning_rate": 1.8824280779919055e-05, + "loss": 1.0939, + "step": 658 + }, + { + "epoch": 1.6854219948849105, + "grad_norm": 0.42161084086226236, + "learning_rate": 1.8818272978562472e-05, + "loss": 1.0949, + "step": 659 + }, + { + "epoch": 1.6879795396419437, + "grad_norm": 0.42114600096809945, + "learning_rate": 1.8812250830427116e-05, + "loss": 1.1071, + "step": 660 + }, + { + "epoch": 1.690537084398977, + "grad_norm": 0.2580305989521523, + "learning_rate": 1.8806214345310648e-05, + "loss": 1.0884, + "step": 661 + }, + { + "epoch": 1.6930946291560103, + "grad_norm": 0.2790098578226022, + "learning_rate": 1.8800163533034048e-05, + "loss": 1.0786, + "step": 662 + }, + { + "epoch": 1.6956521739130435, + "grad_norm": 0.3952483114126335, + "learning_rate": 1.879409840344161e-05, + "loss": 1.1025, + "step": 663 + }, + { + "epoch": 1.6982097186700766, + "grad_norm": 0.34837002184241345, + "learning_rate": 1.8788018966400923e-05, + "loss": 1.0862, + "step": 664 + }, + { + "epoch": 1.7007672634271098, + "grad_norm": 0.23347425632455518, + "learning_rate": 1.878192523180285e-05, + "loss": 1.0903, + "step": 665 + }, + { + "epoch": 1.7033248081841432, + "grad_norm": 0.258084870513599, + "learning_rate": 1.877581720956151e-05, + "loss": 1.0659, + "step": 666 + }, + { + "epoch": 1.7058823529411766, + "grad_norm": 0.2955310030807304, + "learning_rate": 1.876969490961428e-05, + "loss": 1.0803, + "step": 667 + }, + { + "epoch": 1.7084398976982098, + "grad_norm": 0.34485101895191056, + "learning_rate": 1.8763558341921762e-05, + "loss": 1.0729, + "step": 668 + }, + { + "epoch": 1.710997442455243, + "grad_norm": 0.25932977011662367, + "learning_rate": 1.8757407516467762e-05, + "loss": 1.1017, + "step": 669 + }, + { + "epoch": 1.7135549872122762, + "grad_norm": 0.23771298856204617, + "learning_rate": 1.8751242443259286e-05, + "loss": 1.0771, + "step": 670 + }, + { + "epoch": 1.7161125319693094, + "grad_norm": 0.3403000739473665, + "learning_rate": 1.874506313232653e-05, + "loss": 1.0972, + "step": 671 + }, + { + "epoch": 1.7186700767263428, + "grad_norm": 0.36624614786635146, + "learning_rate": 1.873886959372284e-05, + "loss": 1.0948, + "step": 672 + }, + { + "epoch": 1.721227621483376, + "grad_norm": 0.23241780598609607, + "learning_rate": 1.8732661837524722e-05, + "loss": 1.0726, + "step": 673 + }, + { + "epoch": 1.7237851662404093, + "grad_norm": 0.27573330219222747, + "learning_rate": 1.8726439873831803e-05, + "loss": 1.1154, + "step": 674 + }, + { + "epoch": 1.7263427109974425, + "grad_norm": 0.3289571952505283, + "learning_rate": 1.8720203712766833e-05, + "loss": 1.0855, + "step": 675 + }, + { + "epoch": 1.7289002557544757, + "grad_norm": 0.26315983835648826, + "learning_rate": 1.8713953364475654e-05, + "loss": 1.0561, + "step": 676 + }, + { + "epoch": 1.7314578005115089, + "grad_norm": 0.2933737539222408, + "learning_rate": 1.8707688839127187e-05, + "loss": 1.0717, + "step": 677 + }, + { + "epoch": 1.734015345268542, + "grad_norm": 0.24075336640916348, + "learning_rate": 1.8701410146913427e-05, + "loss": 1.0733, + "step": 678 + }, + { + "epoch": 1.7365728900255755, + "grad_norm": 0.2969635924636881, + "learning_rate": 1.869511729804942e-05, + "loss": 1.0736, + "step": 679 + }, + { + "epoch": 1.7391304347826086, + "grad_norm": 0.2302120367596696, + "learning_rate": 1.8688810302773225e-05, + "loss": 1.0718, + "step": 680 + }, + { + "epoch": 1.741687979539642, + "grad_norm": 0.31123990252305606, + "learning_rate": 1.8682489171345942e-05, + "loss": 1.0633, + "step": 681 + }, + { + "epoch": 1.7442455242966752, + "grad_norm": 0.25671775642481637, + "learning_rate": 1.8676153914051648e-05, + "loss": 1.1055, + "step": 682 + }, + { + "epoch": 1.7468030690537084, + "grad_norm": 0.2731165902037635, + "learning_rate": 1.866980454119741e-05, + "loss": 1.1019, + "step": 683 + }, + { + "epoch": 1.7493606138107416, + "grad_norm": 0.29946202186623655, + "learning_rate": 1.8663441063113266e-05, + "loss": 1.0856, + "step": 684 + }, + { + "epoch": 1.7519181585677748, + "grad_norm": 0.2743108383298565, + "learning_rate": 1.8657063490152193e-05, + "loss": 1.0797, + "step": 685 + }, + { + "epoch": 1.7544757033248082, + "grad_norm": 0.2910690805954212, + "learning_rate": 1.8650671832690106e-05, + "loss": 1.1068, + "step": 686 + }, + { + "epoch": 1.7570332480818416, + "grad_norm": 0.25617556691443527, + "learning_rate": 1.864426610112583e-05, + "loss": 1.0801, + "step": 687 + }, + { + "epoch": 1.7595907928388748, + "grad_norm": 0.2446643852273966, + "learning_rate": 1.8637846305881092e-05, + "loss": 1.0712, + "step": 688 + }, + { + "epoch": 1.762148337595908, + "grad_norm": 0.24853300895824507, + "learning_rate": 1.8631412457400494e-05, + "loss": 1.0518, + "step": 689 + }, + { + "epoch": 1.7647058823529411, + "grad_norm": 0.2250526521940477, + "learning_rate": 1.862496456615151e-05, + "loss": 1.0802, + "step": 690 + }, + { + "epoch": 1.7672634271099743, + "grad_norm": 0.23033386861703295, + "learning_rate": 1.861850264262445e-05, + "loss": 1.0921, + "step": 691 + }, + { + "epoch": 1.7698209718670077, + "grad_norm": 0.22393185289398734, + "learning_rate": 1.8612026697332466e-05, + "loss": 1.0824, + "step": 692 + }, + { + "epoch": 1.772378516624041, + "grad_norm": 0.24371247518659098, + "learning_rate": 1.860553674081151e-05, + "loss": 1.0958, + "step": 693 + }, + { + "epoch": 1.7749360613810743, + "grad_norm": 0.21684995978781324, + "learning_rate": 1.859903278362034e-05, + "loss": 1.0511, + "step": 694 + }, + { + "epoch": 1.7774936061381075, + "grad_norm": 0.24359803588661344, + "learning_rate": 1.8592514836340485e-05, + "loss": 1.064, + "step": 695 + }, + { + "epoch": 1.7800511508951407, + "grad_norm": 0.2806613621237684, + "learning_rate": 1.8585982909576243e-05, + "loss": 1.0974, + "step": 696 + }, + { + "epoch": 1.7826086956521738, + "grad_norm": 0.2951317541501585, + "learning_rate": 1.857943701395464e-05, + "loss": 1.0745, + "step": 697 + }, + { + "epoch": 1.785166240409207, + "grad_norm": 0.2602691127905397, + "learning_rate": 1.857287716012545e-05, + "loss": 1.094, + "step": 698 + }, + { + "epoch": 1.7877237851662404, + "grad_norm": 0.2878865850607815, + "learning_rate": 1.8566303358761134e-05, + "loss": 1.0764, + "step": 699 + }, + { + "epoch": 1.7902813299232738, + "grad_norm": 0.25826524614522556, + "learning_rate": 1.8559715620556865e-05, + "loss": 1.095, + "step": 700 + }, + { + "epoch": 1.792838874680307, + "grad_norm": 0.3113734244197743, + "learning_rate": 1.855311395623048e-05, + "loss": 1.0636, + "step": 701 + }, + { + "epoch": 1.7953964194373402, + "grad_norm": 0.32545837268145317, + "learning_rate": 1.854649837652247e-05, + "loss": 1.0836, + "step": 702 + }, + { + "epoch": 1.7979539641943734, + "grad_norm": 0.285984682125429, + "learning_rate": 1.8539868892195972e-05, + "loss": 1.0848, + "step": 703 + }, + { + "epoch": 1.8005115089514065, + "grad_norm": 0.27758608852953665, + "learning_rate": 1.8533225514036742e-05, + "loss": 1.0663, + "step": 704 + }, + { + "epoch": 1.80306905370844, + "grad_norm": 0.27148772448252917, + "learning_rate": 1.852656825285314e-05, + "loss": 1.094, + "step": 705 + }, + { + "epoch": 1.8056265984654731, + "grad_norm": 0.30810009717755804, + "learning_rate": 1.8519897119476115e-05, + "loss": 1.0455, + "step": 706 + }, + { + "epoch": 1.8081841432225065, + "grad_norm": 0.2763175632842481, + "learning_rate": 1.8513212124759185e-05, + "loss": 1.0525, + "step": 707 + }, + { + "epoch": 1.8107416879795397, + "grad_norm": 0.2555077301269018, + "learning_rate": 1.8506513279578415e-05, + "loss": 1.0708, + "step": 708 + }, + { + "epoch": 1.813299232736573, + "grad_norm": 0.2861828394638753, + "learning_rate": 1.849980059483241e-05, + "loss": 1.0269, + "step": 709 + }, + { + "epoch": 1.815856777493606, + "grad_norm": 0.32694363610851984, + "learning_rate": 1.849307408144229e-05, + "loss": 1.0742, + "step": 710 + }, + { + "epoch": 1.8184143222506393, + "grad_norm": 0.33550420038638934, + "learning_rate": 1.8486333750351668e-05, + "loss": 1.1291, + "step": 711 + }, + { + "epoch": 1.8209718670076727, + "grad_norm": 0.30494475043620173, + "learning_rate": 1.8479579612526642e-05, + "loss": 1.0754, + "step": 712 + }, + { + "epoch": 1.8235294117647058, + "grad_norm": 0.2449819480488345, + "learning_rate": 1.8472811678955773e-05, + "loss": 1.083, + "step": 713 + }, + { + "epoch": 1.8260869565217392, + "grad_norm": 0.26042670531487994, + "learning_rate": 1.8466029960650066e-05, + "loss": 1.0749, + "step": 714 + }, + { + "epoch": 1.8286445012787724, + "grad_norm": 0.3057228350277353, + "learning_rate": 1.845923446864295e-05, + "loss": 1.0549, + "step": 715 + }, + { + "epoch": 1.8312020460358056, + "grad_norm": 0.2500852141764497, + "learning_rate": 1.845242521399027e-05, + "loss": 1.0721, + "step": 716 + }, + { + "epoch": 1.8337595907928388, + "grad_norm": 0.2675252870460311, + "learning_rate": 1.8445602207770254e-05, + "loss": 1.0449, + "step": 717 + }, + { + "epoch": 1.836317135549872, + "grad_norm": 0.2836719734304398, + "learning_rate": 1.8438765461083504e-05, + "loss": 1.0905, + "step": 718 + }, + { + "epoch": 1.8388746803069054, + "grad_norm": 0.34699165997108533, + "learning_rate": 1.843191498505299e-05, + "loss": 1.0901, + "step": 719 + }, + { + "epoch": 1.8414322250639388, + "grad_norm": 0.2722070954863811, + "learning_rate": 1.8425050790823994e-05, + "loss": 1.0964, + "step": 720 + }, + { + "epoch": 1.843989769820972, + "grad_norm": 0.258368289769939, + "learning_rate": 1.8418172889564145e-05, + "loss": 1.0962, + "step": 721 + }, + { + "epoch": 1.8465473145780051, + "grad_norm": 0.25936143701246717, + "learning_rate": 1.8411281292463345e-05, + "loss": 1.0545, + "step": 722 + }, + { + "epoch": 1.8491048593350383, + "grad_norm": 0.3060957581043503, + "learning_rate": 1.8404376010733802e-05, + "loss": 1.0815, + "step": 723 + }, + { + "epoch": 1.8516624040920715, + "grad_norm": 0.2815365945528782, + "learning_rate": 1.8397457055609973e-05, + "loss": 1.0759, + "step": 724 + }, + { + "epoch": 1.854219948849105, + "grad_norm": 0.2745951540225352, + "learning_rate": 1.8390524438348565e-05, + "loss": 1.1021, + "step": 725 + }, + { + "epoch": 1.856777493606138, + "grad_norm": 0.27846031555437806, + "learning_rate": 1.8383578170228514e-05, + "loss": 1.0248, + "step": 726 + }, + { + "epoch": 1.8593350383631715, + "grad_norm": 0.2938959273434096, + "learning_rate": 1.8376618262550966e-05, + "loss": 1.0528, + "step": 727 + }, + { + "epoch": 1.8618925831202047, + "grad_norm": 0.2993316558221603, + "learning_rate": 1.836964472663925e-05, + "loss": 1.058, + "step": 728 + }, + { + "epoch": 1.8644501278772379, + "grad_norm": 0.28817201575308804, + "learning_rate": 1.8362657573838874e-05, + "loss": 1.1157, + "step": 729 + }, + { + "epoch": 1.867007672634271, + "grad_norm": 0.22467467671098768, + "learning_rate": 1.8355656815517505e-05, + "loss": 1.0711, + "step": 730 + }, + { + "epoch": 1.8695652173913042, + "grad_norm": 0.29149108866988305, + "learning_rate": 1.8348642463064937e-05, + "loss": 1.0414, + "step": 731 + }, + { + "epoch": 1.8721227621483376, + "grad_norm": 0.39401431973372464, + "learning_rate": 1.8341614527893077e-05, + "loss": 1.0791, + "step": 732 + }, + { + "epoch": 1.8746803069053708, + "grad_norm": 0.4335182479065654, + "learning_rate": 1.833457302143594e-05, + "loss": 1.0878, + "step": 733 + }, + { + "epoch": 1.8772378516624042, + "grad_norm": 0.43497766670833005, + "learning_rate": 1.832751795514962e-05, + "loss": 1.0484, + "step": 734 + }, + { + "epoch": 1.8797953964194374, + "grad_norm": 0.2997553952148685, + "learning_rate": 1.832044934051226e-05, + "loss": 1.0762, + "step": 735 + }, + { + "epoch": 1.8823529411764706, + "grad_norm": 0.23441660095601177, + "learning_rate": 1.8313367189024065e-05, + "loss": 1.1082, + "step": 736 + }, + { + "epoch": 1.8849104859335037, + "grad_norm": 0.23816717696848114, + "learning_rate": 1.8306271512207242e-05, + "loss": 1.0834, + "step": 737 + }, + { + "epoch": 1.887468030690537, + "grad_norm": 0.29809886717421774, + "learning_rate": 1.829916232160602e-05, + "loss": 1.087, + "step": 738 + }, + { + "epoch": 1.8900255754475703, + "grad_norm": 0.36580006827207345, + "learning_rate": 1.829203962878661e-05, + "loss": 1.0718, + "step": 739 + }, + { + "epoch": 1.8925831202046037, + "grad_norm": 0.36472500474679165, + "learning_rate": 1.8284903445337184e-05, + "loss": 1.0435, + "step": 740 + }, + { + "epoch": 1.895140664961637, + "grad_norm": 0.2569898458683152, + "learning_rate": 1.8277753782867865e-05, + "loss": 1.0569, + "step": 741 + }, + { + "epoch": 1.89769820971867, + "grad_norm": 0.2807015519670205, + "learning_rate": 1.8270590653010706e-05, + "loss": 1.0623, + "step": 742 + }, + { + "epoch": 1.9002557544757033, + "grad_norm": 0.2706420270561887, + "learning_rate": 1.8263414067419676e-05, + "loss": 1.101, + "step": 743 + }, + { + "epoch": 1.9028132992327365, + "grad_norm": 0.28562929161394046, + "learning_rate": 1.8256224037770628e-05, + "loss": 1.0524, + "step": 744 + }, + { + "epoch": 1.9053708439897699, + "grad_norm": 0.2774733347803849, + "learning_rate": 1.824902057576129e-05, + "loss": 1.0511, + "step": 745 + }, + { + "epoch": 1.907928388746803, + "grad_norm": 0.22198709105225659, + "learning_rate": 1.8241803693111245e-05, + "loss": 1.075, + "step": 746 + }, + { + "epoch": 1.9104859335038364, + "grad_norm": 0.287788512970941, + "learning_rate": 1.8234573401561914e-05, + "loss": 1.0665, + "step": 747 + }, + { + "epoch": 1.9130434782608696, + "grad_norm": 0.2909301551397291, + "learning_rate": 1.8227329712876525e-05, + "loss": 1.0802, + "step": 748 + }, + { + "epoch": 1.9156010230179028, + "grad_norm": 0.25392349276614573, + "learning_rate": 1.8220072638840105e-05, + "loss": 1.1035, + "step": 749 + }, + { + "epoch": 1.918158567774936, + "grad_norm": 0.22821936416155694, + "learning_rate": 1.8212802191259465e-05, + "loss": 1.0571, + "step": 750 + }, + { + "epoch": 1.9207161125319692, + "grad_norm": 0.3130516886250542, + "learning_rate": 1.8205518381963165e-05, + "loss": 1.1095, + "step": 751 + }, + { + "epoch": 1.9232736572890026, + "grad_norm": 0.3857586516868388, + "learning_rate": 1.8198221222801506e-05, + "loss": 1.06, + "step": 752 + }, + { + "epoch": 1.9258312020460358, + "grad_norm": 0.315792024279407, + "learning_rate": 1.8190910725646512e-05, + "loss": 1.0772, + "step": 753 + }, + { + "epoch": 1.9283887468030692, + "grad_norm": 0.26686727973038904, + "learning_rate": 1.8183586902391905e-05, + "loss": 1.0708, + "step": 754 + }, + { + "epoch": 1.9309462915601023, + "grad_norm": 0.3669775155609857, + "learning_rate": 1.8176249764953088e-05, + "loss": 1.0393, + "step": 755 + }, + { + "epoch": 1.9335038363171355, + "grad_norm": 0.3411186812565117, + "learning_rate": 1.8168899325267122e-05, + "loss": 1.0777, + "step": 756 + }, + { + "epoch": 1.9360613810741687, + "grad_norm": 0.29525106020949826, + "learning_rate": 1.8161535595292717e-05, + "loss": 1.0738, + "step": 757 + }, + { + "epoch": 1.938618925831202, + "grad_norm": 0.2431416087312154, + "learning_rate": 1.8154158587010195e-05, + "loss": 1.0552, + "step": 758 + }, + { + "epoch": 1.9411764705882353, + "grad_norm": 0.2528824918629993, + "learning_rate": 1.8146768312421495e-05, + "loss": 1.1049, + "step": 759 + }, + { + "epoch": 1.9437340153452687, + "grad_norm": 0.27274199937217425, + "learning_rate": 1.8139364783550128e-05, + "loss": 1.11, + "step": 760 + }, + { + "epoch": 1.9462915601023019, + "grad_norm": 0.27694326525936447, + "learning_rate": 1.813194801244117e-05, + "loss": 1.1085, + "step": 761 + }, + { + "epoch": 1.948849104859335, + "grad_norm": 0.26284036778935943, + "learning_rate": 1.8124518011161246e-05, + "loss": 1.0817, + "step": 762 + }, + { + "epoch": 1.9514066496163682, + "grad_norm": 0.34628694859076536, + "learning_rate": 1.8117074791798503e-05, + "loss": 1.0723, + "step": 763 + }, + { + "epoch": 1.9539641943734014, + "grad_norm": 0.3205449398285809, + "learning_rate": 1.8109618366462597e-05, + "loss": 1.0878, + "step": 764 + }, + { + "epoch": 1.9565217391304348, + "grad_norm": 0.2930907660937919, + "learning_rate": 1.8102148747284662e-05, + "loss": 1.0194, + "step": 765 + }, + { + "epoch": 1.959079283887468, + "grad_norm": 0.3199378305398446, + "learning_rate": 1.8094665946417304e-05, + "loss": 1.0818, + "step": 766 + }, + { + "epoch": 1.9616368286445014, + "grad_norm": 0.3147442131814513, + "learning_rate": 1.8087169976034568e-05, + "loss": 1.0524, + "step": 767 + }, + { + "epoch": 1.9641943734015346, + "grad_norm": 0.29010540377698546, + "learning_rate": 1.807966084833193e-05, + "loss": 1.0804, + "step": 768 + }, + { + "epoch": 1.9667519181585678, + "grad_norm": 0.2830375710975825, + "learning_rate": 1.8072138575526277e-05, + "loss": 1.0876, + "step": 769 + }, + { + "epoch": 1.969309462915601, + "grad_norm": 0.29912181409924526, + "learning_rate": 1.806460316985587e-05, + "loss": 1.0674, + "step": 770 + }, + { + "epoch": 1.9718670076726341, + "grad_norm": 0.280637494020639, + "learning_rate": 1.8057054643580347e-05, + "loss": 1.059, + "step": 771 + }, + { + "epoch": 1.9744245524296675, + "grad_norm": 0.25437147169201857, + "learning_rate": 1.8049493008980685e-05, + "loss": 1.076, + "step": 772 + }, + { + "epoch": 1.976982097186701, + "grad_norm": 0.260015840044801, + "learning_rate": 1.8041918278359194e-05, + "loss": 1.0884, + "step": 773 + }, + { + "epoch": 1.979539641943734, + "grad_norm": 0.23338451398624144, + "learning_rate": 1.8034330464039485e-05, + "loss": 1.0564, + "step": 774 + }, + { + "epoch": 1.9820971867007673, + "grad_norm": 0.27240262637273416, + "learning_rate": 1.8026729578366457e-05, + "loss": 1.0653, + "step": 775 + }, + { + "epoch": 1.9846547314578005, + "grad_norm": 0.2658428330726454, + "learning_rate": 1.801911563370628e-05, + "loss": 1.0847, + "step": 776 + }, + { + "epoch": 1.9872122762148337, + "grad_norm": 0.24259844645380865, + "learning_rate": 1.801148864244636e-05, + "loss": 1.0617, + "step": 777 + }, + { + "epoch": 1.989769820971867, + "grad_norm": 0.274423591955145, + "learning_rate": 1.8003848616995333e-05, + "loss": 1.1046, + "step": 778 + }, + { + "epoch": 1.9923273657289002, + "grad_norm": 0.270074412347766, + "learning_rate": 1.7996195569783053e-05, + "loss": 1.0841, + "step": 779 + }, + { + "epoch": 1.9948849104859336, + "grad_norm": 0.32727342222060607, + "learning_rate": 1.798852951326054e-05, + "loss": 1.064, + "step": 780 + }, + { + "epoch": 1.9974424552429668, + "grad_norm": 0.28041604224998723, + "learning_rate": 1.7980850459899997e-05, + "loss": 1.0748, + "step": 781 + }, + { + "epoch": 2.0, + "grad_norm": 0.230649257113214, + "learning_rate": 1.7973158422194754e-05, + "loss": 1.0504, + "step": 782 + }, + { + "epoch": 2.002557544757033, + "grad_norm": 0.27721442928112094, + "learning_rate": 1.7965453412659284e-05, + "loss": 1.0561, + "step": 783 + }, + { + "epoch": 2.0051150895140664, + "grad_norm": 0.3484629274944669, + "learning_rate": 1.795773544382915e-05, + "loss": 1.0484, + "step": 784 + }, + { + "epoch": 2.0076726342710995, + "grad_norm": 0.35248757109292245, + "learning_rate": 1.795000452826101e-05, + "loss": 1.0494, + "step": 785 + }, + { + "epoch": 2.010230179028133, + "grad_norm": 0.31602726514395096, + "learning_rate": 1.794226067853257e-05, + "loss": 1.1343, + "step": 786 + }, + { + "epoch": 2.0127877237851663, + "grad_norm": 0.30632695925595954, + "learning_rate": 1.79345039072426e-05, + "loss": 1.0648, + "step": 787 + }, + { + "epoch": 2.0153452685421995, + "grad_norm": 0.33328827891250323, + "learning_rate": 1.7926734227010876e-05, + "loss": 1.0801, + "step": 788 + }, + { + "epoch": 2.0179028132992327, + "grad_norm": 0.35618373914463364, + "learning_rate": 1.7918951650478188e-05, + "loss": 1.0613, + "step": 789 + }, + { + "epoch": 2.020460358056266, + "grad_norm": 0.3085542598082131, + "learning_rate": 1.7911156190306296e-05, + "loss": 1.0476, + "step": 790 + }, + { + "epoch": 2.023017902813299, + "grad_norm": 0.22686489493321832, + "learning_rate": 1.7903347859177926e-05, + "loss": 1.0486, + "step": 791 + }, + { + "epoch": 2.0255754475703327, + "grad_norm": 0.2750201664093288, + "learning_rate": 1.7895526669796747e-05, + "loss": 1.0543, + "step": 792 + }, + { + "epoch": 2.028132992327366, + "grad_norm": 0.2998881689120612, + "learning_rate": 1.7887692634887345e-05, + "loss": 1.0434, + "step": 793 + }, + { + "epoch": 2.030690537084399, + "grad_norm": 0.260904922673988, + "learning_rate": 1.7879845767195204e-05, + "loss": 1.0443, + "step": 794 + }, + { + "epoch": 2.0332480818414322, + "grad_norm": 0.2465816351987358, + "learning_rate": 1.787198607948669e-05, + "loss": 1.0516, + "step": 795 + }, + { + "epoch": 2.0358056265984654, + "grad_norm": 0.23239060808440448, + "learning_rate": 1.786411358454902e-05, + "loss": 1.0588, + "step": 796 + }, + { + "epoch": 2.0383631713554986, + "grad_norm": 0.26101630597920855, + "learning_rate": 1.785622829519025e-05, + "loss": 1.0835, + "step": 797 + }, + { + "epoch": 2.040920716112532, + "grad_norm": 0.3040971752066545, + "learning_rate": 1.7848330224239256e-05, + "loss": 1.0563, + "step": 798 + }, + { + "epoch": 2.0434782608695654, + "grad_norm": 0.26487253530894395, + "learning_rate": 1.7840419384545706e-05, + "loss": 1.0579, + "step": 799 + }, + { + "epoch": 2.0460358056265986, + "grad_norm": 0.2689601096947907, + "learning_rate": 1.7832495788980035e-05, + "loss": 1.1015, + "step": 800 + }, + { + "epoch": 2.0485933503836318, + "grad_norm": 0.25525460785840065, + "learning_rate": 1.7824559450433446e-05, + "loss": 1.0537, + "step": 801 + }, + { + "epoch": 2.051150895140665, + "grad_norm": 0.345599384998098, + "learning_rate": 1.7816610381817864e-05, + "loss": 1.0604, + "step": 802 + }, + { + "epoch": 2.053708439897698, + "grad_norm": 0.3359389407416057, + "learning_rate": 1.780864859606592e-05, + "loss": 1.0664, + "step": 803 + }, + { + "epoch": 2.0562659846547313, + "grad_norm": 0.2813553104050823, + "learning_rate": 1.780067410613095e-05, + "loss": 1.0937, + "step": 804 + }, + { + "epoch": 2.0588235294117645, + "grad_norm": 0.2548220560875847, + "learning_rate": 1.7792686924986946e-05, + "loss": 1.0441, + "step": 805 + }, + { + "epoch": 2.061381074168798, + "grad_norm": 0.28792647000401994, + "learning_rate": 1.7784687065628554e-05, + "loss": 1.058, + "step": 806 + }, + { + "epoch": 2.0639386189258313, + "grad_norm": 0.2603601267230107, + "learning_rate": 1.777667454107104e-05, + "loss": 1.0992, + "step": 807 + }, + { + "epoch": 2.0664961636828645, + "grad_norm": 0.2583588654263776, + "learning_rate": 1.776864936435029e-05, + "loss": 1.0735, + "step": 808 + }, + { + "epoch": 2.0690537084398977, + "grad_norm": 0.30719716854376583, + "learning_rate": 1.7760611548522755e-05, + "loss": 1.0498, + "step": 809 + }, + { + "epoch": 2.071611253196931, + "grad_norm": 0.30807492892970295, + "learning_rate": 1.7752561106665463e-05, + "loss": 1.0548, + "step": 810 + }, + { + "epoch": 2.074168797953964, + "grad_norm": 0.3210704099635407, + "learning_rate": 1.7744498051875984e-05, + "loss": 1.077, + "step": 811 + }, + { + "epoch": 2.0767263427109977, + "grad_norm": 0.4282126010865939, + "learning_rate": 1.7736422397272396e-05, + "loss": 1.0494, + "step": 812 + }, + { + "epoch": 2.079283887468031, + "grad_norm": 0.4051125030459934, + "learning_rate": 1.772833415599329e-05, + "loss": 1.0511, + "step": 813 + }, + { + "epoch": 2.081841432225064, + "grad_norm": 0.2991528183767012, + "learning_rate": 1.7720233341197726e-05, + "loss": 1.1121, + "step": 814 + }, + { + "epoch": 2.084398976982097, + "grad_norm": 0.22783217071200507, + "learning_rate": 1.7712119966065225e-05, + "loss": 1.0383, + "step": 815 + }, + { + "epoch": 2.0869565217391304, + "grad_norm": 0.3516616820022178, + "learning_rate": 1.770399404379574e-05, + "loss": 1.0498, + "step": 816 + }, + { + "epoch": 2.0895140664961636, + "grad_norm": 0.2606641623626611, + "learning_rate": 1.7695855587609637e-05, + "loss": 1.0594, + "step": 817 + }, + { + "epoch": 2.0920716112531967, + "grad_norm": 0.269085192714615, + "learning_rate": 1.7687704610747676e-05, + "loss": 1.0419, + "step": 818 + }, + { + "epoch": 2.0946291560102304, + "grad_norm": 0.28768629596697776, + "learning_rate": 1.767954112647099e-05, + "loss": 1.0435, + "step": 819 + }, + { + "epoch": 2.0971867007672635, + "grad_norm": 0.27429737921035624, + "learning_rate": 1.7671365148061053e-05, + "loss": 1.0458, + "step": 820 + }, + { + "epoch": 2.0997442455242967, + "grad_norm": 0.29736519534073375, + "learning_rate": 1.7663176688819673e-05, + "loss": 1.0566, + "step": 821 + }, + { + "epoch": 2.10230179028133, + "grad_norm": 0.26021437570192907, + "learning_rate": 1.765497576206896e-05, + "loss": 1.0422, + "step": 822 + }, + { + "epoch": 2.104859335038363, + "grad_norm": 0.2783440308095714, + "learning_rate": 1.764676238115131e-05, + "loss": 1.0776, + "step": 823 + }, + { + "epoch": 2.1074168797953963, + "grad_norm": 0.3339846285282316, + "learning_rate": 1.763853655942938e-05, + "loss": 1.0674, + "step": 824 + }, + { + "epoch": 2.10997442455243, + "grad_norm": 0.2223362385153581, + "learning_rate": 1.7630298310286065e-05, + "loss": 1.0699, + "step": 825 + }, + { + "epoch": 2.112531969309463, + "grad_norm": 0.33059613735162624, + "learning_rate": 1.7622047647124488e-05, + "loss": 1.0634, + "step": 826 + }, + { + "epoch": 2.1150895140664963, + "grad_norm": 0.3414911305158879, + "learning_rate": 1.761378458336796e-05, + "loss": 1.0548, + "step": 827 + }, + { + "epoch": 2.1176470588235294, + "grad_norm": 0.32041930375116484, + "learning_rate": 1.760550913245996e-05, + "loss": 1.0621, + "step": 828 + }, + { + "epoch": 2.1202046035805626, + "grad_norm": 0.2971788267573472, + "learning_rate": 1.7597221307864142e-05, + "loss": 1.0704, + "step": 829 + }, + { + "epoch": 2.122762148337596, + "grad_norm": 0.27537162097267065, + "learning_rate": 1.7588921123064273e-05, + "loss": 1.0961, + "step": 830 + }, + { + "epoch": 2.125319693094629, + "grad_norm": 0.29232241446373336, + "learning_rate": 1.7580608591564233e-05, + "loss": 1.0916, + "step": 831 + }, + { + "epoch": 2.1278772378516626, + "grad_norm": 0.3815701080685027, + "learning_rate": 1.757228372688799e-05, + "loss": 1.0848, + "step": 832 + }, + { + "epoch": 2.130434782608696, + "grad_norm": 0.33830135607419565, + "learning_rate": 1.7563946542579584e-05, + "loss": 1.0824, + "step": 833 + }, + { + "epoch": 2.132992327365729, + "grad_norm": 0.26436755888688523, + "learning_rate": 1.7555597052203088e-05, + "loss": 1.0424, + "step": 834 + }, + { + "epoch": 2.135549872122762, + "grad_norm": 0.2204259325114956, + "learning_rate": 1.7547235269342602e-05, + "loss": 1.0749, + "step": 835 + }, + { + "epoch": 2.1381074168797953, + "grad_norm": 0.31500508880378464, + "learning_rate": 1.7538861207602225e-05, + "loss": 1.0871, + "step": 836 + }, + { + "epoch": 2.1406649616368285, + "grad_norm": 0.33104625224299034, + "learning_rate": 1.753047488060603e-05, + "loss": 1.0257, + "step": 837 + }, + { + "epoch": 2.1432225063938617, + "grad_norm": 0.2325551980906377, + "learning_rate": 1.7522076301998048e-05, + "loss": 1.0907, + "step": 838 + }, + { + "epoch": 2.1457800511508953, + "grad_norm": 0.2464976826758584, + "learning_rate": 1.7513665485442238e-05, + "loss": 1.067, + "step": 839 + }, + { + "epoch": 2.1483375959079285, + "grad_norm": 0.25290511781194314, + "learning_rate": 1.750524244462248e-05, + "loss": 1.0893, + "step": 840 + }, + { + "epoch": 2.1508951406649617, + "grad_norm": 0.3247901788745791, + "learning_rate": 1.7496807193242528e-05, + "loss": 1.0638, + "step": 841 + }, + { + "epoch": 2.153452685421995, + "grad_norm": 0.34958915516133227, + "learning_rate": 1.748835974502601e-05, + "loss": 1.0825, + "step": 842 + }, + { + "epoch": 2.156010230179028, + "grad_norm": 0.24243104695456325, + "learning_rate": 1.7479900113716398e-05, + "loss": 1.0537, + "step": 843 + }, + { + "epoch": 2.1585677749360612, + "grad_norm": 0.2734369268109971, + "learning_rate": 1.7471428313076984e-05, + "loss": 1.1031, + "step": 844 + }, + { + "epoch": 2.1611253196930944, + "grad_norm": 0.3380184912512867, + "learning_rate": 1.7462944356890853e-05, + "loss": 1.0589, + "step": 845 + }, + { + "epoch": 2.163682864450128, + "grad_norm": 0.3625402818137926, + "learning_rate": 1.7454448258960877e-05, + "loss": 1.0561, + "step": 846 + }, + { + "epoch": 2.166240409207161, + "grad_norm": 0.34638148620089215, + "learning_rate": 1.744594003310967e-05, + "loss": 1.0186, + "step": 847 + }, + { + "epoch": 2.1687979539641944, + "grad_norm": 0.24740728690176142, + "learning_rate": 1.743741969317959e-05, + "loss": 1.1099, + "step": 848 + }, + { + "epoch": 2.1713554987212276, + "grad_norm": 0.287155398140135, + "learning_rate": 1.7428887253032695e-05, + "loss": 1.0691, + "step": 849 + }, + { + "epoch": 2.1739130434782608, + "grad_norm": 0.3566062867329238, + "learning_rate": 1.7420342726550728e-05, + "loss": 1.0701, + "step": 850 + }, + { + "epoch": 2.176470588235294, + "grad_norm": 0.3096727205958978, + "learning_rate": 1.74117861276351e-05, + "loss": 1.0716, + "step": 851 + }, + { + "epoch": 2.1790281329923276, + "grad_norm": 0.25874536932280473, + "learning_rate": 1.740321747020687e-05, + "loss": 1.0893, + "step": 852 + }, + { + "epoch": 2.1815856777493607, + "grad_norm": 0.21538442833683963, + "learning_rate": 1.7394636768206702e-05, + "loss": 1.0266, + "step": 853 + }, + { + "epoch": 2.184143222506394, + "grad_norm": 0.2871943030157397, + "learning_rate": 1.738604403559486e-05, + "loss": 1.0085, + "step": 854 + }, + { + "epoch": 2.186700767263427, + "grad_norm": 0.2851621085345804, + "learning_rate": 1.7377439286351184e-05, + "loss": 1.0622, + "step": 855 + }, + { + "epoch": 2.1892583120204603, + "grad_norm": 0.26228336638762867, + "learning_rate": 1.736882253447506e-05, + "loss": 1.083, + "step": 856 + }, + { + "epoch": 2.1918158567774935, + "grad_norm": 0.26992050889733915, + "learning_rate": 1.736019379398542e-05, + "loss": 1.1006, + "step": 857 + }, + { + "epoch": 2.1943734015345266, + "grad_norm": 0.23555655653113924, + "learning_rate": 1.7351553078920665e-05, + "loss": 1.0914, + "step": 858 + }, + { + "epoch": 2.1969309462915603, + "grad_norm": 0.30209071932451825, + "learning_rate": 1.734290040333871e-05, + "loss": 1.0873, + "step": 859 + }, + { + "epoch": 2.1994884910485935, + "grad_norm": 0.23936877597438264, + "learning_rate": 1.733423578131691e-05, + "loss": 1.0835, + "step": 860 + }, + { + "epoch": 2.2020460358056266, + "grad_norm": 0.3366403647300894, + "learning_rate": 1.732555922695207e-05, + "loss": 1.0743, + "step": 861 + }, + { + "epoch": 2.20460358056266, + "grad_norm": 0.30248308613139313, + "learning_rate": 1.73168707543604e-05, + "loss": 1.0482, + "step": 862 + }, + { + "epoch": 2.207161125319693, + "grad_norm": 0.26759196361130394, + "learning_rate": 1.73081703776775e-05, + "loss": 1.0686, + "step": 863 + }, + { + "epoch": 2.209718670076726, + "grad_norm": 0.2424062745806639, + "learning_rate": 1.7299458111058336e-05, + "loss": 1.0738, + "step": 864 + }, + { + "epoch": 2.21227621483376, + "grad_norm": 0.24086304886593904, + "learning_rate": 1.7290733968677226e-05, + "loss": 1.0313, + "step": 865 + }, + { + "epoch": 2.214833759590793, + "grad_norm": 0.30184358263466177, + "learning_rate": 1.7281997964727803e-05, + "loss": 1.0602, + "step": 866 + }, + { + "epoch": 2.217391304347826, + "grad_norm": 0.2366294082979442, + "learning_rate": 1.7273250113423e-05, + "loss": 1.1046, + "step": 867 + }, + { + "epoch": 2.2199488491048593, + "grad_norm": 0.26905581826310315, + "learning_rate": 1.726449042899502e-05, + "loss": 1.0437, + "step": 868 + }, + { + "epoch": 2.2225063938618925, + "grad_norm": 0.36508543225667806, + "learning_rate": 1.725571892569533e-05, + "loss": 1.0809, + "step": 869 + }, + { + "epoch": 2.2250639386189257, + "grad_norm": 0.30221117179280654, + "learning_rate": 1.7246935617794608e-05, + "loss": 1.0664, + "step": 870 + }, + { + "epoch": 2.227621483375959, + "grad_norm": 0.2269380846996494, + "learning_rate": 1.723814051958275e-05, + "loss": 1.045, + "step": 871 + }, + { + "epoch": 2.2301790281329925, + "grad_norm": 0.3848192034817777, + "learning_rate": 1.7229333645368834e-05, + "loss": 1.0661, + "step": 872 + }, + { + "epoch": 2.2327365728900257, + "grad_norm": 0.4724477310420707, + "learning_rate": 1.722051500948109e-05, + "loss": 1.0846, + "step": 873 + }, + { + "epoch": 2.235294117647059, + "grad_norm": 0.3561338471365552, + "learning_rate": 1.7211684626266887e-05, + "loss": 1.0718, + "step": 874 + }, + { + "epoch": 2.237851662404092, + "grad_norm": 0.24533531015000096, + "learning_rate": 1.7202842510092706e-05, + "loss": 1.0428, + "step": 875 + }, + { + "epoch": 2.2404092071611252, + "grad_norm": 0.2999534454935499, + "learning_rate": 1.7193988675344125e-05, + "loss": 1.0598, + "step": 876 + }, + { + "epoch": 2.2429667519181584, + "grad_norm": 0.3931502655829081, + "learning_rate": 1.7185123136425775e-05, + "loss": 1.0486, + "step": 877 + }, + { + "epoch": 2.2455242966751916, + "grad_norm": 0.4099239641868052, + "learning_rate": 1.7176245907761327e-05, + "loss": 1.0567, + "step": 878 + }, + { + "epoch": 2.2480818414322252, + "grad_norm": 0.2859379832887241, + "learning_rate": 1.7167357003793485e-05, + "loss": 1.0567, + "step": 879 + }, + { + "epoch": 2.2506393861892584, + "grad_norm": 0.29262327466969734, + "learning_rate": 1.7158456438983934e-05, + "loss": 1.0299, + "step": 880 + }, + { + "epoch": 2.2531969309462916, + "grad_norm": 0.43158299248544585, + "learning_rate": 1.7149544227813343e-05, + "loss": 1.05, + "step": 881 + }, + { + "epoch": 2.2557544757033248, + "grad_norm": 0.3011090401640172, + "learning_rate": 1.7140620384781316e-05, + "loss": 1.0166, + "step": 882 + }, + { + "epoch": 2.258312020460358, + "grad_norm": 0.2826762526500697, + "learning_rate": 1.7131684924406392e-05, + "loss": 1.0561, + "step": 883 + }, + { + "epoch": 2.260869565217391, + "grad_norm": 0.40076272547936787, + "learning_rate": 1.7122737861226007e-05, + "loss": 1.0536, + "step": 884 + }, + { + "epoch": 2.2634271099744243, + "grad_norm": 0.3893952639906247, + "learning_rate": 1.711377920979647e-05, + "loss": 1.0717, + "step": 885 + }, + { + "epoch": 2.265984654731458, + "grad_norm": 0.2701415754560129, + "learning_rate": 1.7104808984692946e-05, + "loss": 1.0788, + "step": 886 + }, + { + "epoch": 2.268542199488491, + "grad_norm": 0.3118978955533469, + "learning_rate": 1.7095827200509436e-05, + "loss": 1.0358, + "step": 887 + }, + { + "epoch": 2.2710997442455243, + "grad_norm": 0.4681497183113763, + "learning_rate": 1.7086833871858735e-05, + "loss": 1.0405, + "step": 888 + }, + { + "epoch": 2.2736572890025575, + "grad_norm": 0.44886562710116457, + "learning_rate": 1.707782901337243e-05, + "loss": 1.0635, + "step": 889 + }, + { + "epoch": 2.2762148337595907, + "grad_norm": 0.24326783713209693, + "learning_rate": 1.7068812639700862e-05, + "loss": 1.0995, + "step": 890 + }, + { + "epoch": 2.2787723785166243, + "grad_norm": 0.34628521799460377, + "learning_rate": 1.7059784765513106e-05, + "loss": 1.0772, + "step": 891 + }, + { + "epoch": 2.2813299232736575, + "grad_norm": 0.3903166631143913, + "learning_rate": 1.705074540549695e-05, + "loss": 1.0609, + "step": 892 + }, + { + "epoch": 2.2838874680306906, + "grad_norm": 0.3263912141551758, + "learning_rate": 1.704169457435887e-05, + "loss": 1.0661, + "step": 893 + }, + { + "epoch": 2.286445012787724, + "grad_norm": 0.2566336981081094, + "learning_rate": 1.7032632286823995e-05, + "loss": 1.0853, + "step": 894 + }, + { + "epoch": 2.289002557544757, + "grad_norm": 0.36154048413903833, + "learning_rate": 1.702355855763611e-05, + "loss": 1.0723, + "step": 895 + }, + { + "epoch": 2.29156010230179, + "grad_norm": 0.2971617301340999, + "learning_rate": 1.70144734015576e-05, + "loss": 1.0619, + "step": 896 + }, + { + "epoch": 2.2941176470588234, + "grad_norm": 0.2572103383141402, + "learning_rate": 1.700537683336944e-05, + "loss": 1.0589, + "step": 897 + }, + { + "epoch": 2.296675191815857, + "grad_norm": 0.37750177979394905, + "learning_rate": 1.699626886787119e-05, + "loss": 1.0361, + "step": 898 + }, + { + "epoch": 2.29923273657289, + "grad_norm": 0.35765757522418873, + "learning_rate": 1.698714951988093e-05, + "loss": 1.071, + "step": 899 + }, + { + "epoch": 2.3017902813299234, + "grad_norm": 0.30989044748347006, + "learning_rate": 1.6978018804235278e-05, + "loss": 1.0555, + "step": 900 + }, + { + "epoch": 2.3043478260869565, + "grad_norm": 0.24476809290635856, + "learning_rate": 1.6968876735789326e-05, + "loss": 1.0483, + "step": 901 + }, + { + "epoch": 2.3069053708439897, + "grad_norm": 0.308551372008468, + "learning_rate": 1.695972332941666e-05, + "loss": 1.0551, + "step": 902 + }, + { + "epoch": 2.309462915601023, + "grad_norm": 0.37111491476604536, + "learning_rate": 1.695055860000929e-05, + "loss": 1.0743, + "step": 903 + }, + { + "epoch": 2.312020460358056, + "grad_norm": 0.29147416337800386, + "learning_rate": 1.6941382562477664e-05, + "loss": 1.0003, + "step": 904 + }, + { + "epoch": 2.3145780051150897, + "grad_norm": 0.26326878890729166, + "learning_rate": 1.6932195231750616e-05, + "loss": 1.0351, + "step": 905 + }, + { + "epoch": 2.317135549872123, + "grad_norm": 0.29839767577203885, + "learning_rate": 1.6922996622775363e-05, + "loss": 1.0445, + "step": 906 + }, + { + "epoch": 2.319693094629156, + "grad_norm": 0.23637128109675618, + "learning_rate": 1.691378675051747e-05, + "loss": 1.0519, + "step": 907 + }, + { + "epoch": 2.3222506393861893, + "grad_norm": 0.25442257071130125, + "learning_rate": 1.6904565629960814e-05, + "loss": 1.0902, + "step": 908 + }, + { + "epoch": 2.3248081841432224, + "grad_norm": 0.3303656891744051, + "learning_rate": 1.6895333276107588e-05, + "loss": 1.0265, + "step": 909 + }, + { + "epoch": 2.3273657289002556, + "grad_norm": 0.2612217404110996, + "learning_rate": 1.688608970397825e-05, + "loss": 1.1046, + "step": 910 + }, + { + "epoch": 2.329923273657289, + "grad_norm": 0.271721798226581, + "learning_rate": 1.6876834928611524e-05, + "loss": 1.0784, + "step": 911 + }, + { + "epoch": 2.3324808184143224, + "grad_norm": 0.22229862393309946, + "learning_rate": 1.6867568965064336e-05, + "loss": 1.0364, + "step": 912 + }, + { + "epoch": 2.3350383631713556, + "grad_norm": 0.23741009658476048, + "learning_rate": 1.685829182841184e-05, + "loss": 1.0707, + "step": 913 + }, + { + "epoch": 2.337595907928389, + "grad_norm": 0.28874176637750065, + "learning_rate": 1.684900353374735e-05, + "loss": 1.0702, + "step": 914 + }, + { + "epoch": 2.340153452685422, + "grad_norm": 0.30379227509184065, + "learning_rate": 1.683970409618235e-05, + "loss": 1.0689, + "step": 915 + }, + { + "epoch": 2.342710997442455, + "grad_norm": 0.2726310509927992, + "learning_rate": 1.683039353084644e-05, + "loss": 1.0905, + "step": 916 + }, + { + "epoch": 2.3452685421994883, + "grad_norm": 0.2713331067951481, + "learning_rate": 1.6821071852887322e-05, + "loss": 1.0317, + "step": 917 + }, + { + "epoch": 2.3478260869565215, + "grad_norm": 0.3293005148131402, + "learning_rate": 1.681173907747079e-05, + "loss": 1.0572, + "step": 918 + }, + { + "epoch": 2.350383631713555, + "grad_norm": 0.2660221814623652, + "learning_rate": 1.680239521978068e-05, + "loss": 1.0429, + "step": 919 + }, + { + "epoch": 2.3529411764705883, + "grad_norm": 0.2412158860005583, + "learning_rate": 1.679304029501887e-05, + "loss": 1.0452, + "step": 920 + }, + { + "epoch": 2.3554987212276215, + "grad_norm": 0.33605356950268017, + "learning_rate": 1.6783674318405233e-05, + "loss": 1.0496, + "step": 921 + }, + { + "epoch": 2.3580562659846547, + "grad_norm": 0.29348949393829404, + "learning_rate": 1.677429730517763e-05, + "loss": 1.0471, + "step": 922 + }, + { + "epoch": 2.360613810741688, + "grad_norm": 0.27205789977362044, + "learning_rate": 1.6764909270591875e-05, + "loss": 1.049, + "step": 923 + }, + { + "epoch": 2.363171355498721, + "grad_norm": 0.24380065073942686, + "learning_rate": 1.6755510229921713e-05, + "loss": 1.0568, + "step": 924 + }, + { + "epoch": 2.3657289002557547, + "grad_norm": 0.2607905003163443, + "learning_rate": 1.6746100198458795e-05, + "loss": 1.0447, + "step": 925 + }, + { + "epoch": 2.368286445012788, + "grad_norm": 0.25646849705097663, + "learning_rate": 1.673667919151266e-05, + "loss": 1.0213, + "step": 926 + }, + { + "epoch": 2.370843989769821, + "grad_norm": 0.24557852833345492, + "learning_rate": 1.6727247224410686e-05, + "loss": 1.079, + "step": 927 + }, + { + "epoch": 2.373401534526854, + "grad_norm": 0.2536896072712956, + "learning_rate": 1.67178043124981e-05, + "loss": 1.0864, + "step": 928 + }, + { + "epoch": 2.3759590792838874, + "grad_norm": 0.2921088303385537, + "learning_rate": 1.6708350471137927e-05, + "loss": 1.0564, + "step": 929 + }, + { + "epoch": 2.3785166240409206, + "grad_norm": 0.20366681064359315, + "learning_rate": 1.669888571571098e-05, + "loss": 1.0815, + "step": 930 + }, + { + "epoch": 2.381074168797954, + "grad_norm": 0.2708885776774786, + "learning_rate": 1.6689410061615823e-05, + "loss": 1.0453, + "step": 931 + }, + { + "epoch": 2.3836317135549874, + "grad_norm": 0.26422900568518476, + "learning_rate": 1.6679923524268748e-05, + "loss": 1.0691, + "step": 932 + }, + { + "epoch": 2.3861892583120206, + "grad_norm": 0.24062139672551194, + "learning_rate": 1.6670426119103762e-05, + "loss": 1.0527, + "step": 933 + }, + { + "epoch": 2.3887468030690537, + "grad_norm": 0.2440568759213169, + "learning_rate": 1.666091786157255e-05, + "loss": 1.039, + "step": 934 + }, + { + "epoch": 2.391304347826087, + "grad_norm": 0.24192631220648755, + "learning_rate": 1.6651398767144454e-05, + "loss": 1.0368, + "step": 935 + }, + { + "epoch": 2.39386189258312, + "grad_norm": 0.3094662604619502, + "learning_rate": 1.664186885130644e-05, + "loss": 1.0612, + "step": 936 + }, + { + "epoch": 2.3964194373401533, + "grad_norm": 0.22698815376801923, + "learning_rate": 1.6632328129563088e-05, + "loss": 1.0573, + "step": 937 + }, + { + "epoch": 2.398976982097187, + "grad_norm": 0.25713439762667506, + "learning_rate": 1.6622776617436556e-05, + "loss": 1.0689, + "step": 938 + }, + { + "epoch": 2.40153452685422, + "grad_norm": 0.21070288001877646, + "learning_rate": 1.6613214330466557e-05, + "loss": 1.0514, + "step": 939 + }, + { + "epoch": 2.4040920716112533, + "grad_norm": 0.2650104302111488, + "learning_rate": 1.6603641284210335e-05, + "loss": 1.0607, + "step": 940 + }, + { + "epoch": 2.4066496163682864, + "grad_norm": 0.24280091189228237, + "learning_rate": 1.6594057494242634e-05, + "loss": 1.0526, + "step": 941 + }, + { + "epoch": 2.4092071611253196, + "grad_norm": 0.2255724092281544, + "learning_rate": 1.6584462976155683e-05, + "loss": 1.0584, + "step": 942 + }, + { + "epoch": 2.411764705882353, + "grad_norm": 0.2704536970024839, + "learning_rate": 1.6574857745559168e-05, + "loss": 1.0621, + "step": 943 + }, + { + "epoch": 2.414322250639386, + "grad_norm": 0.29272610932834264, + "learning_rate": 1.656524181808019e-05, + "loss": 1.0625, + "step": 944 + }, + { + "epoch": 2.4168797953964196, + "grad_norm": 0.28911787491946217, + "learning_rate": 1.655561520936327e-05, + "loss": 1.0165, + "step": 945 + }, + { + "epoch": 2.419437340153453, + "grad_norm": 0.2532789709507061, + "learning_rate": 1.6545977935070293e-05, + "loss": 1.036, + "step": 946 + }, + { + "epoch": 2.421994884910486, + "grad_norm": 0.2522741919476773, + "learning_rate": 1.6536330010880502e-05, + "loss": 1.0879, + "step": 947 + }, + { + "epoch": 2.424552429667519, + "grad_norm": 0.2902148618078098, + "learning_rate": 1.652667145249047e-05, + "loss": 1.0447, + "step": 948 + }, + { + "epoch": 2.4271099744245523, + "grad_norm": 0.2266116217612757, + "learning_rate": 1.6517002275614062e-05, + "loss": 1.0603, + "step": 949 + }, + { + "epoch": 2.4296675191815855, + "grad_norm": 0.2855681782290051, + "learning_rate": 1.6507322495982433e-05, + "loss": 1.0415, + "step": 950 + }, + { + "epoch": 2.4322250639386187, + "grad_norm": 0.2666978671553076, + "learning_rate": 1.6497632129343964e-05, + "loss": 1.057, + "step": 951 + }, + { + "epoch": 2.4347826086956523, + "grad_norm": 0.25398223147396237, + "learning_rate": 1.6487931191464293e-05, + "loss": 1.0225, + "step": 952 + }, + { + "epoch": 2.4373401534526855, + "grad_norm": 0.27478774153195795, + "learning_rate": 1.647821969812623e-05, + "loss": 1.0743, + "step": 953 + }, + { + "epoch": 2.4398976982097187, + "grad_norm": 0.2548269730970245, + "learning_rate": 1.6468497665129767e-05, + "loss": 1.0753, + "step": 954 + }, + { + "epoch": 2.442455242966752, + "grad_norm": 0.2531646552603803, + "learning_rate": 1.645876510829205e-05, + "loss": 1.0502, + "step": 955 + }, + { + "epoch": 2.445012787723785, + "grad_norm": 0.2716259730414166, + "learning_rate": 1.6449022043447333e-05, + "loss": 1.0604, + "step": 956 + }, + { + "epoch": 2.4475703324808182, + "grad_norm": 0.2759652629992187, + "learning_rate": 1.6439268486446982e-05, + "loss": 1.0307, + "step": 957 + }, + { + "epoch": 2.4501278772378514, + "grad_norm": 0.284229730108131, + "learning_rate": 1.642950445315941e-05, + "loss": 1.0244, + "step": 958 + }, + { + "epoch": 2.452685421994885, + "grad_norm": 0.2857191939202473, + "learning_rate": 1.6419729959470107e-05, + "loss": 1.0475, + "step": 959 + }, + { + "epoch": 2.455242966751918, + "grad_norm": 0.24411876551827455, + "learning_rate": 1.6409945021281547e-05, + "loss": 1.0205, + "step": 960 + }, + { + "epoch": 2.4578005115089514, + "grad_norm": 0.2839219346381256, + "learning_rate": 1.6400149654513224e-05, + "loss": 1.0902, + "step": 961 + }, + { + "epoch": 2.4603580562659846, + "grad_norm": 0.290894600450773, + "learning_rate": 1.6390343875101582e-05, + "loss": 1.0655, + "step": 962 + }, + { + "epoch": 2.4629156010230178, + "grad_norm": 0.25018640254339125, + "learning_rate": 1.6380527699000012e-05, + "loss": 1.075, + "step": 963 + }, + { + "epoch": 2.4654731457800514, + "grad_norm": 0.314947984707885, + "learning_rate": 1.6370701142178815e-05, + "loss": 1.0802, + "step": 964 + }, + { + "epoch": 2.4680306905370846, + "grad_norm": 0.23513441288297676, + "learning_rate": 1.636086422062519e-05, + "loss": 1.0315, + "step": 965 + }, + { + "epoch": 2.4705882352941178, + "grad_norm": 0.26967522371119773, + "learning_rate": 1.635101695034319e-05, + "loss": 1.0454, + "step": 966 + }, + { + "epoch": 2.473145780051151, + "grad_norm": 0.2673917447835626, + "learning_rate": 1.6341159347353714e-05, + "loss": 1.0577, + "step": 967 + }, + { + "epoch": 2.475703324808184, + "grad_norm": 0.24623838061921519, + "learning_rate": 1.633129142769446e-05, + "loss": 1.0607, + "step": 968 + }, + { + "epoch": 2.4782608695652173, + "grad_norm": 0.5975989314807109, + "learning_rate": 1.6321413207419915e-05, + "loss": 1.0624, + "step": 969 + }, + { + "epoch": 2.4808184143222505, + "grad_norm": 0.2783985268403012, + "learning_rate": 1.6311524702601328e-05, + "loss": 1.0277, + "step": 970 + }, + { + "epoch": 2.483375959079284, + "grad_norm": 0.2948227168148377, + "learning_rate": 1.6301625929326682e-05, + "loss": 1.0509, + "step": 971 + }, + { + "epoch": 2.4859335038363173, + "grad_norm": 0.25464495418366273, + "learning_rate": 1.6291716903700657e-05, + "loss": 1.0743, + "step": 972 + }, + { + "epoch": 2.4884910485933505, + "grad_norm": 0.32267891042610297, + "learning_rate": 1.6281797641844615e-05, + "loss": 1.0528, + "step": 973 + }, + { + "epoch": 2.4910485933503836, + "grad_norm": 0.24461174022768228, + "learning_rate": 1.6271868159896583e-05, + "loss": 1.0536, + "step": 974 + }, + { + "epoch": 2.493606138107417, + "grad_norm": 0.3184259095166065, + "learning_rate": 1.6261928474011205e-05, + "loss": 1.0295, + "step": 975 + }, + { + "epoch": 2.49616368286445, + "grad_norm": 0.31223168542424856, + "learning_rate": 1.6251978600359727e-05, + "loss": 1.0611, + "step": 976 + }, + { + "epoch": 2.498721227621483, + "grad_norm": 0.24470883821957645, + "learning_rate": 1.6242018555129968e-05, + "loss": 1.0501, + "step": 977 + }, + { + "epoch": 2.501278772378517, + "grad_norm": 0.263841680832215, + "learning_rate": 1.6232048354526305e-05, + "loss": 1.0632, + "step": 978 + }, + { + "epoch": 2.50383631713555, + "grad_norm": 0.2799350053468126, + "learning_rate": 1.6222068014769626e-05, + "loss": 1.0669, + "step": 979 + }, + { + "epoch": 2.506393861892583, + "grad_norm": 0.23708656285849256, + "learning_rate": 1.6212077552097326e-05, + "loss": 1.0242, + "step": 980 + }, + { + "epoch": 2.5089514066496164, + "grad_norm": 0.32106303705514144, + "learning_rate": 1.6202076982763258e-05, + "loss": 1.038, + "step": 981 + }, + { + "epoch": 2.5115089514066495, + "grad_norm": 0.32641459248285415, + "learning_rate": 1.6192066323037723e-05, + "loss": 1.0192, + "step": 982 + }, + { + "epoch": 2.5140664961636827, + "grad_norm": 0.2374782294678397, + "learning_rate": 1.618204558920744e-05, + "loss": 1.0317, + "step": 983 + }, + { + "epoch": 2.516624040920716, + "grad_norm": 0.2669950742681541, + "learning_rate": 1.6172014797575512e-05, + "loss": 1.0604, + "step": 984 + }, + { + "epoch": 2.5191815856777495, + "grad_norm": 0.3289018657957539, + "learning_rate": 1.616197396446142e-05, + "loss": 1.0558, + "step": 985 + }, + { + "epoch": 2.5217391304347827, + "grad_norm": 0.30014120894320534, + "learning_rate": 1.6151923106200964e-05, + "loss": 1.0282, + "step": 986 + }, + { + "epoch": 2.524296675191816, + "grad_norm": 0.22934126760741957, + "learning_rate": 1.6141862239146263e-05, + "loss": 1.0442, + "step": 987 + }, + { + "epoch": 2.526854219948849, + "grad_norm": 0.3082443169061738, + "learning_rate": 1.613179137966572e-05, + "loss": 1.0671, + "step": 988 + }, + { + "epoch": 2.5294117647058822, + "grad_norm": 0.34264852115767747, + "learning_rate": 1.612171054414399e-05, + "loss": 1.0659, + "step": 989 + }, + { + "epoch": 2.531969309462916, + "grad_norm": 0.28840855857878017, + "learning_rate": 1.6111619748981967e-05, + "loss": 1.0757, + "step": 990 + }, + { + "epoch": 2.5345268542199486, + "grad_norm": 0.29679625325903564, + "learning_rate": 1.610151901059674e-05, + "loss": 1.0574, + "step": 991 + }, + { + "epoch": 2.5370843989769822, + "grad_norm": 0.2701305485919972, + "learning_rate": 1.6091408345421583e-05, + "loss": 1.076, + "step": 992 + }, + { + "epoch": 2.5396419437340154, + "grad_norm": 0.27772319714999755, + "learning_rate": 1.6081287769905914e-05, + "loss": 1.0557, + "step": 993 + }, + { + "epoch": 2.5421994884910486, + "grad_norm": 0.2575298835482317, + "learning_rate": 1.6071157300515274e-05, + "loss": 1.0371, + "step": 994 + }, + { + "epoch": 2.544757033248082, + "grad_norm": 0.2434229348885953, + "learning_rate": 1.6061016953731307e-05, + "loss": 1.0293, + "step": 995 + }, + { + "epoch": 2.547314578005115, + "grad_norm": 0.24931228820010734, + "learning_rate": 1.6050866746051722e-05, + "loss": 1.0497, + "step": 996 + }, + { + "epoch": 2.5498721227621486, + "grad_norm": 0.24970615225374868, + "learning_rate": 1.6040706693990272e-05, + "loss": 1.0507, + "step": 997 + }, + { + "epoch": 2.5524296675191813, + "grad_norm": 0.2705848075384666, + "learning_rate": 1.6030536814076722e-05, + "loss": 1.051, + "step": 998 + }, + { + "epoch": 2.554987212276215, + "grad_norm": 0.2645976951028759, + "learning_rate": 1.602035712285684e-05, + "loss": 1.044, + "step": 999 + }, + { + "epoch": 2.557544757033248, + "grad_norm": 0.25280588284501737, + "learning_rate": 1.6010167636892338e-05, + "loss": 1.0466, + "step": 1000 + }, + { + "epoch": 2.5601023017902813, + "grad_norm": 0.23309975174376094, + "learning_rate": 1.5999968372760882e-05, + "loss": 1.0503, + "step": 1001 + }, + { + "epoch": 2.5626598465473145, + "grad_norm": 0.24003131974818753, + "learning_rate": 1.5989759347056028e-05, + "loss": 1.0428, + "step": 1002 + }, + { + "epoch": 2.5652173913043477, + "grad_norm": 0.22803670250684518, + "learning_rate": 1.5979540576387226e-05, + "loss": 1.067, + "step": 1003 + }, + { + "epoch": 2.5677749360613813, + "grad_norm": 0.23366692767216873, + "learning_rate": 1.596931207737978e-05, + "loss": 1.0735, + "step": 1004 + }, + { + "epoch": 2.5703324808184145, + "grad_norm": 0.2514628572179653, + "learning_rate": 1.5959073866674812e-05, + "loss": 1.0683, + "step": 1005 + }, + { + "epoch": 2.5728900255754477, + "grad_norm": 0.2647695835957155, + "learning_rate": 1.594882596092926e-05, + "loss": 1.006, + "step": 1006 + }, + { + "epoch": 2.575447570332481, + "grad_norm": 0.2705206567562451, + "learning_rate": 1.5938568376815816e-05, + "loss": 1.0815, + "step": 1007 + }, + { + "epoch": 2.578005115089514, + "grad_norm": 0.26218100830771535, + "learning_rate": 1.5928301131022933e-05, + "loss": 1.0712, + "step": 1008 + }, + { + "epoch": 2.580562659846547, + "grad_norm": 0.24704018764157912, + "learning_rate": 1.5918024240254778e-05, + "loss": 1.069, + "step": 1009 + }, + { + "epoch": 2.5831202046035804, + "grad_norm": 0.3099818232532923, + "learning_rate": 1.5907737721231205e-05, + "loss": 1.0485, + "step": 1010 + }, + { + "epoch": 2.585677749360614, + "grad_norm": 0.2976698121714401, + "learning_rate": 1.5897441590687747e-05, + "loss": 1.0577, + "step": 1011 + }, + { + "epoch": 2.588235294117647, + "grad_norm": 0.25285713641828206, + "learning_rate": 1.5887135865375552e-05, + "loss": 1.0603, + "step": 1012 + }, + { + "epoch": 2.5907928388746804, + "grad_norm": 0.2526446484384057, + "learning_rate": 1.5876820562061402e-05, + "loss": 1.0433, + "step": 1013 + }, + { + "epoch": 2.5933503836317136, + "grad_norm": 0.29067294932967996, + "learning_rate": 1.586649569752765e-05, + "loss": 1.0616, + "step": 1014 + }, + { + "epoch": 2.5959079283887467, + "grad_norm": 0.282910218177146, + "learning_rate": 1.5856161288572195e-05, + "loss": 1.0413, + "step": 1015 + }, + { + "epoch": 2.59846547314578, + "grad_norm": 0.2268843181296163, + "learning_rate": 1.5845817352008485e-05, + "loss": 1.0407, + "step": 1016 + }, + { + "epoch": 2.601023017902813, + "grad_norm": 0.22762472803069236, + "learning_rate": 1.583546390466545e-05, + "loss": 1.0536, + "step": 1017 + }, + { + "epoch": 2.6035805626598467, + "grad_norm": 0.23603794648210832, + "learning_rate": 1.58251009633875e-05, + "loss": 1.0571, + "step": 1018 + }, + { + "epoch": 2.60613810741688, + "grad_norm": 0.2676423332930833, + "learning_rate": 1.5814728545034503e-05, + "loss": 1.0297, + "step": 1019 + }, + { + "epoch": 2.608695652173913, + "grad_norm": 0.25371119273646303, + "learning_rate": 1.5804346666481728e-05, + "loss": 1.037, + "step": 1020 + }, + { + "epoch": 2.6112531969309463, + "grad_norm": 0.23765073500378178, + "learning_rate": 1.5793955344619846e-05, + "loss": 1.0493, + "step": 1021 + }, + { + "epoch": 2.6138107416879794, + "grad_norm": 0.28479895070770733, + "learning_rate": 1.5783554596354885e-05, + "loss": 1.0428, + "step": 1022 + }, + { + "epoch": 2.6163682864450126, + "grad_norm": 0.2610596840924324, + "learning_rate": 1.577314443860821e-05, + "loss": 1.0659, + "step": 1023 + }, + { + "epoch": 2.618925831202046, + "grad_norm": 0.24670717715351206, + "learning_rate": 1.57627248883165e-05, + "loss": 1.0434, + "step": 1024 + }, + { + "epoch": 2.6214833759590794, + "grad_norm": 0.22640840073229135, + "learning_rate": 1.575229596243171e-05, + "loss": 1.043, + "step": 1025 + }, + { + "epoch": 2.6240409207161126, + "grad_norm": 0.25314200985521523, + "learning_rate": 1.574185767792106e-05, + "loss": 1.0494, + "step": 1026 + }, + { + "epoch": 2.626598465473146, + "grad_norm": 0.21470094174624627, + "learning_rate": 1.573141005176697e-05, + "loss": 1.0568, + "step": 1027 + }, + { + "epoch": 2.629156010230179, + "grad_norm": 0.23151889692704267, + "learning_rate": 1.5720953100967085e-05, + "loss": 1.0648, + "step": 1028 + }, + { + "epoch": 2.631713554987212, + "grad_norm": 0.21397184877158426, + "learning_rate": 1.5710486842534206e-05, + "loss": 1.0663, + "step": 1029 + }, + { + "epoch": 2.634271099744246, + "grad_norm": 0.22192997813660584, + "learning_rate": 1.5700011293496285e-05, + "loss": 1.0534, + "step": 1030 + }, + { + "epoch": 2.6368286445012785, + "grad_norm": 0.21407356154899657, + "learning_rate": 1.568952647089638e-05, + "loss": 1.059, + "step": 1031 + }, + { + "epoch": 2.639386189258312, + "grad_norm": 0.21832618515669033, + "learning_rate": 1.5679032391792648e-05, + "loss": 1.0221, + "step": 1032 + }, + { + "epoch": 2.6419437340153453, + "grad_norm": 0.24431871394272658, + "learning_rate": 1.5668529073258298e-05, + "loss": 1.0858, + "step": 1033 + }, + { + "epoch": 2.6445012787723785, + "grad_norm": 0.31234951434869057, + "learning_rate": 1.5658016532381565e-05, + "loss": 1.06, + "step": 1034 + }, + { + "epoch": 2.6470588235294117, + "grad_norm": 0.2080542192295102, + "learning_rate": 1.5647494786265705e-05, + "loss": 1.0651, + "step": 1035 + }, + { + "epoch": 2.649616368286445, + "grad_norm": 0.24670278561413833, + "learning_rate": 1.5636963852028936e-05, + "loss": 1.0373, + "step": 1036 + }, + { + "epoch": 2.6521739130434785, + "grad_norm": 0.23750220801463004, + "learning_rate": 1.5626423746804433e-05, + "loss": 1.0426, + "step": 1037 + }, + { + "epoch": 2.6547314578005117, + "grad_norm": 0.24041568140574793, + "learning_rate": 1.5615874487740287e-05, + "loss": 1.0504, + "step": 1038 + }, + { + "epoch": 2.657289002557545, + "grad_norm": 0.2389633958150457, + "learning_rate": 1.560531609199948e-05, + "loss": 1.0572, + "step": 1039 + }, + { + "epoch": 2.659846547314578, + "grad_norm": 0.2770548151196396, + "learning_rate": 1.559474857675986e-05, + "loss": 1.068, + "step": 1040 + }, + { + "epoch": 2.662404092071611, + "grad_norm": 0.266725154908083, + "learning_rate": 1.5584171959214126e-05, + "loss": 1.0449, + "step": 1041 + }, + { + "epoch": 2.6649616368286444, + "grad_norm": 0.25482885945652345, + "learning_rate": 1.557358625656976e-05, + "loss": 1.0784, + "step": 1042 + }, + { + "epoch": 2.6675191815856776, + "grad_norm": 0.264472394184579, + "learning_rate": 1.5562991486049045e-05, + "loss": 1.0118, + "step": 1043 + }, + { + "epoch": 2.670076726342711, + "grad_norm": 0.2848797989882817, + "learning_rate": 1.555238766488901e-05, + "loss": 1.0555, + "step": 1044 + }, + { + "epoch": 2.6726342710997444, + "grad_norm": 0.24695033243914596, + "learning_rate": 1.5541774810341404e-05, + "loss": 1.0402, + "step": 1045 + }, + { + "epoch": 2.6751918158567776, + "grad_norm": 0.20315866222350132, + "learning_rate": 1.5531152939672683e-05, + "loss": 1.0251, + "step": 1046 + }, + { + "epoch": 2.6777493606138107, + "grad_norm": 0.2608581931242649, + "learning_rate": 1.5520522070163962e-05, + "loss": 1.0549, + "step": 1047 + }, + { + "epoch": 2.680306905370844, + "grad_norm": 0.3085807293166213, + "learning_rate": 1.550988221911101e-05, + "loss": 1.0586, + "step": 1048 + }, + { + "epoch": 2.682864450127877, + "grad_norm": 0.22686082652143869, + "learning_rate": 1.549923340382419e-05, + "loss": 1.0315, + "step": 1049 + }, + { + "epoch": 2.6854219948849103, + "grad_norm": 0.23840859030860576, + "learning_rate": 1.548857564162846e-05, + "loss": 1.0542, + "step": 1050 + }, + { + "epoch": 2.687979539641944, + "grad_norm": 0.2828144148836396, + "learning_rate": 1.5477908949863335e-05, + "loss": 1.0546, + "step": 1051 + }, + { + "epoch": 2.690537084398977, + "grad_norm": 0.24462451577997144, + "learning_rate": 1.5467233345882858e-05, + "loss": 1.05, + "step": 1052 + }, + { + "epoch": 2.6930946291560103, + "grad_norm": 0.2608389325913873, + "learning_rate": 1.5456548847055565e-05, + "loss": 1.0582, + "step": 1053 + }, + { + "epoch": 2.6956521739130435, + "grad_norm": 0.2341653521141245, + "learning_rate": 1.5445855470764467e-05, + "loss": 1.0227, + "step": 1054 + }, + { + "epoch": 2.6982097186700766, + "grad_norm": 0.2001748409496552, + "learning_rate": 1.5435153234407023e-05, + "loss": 1.0361, + "step": 1055 + }, + { + "epoch": 2.70076726342711, + "grad_norm": 0.24778418959062198, + "learning_rate": 1.5424442155395095e-05, + "loss": 1.0556, + "step": 1056 + }, + { + "epoch": 2.703324808184143, + "grad_norm": 0.23891064433631373, + "learning_rate": 1.5413722251154947e-05, + "loss": 1.0583, + "step": 1057 + }, + { + "epoch": 2.7058823529411766, + "grad_norm": 0.18730639273619554, + "learning_rate": 1.540299353912719e-05, + "loss": 1.0461, + "step": 1058 + }, + { + "epoch": 2.70843989769821, + "grad_norm": 0.22764007423409213, + "learning_rate": 1.5392256036766767e-05, + "loss": 1.0723, + "step": 1059 + }, + { + "epoch": 2.710997442455243, + "grad_norm": 0.2161337514937876, + "learning_rate": 1.5381509761542925e-05, + "loss": 1.0303, + "step": 1060 + }, + { + "epoch": 2.713554987212276, + "grad_norm": 0.23665490844389125, + "learning_rate": 1.537075473093918e-05, + "loss": 1.072, + "step": 1061 + }, + { + "epoch": 2.7161125319693094, + "grad_norm": 0.2171745194472315, + "learning_rate": 1.535999096245329e-05, + "loss": 1.0609, + "step": 1062 + }, + { + "epoch": 2.718670076726343, + "grad_norm": 0.27479490086390757, + "learning_rate": 1.5349218473597244e-05, + "loss": 1.0976, + "step": 1063 + }, + { + "epoch": 2.7212276214833757, + "grad_norm": 0.23802159891837593, + "learning_rate": 1.5338437281897196e-05, + "loss": 1.0561, + "step": 1064 + }, + { + "epoch": 2.7237851662404093, + "grad_norm": 0.23413108216980624, + "learning_rate": 1.532764740489348e-05, + "loss": 1.0249, + "step": 1065 + }, + { + "epoch": 2.7263427109974425, + "grad_norm": 0.23839123328370654, + "learning_rate": 1.5316848860140545e-05, + "loss": 1.0448, + "step": 1066 + }, + { + "epoch": 2.7289002557544757, + "grad_norm": 0.26889749126936374, + "learning_rate": 1.530604166520695e-05, + "loss": 1.0538, + "step": 1067 + }, + { + "epoch": 2.731457800511509, + "grad_norm": 0.23104275616772496, + "learning_rate": 1.529522583767533e-05, + "loss": 1.0709, + "step": 1068 + }, + { + "epoch": 2.734015345268542, + "grad_norm": 0.26947945752974595, + "learning_rate": 1.5284401395142356e-05, + "loss": 1.0476, + "step": 1069 + }, + { + "epoch": 2.7365728900255757, + "grad_norm": 0.2650970504236315, + "learning_rate": 1.5273568355218714e-05, + "loss": 1.0906, + "step": 1070 + }, + { + "epoch": 2.7391304347826084, + "grad_norm": 0.2426600100365933, + "learning_rate": 1.5262726735529096e-05, + "loss": 1.0421, + "step": 1071 + }, + { + "epoch": 2.741687979539642, + "grad_norm": 0.2565653498953779, + "learning_rate": 1.5251876553712129e-05, + "loss": 1.0714, + "step": 1072 + }, + { + "epoch": 2.7442455242966752, + "grad_norm": 0.2590844357725753, + "learning_rate": 1.5241017827420379e-05, + "loss": 1.0529, + "step": 1073 + }, + { + "epoch": 2.7468030690537084, + "grad_norm": 0.2661157616076656, + "learning_rate": 1.523015057432032e-05, + "loss": 1.0413, + "step": 1074 + }, + { + "epoch": 2.7493606138107416, + "grad_norm": 0.2316877382855349, + "learning_rate": 1.5219274812092297e-05, + "loss": 1.0965, + "step": 1075 + }, + { + "epoch": 2.7519181585677748, + "grad_norm": 0.281689753856549, + "learning_rate": 1.5208390558430486e-05, + "loss": 1.0506, + "step": 1076 + }, + { + "epoch": 2.7544757033248084, + "grad_norm": 0.25889609476509934, + "learning_rate": 1.5197497831042891e-05, + "loss": 1.0701, + "step": 1077 + }, + { + "epoch": 2.7570332480818416, + "grad_norm": 0.25370938447354224, + "learning_rate": 1.5186596647651299e-05, + "loss": 1.0344, + "step": 1078 + }, + { + "epoch": 2.7595907928388748, + "grad_norm": 0.21590996086487077, + "learning_rate": 1.5175687025991254e-05, + "loss": 1.0111, + "step": 1079 + }, + { + "epoch": 2.762148337595908, + "grad_norm": 0.25136209115240976, + "learning_rate": 1.5164768983812031e-05, + "loss": 1.0594, + "step": 1080 + }, + { + "epoch": 2.764705882352941, + "grad_norm": 0.2296309073317973, + "learning_rate": 1.5153842538876595e-05, + "loss": 1.0195, + "step": 1081 + }, + { + "epoch": 2.7672634271099743, + "grad_norm": 0.2188880236827278, + "learning_rate": 1.5142907708961594e-05, + "loss": 1.0563, + "step": 1082 + }, + { + "epoch": 2.7698209718670075, + "grad_norm": 0.29043124524993463, + "learning_rate": 1.5131964511857307e-05, + "loss": 1.0579, + "step": 1083 + }, + { + "epoch": 2.772378516624041, + "grad_norm": 0.23042976434473456, + "learning_rate": 1.512101296536764e-05, + "loss": 1.0594, + "step": 1084 + }, + { + "epoch": 2.7749360613810743, + "grad_norm": 0.3064542379695439, + "learning_rate": 1.5110053087310067e-05, + "loss": 1.0347, + "step": 1085 + }, + { + "epoch": 2.7774936061381075, + "grad_norm": 0.2990911954190306, + "learning_rate": 1.5099084895515633e-05, + "loss": 1.0872, + "step": 1086 + }, + { + "epoch": 2.7800511508951407, + "grad_norm": 0.30238830537129957, + "learning_rate": 1.5088108407828887e-05, + "loss": 1.0102, + "step": 1087 + }, + { + "epoch": 2.782608695652174, + "grad_norm": 0.22800852447745912, + "learning_rate": 1.5077123642107901e-05, + "loss": 1.0373, + "step": 1088 + }, + { + "epoch": 2.785166240409207, + "grad_norm": 0.26466118290058793, + "learning_rate": 1.5066130616224194e-05, + "loss": 1.0601, + "step": 1089 + }, + { + "epoch": 2.78772378516624, + "grad_norm": 0.3134236905423725, + "learning_rate": 1.5055129348062733e-05, + "loss": 1.0282, + "step": 1090 + }, + { + "epoch": 2.790281329923274, + "grad_norm": 0.30040919493276264, + "learning_rate": 1.5044119855521899e-05, + "loss": 1.0028, + "step": 1091 + }, + { + "epoch": 2.792838874680307, + "grad_norm": 0.3018437088485077, + "learning_rate": 1.5033102156513442e-05, + "loss": 1.0642, + "step": 1092 + }, + { + "epoch": 2.79539641943734, + "grad_norm": 0.2594288455529522, + "learning_rate": 1.5022076268962474e-05, + "loss": 1.0651, + "step": 1093 + }, + { + "epoch": 2.7979539641943734, + "grad_norm": 0.2427672329241251, + "learning_rate": 1.5011042210807416e-05, + "loss": 1.0499, + "step": 1094 + }, + { + "epoch": 2.8005115089514065, + "grad_norm": 0.2753688016374087, + "learning_rate": 1.5000000000000002e-05, + "loss": 1.0441, + "step": 1095 + }, + { + "epoch": 2.80306905370844, + "grad_norm": 0.333646004575826, + "learning_rate": 1.4988949654505212e-05, + "loss": 1.0954, + "step": 1096 + }, + { + "epoch": 2.805626598465473, + "grad_norm": 0.24884374092942535, + "learning_rate": 1.4977891192301266e-05, + "loss": 1.0616, + "step": 1097 + }, + { + "epoch": 2.8081841432225065, + "grad_norm": 0.25576802318021363, + "learning_rate": 1.4966824631379595e-05, + "loss": 1.0767, + "step": 1098 + }, + { + "epoch": 2.8107416879795397, + "grad_norm": 0.2726811004318987, + "learning_rate": 1.49557499897448e-05, + "loss": 1.0629, + "step": 1099 + }, + { + "epoch": 2.813299232736573, + "grad_norm": 0.2490020562964201, + "learning_rate": 1.4944667285414629e-05, + "loss": 1.0401, + "step": 1100 + }, + { + "epoch": 2.815856777493606, + "grad_norm": 0.230153454763048, + "learning_rate": 1.4933576536419951e-05, + "loss": 1.0681, + "step": 1101 + }, + { + "epoch": 2.8184143222506393, + "grad_norm": 0.29290021173573333, + "learning_rate": 1.492247776080472e-05, + "loss": 1.0478, + "step": 1102 + }, + { + "epoch": 2.820971867007673, + "grad_norm": 0.22373455728798555, + "learning_rate": 1.4911370976625951e-05, + "loss": 1.0646, + "step": 1103 + }, + { + "epoch": 2.8235294117647056, + "grad_norm": 0.2867670697761132, + "learning_rate": 1.4900256201953686e-05, + "loss": 1.0395, + "step": 1104 + }, + { + "epoch": 2.8260869565217392, + "grad_norm": 0.2580511336465639, + "learning_rate": 1.488913345487097e-05, + "loss": 1.0299, + "step": 1105 + }, + { + "epoch": 2.8286445012787724, + "grad_norm": 0.30823901300584283, + "learning_rate": 1.4878002753473814e-05, + "loss": 1.0588, + "step": 1106 + }, + { + "epoch": 2.8312020460358056, + "grad_norm": 0.26061529857491966, + "learning_rate": 1.486686411587118e-05, + "loss": 1.0544, + "step": 1107 + }, + { + "epoch": 2.833759590792839, + "grad_norm": 0.3411340236384177, + "learning_rate": 1.4855717560184925e-05, + "loss": 1.0673, + "step": 1108 + }, + { + "epoch": 2.836317135549872, + "grad_norm": 0.3112034427743734, + "learning_rate": 1.4844563104549808e-05, + "loss": 1.0702, + "step": 1109 + }, + { + "epoch": 2.8388746803069056, + "grad_norm": 0.26159448325094614, + "learning_rate": 1.4833400767113425e-05, + "loss": 1.0518, + "step": 1110 + }, + { + "epoch": 2.8414322250639388, + "grad_norm": 0.24843885045239295, + "learning_rate": 1.48222305660362e-05, + "loss": 1.0519, + "step": 1111 + }, + { + "epoch": 2.843989769820972, + "grad_norm": 0.34052436576940476, + "learning_rate": 1.4811052519491358e-05, + "loss": 1.0621, + "step": 1112 + }, + { + "epoch": 2.846547314578005, + "grad_norm": 0.25035667041534276, + "learning_rate": 1.4799866645664875e-05, + "loss": 1.0495, + "step": 1113 + }, + { + "epoch": 2.8491048593350383, + "grad_norm": 0.23950107492766087, + "learning_rate": 1.4788672962755474e-05, + "loss": 1.0474, + "step": 1114 + }, + { + "epoch": 2.8516624040920715, + "grad_norm": 0.2228748439468561, + "learning_rate": 1.4777471488974573e-05, + "loss": 1.056, + "step": 1115 + }, + { + "epoch": 2.8542199488491047, + "grad_norm": 0.21686894636285, + "learning_rate": 1.476626224254627e-05, + "loss": 1.0473, + "step": 1116 + }, + { + "epoch": 2.8567774936061383, + "grad_norm": 0.21336673271033718, + "learning_rate": 1.475504524170731e-05, + "loss": 1.0327, + "step": 1117 + }, + { + "epoch": 2.8593350383631715, + "grad_norm": 0.2412247096897979, + "learning_rate": 1.4743820504707054e-05, + "loss": 1.0603, + "step": 1118 + }, + { + "epoch": 2.8618925831202047, + "grad_norm": 0.20338495510222906, + "learning_rate": 1.4732588049807442e-05, + "loss": 1.0345, + "step": 1119 + }, + { + "epoch": 2.864450127877238, + "grad_norm": 0.2224056939046196, + "learning_rate": 1.4721347895282977e-05, + "loss": 1.0932, + "step": 1120 + }, + { + "epoch": 2.867007672634271, + "grad_norm": 0.21219190570803861, + "learning_rate": 1.4710100059420693e-05, + "loss": 1.0577, + "step": 1121 + }, + { + "epoch": 2.869565217391304, + "grad_norm": 0.23417177032958478, + "learning_rate": 1.4698844560520107e-05, + "loss": 1.04, + "step": 1122 + }, + { + "epoch": 2.8721227621483374, + "grad_norm": 0.21756710346483277, + "learning_rate": 1.4687581416893218e-05, + "loss": 1.0115, + "step": 1123 + }, + { + "epoch": 2.874680306905371, + "grad_norm": 0.27116811809019226, + "learning_rate": 1.4676310646864455e-05, + "loss": 1.0925, + "step": 1124 + }, + { + "epoch": 2.877237851662404, + "grad_norm": 0.20359779513752466, + "learning_rate": 1.4665032268770656e-05, + "loss": 1.0662, + "step": 1125 + }, + { + "epoch": 2.8797953964194374, + "grad_norm": 0.25086860996163834, + "learning_rate": 1.4653746300961037e-05, + "loss": 1.0615, + "step": 1126 + }, + { + "epoch": 2.8823529411764706, + "grad_norm": 0.21619154701357268, + "learning_rate": 1.4642452761797166e-05, + "loss": 1.028, + "step": 1127 + }, + { + "epoch": 2.8849104859335037, + "grad_norm": 0.23657771626030477, + "learning_rate": 1.4631151669652917e-05, + "loss": 1.0339, + "step": 1128 + }, + { + "epoch": 2.887468030690537, + "grad_norm": 0.25435410320469787, + "learning_rate": 1.4619843042914466e-05, + "loss": 1.0382, + "step": 1129 + }, + { + "epoch": 2.89002557544757, + "grad_norm": 0.3165858987447032, + "learning_rate": 1.4608526899980238e-05, + "loss": 1.0631, + "step": 1130 + }, + { + "epoch": 2.8925831202046037, + "grad_norm": 0.3059530735276844, + "learning_rate": 1.4597203259260893e-05, + "loss": 1.0742, + "step": 1131 + }, + { + "epoch": 2.895140664961637, + "grad_norm": 0.23231123365328338, + "learning_rate": 1.4585872139179284e-05, + "loss": 1.0108, + "step": 1132 + }, + { + "epoch": 2.89769820971867, + "grad_norm": 0.32159788413714113, + "learning_rate": 1.457453355817044e-05, + "loss": 1.0343, + "step": 1133 + }, + { + "epoch": 2.9002557544757033, + "grad_norm": 0.2624561212579556, + "learning_rate": 1.456318753468152e-05, + "loss": 1.0344, + "step": 1134 + }, + { + "epoch": 2.9028132992327365, + "grad_norm": 0.21340797781295, + "learning_rate": 1.455183408717179e-05, + "loss": 1.0582, + "step": 1135 + }, + { + "epoch": 2.90537084398977, + "grad_norm": 0.27498982896150626, + "learning_rate": 1.4540473234112607e-05, + "loss": 1.0319, + "step": 1136 + }, + { + "epoch": 2.907928388746803, + "grad_norm": 0.26787413886350847, + "learning_rate": 1.4529104993987364e-05, + "loss": 1.094, + "step": 1137 + }, + { + "epoch": 2.9104859335038364, + "grad_norm": 0.22411507204789752, + "learning_rate": 1.4517729385291479e-05, + "loss": 1.0289, + "step": 1138 + }, + { + "epoch": 2.9130434782608696, + "grad_norm": 0.3186727715150146, + "learning_rate": 1.4506346426532356e-05, + "loss": 1.0474, + "step": 1139 + }, + { + "epoch": 2.915601023017903, + "grad_norm": 0.23017658335190225, + "learning_rate": 1.4494956136229356e-05, + "loss": 1.0406, + "step": 1140 + }, + { + "epoch": 2.918158567774936, + "grad_norm": 0.2469732487522561, + "learning_rate": 1.448355853291377e-05, + "loss": 1.0545, + "step": 1141 + }, + { + "epoch": 2.920716112531969, + "grad_norm": 0.34257461951959434, + "learning_rate": 1.4472153635128787e-05, + "loss": 1.0649, + "step": 1142 + }, + { + "epoch": 2.923273657289003, + "grad_norm": 0.26582238607210484, + "learning_rate": 1.4460741461429457e-05, + "loss": 1.0643, + "step": 1143 + }, + { + "epoch": 2.9258312020460355, + "grad_norm": 0.238713886041743, + "learning_rate": 1.4449322030382681e-05, + "loss": 1.0375, + "step": 1144 + }, + { + "epoch": 2.928388746803069, + "grad_norm": 0.28544164960503227, + "learning_rate": 1.4437895360567156e-05, + "loss": 1.0459, + "step": 1145 + }, + { + "epoch": 2.9309462915601023, + "grad_norm": 0.30617216188801405, + "learning_rate": 1.4426461470573358e-05, + "loss": 1.0352, + "step": 1146 + }, + { + "epoch": 2.9335038363171355, + "grad_norm": 0.23250706835607923, + "learning_rate": 1.4415020379003513e-05, + "loss": 1.0547, + "step": 1147 + }, + { + "epoch": 2.9360613810741687, + "grad_norm": 0.23449213816934886, + "learning_rate": 1.4403572104471559e-05, + "loss": 1.0506, + "step": 1148 + }, + { + "epoch": 2.938618925831202, + "grad_norm": 0.26285727807721854, + "learning_rate": 1.4392116665603123e-05, + "loss": 1.067, + "step": 1149 + }, + { + "epoch": 2.9411764705882355, + "grad_norm": 0.25864228967500363, + "learning_rate": 1.4380654081035492e-05, + "loss": 1.0566, + "step": 1150 + }, + { + "epoch": 2.9437340153452687, + "grad_norm": 0.2197313587417355, + "learning_rate": 1.4369184369417573e-05, + "loss": 1.069, + "step": 1151 + }, + { + "epoch": 2.946291560102302, + "grad_norm": 0.22175625255078796, + "learning_rate": 1.4357707549409865e-05, + "loss": 1.0393, + "step": 1152 + }, + { + "epoch": 2.948849104859335, + "grad_norm": 0.20734806987916835, + "learning_rate": 1.4346223639684445e-05, + "loss": 1.0629, + "step": 1153 + }, + { + "epoch": 2.9514066496163682, + "grad_norm": 0.20844980678105798, + "learning_rate": 1.4334732658924906e-05, + "loss": 1.0683, + "step": 1154 + }, + { + "epoch": 2.9539641943734014, + "grad_norm": 0.1986457605182691, + "learning_rate": 1.4323234625826363e-05, + "loss": 1.082, + "step": 1155 + }, + { + "epoch": 2.9565217391304346, + "grad_norm": 0.2173974733436024, + "learning_rate": 1.4311729559095391e-05, + "loss": 1.0579, + "step": 1156 + }, + { + "epoch": 2.959079283887468, + "grad_norm": 0.23569051033252647, + "learning_rate": 1.430021747745002e-05, + "loss": 1.0501, + "step": 1157 + }, + { + "epoch": 2.9616368286445014, + "grad_norm": 0.1958953354096487, + "learning_rate": 1.4288698399619682e-05, + "loss": 1.0423, + "step": 1158 + }, + { + "epoch": 2.9641943734015346, + "grad_norm": 0.24550680925330018, + "learning_rate": 1.4277172344345203e-05, + "loss": 1.0429, + "step": 1159 + }, + { + "epoch": 2.9667519181585678, + "grad_norm": 0.22335624269922177, + "learning_rate": 1.4265639330378751e-05, + "loss": 1.0637, + "step": 1160 + }, + { + "epoch": 2.969309462915601, + "grad_norm": 0.19207777433952558, + "learning_rate": 1.4254099376483814e-05, + "loss": 1.032, + "step": 1161 + }, + { + "epoch": 2.971867007672634, + "grad_norm": 0.21933228277599973, + "learning_rate": 1.424255250143518e-05, + "loss": 1.0399, + "step": 1162 + }, + { + "epoch": 2.9744245524296673, + "grad_norm": 0.2042696972237095, + "learning_rate": 1.423099872401889e-05, + "loss": 1.082, + "step": 1163 + }, + { + "epoch": 2.976982097186701, + "grad_norm": 0.23521017440946976, + "learning_rate": 1.4219438063032223e-05, + "loss": 1.0337, + "step": 1164 + }, + { + "epoch": 2.979539641943734, + "grad_norm": 0.23773407464153606, + "learning_rate": 1.4207870537283645e-05, + "loss": 1.0464, + "step": 1165 + }, + { + "epoch": 2.9820971867007673, + "grad_norm": 0.19999456866670134, + "learning_rate": 1.4196296165592804e-05, + "loss": 1.0738, + "step": 1166 + }, + { + "epoch": 2.9846547314578005, + "grad_norm": 0.24196149952728568, + "learning_rate": 1.4184714966790472e-05, + "loss": 1.0515, + "step": 1167 + }, + { + "epoch": 2.9872122762148337, + "grad_norm": 0.2078635385282362, + "learning_rate": 1.4173126959718542e-05, + "loss": 1.0685, + "step": 1168 + }, + { + "epoch": 2.9897698209718673, + "grad_norm": 0.22519888128468324, + "learning_rate": 1.416153216322997e-05, + "loss": 1.0406, + "step": 1169 + }, + { + "epoch": 2.9923273657289, + "grad_norm": 0.23526057180385235, + "learning_rate": 1.4149930596188768e-05, + "loss": 1.0388, + "step": 1170 + }, + { + "epoch": 2.9948849104859336, + "grad_norm": 0.23228687433861023, + "learning_rate": 1.4138322277469962e-05, + "loss": 1.035, + "step": 1171 + }, + { + "epoch": 2.997442455242967, + "grad_norm": 0.23799687340205392, + "learning_rate": 1.412670722595956e-05, + "loss": 1.0798, + "step": 1172 + }, + { + "epoch": 3.0, + "grad_norm": 0.22605319189413042, + "learning_rate": 1.4115085460554524e-05, + "loss": 1.0724, + "step": 1173 + }, + { + "epoch": 3.002557544757033, + "grad_norm": 0.22583372556086656, + "learning_rate": 1.410345700016274e-05, + "loss": 1.0653, + "step": 1174 + }, + { + "epoch": 3.0051150895140664, + "grad_norm": 0.20810235633737204, + "learning_rate": 1.4091821863702983e-05, + "loss": 1.0641, + "step": 1175 + }, + { + "epoch": 3.0076726342710995, + "grad_norm": 0.20645828983892262, + "learning_rate": 1.4080180070104897e-05, + "loss": 1.0426, + "step": 1176 + }, + { + "epoch": 3.010230179028133, + "grad_norm": 0.20345366792505884, + "learning_rate": 1.406853163830895e-05, + "loss": 1.0849, + "step": 1177 + }, + { + "epoch": 3.0127877237851663, + "grad_norm": 0.21212291453565033, + "learning_rate": 1.4056876587266413e-05, + "loss": 1.0687, + "step": 1178 + }, + { + "epoch": 3.0153452685421995, + "grad_norm": 0.19908450369242628, + "learning_rate": 1.4045214935939323e-05, + "loss": 1.0193, + "step": 1179 + }, + { + "epoch": 3.0179028132992327, + "grad_norm": 0.22127953869549283, + "learning_rate": 1.4033546703300465e-05, + "loss": 1.027, + "step": 1180 + }, + { + "epoch": 3.020460358056266, + "grad_norm": 0.2284795334598278, + "learning_rate": 1.402187190833331e-05, + "loss": 1.041, + "step": 1181 + }, + { + "epoch": 3.023017902813299, + "grad_norm": 0.2062329065326131, + "learning_rate": 1.4010190570032034e-05, + "loss": 1.0371, + "step": 1182 + }, + { + "epoch": 3.0255754475703327, + "grad_norm": 0.19478100964489237, + "learning_rate": 1.3998502707401437e-05, + "loss": 1.0578, + "step": 1183 + }, + { + "epoch": 3.028132992327366, + "grad_norm": 0.22168971452412287, + "learning_rate": 1.398680833945694e-05, + "loss": 1.023, + "step": 1184 + }, + { + "epoch": 3.030690537084399, + "grad_norm": 0.2040809628837293, + "learning_rate": 1.3975107485224552e-05, + "loss": 1.0382, + "step": 1185 + }, + { + "epoch": 3.0332480818414322, + "grad_norm": 0.2051983553640489, + "learning_rate": 1.3963400163740828e-05, + "loss": 1.0186, + "step": 1186 + }, + { + "epoch": 3.0358056265984654, + "grad_norm": 0.2350671015231016, + "learning_rate": 1.395168639405285e-05, + "loss": 1.0455, + "step": 1187 + }, + { + "epoch": 3.0383631713554986, + "grad_norm": 0.22621448501355076, + "learning_rate": 1.3939966195218188e-05, + "loss": 1.0074, + "step": 1188 + }, + { + "epoch": 3.040920716112532, + "grad_norm": 0.23737640534776971, + "learning_rate": 1.3928239586304873e-05, + "loss": 1.0437, + "step": 1189 + }, + { + "epoch": 3.0434782608695654, + "grad_norm": 0.2323257168547048, + "learning_rate": 1.3916506586391364e-05, + "loss": 1.0327, + "step": 1190 + }, + { + "epoch": 3.0460358056265986, + "grad_norm": 0.22305161499533654, + "learning_rate": 1.390476721456652e-05, + "loss": 1.0099, + "step": 1191 + }, + { + "epoch": 3.0485933503836318, + "grad_norm": 0.23535858097990897, + "learning_rate": 1.3893021489929564e-05, + "loss": 1.051, + "step": 1192 + }, + { + "epoch": 3.051150895140665, + "grad_norm": 0.20326385048979087, + "learning_rate": 1.3881269431590052e-05, + "loss": 1.057, + "step": 1193 + }, + { + "epoch": 3.053708439897698, + "grad_norm": 0.21150204467244554, + "learning_rate": 1.3869511058667855e-05, + "loss": 1.0296, + "step": 1194 + }, + { + "epoch": 3.0562659846547313, + "grad_norm": 0.2227085234968232, + "learning_rate": 1.3857746390293106e-05, + "loss": 1.0342, + "step": 1195 + }, + { + "epoch": 3.0588235294117645, + "grad_norm": 0.24189335016155378, + "learning_rate": 1.3845975445606184e-05, + "loss": 1.0491, + "step": 1196 + }, + { + "epoch": 3.061381074168798, + "grad_norm": 0.21700679291777608, + "learning_rate": 1.383419824375768e-05, + "loss": 1.0458, + "step": 1197 + }, + { + "epoch": 3.0639386189258313, + "grad_norm": 0.2325789506958363, + "learning_rate": 1.382241480390837e-05, + "loss": 1.0451, + "step": 1198 + }, + { + "epoch": 3.0664961636828645, + "grad_norm": 0.21783084381710976, + "learning_rate": 1.3810625145229174e-05, + "loss": 1.0621, + "step": 1199 + }, + { + "epoch": 3.0690537084398977, + "grad_norm": 0.259978348441225, + "learning_rate": 1.3798829286901122e-05, + "loss": 1.0216, + "step": 1200 + }, + { + "epoch": 3.071611253196931, + "grad_norm": 0.2531231166315013, + "learning_rate": 1.3787027248115341e-05, + "loss": 1.0344, + "step": 1201 + }, + { + "epoch": 3.074168797953964, + "grad_norm": 0.25693037958499804, + "learning_rate": 1.3775219048073011e-05, + "loss": 1.0571, + "step": 1202 + }, + { + "epoch": 3.0767263427109977, + "grad_norm": 0.22329447917802453, + "learning_rate": 1.376340470598534e-05, + "loss": 1.0621, + "step": 1203 + }, + { + "epoch": 3.079283887468031, + "grad_norm": 0.24363305905238922, + "learning_rate": 1.3751584241073517e-05, + "loss": 1.0627, + "step": 1204 + }, + { + "epoch": 3.081841432225064, + "grad_norm": 0.252245006887946, + "learning_rate": 1.3739757672568703e-05, + "loss": 1.0619, + "step": 1205 + }, + { + "epoch": 3.084398976982097, + "grad_norm": 0.24187527332738293, + "learning_rate": 1.3727925019711981e-05, + "loss": 1.0324, + "step": 1206 + }, + { + "epoch": 3.0869565217391304, + "grad_norm": 0.2140650570738505, + "learning_rate": 1.3716086301754343e-05, + "loss": 1.0538, + "step": 1207 + }, + { + "epoch": 3.0895140664961636, + "grad_norm": 0.26828049735013604, + "learning_rate": 1.3704241537956643e-05, + "loss": 1.0806, + "step": 1208 + }, + { + "epoch": 3.0920716112531967, + "grad_norm": 0.20662196910585112, + "learning_rate": 1.3692390747589564e-05, + "loss": 1.0272, + "step": 1209 + }, + { + "epoch": 3.0946291560102304, + "grad_norm": 0.23564415225665816, + "learning_rate": 1.3680533949933607e-05, + "loss": 1.0499, + "step": 1210 + }, + { + "epoch": 3.0971867007672635, + "grad_norm": 0.20991526952221617, + "learning_rate": 1.3668671164279039e-05, + "loss": 1.0514, + "step": 1211 + }, + { + "epoch": 3.0997442455242967, + "grad_norm": 0.22870151484413298, + "learning_rate": 1.3656802409925874e-05, + "loss": 1.0134, + "step": 1212 + }, + { + "epoch": 3.10230179028133, + "grad_norm": 0.21877781759998727, + "learning_rate": 1.3644927706183824e-05, + "loss": 1.0851, + "step": 1213 + }, + { + "epoch": 3.104859335038363, + "grad_norm": 0.2327125173805525, + "learning_rate": 1.3633047072372301e-05, + "loss": 1.0311, + "step": 1214 + }, + { + "epoch": 3.1074168797953963, + "grad_norm": 0.22202571636713042, + "learning_rate": 1.3621160527820343e-05, + "loss": 1.0737, + "step": 1215 + }, + { + "epoch": 3.10997442455243, + "grad_norm": 0.2154525697553689, + "learning_rate": 1.3609268091866621e-05, + "loss": 1.0298, + "step": 1216 + }, + { + "epoch": 3.112531969309463, + "grad_norm": 0.24440602961960542, + "learning_rate": 1.3597369783859385e-05, + "loss": 1.0637, + "step": 1217 + }, + { + "epoch": 3.1150895140664963, + "grad_norm": 0.22947504540372743, + "learning_rate": 1.3585465623156434e-05, + "loss": 1.0358, + "step": 1218 + }, + { + "epoch": 3.1176470588235294, + "grad_norm": 0.20546693748205078, + "learning_rate": 1.3573555629125097e-05, + "loss": 1.0531, + "step": 1219 + }, + { + "epoch": 3.1202046035805626, + "grad_norm": 0.2376207772257609, + "learning_rate": 1.3561639821142187e-05, + "loss": 1.0422, + "step": 1220 + }, + { + "epoch": 3.122762148337596, + "grad_norm": 0.2075906124157621, + "learning_rate": 1.3549718218593982e-05, + "loss": 1.0373, + "step": 1221 + }, + { + "epoch": 3.125319693094629, + "grad_norm": 0.2710877805734423, + "learning_rate": 1.3537790840876179e-05, + "loss": 0.9867, + "step": 1222 + }, + { + "epoch": 3.1278772378516626, + "grad_norm": 0.21873389694947254, + "learning_rate": 1.3525857707393878e-05, + "loss": 1.0493, + "step": 1223 + }, + { + "epoch": 3.130434782608696, + "grad_norm": 0.23140954420047274, + "learning_rate": 1.3513918837561544e-05, + "loss": 1.0192, + "step": 1224 + }, + { + "epoch": 3.132992327365729, + "grad_norm": 0.21413826548960174, + "learning_rate": 1.3501974250802967e-05, + "loss": 1.0233, + "step": 1225 + }, + { + "epoch": 3.135549872122762, + "grad_norm": 0.2211593381832046, + "learning_rate": 1.3490023966551249e-05, + "loss": 1.0415, + "step": 1226 + }, + { + "epoch": 3.1381074168797953, + "grad_norm": 0.23108631631913867, + "learning_rate": 1.3478068004248747e-05, + "loss": 1.0399, + "step": 1227 + }, + { + "epoch": 3.1406649616368285, + "grad_norm": 0.22275279756167513, + "learning_rate": 1.346610638334707e-05, + "loss": 1.0596, + "step": 1228 + }, + { + "epoch": 3.1432225063938617, + "grad_norm": 0.2524231602837744, + "learning_rate": 1.3454139123307023e-05, + "loss": 1.065, + "step": 1229 + }, + { + "epoch": 3.1457800511508953, + "grad_norm": 0.2196098109454718, + "learning_rate": 1.3442166243598598e-05, + "loss": 1.0497, + "step": 1230 + }, + { + "epoch": 3.1483375959079285, + "grad_norm": 0.2392235318659055, + "learning_rate": 1.3430187763700914e-05, + "loss": 1.0579, + "step": 1231 + }, + { + "epoch": 3.1508951406649617, + "grad_norm": 0.2252882411678263, + "learning_rate": 1.341820370310221e-05, + "loss": 1.037, + "step": 1232 + }, + { + "epoch": 3.153452685421995, + "grad_norm": 0.21957606499611643, + "learning_rate": 1.3406214081299807e-05, + "loss": 1.077, + "step": 1233 + }, + { + "epoch": 3.156010230179028, + "grad_norm": 0.2158883136158835, + "learning_rate": 1.3394218917800064e-05, + "loss": 1.0576, + "step": 1234 + }, + { + "epoch": 3.1585677749360612, + "grad_norm": 0.23206630107006462, + "learning_rate": 1.3382218232118367e-05, + "loss": 1.046, + "step": 1235 + }, + { + "epoch": 3.1611253196930944, + "grad_norm": 0.22650165934718894, + "learning_rate": 1.3370212043779078e-05, + "loss": 1.0513, + "step": 1236 + }, + { + "epoch": 3.163682864450128, + "grad_norm": 0.2146494581025888, + "learning_rate": 1.335820037231552e-05, + "loss": 1.0418, + "step": 1237 + }, + { + "epoch": 3.166240409207161, + "grad_norm": 0.22693672785502703, + "learning_rate": 1.3346183237269925e-05, + "loss": 1.044, + "step": 1238 + }, + { + "epoch": 3.1687979539641944, + "grad_norm": 0.24944388113412067, + "learning_rate": 1.3334160658193425e-05, + "loss": 1.0085, + "step": 1239 + }, + { + "epoch": 3.1713554987212276, + "grad_norm": 0.2323240702756201, + "learning_rate": 1.3322132654646003e-05, + "loss": 1.0348, + "step": 1240 + }, + { + "epoch": 3.1739130434782608, + "grad_norm": 0.23314120380593967, + "learning_rate": 1.3310099246196466e-05, + "loss": 1.0255, + "step": 1241 + }, + { + "epoch": 3.176470588235294, + "grad_norm": 0.22959022702139156, + "learning_rate": 1.3298060452422421e-05, + "loss": 1.0303, + "step": 1242 + }, + { + "epoch": 3.1790281329923276, + "grad_norm": 0.1945764817333214, + "learning_rate": 1.3286016292910229e-05, + "loss": 1.0366, + "step": 1243 + }, + { + "epoch": 3.1815856777493607, + "grad_norm": 0.2049881448552149, + "learning_rate": 1.327396678725499e-05, + "loss": 1.0224, + "step": 1244 + }, + { + "epoch": 3.184143222506394, + "grad_norm": 0.245199876694944, + "learning_rate": 1.3261911955060493e-05, + "loss": 0.9968, + "step": 1245 + }, + { + "epoch": 3.186700767263427, + "grad_norm": 0.19541276884697034, + "learning_rate": 1.3249851815939197e-05, + "loss": 1.0502, + "step": 1246 + }, + { + "epoch": 3.1892583120204603, + "grad_norm": 0.22313066289223873, + "learning_rate": 1.3237786389512191e-05, + "loss": 1.0577, + "step": 1247 + }, + { + "epoch": 3.1918158567774935, + "grad_norm": 0.23691814508572034, + "learning_rate": 1.3225715695409171e-05, + "loss": 1.0407, + "step": 1248 + }, + { + "epoch": 3.1943734015345266, + "grad_norm": 0.19364764369376442, + "learning_rate": 1.3213639753268406e-05, + "loss": 1.0289, + "step": 1249 + }, + { + "epoch": 3.1969309462915603, + "grad_norm": 0.19636310287160377, + "learning_rate": 1.3201558582736693e-05, + "loss": 1.0389, + "step": 1250 + }, + { + "epoch": 3.1994884910485935, + "grad_norm": 0.1876664287484004, + "learning_rate": 1.3189472203469347e-05, + "loss": 1.0167, + "step": 1251 + }, + { + "epoch": 3.2020460358056266, + "grad_norm": 0.19365316134612506, + "learning_rate": 1.3177380635130144e-05, + "loss": 1.0522, + "step": 1252 + }, + { + "epoch": 3.20460358056266, + "grad_norm": 0.17412371216897868, + "learning_rate": 1.3165283897391315e-05, + "loss": 1.0125, + "step": 1253 + }, + { + "epoch": 3.207161125319693, + "grad_norm": 0.21377597350657065, + "learning_rate": 1.3153182009933495e-05, + "loss": 1.035, + "step": 1254 + }, + { + "epoch": 3.209718670076726, + "grad_norm": 0.18072951551049465, + "learning_rate": 1.3141074992445695e-05, + "loss": 1.0354, + "step": 1255 + }, + { + "epoch": 3.21227621483376, + "grad_norm": 0.21819804516231073, + "learning_rate": 1.3128962864625281e-05, + "loss": 1.0288, + "step": 1256 + }, + { + "epoch": 3.214833759590793, + "grad_norm": 0.22829327535687294, + "learning_rate": 1.3116845646177923e-05, + "loss": 1.0329, + "step": 1257 + }, + { + "epoch": 3.217391304347826, + "grad_norm": 0.22096551556124827, + "learning_rate": 1.3104723356817582e-05, + "loss": 1.0272, + "step": 1258 + }, + { + "epoch": 3.2199488491048593, + "grad_norm": 0.19427368545567542, + "learning_rate": 1.309259601626646e-05, + "loss": 1.0757, + "step": 1259 + }, + { + "epoch": 3.2225063938618925, + "grad_norm": 0.2517142880283656, + "learning_rate": 1.3080463644254986e-05, + "loss": 1.0449, + "step": 1260 + }, + { + "epoch": 3.2250639386189257, + "grad_norm": 0.21438511450639225, + "learning_rate": 1.3068326260521769e-05, + "loss": 1.0253, + "step": 1261 + }, + { + "epoch": 3.227621483375959, + "grad_norm": 0.23939604240119217, + "learning_rate": 1.3056183884813568e-05, + "loss": 1.0055, + "step": 1262 + }, + { + "epoch": 3.2301790281329925, + "grad_norm": 0.24913816729402657, + "learning_rate": 1.3044036536885284e-05, + "loss": 1.0305, + "step": 1263 + }, + { + "epoch": 3.2327365728900257, + "grad_norm": 0.22985968452270927, + "learning_rate": 1.3031884236499877e-05, + "loss": 1.0356, + "step": 1264 + }, + { + "epoch": 3.235294117647059, + "grad_norm": 0.2432127136491896, + "learning_rate": 1.3019727003428387e-05, + "loss": 1.0327, + "step": 1265 + }, + { + "epoch": 3.237851662404092, + "grad_norm": 0.21511626506563813, + "learning_rate": 1.300756485744987e-05, + "loss": 1.0351, + "step": 1266 + }, + { + "epoch": 3.2404092071611252, + "grad_norm": 0.21620331140589194, + "learning_rate": 1.2995397818351381e-05, + "loss": 1.0272, + "step": 1267 + }, + { + "epoch": 3.2429667519181584, + "grad_norm": 0.24918797088173247, + "learning_rate": 1.2983225905927924e-05, + "loss": 0.9923, + "step": 1268 + }, + { + "epoch": 3.2455242966751916, + "grad_norm": 0.2033868759774891, + "learning_rate": 1.2971049139982448e-05, + "loss": 1.0526, + "step": 1269 + }, + { + "epoch": 3.2480818414322252, + "grad_norm": 0.24065409839804014, + "learning_rate": 1.2958867540325785e-05, + "loss": 1.0283, + "step": 1270 + }, + { + "epoch": 3.2506393861892584, + "grad_norm": 0.23975735377063542, + "learning_rate": 1.294668112677664e-05, + "loss": 1.0467, + "step": 1271 + }, + { + "epoch": 3.2531969309462916, + "grad_norm": 0.20321738007355677, + "learning_rate": 1.2934489919161541e-05, + "loss": 1.0292, + "step": 1272 + }, + { + "epoch": 3.2557544757033248, + "grad_norm": 0.22563988593724132, + "learning_rate": 1.292229393731482e-05, + "loss": 1.0273, + "step": 1273 + }, + { + "epoch": 3.258312020460358, + "grad_norm": 0.2108784426288754, + "learning_rate": 1.2910093201078584e-05, + "loss": 1.041, + "step": 1274 + }, + { + "epoch": 3.260869565217391, + "grad_norm": 0.25182826531670705, + "learning_rate": 1.289788773030266e-05, + "loss": 1.0507, + "step": 1275 + }, + { + "epoch": 3.2634271099744243, + "grad_norm": 0.23260866121986465, + "learning_rate": 1.2885677544844592e-05, + "loss": 1.0073, + "step": 1276 + }, + { + "epoch": 3.265984654731458, + "grad_norm": 0.20778832907058722, + "learning_rate": 1.2873462664569583e-05, + "loss": 1.063, + "step": 1277 + }, + { + "epoch": 3.268542199488491, + "grad_norm": 0.24704017386773852, + "learning_rate": 1.2861243109350485e-05, + "loss": 1.0275, + "step": 1278 + }, + { + "epoch": 3.2710997442455243, + "grad_norm": 0.20143011397018976, + "learning_rate": 1.2849018899067746e-05, + "loss": 1.0786, + "step": 1279 + }, + { + "epoch": 3.2736572890025575, + "grad_norm": 0.19780957370773475, + "learning_rate": 1.2836790053609396e-05, + "loss": 1.0475, + "step": 1280 + }, + { + "epoch": 3.2762148337595907, + "grad_norm": 0.21001290371983408, + "learning_rate": 1.2824556592870993e-05, + "loss": 1.0544, + "step": 1281 + }, + { + "epoch": 3.2787723785166243, + "grad_norm": 0.2314545925289747, + "learning_rate": 1.2812318536755624e-05, + "loss": 1.0432, + "step": 1282 + }, + { + "epoch": 3.2813299232736575, + "grad_norm": 0.21988256589877733, + "learning_rate": 1.2800075905173834e-05, + "loss": 1.0432, + "step": 1283 + }, + { + "epoch": 3.2838874680306906, + "grad_norm": 0.26832633674704665, + "learning_rate": 1.2787828718043622e-05, + "loss": 1.0379, + "step": 1284 + }, + { + "epoch": 3.286445012787724, + "grad_norm": 0.2234222589374059, + "learning_rate": 1.2775576995290397e-05, + "loss": 1.0421, + "step": 1285 + }, + { + "epoch": 3.289002557544757, + "grad_norm": 0.20516563803916263, + "learning_rate": 1.276332075684694e-05, + "loss": 1.0392, + "step": 1286 + }, + { + "epoch": 3.29156010230179, + "grad_norm": 0.2404590656925125, + "learning_rate": 1.2751060022653393e-05, + "loss": 1.0283, + "step": 1287 + }, + { + "epoch": 3.2941176470588234, + "grad_norm": 0.19864113603292302, + "learning_rate": 1.2738794812657194e-05, + "loss": 1.0144, + "step": 1288 + }, + { + "epoch": 3.296675191815857, + "grad_norm": 0.2323436030300969, + "learning_rate": 1.2726525146813078e-05, + "loss": 1.0151, + "step": 1289 + }, + { + "epoch": 3.29923273657289, + "grad_norm": 0.24929371156784427, + "learning_rate": 1.2714251045083028e-05, + "loss": 1.0137, + "step": 1290 + }, + { + "epoch": 3.3017902813299234, + "grad_norm": 0.20413376158858587, + "learning_rate": 1.2701972527436235e-05, + "loss": 1.0233, + "step": 1291 + }, + { + "epoch": 3.3043478260869565, + "grad_norm": 0.21637513281635873, + "learning_rate": 1.2689689613849083e-05, + "loss": 1.0586, + "step": 1292 + }, + { + "epoch": 3.3069053708439897, + "grad_norm": 0.18194714637573692, + "learning_rate": 1.2677402324305099e-05, + "loss": 0.994, + "step": 1293 + }, + { + "epoch": 3.309462915601023, + "grad_norm": 0.19606411156722506, + "learning_rate": 1.266511067879494e-05, + "loss": 1.0283, + "step": 1294 + }, + { + "epoch": 3.312020460358056, + "grad_norm": 0.19517256802808283, + "learning_rate": 1.265281469731634e-05, + "loss": 1.0373, + "step": 1295 + }, + { + "epoch": 3.3145780051150897, + "grad_norm": 0.17867307264513901, + "learning_rate": 1.2640514399874095e-05, + "loss": 1.0517, + "step": 1296 + }, + { + "epoch": 3.317135549872123, + "grad_norm": 0.19814474828943063, + "learning_rate": 1.2628209806480024e-05, + "loss": 1.0068, + "step": 1297 + }, + { + "epoch": 3.319693094629156, + "grad_norm": 0.21270750338094424, + "learning_rate": 1.2615900937152923e-05, + "loss": 1.0236, + "step": 1298 + }, + { + "epoch": 3.3222506393861893, + "grad_norm": 0.21625825452151415, + "learning_rate": 1.2603587811918558e-05, + "loss": 1.0495, + "step": 1299 + }, + { + "epoch": 3.3248081841432224, + "grad_norm": 0.23776899893360745, + "learning_rate": 1.2591270450809612e-05, + "loss": 1.0741, + "step": 1300 + }, + { + "epoch": 3.3273657289002556, + "grad_norm": 0.22428186293001376, + "learning_rate": 1.2578948873865662e-05, + "loss": 1.0132, + "step": 1301 + }, + { + "epoch": 3.329923273657289, + "grad_norm": 0.20864902455184137, + "learning_rate": 1.2566623101133144e-05, + "loss": 1.0464, + "step": 1302 + }, + { + "epoch": 3.3324808184143224, + "grad_norm": 0.2685355350833958, + "learning_rate": 1.2554293152665316e-05, + "loss": 1.0247, + "step": 1303 + }, + { + "epoch": 3.3350383631713556, + "grad_norm": 0.2527986356697781, + "learning_rate": 1.2541959048522239e-05, + "loss": 1.0399, + "step": 1304 + }, + { + "epoch": 3.337595907928389, + "grad_norm": 0.22197339925214596, + "learning_rate": 1.2529620808770723e-05, + "loss": 1.0157, + "step": 1305 + }, + { + "epoch": 3.340153452685422, + "grad_norm": 0.3107261506811511, + "learning_rate": 1.251727845348432e-05, + "loss": 1.0495, + "step": 1306 + }, + { + "epoch": 3.342710997442455, + "grad_norm": 0.2643689123746537, + "learning_rate": 1.2504932002743262e-05, + "loss": 1.001, + "step": 1307 + }, + { + "epoch": 3.3452685421994883, + "grad_norm": 0.2364739279711792, + "learning_rate": 1.2492581476634458e-05, + "loss": 1.045, + "step": 1308 + }, + { + "epoch": 3.3478260869565215, + "grad_norm": 0.28136518049730547, + "learning_rate": 1.2480226895251439e-05, + "loss": 1.0285, + "step": 1309 + }, + { + "epoch": 3.350383631713555, + "grad_norm": 0.2523350080360508, + "learning_rate": 1.2467868278694342e-05, + "loss": 1.0658, + "step": 1310 + }, + { + "epoch": 3.3529411764705883, + "grad_norm": 0.20529584681597104, + "learning_rate": 1.245550564706986e-05, + "loss": 1.0372, + "step": 1311 + }, + { + "epoch": 3.3554987212276215, + "grad_norm": 0.26187724014211844, + "learning_rate": 1.2443139020491216e-05, + "loss": 1.0295, + "step": 1312 + }, + { + "epoch": 3.3580562659846547, + "grad_norm": 0.2759180573007528, + "learning_rate": 1.2430768419078143e-05, + "loss": 1.0312, + "step": 1313 + }, + { + "epoch": 3.360613810741688, + "grad_norm": 0.2020495956799633, + "learning_rate": 1.2418393862956837e-05, + "loss": 1.0419, + "step": 1314 + }, + { + "epoch": 3.363171355498721, + "grad_norm": 0.2369272520944126, + "learning_rate": 1.2406015372259925e-05, + "loss": 1.0122, + "step": 1315 + }, + { + "epoch": 3.3657289002557547, + "grad_norm": 0.2184979100214276, + "learning_rate": 1.2393632967126441e-05, + "loss": 1.0327, + "step": 1316 + }, + { + "epoch": 3.368286445012788, + "grad_norm": 0.23858603204557072, + "learning_rate": 1.2381246667701781e-05, + "loss": 1.0475, + "step": 1317 + }, + { + "epoch": 3.370843989769821, + "grad_norm": 0.26756479784593945, + "learning_rate": 1.236885649413768e-05, + "loss": 1.0426, + "step": 1318 + }, + { + "epoch": 3.373401534526854, + "grad_norm": 0.1892302039091279, + "learning_rate": 1.2356462466592177e-05, + "loss": 1.0412, + "step": 1319 + }, + { + "epoch": 3.3759590792838874, + "grad_norm": 0.29335988888765785, + "learning_rate": 1.2344064605229577e-05, + "loss": 1.0175, + "step": 1320 + }, + { + "epoch": 3.3785166240409206, + "grad_norm": 0.21447038773497848, + "learning_rate": 1.2331662930220424e-05, + "loss": 1.018, + "step": 1321 + }, + { + "epoch": 3.381074168797954, + "grad_norm": 0.24164773212365756, + "learning_rate": 1.2319257461741478e-05, + "loss": 1.029, + "step": 1322 + }, + { + "epoch": 3.3836317135549874, + "grad_norm": 0.23724415736018667, + "learning_rate": 1.2306848219975649e-05, + "loss": 1.017, + "step": 1323 + }, + { + "epoch": 3.3861892583120206, + "grad_norm": 0.2146728306264026, + "learning_rate": 1.2294435225112005e-05, + "loss": 1.0301, + "step": 1324 + }, + { + "epoch": 3.3887468030690537, + "grad_norm": 0.18212095256468025, + "learning_rate": 1.2282018497345705e-05, + "loss": 1.0361, + "step": 1325 + }, + { + "epoch": 3.391304347826087, + "grad_norm": 0.23148682510609303, + "learning_rate": 1.2269598056877996e-05, + "loss": 1.0385, + "step": 1326 + }, + { + "epoch": 3.39386189258312, + "grad_norm": 0.20473257376707585, + "learning_rate": 1.2257173923916154e-05, + "loss": 1.0208, + "step": 1327 + }, + { + "epoch": 3.3964194373401533, + "grad_norm": 0.20995062344103757, + "learning_rate": 1.2244746118673467e-05, + "loss": 1.0116, + "step": 1328 + }, + { + "epoch": 3.398976982097187, + "grad_norm": 0.23774156769953378, + "learning_rate": 1.22323146613692e-05, + "loss": 1.0742, + "step": 1329 + }, + { + "epoch": 3.40153452685422, + "grad_norm": 0.20830692559875352, + "learning_rate": 1.2219879572228555e-05, + "loss": 1.0565, + "step": 1330 + }, + { + "epoch": 3.4040920716112533, + "grad_norm": 0.2147028468697588, + "learning_rate": 1.2207440871482644e-05, + "loss": 1.0294, + "step": 1331 + }, + { + "epoch": 3.4066496163682864, + "grad_norm": 0.24756067918436106, + "learning_rate": 1.2194998579368451e-05, + "loss": 1.0479, + "step": 1332 + }, + { + "epoch": 3.4092071611253196, + "grad_norm": 0.2056045421373826, + "learning_rate": 1.2182552716128818e-05, + "loss": 1.0236, + "step": 1333 + }, + { + "epoch": 3.411764705882353, + "grad_norm": 0.2079215269898909, + "learning_rate": 1.2170103302012374e-05, + "loss": 1.0513, + "step": 1334 + }, + { + "epoch": 3.414322250639386, + "grad_norm": 0.19554068307435188, + "learning_rate": 1.2157650357273547e-05, + "loss": 1.0389, + "step": 1335 + }, + { + "epoch": 3.4168797953964196, + "grad_norm": 0.20840944979090947, + "learning_rate": 1.2145193902172496e-05, + "loss": 1.0355, + "step": 1336 + }, + { + "epoch": 3.419437340153453, + "grad_norm": 0.21130712097196197, + "learning_rate": 1.2132733956975093e-05, + "loss": 1.0322, + "step": 1337 + }, + { + "epoch": 3.421994884910486, + "grad_norm": 0.17958150894777242, + "learning_rate": 1.2120270541952892e-05, + "loss": 1.0227, + "step": 1338 + }, + { + "epoch": 3.424552429667519, + "grad_norm": 0.2225571229441682, + "learning_rate": 1.210780367738309e-05, + "loss": 1.0285, + "step": 1339 + }, + { + "epoch": 3.4271099744245523, + "grad_norm": 0.1885954682977986, + "learning_rate": 1.2095333383548495e-05, + "loss": 1.0812, + "step": 1340 + }, + { + "epoch": 3.4296675191815855, + "grad_norm": 0.2099948092443905, + "learning_rate": 1.2082859680737495e-05, + "loss": 1.0716, + "step": 1341 + }, + { + "epoch": 3.4322250639386187, + "grad_norm": 0.2256939428442792, + "learning_rate": 1.2070382589244026e-05, + "loss": 1.0311, + "step": 1342 + }, + { + "epoch": 3.4347826086956523, + "grad_norm": 0.23072791297771425, + "learning_rate": 1.2057902129367536e-05, + "loss": 1.0467, + "step": 1343 + }, + { + "epoch": 3.4373401534526855, + "grad_norm": 0.2057602125391487, + "learning_rate": 1.204541832141295e-05, + "loss": 1.028, + "step": 1344 + }, + { + "epoch": 3.4398976982097187, + "grad_norm": 0.2520074046407619, + "learning_rate": 1.2032931185690646e-05, + "loss": 1.0163, + "step": 1345 + }, + { + "epoch": 3.442455242966752, + "grad_norm": 0.2421964192866277, + "learning_rate": 1.202044074251641e-05, + "loss": 1.063, + "step": 1346 + }, + { + "epoch": 3.445012787723785, + "grad_norm": 0.20429551187516548, + "learning_rate": 1.2007947012211419e-05, + "loss": 1.0361, + "step": 1347 + }, + { + "epoch": 3.4475703324808182, + "grad_norm": 0.2520787216839294, + "learning_rate": 1.199545001510218e-05, + "loss": 1.054, + "step": 1348 + }, + { + "epoch": 3.4501278772378514, + "grad_norm": 0.24681543428956615, + "learning_rate": 1.1982949771520535e-05, + "loss": 1.0605, + "step": 1349 + }, + { + "epoch": 3.452685421994885, + "grad_norm": 0.20282034999970464, + "learning_rate": 1.1970446301803598e-05, + "loss": 1.0461, + "step": 1350 + }, + { + "epoch": 3.455242966751918, + "grad_norm": 0.22677677047988842, + "learning_rate": 1.1957939626293726e-05, + "loss": 1.0459, + "step": 1351 + }, + { + "epoch": 3.4578005115089514, + "grad_norm": 0.23929950706752162, + "learning_rate": 1.1945429765338507e-05, + "loss": 1.0531, + "step": 1352 + }, + { + "epoch": 3.4603580562659846, + "grad_norm": 0.2096490071983182, + "learning_rate": 1.1932916739290694e-05, + "loss": 1.0148, + "step": 1353 + }, + { + "epoch": 3.4629156010230178, + "grad_norm": 0.20618185619438542, + "learning_rate": 1.1920400568508201e-05, + "loss": 1.0375, + "step": 1354 + }, + { + "epoch": 3.4654731457800514, + "grad_norm": 0.23186283780985562, + "learning_rate": 1.1907881273354059e-05, + "loss": 1.0276, + "step": 1355 + }, + { + "epoch": 3.4680306905370846, + "grad_norm": 0.21691929515578598, + "learning_rate": 1.1895358874196377e-05, + "loss": 1.0368, + "step": 1356 + }, + { + "epoch": 3.4705882352941178, + "grad_norm": 0.20410519325755752, + "learning_rate": 1.188283339140831e-05, + "loss": 1.038, + "step": 1357 + }, + { + "epoch": 3.473145780051151, + "grad_norm": 0.22863334112386996, + "learning_rate": 1.1870304845368043e-05, + "loss": 1.0433, + "step": 1358 + }, + { + "epoch": 3.475703324808184, + "grad_norm": 0.2126661663430652, + "learning_rate": 1.1857773256458732e-05, + "loss": 1.0605, + "step": 1359 + }, + { + "epoch": 3.4782608695652173, + "grad_norm": 0.24272298207990836, + "learning_rate": 1.184523864506849e-05, + "loss": 1.0476, + "step": 1360 + }, + { + "epoch": 3.4808184143222505, + "grad_norm": 0.20098243757734405, + "learning_rate": 1.1832701031590345e-05, + "loss": 1.032, + "step": 1361 + }, + { + "epoch": 3.483375959079284, + "grad_norm": 0.2516527217412891, + "learning_rate": 1.1820160436422213e-05, + "loss": 1.0392, + "step": 1362 + }, + { + "epoch": 3.4859335038363173, + "grad_norm": 0.22312520765078486, + "learning_rate": 1.1807616879966856e-05, + "loss": 1.0549, + "step": 1363 + }, + { + "epoch": 3.4884910485933505, + "grad_norm": 0.23508194911007732, + "learning_rate": 1.1795070382631856e-05, + "loss": 1.0257, + "step": 1364 + }, + { + "epoch": 3.4910485933503836, + "grad_norm": 0.2056219883277526, + "learning_rate": 1.1782520964829583e-05, + "loss": 1.0616, + "step": 1365 + }, + { + "epoch": 3.493606138107417, + "grad_norm": 0.22297849379676427, + "learning_rate": 1.1769968646977148e-05, + "loss": 1.08, + "step": 1366 + }, + { + "epoch": 3.49616368286445, + "grad_norm": 0.1917605236627194, + "learning_rate": 1.1757413449496393e-05, + "loss": 1.0582, + "step": 1367 + }, + { + "epoch": 3.498721227621483, + "grad_norm": 0.22264832355995012, + "learning_rate": 1.174485539281384e-05, + "loss": 0.9999, + "step": 1368 + }, + { + "epoch": 3.501278772378517, + "grad_norm": 0.18053830121135175, + "learning_rate": 1.1732294497360658e-05, + "loss": 1.0481, + "step": 1369 + }, + { + "epoch": 3.50383631713555, + "grad_norm": 0.25413658020729973, + "learning_rate": 1.1719730783572645e-05, + "loss": 1.0526, + "step": 1370 + }, + { + "epoch": 3.506393861892583, + "grad_norm": 0.20438148687464178, + "learning_rate": 1.1707164271890168e-05, + "loss": 1.0465, + "step": 1371 + }, + { + "epoch": 3.5089514066496164, + "grad_norm": 0.27411869672391553, + "learning_rate": 1.1694594982758164e-05, + "loss": 1.0672, + "step": 1372 + }, + { + "epoch": 3.5115089514066495, + "grad_norm": 0.27020394951486204, + "learning_rate": 1.1682022936626076e-05, + "loss": 1.0249, + "step": 1373 + }, + { + "epoch": 3.5140664961636827, + "grad_norm": 0.20542313494356507, + "learning_rate": 1.166944815394784e-05, + "loss": 1.0444, + "step": 1374 + }, + { + "epoch": 3.516624040920716, + "grad_norm": 0.2696771035530231, + "learning_rate": 1.165687065518184e-05, + "loss": 1.0164, + "step": 1375 + }, + { + "epoch": 3.5191815856777495, + "grad_norm": 0.21834933315057503, + "learning_rate": 1.1644290460790879e-05, + "loss": 1.0231, + "step": 1376 + }, + { + "epoch": 3.5217391304347827, + "grad_norm": 0.25602165129241816, + "learning_rate": 1.163170759124215e-05, + "loss": 1.0499, + "step": 1377 + }, + { + "epoch": 3.524296675191816, + "grad_norm": 0.2466307590095287, + "learning_rate": 1.161912206700719e-05, + "loss": 1.0179, + "step": 1378 + }, + { + "epoch": 3.526854219948849, + "grad_norm": 0.1990877095514582, + "learning_rate": 1.1606533908561866e-05, + "loss": 1.0825, + "step": 1379 + }, + { + "epoch": 3.5294117647058822, + "grad_norm": 0.2262880860449741, + "learning_rate": 1.1593943136386316e-05, + "loss": 1.0239, + "step": 1380 + }, + { + "epoch": 3.531969309462916, + "grad_norm": 0.23639713675723853, + "learning_rate": 1.1581349770964946e-05, + "loss": 1.0797, + "step": 1381 + }, + { + "epoch": 3.5345268542199486, + "grad_norm": 0.19143592758217978, + "learning_rate": 1.1568753832786376e-05, + "loss": 1.0482, + "step": 1382 + }, + { + "epoch": 3.5370843989769822, + "grad_norm": 0.21395077968188803, + "learning_rate": 1.1556155342343405e-05, + "loss": 1.0341, + "step": 1383 + }, + { + "epoch": 3.5396419437340154, + "grad_norm": 0.20517427967195068, + "learning_rate": 1.154355432013299e-05, + "loss": 1.0657, + "step": 1384 + }, + { + "epoch": 3.5421994884910486, + "grad_norm": 0.19022344547536582, + "learning_rate": 1.1530950786656205e-05, + "loss": 1.0428, + "step": 1385 + }, + { + "epoch": 3.544757033248082, + "grad_norm": 0.24857892965208156, + "learning_rate": 1.1518344762418216e-05, + "loss": 1.0614, + "step": 1386 + }, + { + "epoch": 3.547314578005115, + "grad_norm": 0.17434032950673256, + "learning_rate": 1.150573626792823e-05, + "loss": 1.0119, + "step": 1387 + }, + { + "epoch": 3.5498721227621486, + "grad_norm": 0.221669736437551, + "learning_rate": 1.1493125323699486e-05, + "loss": 1.0325, + "step": 1388 + }, + { + "epoch": 3.5524296675191813, + "grad_norm": 0.19550877444868983, + "learning_rate": 1.1480511950249195e-05, + "loss": 1.0621, + "step": 1389 + }, + { + "epoch": 3.554987212276215, + "grad_norm": 0.20320983764425946, + "learning_rate": 1.1467896168098533e-05, + "loss": 1.0688, + "step": 1390 + }, + { + "epoch": 3.557544757033248, + "grad_norm": 0.21236236447911172, + "learning_rate": 1.1455277997772585e-05, + "loss": 0.9992, + "step": 1391 + }, + { + "epoch": 3.5601023017902813, + "grad_norm": 0.1946876189282923, + "learning_rate": 1.1442657459800323e-05, + "loss": 1.0298, + "step": 1392 + }, + { + "epoch": 3.5626598465473145, + "grad_norm": 0.20833695509734265, + "learning_rate": 1.143003457471458e-05, + "loss": 1.0481, + "step": 1393 + }, + { + "epoch": 3.5652173913043477, + "grad_norm": 0.19849397670530705, + "learning_rate": 1.1417409363051992e-05, + "loss": 1.0508, + "step": 1394 + }, + { + "epoch": 3.5677749360613813, + "grad_norm": 0.1862173592034928, + "learning_rate": 1.1404781845352999e-05, + "loss": 1.0586, + "step": 1395 + }, + { + "epoch": 3.5703324808184145, + "grad_norm": 0.20151362231655162, + "learning_rate": 1.1392152042161774e-05, + "loss": 1.0319, + "step": 1396 + }, + { + "epoch": 3.5728900255754477, + "grad_norm": 0.23404342439834142, + "learning_rate": 1.1379519974026226e-05, + "loss": 1.0151, + "step": 1397 + }, + { + "epoch": 3.575447570332481, + "grad_norm": 0.18584316354206787, + "learning_rate": 1.136688566149793e-05, + "loss": 1.0516, + "step": 1398 + }, + { + "epoch": 3.578005115089514, + "grad_norm": 0.2357364264338847, + "learning_rate": 1.1354249125132131e-05, + "loss": 1.0558, + "step": 1399 + }, + { + "epoch": 3.580562659846547, + "grad_norm": 0.255370311471337, + "learning_rate": 1.1341610385487677e-05, + "loss": 1.0159, + "step": 1400 + }, + { + "epoch": 3.5831202046035804, + "grad_norm": 0.2015566724373594, + "learning_rate": 1.1328969463127009e-05, + "loss": 1.0256, + "step": 1401 + }, + { + "epoch": 3.585677749360614, + "grad_norm": 0.2717588011458947, + "learning_rate": 1.1316326378616121e-05, + "loss": 1.0452, + "step": 1402 + }, + { + "epoch": 3.588235294117647, + "grad_norm": 0.226800697503035, + "learning_rate": 1.1303681152524514e-05, + "loss": 1.0417, + "step": 1403 + }, + { + "epoch": 3.5907928388746804, + "grad_norm": 0.20628829171202948, + "learning_rate": 1.129103380542519e-05, + "loss": 1.0483, + "step": 1404 + }, + { + "epoch": 3.5933503836317136, + "grad_norm": 0.2260665953032841, + "learning_rate": 1.1278384357894585e-05, + "loss": 1.0407, + "step": 1405 + }, + { + "epoch": 3.5959079283887467, + "grad_norm": 0.20513785218039995, + "learning_rate": 1.1265732830512561e-05, + "loss": 1.0391, + "step": 1406 + }, + { + "epoch": 3.59846547314578, + "grad_norm": 0.21444285296757887, + "learning_rate": 1.125307924386236e-05, + "loss": 1.0456, + "step": 1407 + }, + { + "epoch": 3.601023017902813, + "grad_norm": 0.2652819565444848, + "learning_rate": 1.1240423618530578e-05, + "loss": 1.0501, + "step": 1408 + }, + { + "epoch": 3.6035805626598467, + "grad_norm": 0.23632809050025924, + "learning_rate": 1.122776597510713e-05, + "loss": 1.0294, + "step": 1409 + }, + { + "epoch": 3.60613810741688, + "grad_norm": 0.2185806876530497, + "learning_rate": 1.1215106334185201e-05, + "loss": 1.0024, + "step": 1410 + }, + { + "epoch": 3.608695652173913, + "grad_norm": 0.24854116957417377, + "learning_rate": 1.1202444716361247e-05, + "loss": 1.0451, + "step": 1411 + }, + { + "epoch": 3.6112531969309463, + "grad_norm": 0.2045525689869136, + "learning_rate": 1.1189781142234917e-05, + "loss": 1.0635, + "step": 1412 + }, + { + "epoch": 3.6138107416879794, + "grad_norm": 0.2399433598230184, + "learning_rate": 1.1177115632409064e-05, + "loss": 1.0177, + "step": 1413 + }, + { + "epoch": 3.6163682864450126, + "grad_norm": 0.2415017313404832, + "learning_rate": 1.1164448207489673e-05, + "loss": 1.0379, + "step": 1414 + }, + { + "epoch": 3.618925831202046, + "grad_norm": 0.21319360249943278, + "learning_rate": 1.1151778888085856e-05, + "loss": 1.0179, + "step": 1415 + }, + { + "epoch": 3.6214833759590794, + "grad_norm": 0.24881166658392342, + "learning_rate": 1.1139107694809806e-05, + "loss": 1.0392, + "step": 1416 + }, + { + "epoch": 3.6240409207161126, + "grad_norm": 0.19415985264760977, + "learning_rate": 1.1126434648276756e-05, + "loss": 1.0124, + "step": 1417 + }, + { + "epoch": 3.626598465473146, + "grad_norm": 0.25642703103922565, + "learning_rate": 1.1113759769104965e-05, + "loss": 1.0496, + "step": 1418 + }, + { + "epoch": 3.629156010230179, + "grad_norm": 0.2492878689877881, + "learning_rate": 1.1101083077915667e-05, + "loss": 1.043, + "step": 1419 + }, + { + "epoch": 3.631713554987212, + "grad_norm": 0.1983125579481505, + "learning_rate": 1.1088404595333046e-05, + "loss": 1.0449, + "step": 1420 + }, + { + "epoch": 3.634271099744246, + "grad_norm": 0.21827713474511093, + "learning_rate": 1.1075724341984201e-05, + "loss": 1.0622, + "step": 1421 + }, + { + "epoch": 3.6368286445012785, + "grad_norm": 0.23619084555258635, + "learning_rate": 1.1063042338499113e-05, + "loss": 1.015, + "step": 1422 + }, + { + "epoch": 3.639386189258312, + "grad_norm": 0.20336660531825468, + "learning_rate": 1.1050358605510606e-05, + "loss": 1.0413, + "step": 1423 + }, + { + "epoch": 3.6419437340153453, + "grad_norm": 0.2421386235557971, + "learning_rate": 1.1037673163654321e-05, + "loss": 1.0307, + "step": 1424 + }, + { + "epoch": 3.6445012787723785, + "grad_norm": 0.22360499286457716, + "learning_rate": 1.1024986033568683e-05, + "loss": 1.0605, + "step": 1425 + }, + { + "epoch": 3.6470588235294117, + "grad_norm": 0.2378376933825962, + "learning_rate": 1.101229723589485e-05, + "loss": 1.0192, + "step": 1426 + }, + { + "epoch": 3.649616368286445, + "grad_norm": 0.22968460013912853, + "learning_rate": 1.099960679127671e-05, + "loss": 1.0349, + "step": 1427 + }, + { + "epoch": 3.6521739130434785, + "grad_norm": 0.23158540102865127, + "learning_rate": 1.0986914720360821e-05, + "loss": 1.0253, + "step": 1428 + }, + { + "epoch": 3.6547314578005117, + "grad_norm": 0.22013393117978197, + "learning_rate": 1.097422104379639e-05, + "loss": 1.018, + "step": 1429 + }, + { + "epoch": 3.657289002557545, + "grad_norm": 0.22220097208242998, + "learning_rate": 1.0961525782235233e-05, + "loss": 1.0473, + "step": 1430 + }, + { + "epoch": 3.659846547314578, + "grad_norm": 0.22194116899976712, + "learning_rate": 1.0948828956331752e-05, + "loss": 1.0424, + "step": 1431 + }, + { + "epoch": 3.662404092071611, + "grad_norm": 0.1983453396349903, + "learning_rate": 1.0936130586742881e-05, + "loss": 1.0453, + "step": 1432 + }, + { + "epoch": 3.6649616368286444, + "grad_norm": 0.2327743943604014, + "learning_rate": 1.0923430694128074e-05, + "loss": 1.0193, + "step": 1433 + }, + { + "epoch": 3.6675191815856776, + "grad_norm": 0.21867884439727386, + "learning_rate": 1.091072929914927e-05, + "loss": 1.0256, + "step": 1434 + }, + { + "epoch": 3.670076726342711, + "grad_norm": 0.23080732244405422, + "learning_rate": 1.0898026422470838e-05, + "loss": 1.0232, + "step": 1435 + }, + { + "epoch": 3.6726342710997444, + "grad_norm": 0.22857566907679472, + "learning_rate": 1.0885322084759566e-05, + "loss": 1.0536, + "step": 1436 + }, + { + "epoch": 3.6751918158567776, + "grad_norm": 0.2520804757587095, + "learning_rate": 1.0872616306684616e-05, + "loss": 1.0287, + "step": 1437 + }, + { + "epoch": 3.6777493606138107, + "grad_norm": 0.2469698171523125, + "learning_rate": 1.0859909108917497e-05, + "loss": 1.0909, + "step": 1438 + }, + { + "epoch": 3.680306905370844, + "grad_norm": 0.2327692634720372, + "learning_rate": 1.084720051213202e-05, + "loss": 1.0193, + "step": 1439 + }, + { + "epoch": 3.682864450127877, + "grad_norm": 0.23658961049768784, + "learning_rate": 1.0834490537004286e-05, + "loss": 1.0212, + "step": 1440 + }, + { + "epoch": 3.6854219948849103, + "grad_norm": 0.20942394628132058, + "learning_rate": 1.0821779204212623e-05, + "loss": 1.0249, + "step": 1441 + }, + { + "epoch": 3.687979539641944, + "grad_norm": 0.23145657493822064, + "learning_rate": 1.0809066534437576e-05, + "loss": 1.0179, + "step": 1442 + }, + { + "epoch": 3.690537084398977, + "grad_norm": 0.1999453161376075, + "learning_rate": 1.0796352548361863e-05, + "loss": 1.0026, + "step": 1443 + }, + { + "epoch": 3.6930946291560103, + "grad_norm": 0.22035660036843002, + "learning_rate": 1.0783637266670348e-05, + "loss": 1.0287, + "step": 1444 + }, + { + "epoch": 3.6956521739130435, + "grad_norm": 0.19317194516834582, + "learning_rate": 1.0770920710049997e-05, + "loss": 1.0507, + "step": 1445 + }, + { + "epoch": 3.6982097186700766, + "grad_norm": 0.2457010945328612, + "learning_rate": 1.0758202899189852e-05, + "loss": 1.0135, + "step": 1446 + }, + { + "epoch": 3.70076726342711, + "grad_norm": 0.18287871278152357, + "learning_rate": 1.0745483854780996e-05, + "loss": 1.0408, + "step": 1447 + }, + { + "epoch": 3.703324808184143, + "grad_norm": 0.23748668263508885, + "learning_rate": 1.073276359751652e-05, + "loss": 1.0642, + "step": 1448 + }, + { + "epoch": 3.7058823529411766, + "grad_norm": 0.22123508756316554, + "learning_rate": 1.0720042148091487e-05, + "loss": 1.0136, + "step": 1449 + }, + { + "epoch": 3.70843989769821, + "grad_norm": 0.23936061656812962, + "learning_rate": 1.0707319527202902e-05, + "loss": 1.0297, + "step": 1450 + }, + { + "epoch": 3.710997442455243, + "grad_norm": 0.27579723622779695, + "learning_rate": 1.0694595755549668e-05, + "loss": 1.0088, + "step": 1451 + }, + { + "epoch": 3.713554987212276, + "grad_norm": 0.2295449569053256, + "learning_rate": 1.0681870853832572e-05, + "loss": 1.0411, + "step": 1452 + }, + { + "epoch": 3.7161125319693094, + "grad_norm": 0.21165912842223478, + "learning_rate": 1.066914484275423e-05, + "loss": 1.0237, + "step": 1453 + }, + { + "epoch": 3.718670076726343, + "grad_norm": 0.22373624538155187, + "learning_rate": 1.0656417743019065e-05, + "loss": 1.0661, + "step": 1454 + }, + { + "epoch": 3.7212276214833757, + "grad_norm": 0.18604305862261736, + "learning_rate": 1.0643689575333276e-05, + "loss": 1.0205, + "step": 1455 + }, + { + "epoch": 3.7237851662404093, + "grad_norm": 0.22160309843387682, + "learning_rate": 1.0630960360404793e-05, + "loss": 1.0179, + "step": 1456 + }, + { + "epoch": 3.7263427109974425, + "grad_norm": 0.1910813020463846, + "learning_rate": 1.061823011894326e-05, + "loss": 1.0622, + "step": 1457 + }, + { + "epoch": 3.7289002557544757, + "grad_norm": 0.22862715748972842, + "learning_rate": 1.0605498871659974e-05, + "loss": 1.0185, + "step": 1458 + }, + { + "epoch": 3.731457800511509, + "grad_norm": 0.20341936295394042, + "learning_rate": 1.0592766639267885e-05, + "loss": 1.0534, + "step": 1459 + }, + { + "epoch": 3.734015345268542, + "grad_norm": 0.2403253522185079, + "learning_rate": 1.0580033442481532e-05, + "loss": 1.0384, + "step": 1460 + }, + { + "epoch": 3.7365728900255757, + "grad_norm": 0.22338961464147264, + "learning_rate": 1.0567299302017038e-05, + "loss": 1.0143, + "step": 1461 + }, + { + "epoch": 3.7391304347826084, + "grad_norm": 0.2117212049005623, + "learning_rate": 1.0554564238592051e-05, + "loss": 1.021, + "step": 1462 + }, + { + "epoch": 3.741687979539642, + "grad_norm": 0.2254372260082909, + "learning_rate": 1.0541828272925721e-05, + "loss": 1.0292, + "step": 1463 + }, + { + "epoch": 3.7442455242966752, + "grad_norm": 0.1922734992717323, + "learning_rate": 1.0529091425738669e-05, + "loss": 1.0489, + "step": 1464 + }, + { + "epoch": 3.7468030690537084, + "grad_norm": 0.21486062627786348, + "learning_rate": 1.0516353717752947e-05, + "loss": 1.0359, + "step": 1465 + }, + { + "epoch": 3.7493606138107416, + "grad_norm": 0.19407217948842267, + "learning_rate": 1.0503615169692012e-05, + "loss": 1.0342, + "step": 1466 + }, + { + "epoch": 3.7519181585677748, + "grad_norm": 0.1785805281257786, + "learning_rate": 1.0490875802280685e-05, + "loss": 1.0353, + "step": 1467 + }, + { + "epoch": 3.7544757033248084, + "grad_norm": 0.20291577459751503, + "learning_rate": 1.0478135636245122e-05, + "loss": 1.0306, + "step": 1468 + }, + { + "epoch": 3.7570332480818416, + "grad_norm": 0.1982096205595046, + "learning_rate": 1.046539469231277e-05, + "loss": 1.0548, + "step": 1469 + }, + { + "epoch": 3.7595907928388748, + "grad_norm": 0.20930042720158404, + "learning_rate": 1.0452652991212357e-05, + "loss": 1.0094, + "step": 1470 + }, + { + "epoch": 3.762148337595908, + "grad_norm": 0.19919273397375814, + "learning_rate": 1.0439910553673829e-05, + "loss": 1.0439, + "step": 1471 + }, + { + "epoch": 3.764705882352941, + "grad_norm": 0.22254826567261315, + "learning_rate": 1.0427167400428331e-05, + "loss": 1.0373, + "step": 1472 + }, + { + "epoch": 3.7672634271099743, + "grad_norm": 0.22854611711688827, + "learning_rate": 1.0414423552208184e-05, + "loss": 1.0199, + "step": 1473 + }, + { + "epoch": 3.7698209718670075, + "grad_norm": 0.3654589035727414, + "learning_rate": 1.0401679029746828e-05, + "loss": 1.0311, + "step": 1474 + }, + { + "epoch": 3.772378516624041, + "grad_norm": 0.19477682817923897, + "learning_rate": 1.038893385377881e-05, + "loss": 1.0445, + "step": 1475 + }, + { + "epoch": 3.7749360613810743, + "grad_norm": 0.2035068833502665, + "learning_rate": 1.0376188045039723e-05, + "loss": 1.035, + "step": 1476 + }, + { + "epoch": 3.7774936061381075, + "grad_norm": 0.20207740056727894, + "learning_rate": 1.0363441624266213e-05, + "loss": 1.0054, + "step": 1477 + }, + { + "epoch": 3.7800511508951407, + "grad_norm": 0.23108316839210677, + "learning_rate": 1.0350694612195905e-05, + "loss": 1.0299, + "step": 1478 + }, + { + "epoch": 3.782608695652174, + "grad_norm": 0.19921910618488686, + "learning_rate": 1.0337947029567388e-05, + "loss": 1.013, + "step": 1479 + }, + { + "epoch": 3.785166240409207, + "grad_norm": 0.19609376442655463, + "learning_rate": 1.0325198897120183e-05, + "loss": 1.0239, + "step": 1480 + }, + { + "epoch": 3.78772378516624, + "grad_norm": 0.2039103534692172, + "learning_rate": 1.0312450235594706e-05, + "loss": 1.0262, + "step": 1481 + }, + { + "epoch": 3.790281329923274, + "grad_norm": 0.19686683259289736, + "learning_rate": 1.0299701065732235e-05, + "loss": 1.0444, + "step": 1482 + }, + { + "epoch": 3.792838874680307, + "grad_norm": 0.2031103792356114, + "learning_rate": 1.0286951408274865e-05, + "loss": 1.0993, + "step": 1483 + }, + { + "epoch": 3.79539641943734, + "grad_norm": 0.2263801739639009, + "learning_rate": 1.0274201283965497e-05, + "loss": 1.0409, + "step": 1484 + }, + { + "epoch": 3.7979539641943734, + "grad_norm": 0.17572315424279408, + "learning_rate": 1.0261450713547785e-05, + "loss": 1.075, + "step": 1485 + }, + { + "epoch": 3.8005115089514065, + "grad_norm": 0.27023491274755906, + "learning_rate": 1.0248699717766107e-05, + "loss": 1.0679, + "step": 1486 + }, + { + "epoch": 3.80306905370844, + "grad_norm": 0.1713633148592625, + "learning_rate": 1.023594831736554e-05, + "loss": 1.0484, + "step": 1487 + }, + { + "epoch": 3.805626598465473, + "grad_norm": 0.2367623046752298, + "learning_rate": 1.0223196533091813e-05, + "loss": 1.0287, + "step": 1488 + }, + { + "epoch": 3.8081841432225065, + "grad_norm": 0.1984118987646221, + "learning_rate": 1.0210444385691282e-05, + "loss": 1.0373, + "step": 1489 + }, + { + "epoch": 3.8107416879795397, + "grad_norm": 0.19013291547902408, + "learning_rate": 1.0197691895910895e-05, + "loss": 1.0396, + "step": 1490 + }, + { + "epoch": 3.813299232736573, + "grad_norm": 0.2262690201508357, + "learning_rate": 1.0184939084498153e-05, + "loss": 1.0383, + "step": 1491 + }, + { + "epoch": 3.815856777493606, + "grad_norm": 0.21345095926753077, + "learning_rate": 1.0172185972201082e-05, + "loss": 1.0341, + "step": 1492 + }, + { + "epoch": 3.8184143222506393, + "grad_norm": 0.18180827453898485, + "learning_rate": 1.01594325797682e-05, + "loss": 1.0419, + "step": 1493 + }, + { + "epoch": 3.820971867007673, + "grad_norm": 0.23760325057681905, + "learning_rate": 1.0146678927948484e-05, + "loss": 1.0178, + "step": 1494 + }, + { + "epoch": 3.8235294117647056, + "grad_norm": 0.18084043730292876, + "learning_rate": 1.013392503749132e-05, + "loss": 1.0701, + "step": 1495 + }, + { + "epoch": 3.8260869565217392, + "grad_norm": 0.18619103410121773, + "learning_rate": 1.0121170929146493e-05, + "loss": 1.0359, + "step": 1496 + }, + { + "epoch": 3.8286445012787724, + "grad_norm": 0.1814058213229099, + "learning_rate": 1.0108416623664142e-05, + "loss": 1.0483, + "step": 1497 + }, + { + "epoch": 3.8312020460358056, + "grad_norm": 0.17659823284048892, + "learning_rate": 1.0095662141794725e-05, + "loss": 1.0167, + "step": 1498 + }, + { + "epoch": 3.833759590792839, + "grad_norm": 0.18093838446366517, + "learning_rate": 1.0082907504288977e-05, + "loss": 1.0271, + "step": 1499 + }, + { + "epoch": 3.836317135549872, + "grad_norm": 0.19401662423230362, + "learning_rate": 1.0070152731897911e-05, + "loss": 1.0525, + "step": 1500 + }, + { + "epoch": 3.8388746803069056, + "grad_norm": 0.17897896363370017, + "learning_rate": 1.0057397845372734e-05, + "loss": 1.0354, + "step": 1501 + }, + { + "epoch": 3.8414322250639388, + "grad_norm": 0.18581636595029996, + "learning_rate": 1.004464286546485e-05, + "loss": 1.0439, + "step": 1502 + }, + { + "epoch": 3.843989769820972, + "grad_norm": 0.17458922536736418, + "learning_rate": 1.0031887812925818e-05, + "loss": 1.0073, + "step": 1503 + }, + { + "epoch": 3.846547314578005, + "grad_norm": 0.18401279215992355, + "learning_rate": 1.0019132708507307e-05, + "loss": 1.0549, + "step": 1504 + }, + { + "epoch": 3.8491048593350383, + "grad_norm": 0.17886260918603583, + "learning_rate": 1.0006377572961075e-05, + "loss": 1.056, + "step": 1505 + }, + { + "epoch": 3.8516624040920715, + "grad_norm": 0.18640944420175584, + "learning_rate": 9.99362242703893e-06, + "loss": 1.0317, + "step": 1506 + }, + { + "epoch": 3.8542199488491047, + "grad_norm": 0.1724777242125077, + "learning_rate": 9.980867291492697e-06, + "loss": 1.0496, + "step": 1507 + }, + { + "epoch": 3.8567774936061383, + "grad_norm": 0.17736614296923925, + "learning_rate": 9.968112187074187e-06, + "loss": 1.0321, + "step": 1508 + }, + { + "epoch": 3.8593350383631715, + "grad_norm": 0.18919776197181185, + "learning_rate": 9.955357134535153e-06, + "loss": 1.0612, + "step": 1509 + }, + { + "epoch": 3.8618925831202047, + "grad_norm": 0.17013450287572257, + "learning_rate": 9.94260215462727e-06, + "loss": 1.0371, + "step": 1510 + }, + { + "epoch": 3.864450127877238, + "grad_norm": 0.1795391930284376, + "learning_rate": 9.929847268102092e-06, + "loss": 1.0116, + "step": 1511 + }, + { + "epoch": 3.867007672634271, + "grad_norm": 0.18010343872623125, + "learning_rate": 9.917092495711023e-06, + "loss": 0.9975, + "step": 1512 + }, + { + "epoch": 3.869565217391304, + "grad_norm": 0.2018143041172149, + "learning_rate": 9.904337858205282e-06, + "loss": 1.0261, + "step": 1513 + }, + { + "epoch": 3.8721227621483374, + "grad_norm": 0.20189193249637963, + "learning_rate": 9.891583376335861e-06, + "loss": 1.036, + "step": 1514 + }, + { + "epoch": 3.874680306905371, + "grad_norm": 0.18604316403857601, + "learning_rate": 9.87882907085351e-06, + "loss": 1.0353, + "step": 1515 + }, + { + "epoch": 3.877237851662404, + "grad_norm": 0.1764086076077849, + "learning_rate": 9.866074962508684e-06, + "loss": 1.048, + "step": 1516 + }, + { + "epoch": 3.8797953964194374, + "grad_norm": 0.18861859299069214, + "learning_rate": 9.85332107205152e-06, + "loss": 1.0719, + "step": 1517 + }, + { + "epoch": 3.8823529411764706, + "grad_norm": 0.1729886347071538, + "learning_rate": 9.840567420231802e-06, + "loss": 1.0436, + "step": 1518 + }, + { + "epoch": 3.8849104859335037, + "grad_norm": 0.20230041478663247, + "learning_rate": 9.82781402779892e-06, + "loss": 1.0611, + "step": 1519 + }, + { + "epoch": 3.887468030690537, + "grad_norm": 0.19599063188718716, + "learning_rate": 9.815060915501852e-06, + "loss": 1.0517, + "step": 1520 + }, + { + "epoch": 3.89002557544757, + "grad_norm": 0.20556197980895194, + "learning_rate": 9.802308104089109e-06, + "loss": 1.0249, + "step": 1521 + }, + { + "epoch": 3.8925831202046037, + "grad_norm": 0.21413593644142717, + "learning_rate": 9.789555614308721e-06, + "loss": 0.9947, + "step": 1522 + }, + { + "epoch": 3.895140664961637, + "grad_norm": 0.20287758208508144, + "learning_rate": 9.77680346690819e-06, + "loss": 1.0352, + "step": 1523 + }, + { + "epoch": 3.89769820971867, + "grad_norm": 0.19248950316327032, + "learning_rate": 9.764051682634462e-06, + "loss": 1.0275, + "step": 1524 + }, + { + "epoch": 3.9002557544757033, + "grad_norm": 0.22258046212032104, + "learning_rate": 9.751300282233895e-06, + "loss": 1.0534, + "step": 1525 + }, + { + "epoch": 3.9028132992327365, + "grad_norm": 0.21347571901775975, + "learning_rate": 9.738549286452218e-06, + "loss": 1.038, + "step": 1526 + }, + { + "epoch": 3.90537084398977, + "grad_norm": 0.2280185995042673, + "learning_rate": 9.725798716034507e-06, + "loss": 1.0286, + "step": 1527 + }, + { + "epoch": 3.907928388746803, + "grad_norm": 0.20202933779134605, + "learning_rate": 9.713048591725138e-06, + "loss": 1.0448, + "step": 1528 + }, + { + "epoch": 3.9104859335038364, + "grad_norm": 0.20920944736139577, + "learning_rate": 9.700298934267766e-06, + "loss": 1.0069, + "step": 1529 + }, + { + "epoch": 3.9130434782608696, + "grad_norm": 0.19240200507914293, + "learning_rate": 9.687549764405296e-06, + "loss": 1.0376, + "step": 1530 + }, + { + "epoch": 3.915601023017903, + "grad_norm": 0.20292905124684749, + "learning_rate": 9.674801102879817e-06, + "loss": 1.0274, + "step": 1531 + }, + { + "epoch": 3.918158567774936, + "grad_norm": 0.19062905855598355, + "learning_rate": 9.662052970432617e-06, + "loss": 1.0407, + "step": 1532 + }, + { + "epoch": 3.920716112531969, + "grad_norm": 0.21406493946615143, + "learning_rate": 9.6493053878041e-06, + "loss": 1.0401, + "step": 1533 + }, + { + "epoch": 3.923273657289003, + "grad_norm": 0.19190236583371453, + "learning_rate": 9.63655837573379e-06, + "loss": 1.0521, + "step": 1534 + }, + { + "epoch": 3.9258312020460355, + "grad_norm": 0.22868484745745557, + "learning_rate": 9.623811954960279e-06, + "loss": 1.0396, + "step": 1535 + }, + { + "epoch": 3.928388746803069, + "grad_norm": 0.1896213962401851, + "learning_rate": 9.611066146221192e-06, + "loss": 1.0272, + "step": 1536 + }, + { + "epoch": 3.9309462915601023, + "grad_norm": 0.208558000446644, + "learning_rate": 9.598320970253175e-06, + "loss": 1.0263, + "step": 1537 + }, + { + "epoch": 3.9335038363171355, + "grad_norm": 0.18215621037833685, + "learning_rate": 9.585576447791817e-06, + "loss": 1.044, + "step": 1538 + }, + { + "epoch": 3.9360613810741687, + "grad_norm": 0.17351304593560926, + "learning_rate": 9.572832599571674e-06, + "loss": 1.0268, + "step": 1539 + }, + { + "epoch": 3.938618925831202, + "grad_norm": 0.22389061474679745, + "learning_rate": 9.560089446326175e-06, + "loss": 1.0313, + "step": 1540 + }, + { + "epoch": 3.9411764705882355, + "grad_norm": 0.17547633776625562, + "learning_rate": 9.547347008787648e-06, + "loss": 1.0321, + "step": 1541 + }, + { + "epoch": 3.9437340153452687, + "grad_norm": 0.21231411571444475, + "learning_rate": 9.534605307687233e-06, + "loss": 1.0027, + "step": 1542 + }, + { + "epoch": 3.946291560102302, + "grad_norm": 0.1792239552721382, + "learning_rate": 9.52186436375488e-06, + "loss": 1.0272, + "step": 1543 + }, + { + "epoch": 3.948849104859335, + "grad_norm": 0.21595336710565813, + "learning_rate": 9.509124197719317e-06, + "loss": 1.0074, + "step": 1544 + }, + { + "epoch": 3.9514066496163682, + "grad_norm": 0.20310879984969743, + "learning_rate": 9.496384830307988e-06, + "loss": 1.0481, + "step": 1545 + }, + { + "epoch": 3.9539641943734014, + "grad_norm": 0.20949639165674833, + "learning_rate": 9.483646282247056e-06, + "loss": 1.0167, + "step": 1546 + }, + { + "epoch": 3.9565217391304346, + "grad_norm": 0.23427285497954728, + "learning_rate": 9.470908574261333e-06, + "loss": 1.0478, + "step": 1547 + }, + { + "epoch": 3.959079283887468, + "grad_norm": 0.1881836520862583, + "learning_rate": 9.458171727074284e-06, + "loss": 1.0257, + "step": 1548 + }, + { + "epoch": 3.9616368286445014, + "grad_norm": 0.22079043196824938, + "learning_rate": 9.44543576140795e-06, + "loss": 1.0904, + "step": 1549 + }, + { + "epoch": 3.9641943734015346, + "grad_norm": 0.18959168411837335, + "learning_rate": 9.432700697982962e-06, + "loss": 1.0562, + "step": 1550 + }, + { + "epoch": 3.9667519181585678, + "grad_norm": 0.1881932409897208, + "learning_rate": 9.419966557518472e-06, + "loss": 1.048, + "step": 1551 + }, + { + "epoch": 3.969309462915601, + "grad_norm": 0.20694575807793056, + "learning_rate": 9.407233360732119e-06, + "loss": 1.0453, + "step": 1552 + }, + { + "epoch": 3.971867007672634, + "grad_norm": 0.21141511803194477, + "learning_rate": 9.39450112834003e-06, + "loss": 1.0416, + "step": 1553 + }, + { + "epoch": 3.9744245524296673, + "grad_norm": 0.19924380600743072, + "learning_rate": 9.381769881056744e-06, + "loss": 1.0302, + "step": 1554 + }, + { + "epoch": 3.976982097186701, + "grad_norm": 0.18443702573710982, + "learning_rate": 9.36903963959521e-06, + "loss": 1.0509, + "step": 1555 + }, + { + "epoch": 3.979539641943734, + "grad_norm": 0.2130900807101153, + "learning_rate": 9.356310424666725e-06, + "loss": 1.0674, + "step": 1556 + }, + { + "epoch": 3.9820971867007673, + "grad_norm": 0.18076464736813797, + "learning_rate": 9.343582256980937e-06, + "loss": 1.0327, + "step": 1557 + }, + { + "epoch": 3.9846547314578005, + "grad_norm": 0.19770573119978005, + "learning_rate": 9.330855157245776e-06, + "loss": 1.049, + "step": 1558 + }, + { + "epoch": 3.9872122762148337, + "grad_norm": 0.18941088064084555, + "learning_rate": 9.318129146167432e-06, + "loss": 1.0285, + "step": 1559 + }, + { + "epoch": 3.9897698209718673, + "grad_norm": 0.21949442372495884, + "learning_rate": 9.305404244450337e-06, + "loss": 1.0447, + "step": 1560 + }, + { + "epoch": 3.9923273657289, + "grad_norm": 0.19665403880426255, + "learning_rate": 9.292680472797101e-06, + "loss": 1.0411, + "step": 1561 + }, + { + "epoch": 3.9948849104859336, + "grad_norm": 0.19058036356127872, + "learning_rate": 9.279957851908513e-06, + "loss": 1.0535, + "step": 1562 + }, + { + "epoch": 3.997442455242967, + "grad_norm": 0.18814319318672243, + "learning_rate": 9.267236402483482e-06, + "loss": 1.036, + "step": 1563 + }, + { + "epoch": 4.0, + "grad_norm": 0.1865356816625339, + "learning_rate": 9.254516145219006e-06, + "loss": 1.0435, + "step": 1564 + }, + { + "epoch": 4.002557544757034, + "grad_norm": 0.19230450271770366, + "learning_rate": 9.241797100810152e-06, + "loss": 1.0143, + "step": 1565 + }, + { + "epoch": 4.005115089514066, + "grad_norm": 0.19899721133072965, + "learning_rate": 9.229079289950005e-06, + "loss": 1.0249, + "step": 1566 + }, + { + "epoch": 4.0076726342711, + "grad_norm": 0.21185878359559354, + "learning_rate": 9.216362733329657e-06, + "loss": 0.9987, + "step": 1567 + }, + { + "epoch": 4.010230179028133, + "grad_norm": 0.1985629222033457, + "learning_rate": 9.203647451638138e-06, + "loss": 1.0198, + "step": 1568 + }, + { + "epoch": 4.012787723785166, + "grad_norm": 0.1930121039553769, + "learning_rate": 9.190933465562426e-06, + "loss": 1.0328, + "step": 1569 + }, + { + "epoch": 4.015345268542199, + "grad_norm": 0.2189356848452908, + "learning_rate": 9.17822079578738e-06, + "loss": 1.0358, + "step": 1570 + }, + { + "epoch": 4.017902813299233, + "grad_norm": 0.18197666560197398, + "learning_rate": 9.165509462995716e-06, + "loss": 1.0312, + "step": 1571 + }, + { + "epoch": 4.020460358056266, + "grad_norm": 0.22141370700870244, + "learning_rate": 9.152799487867981e-06, + "loss": 1.0167, + "step": 1572 + }, + { + "epoch": 4.023017902813299, + "grad_norm": 0.2061928144217363, + "learning_rate": 9.140090891082506e-06, + "loss": 1.0173, + "step": 1573 + }, + { + "epoch": 4.025575447570333, + "grad_norm": 0.1855420730525284, + "learning_rate": 9.127383693315387e-06, + "loss": 1.0122, + "step": 1574 + }, + { + "epoch": 4.028132992327365, + "grad_norm": 0.19054702381827276, + "learning_rate": 9.114677915240436e-06, + "loss": 1.0207, + "step": 1575 + }, + { + "epoch": 4.030690537084399, + "grad_norm": 0.17786433578081798, + "learning_rate": 9.101973577529164e-06, + "loss": 1.0339, + "step": 1576 + }, + { + "epoch": 4.033248081841432, + "grad_norm": 0.18910562787321678, + "learning_rate": 9.089270700850733e-06, + "loss": 1.0007, + "step": 1577 + }, + { + "epoch": 4.035805626598465, + "grad_norm": 0.18519350419636166, + "learning_rate": 9.076569305871926e-06, + "loss": 1.0314, + "step": 1578 + }, + { + "epoch": 4.038363171355499, + "grad_norm": 0.21754655747857035, + "learning_rate": 9.063869413257124e-06, + "loss": 1.0302, + "step": 1579 + }, + { + "epoch": 4.040920716112532, + "grad_norm": 0.18004679417947927, + "learning_rate": 9.051171043668251e-06, + "loss": 1.0476, + "step": 1580 + }, + { + "epoch": 4.043478260869565, + "grad_norm": 0.2168920363400877, + "learning_rate": 9.038474217764768e-06, + "loss": 1.025, + "step": 1581 + }, + { + "epoch": 4.046035805626598, + "grad_norm": 0.19274796431055907, + "learning_rate": 9.025778956203611e-06, + "loss": 1.0098, + "step": 1582 + }, + { + "epoch": 4.048593350383632, + "grad_norm": 0.19201028214018007, + "learning_rate": 9.013085279639178e-06, + "loss": 1.0017, + "step": 1583 + }, + { + "epoch": 4.051150895140665, + "grad_norm": 0.19629486524205142, + "learning_rate": 9.000393208723291e-06, + "loss": 1.0219, + "step": 1584 + }, + { + "epoch": 4.053708439897698, + "grad_norm": 0.19752451256428386, + "learning_rate": 8.987702764105151e-06, + "loss": 1.0177, + "step": 1585 + }, + { + "epoch": 4.056265984654732, + "grad_norm": 0.20166118830323768, + "learning_rate": 8.975013966431323e-06, + "loss": 1.0601, + "step": 1586 + }, + { + "epoch": 4.0588235294117645, + "grad_norm": 0.17326861120237855, + "learning_rate": 8.96232683634568e-06, + "loss": 0.9847, + "step": 1587 + }, + { + "epoch": 4.061381074168798, + "grad_norm": 0.1898245941021511, + "learning_rate": 8.949641394489399e-06, + "loss": 1.0099, + "step": 1588 + }, + { + "epoch": 4.063938618925831, + "grad_norm": 0.1700392821316134, + "learning_rate": 8.93695766150089e-06, + "loss": 1.0538, + "step": 1589 + }, + { + "epoch": 4.0664961636828645, + "grad_norm": 0.1682061615806585, + "learning_rate": 8.9242756580158e-06, + "loss": 1.0172, + "step": 1590 + }, + { + "epoch": 4.069053708439898, + "grad_norm": 0.19303997092308417, + "learning_rate": 8.911595404666957e-06, + "loss": 1.0546, + "step": 1591 + }, + { + "epoch": 4.071611253196931, + "grad_norm": 0.1654939906619837, + "learning_rate": 8.898916922084336e-06, + "loss": 1.0464, + "step": 1592 + }, + { + "epoch": 4.0741687979539645, + "grad_norm": 0.18143405806846177, + "learning_rate": 8.88624023089504e-06, + "loss": 1.0545, + "step": 1593 + }, + { + "epoch": 4.076726342710997, + "grad_norm": 0.20747010533584376, + "learning_rate": 8.873565351723249e-06, + "loss": 1.0589, + "step": 1594 + }, + { + "epoch": 4.079283887468031, + "grad_norm": 0.15953653305890375, + "learning_rate": 8.8608923051902e-06, + "loss": 1.0179, + "step": 1595 + }, + { + "epoch": 4.081841432225064, + "grad_norm": 0.2035902582767619, + "learning_rate": 8.848221111914147e-06, + "loss": 1.0447, + "step": 1596 + }, + { + "epoch": 4.084398976982097, + "grad_norm": 0.15347759439362155, + "learning_rate": 8.835551792510329e-06, + "loss": 1.0307, + "step": 1597 + }, + { + "epoch": 4.086956521739131, + "grad_norm": 0.20574769500088766, + "learning_rate": 8.822884367590941e-06, + "loss": 0.9952, + "step": 1598 + }, + { + "epoch": 4.089514066496164, + "grad_norm": 0.1835496415175651, + "learning_rate": 8.810218857765085e-06, + "loss": 1.0005, + "step": 1599 + }, + { + "epoch": 4.092071611253197, + "grad_norm": 0.20530099186755948, + "learning_rate": 8.79755528363876e-06, + "loss": 1.0361, + "step": 1600 + }, + { + "epoch": 4.09462915601023, + "grad_norm": 0.2026938929869877, + "learning_rate": 8.7848936658148e-06, + "loss": 1.0328, + "step": 1601 + }, + { + "epoch": 4.0971867007672635, + "grad_norm": 0.1907662170906002, + "learning_rate": 8.772234024892872e-06, + "loss": 1.0133, + "step": 1602 + }, + { + "epoch": 4.099744245524296, + "grad_norm": 0.19617684565754476, + "learning_rate": 8.759576381469425e-06, + "loss": 1.0027, + "step": 1603 + }, + { + "epoch": 4.10230179028133, + "grad_norm": 0.17534476994793663, + "learning_rate": 8.746920756137642e-06, + "loss": 1.0437, + "step": 1604 + }, + { + "epoch": 4.1048593350383635, + "grad_norm": 0.20521166727954332, + "learning_rate": 8.734267169487444e-06, + "loss": 1.0265, + "step": 1605 + }, + { + "epoch": 4.107416879795396, + "grad_norm": 0.17225400361630142, + "learning_rate": 8.721615642105417e-06, + "loss": 1.0338, + "step": 1606 + }, + { + "epoch": 4.10997442455243, + "grad_norm": 0.21382338032724127, + "learning_rate": 8.708966194574814e-06, + "loss": 1.0083, + "step": 1607 + }, + { + "epoch": 4.112531969309463, + "grad_norm": 0.16180422908572098, + "learning_rate": 8.696318847475487e-06, + "loss": 1.0169, + "step": 1608 + }, + { + "epoch": 4.115089514066496, + "grad_norm": 0.23650182130816144, + "learning_rate": 8.68367362138388e-06, + "loss": 1.0323, + "step": 1609 + }, + { + "epoch": 4.117647058823529, + "grad_norm": 0.18535588146645351, + "learning_rate": 8.671030536872995e-06, + "loss": 1.0299, + "step": 1610 + }, + { + "epoch": 4.120204603580563, + "grad_norm": 0.17955290128121904, + "learning_rate": 8.658389614512325e-06, + "loss": 1.0189, + "step": 1611 + }, + { + "epoch": 4.122762148337596, + "grad_norm": 0.1782288851096717, + "learning_rate": 8.645750874867876e-06, + "loss": 1.0134, + "step": 1612 + }, + { + "epoch": 4.125319693094629, + "grad_norm": 0.18693604034380645, + "learning_rate": 8.633114338502073e-06, + "loss": 1.0403, + "step": 1613 + }, + { + "epoch": 4.127877237851663, + "grad_norm": 0.18248123513699424, + "learning_rate": 8.62048002597378e-06, + "loss": 1.0288, + "step": 1614 + }, + { + "epoch": 4.130434782608695, + "grad_norm": 0.18165634630490243, + "learning_rate": 8.607847957838227e-06, + "loss": 1.0301, + "step": 1615 + }, + { + "epoch": 4.132992327365729, + "grad_norm": 0.1803487141905229, + "learning_rate": 8.595218154647001e-06, + "loss": 1.0301, + "step": 1616 + }, + { + "epoch": 4.135549872122763, + "grad_norm": 0.18173901474688528, + "learning_rate": 8.58259063694801e-06, + "loss": 1.0222, + "step": 1617 + }, + { + "epoch": 4.138107416879795, + "grad_norm": 0.18078862560079437, + "learning_rate": 8.56996542528542e-06, + "loss": 1.0235, + "step": 1618 + }, + { + "epoch": 4.140664961636829, + "grad_norm": 0.1803693056043885, + "learning_rate": 8.55734254019968e-06, + "loss": 0.9988, + "step": 1619 + }, + { + "epoch": 4.143222506393862, + "grad_norm": 0.1865048325076587, + "learning_rate": 8.544722002227417e-06, + "loss": 1.0538, + "step": 1620 + }, + { + "epoch": 4.145780051150895, + "grad_norm": 0.17978097814336544, + "learning_rate": 8.532103831901472e-06, + "loss": 1.035, + "step": 1621 + }, + { + "epoch": 4.148337595907928, + "grad_norm": 0.23624978152806544, + "learning_rate": 8.519488049750808e-06, + "loss": 1.0298, + "step": 1622 + }, + { + "epoch": 4.150895140664962, + "grad_norm": 0.16381055698474817, + "learning_rate": 8.506874676300514e-06, + "loss": 1.0485, + "step": 1623 + }, + { + "epoch": 4.153452685421995, + "grad_norm": 0.19963138199162672, + "learning_rate": 8.494263732071772e-06, + "loss": 1.0092, + "step": 1624 + }, + { + "epoch": 4.156010230179028, + "grad_norm": 0.19251260911612733, + "learning_rate": 8.481655237581785e-06, + "loss": 1.0209, + "step": 1625 + }, + { + "epoch": 4.158567774936062, + "grad_norm": 0.17091450724555518, + "learning_rate": 8.469049213343798e-06, + "loss": 1.0358, + "step": 1626 + }, + { + "epoch": 4.161125319693094, + "grad_norm": 0.18111441891291247, + "learning_rate": 8.456445679867013e-06, + "loss": 1.0235, + "step": 1627 + }, + { + "epoch": 4.163682864450128, + "grad_norm": 0.1742001195215167, + "learning_rate": 8.443844657656596e-06, + "loss": 1.0436, + "step": 1628 + }, + { + "epoch": 4.166240409207161, + "grad_norm": 0.17755175605855264, + "learning_rate": 8.431246167213627e-06, + "loss": 1.0444, + "step": 1629 + }, + { + "epoch": 4.168797953964194, + "grad_norm": 0.17719860198513576, + "learning_rate": 8.418650229035054e-06, + "loss": 1.0321, + "step": 1630 + }, + { + "epoch": 4.171355498721228, + "grad_norm": 0.1606826181735471, + "learning_rate": 8.406056863613689e-06, + "loss": 1.0539, + "step": 1631 + }, + { + "epoch": 4.173913043478261, + "grad_norm": 0.1739885726513299, + "learning_rate": 8.393466091438139e-06, + "loss": 1.0282, + "step": 1632 + }, + { + "epoch": 4.176470588235294, + "grad_norm": 0.18218865497775108, + "learning_rate": 8.380877932992815e-06, + "loss": 1.0239, + "step": 1633 + }, + { + "epoch": 4.179028132992327, + "grad_norm": 0.16523774532642985, + "learning_rate": 8.368292408757853e-06, + "loss": 1.02, + "step": 1634 + }, + { + "epoch": 4.181585677749361, + "grad_norm": 0.17345180693087728, + "learning_rate": 8.355709539209121e-06, + "loss": 1.0392, + "step": 1635 + }, + { + "epoch": 4.1841432225063935, + "grad_norm": 0.17255097246631376, + "learning_rate": 8.343129344818162e-06, + "loss": 1.0714, + "step": 1636 + }, + { + "epoch": 4.186700767263427, + "grad_norm": 0.1814224170983909, + "learning_rate": 8.33055184605216e-06, + "loss": 1.0217, + "step": 1637 + }, + { + "epoch": 4.189258312020461, + "grad_norm": 0.1748560906889792, + "learning_rate": 8.317977063373925e-06, + "loss": 1.0391, + "step": 1638 + }, + { + "epoch": 4.1918158567774935, + "grad_norm": 0.18435771096605524, + "learning_rate": 8.305405017241837e-06, + "loss": 1.0215, + "step": 1639 + }, + { + "epoch": 4.194373401534527, + "grad_norm": 0.16909940397166726, + "learning_rate": 8.292835728109835e-06, + "loss": 1.0141, + "step": 1640 + }, + { + "epoch": 4.19693094629156, + "grad_norm": 0.16864611479976394, + "learning_rate": 8.28026921642736e-06, + "loss": 0.995, + "step": 1641 + }, + { + "epoch": 4.1994884910485935, + "grad_norm": 0.1832641724885349, + "learning_rate": 8.267705502639342e-06, + "loss": 1.0443, + "step": 1642 + }, + { + "epoch": 4.202046035805626, + "grad_norm": 0.15678971891456242, + "learning_rate": 8.255144607186161e-06, + "loss": 0.9988, + "step": 1643 + }, + { + "epoch": 4.20460358056266, + "grad_norm": 0.17026684913571113, + "learning_rate": 8.242586550503607e-06, + "loss": 1.0413, + "step": 1644 + }, + { + "epoch": 4.207161125319693, + "grad_norm": 0.17089179054567286, + "learning_rate": 8.230031353022855e-06, + "loss": 1.0305, + "step": 1645 + }, + { + "epoch": 4.209718670076726, + "grad_norm": 0.17613488393658056, + "learning_rate": 8.217479035170422e-06, + "loss": 1.0075, + "step": 1646 + }, + { + "epoch": 4.21227621483376, + "grad_norm": 0.15804554349273428, + "learning_rate": 8.204929617368147e-06, + "loss": 1.0119, + "step": 1647 + }, + { + "epoch": 4.2148337595907925, + "grad_norm": 0.20718638597658195, + "learning_rate": 8.192383120033147e-06, + "loss": 1.0239, + "step": 1648 + }, + { + "epoch": 4.217391304347826, + "grad_norm": 0.1845223450299457, + "learning_rate": 8.179839563577789e-06, + "loss": 1.0044, + "step": 1649 + }, + { + "epoch": 4.21994884910486, + "grad_norm": 0.1740911877816002, + "learning_rate": 8.167298968409658e-06, + "loss": 1.0114, + "step": 1650 + }, + { + "epoch": 4.2225063938618925, + "grad_norm": 0.17787524858695802, + "learning_rate": 8.154761354931513e-06, + "loss": 1.0342, + "step": 1651 + }, + { + "epoch": 4.225063938618926, + "grad_norm": 0.17981590233123262, + "learning_rate": 8.142226743541273e-06, + "loss": 1.0196, + "step": 1652 + }, + { + "epoch": 4.227621483375959, + "grad_norm": 0.15945346875306546, + "learning_rate": 8.12969515463196e-06, + "loss": 1.0319, + "step": 1653 + }, + { + "epoch": 4.2301790281329925, + "grad_norm": 0.1782254652095104, + "learning_rate": 8.117166608591693e-06, + "loss": 1.027, + "step": 1654 + }, + { + "epoch": 4.232736572890025, + "grad_norm": 0.16769675527664904, + "learning_rate": 8.104641125803628e-06, + "loss": 1.0512, + "step": 1655 + }, + { + "epoch": 4.235294117647059, + "grad_norm": 0.17673772312426278, + "learning_rate": 8.092118726645943e-06, + "loss": 1.0289, + "step": 1656 + }, + { + "epoch": 4.2378516624040925, + "grad_norm": 0.17775412310787495, + "learning_rate": 8.0795994314918e-06, + "loss": 1.0134, + "step": 1657 + }, + { + "epoch": 4.240409207161125, + "grad_norm": 0.165083768711067, + "learning_rate": 8.067083260709309e-06, + "loss": 1.0482, + "step": 1658 + }, + { + "epoch": 4.242966751918159, + "grad_norm": 0.19604799862438058, + "learning_rate": 8.054570234661498e-06, + "loss": 1.0317, + "step": 1659 + }, + { + "epoch": 4.245524296675192, + "grad_norm": 0.16528010613818045, + "learning_rate": 8.042060373706275e-06, + "loss": 1.0348, + "step": 1660 + }, + { + "epoch": 4.248081841432225, + "grad_norm": 0.1804031281677697, + "learning_rate": 8.029553698196405e-06, + "loss": 1.0401, + "step": 1661 + }, + { + "epoch": 4.250639386189258, + "grad_norm": 0.176393933273107, + "learning_rate": 8.017050228479467e-06, + "loss": 1.0356, + "step": 1662 + }, + { + "epoch": 4.253196930946292, + "grad_norm": 0.19395943497159726, + "learning_rate": 8.004549984897822e-06, + "loss": 1.0191, + "step": 1663 + }, + { + "epoch": 4.255754475703325, + "grad_norm": 0.17246963598612605, + "learning_rate": 7.992052987788586e-06, + "loss": 1.0162, + "step": 1664 + }, + { + "epoch": 4.258312020460358, + "grad_norm": 0.18066442113845643, + "learning_rate": 7.979559257483591e-06, + "loss": 1.0229, + "step": 1665 + }, + { + "epoch": 4.260869565217392, + "grad_norm": 0.1680697165366633, + "learning_rate": 7.967068814309359e-06, + "loss": 1.0202, + "step": 1666 + }, + { + "epoch": 4.263427109974424, + "grad_norm": 0.17705957749246876, + "learning_rate": 7.954581678587054e-06, + "loss": 1.0324, + "step": 1667 + }, + { + "epoch": 4.265984654731458, + "grad_norm": 0.16130768348650035, + "learning_rate": 7.942097870632467e-06, + "loss": 0.9793, + "step": 1668 + }, + { + "epoch": 4.268542199488491, + "grad_norm": 0.17498237044992782, + "learning_rate": 7.929617410755977e-06, + "loss": 1.0249, + "step": 1669 + }, + { + "epoch": 4.271099744245524, + "grad_norm": 0.1925424733299812, + "learning_rate": 7.917140319262507e-06, + "loss": 1.0365, + "step": 1670 + }, + { + "epoch": 4.273657289002558, + "grad_norm": 0.18797309789320532, + "learning_rate": 7.90466661645151e-06, + "loss": 1.0118, + "step": 1671 + }, + { + "epoch": 4.276214833759591, + "grad_norm": 0.16573297446104532, + "learning_rate": 7.892196322616912e-06, + "loss": 1.0247, + "step": 1672 + }, + { + "epoch": 4.278772378516624, + "grad_norm": 0.1925991067748996, + "learning_rate": 7.879729458047111e-06, + "loss": 0.978, + "step": 1673 + }, + { + "epoch": 4.281329923273657, + "grad_norm": 0.1758834459188358, + "learning_rate": 7.86726604302491e-06, + "loss": 1.0175, + "step": 1674 + }, + { + "epoch": 4.283887468030691, + "grad_norm": 0.16487956982839647, + "learning_rate": 7.854806097827507e-06, + "loss": 1.0288, + "step": 1675 + }, + { + "epoch": 4.286445012787723, + "grad_norm": 0.1787793037572042, + "learning_rate": 7.842349642726458e-06, + "loss": 1.0166, + "step": 1676 + }, + { + "epoch": 4.289002557544757, + "grad_norm": 0.1841366036398648, + "learning_rate": 7.829896697987627e-06, + "loss": 1.0348, + "step": 1677 + }, + { + "epoch": 4.291560102301791, + "grad_norm": 0.1576001038888875, + "learning_rate": 7.817447283871187e-06, + "loss": 1.0342, + "step": 1678 + }, + { + "epoch": 4.294117647058823, + "grad_norm": 0.17981916810192364, + "learning_rate": 7.80500142063155e-06, + "loss": 1.0214, + "step": 1679 + }, + { + "epoch": 4.296675191815857, + "grad_norm": 0.17518421051117097, + "learning_rate": 7.792559128517363e-06, + "loss": 1.0404, + "step": 1680 + }, + { + "epoch": 4.29923273657289, + "grad_norm": 0.16823487687822244, + "learning_rate": 7.780120427771449e-06, + "loss": 1.0112, + "step": 1681 + }, + { + "epoch": 4.301790281329923, + "grad_norm": 0.16558738219755195, + "learning_rate": 7.7676853386308e-06, + "loss": 1.0605, + "step": 1682 + }, + { + "epoch": 4.304347826086957, + "grad_norm": 0.17794613732094552, + "learning_rate": 7.755253881326535e-06, + "loss": 1.0371, + "step": 1683 + }, + { + "epoch": 4.30690537084399, + "grad_norm": 0.19300577747925785, + "learning_rate": 7.742826076083848e-06, + "loss": 1.06, + "step": 1684 + }, + { + "epoch": 4.309462915601023, + "grad_norm": 0.16066023211525512, + "learning_rate": 7.730401943122007e-06, + "loss": 1.0084, + "step": 1685 + }, + { + "epoch": 4.312020460358056, + "grad_norm": 0.1947539405327399, + "learning_rate": 7.717981502654297e-06, + "loss": 1.0418, + "step": 1686 + }, + { + "epoch": 4.31457800511509, + "grad_norm": 0.16039175830465094, + "learning_rate": 7.705564774888001e-06, + "loss": 1.0039, + "step": 1687 + }, + { + "epoch": 4.3171355498721224, + "grad_norm": 0.18746085529738926, + "learning_rate": 7.693151780024354e-06, + "loss": 1.0041, + "step": 1688 + }, + { + "epoch": 4.319693094629156, + "grad_norm": 0.17014035483962622, + "learning_rate": 7.680742538258524e-06, + "loss": 1.0087, + "step": 1689 + }, + { + "epoch": 4.322250639386189, + "grad_norm": 0.19178845859382257, + "learning_rate": 7.668337069779577e-06, + "loss": 1.0716, + "step": 1690 + }, + { + "epoch": 4.324808184143222, + "grad_norm": 0.16691270419041054, + "learning_rate": 7.655935394770425e-06, + "loss": 1.0185, + "step": 1691 + }, + { + "epoch": 4.327365728900256, + "grad_norm": 0.17518851447109943, + "learning_rate": 7.643537533407828e-06, + "loss": 1.0173, + "step": 1692 + }, + { + "epoch": 4.329923273657289, + "grad_norm": 0.16145421958943196, + "learning_rate": 7.631143505862325e-06, + "loss": 1.0351, + "step": 1693 + }, + { + "epoch": 4.332480818414322, + "grad_norm": 0.37204295825399436, + "learning_rate": 7.618753332298219e-06, + "loss": 1.0303, + "step": 1694 + }, + { + "epoch": 4.335038363171355, + "grad_norm": 0.15830617963945456, + "learning_rate": 7.606367032873562e-06, + "loss": 1.0129, + "step": 1695 + }, + { + "epoch": 4.337595907928389, + "grad_norm": 0.18979652677231215, + "learning_rate": 7.593984627740075e-06, + "loss": 1.0526, + "step": 1696 + }, + { + "epoch": 4.340153452685422, + "grad_norm": 0.1876359842591056, + "learning_rate": 7.5816061370431674e-06, + "loss": 1.0181, + "step": 1697 + }, + { + "epoch": 4.342710997442455, + "grad_norm": 0.18251068037823034, + "learning_rate": 7.569231580921858e-06, + "loss": 0.996, + "step": 1698 + }, + { + "epoch": 4.345268542199489, + "grad_norm": 0.17542644898051862, + "learning_rate": 7.556860979508791e-06, + "loss": 1.0301, + "step": 1699 + }, + { + "epoch": 4.3478260869565215, + "grad_norm": 0.1927803590994827, + "learning_rate": 7.544494352930145e-06, + "loss": 1.03, + "step": 1700 + }, + { + "epoch": 4.350383631713555, + "grad_norm": 0.16917148556319608, + "learning_rate": 7.532131721305659e-06, + "loss": 0.9895, + "step": 1701 + }, + { + "epoch": 4.352941176470588, + "grad_norm": 0.18346223176780307, + "learning_rate": 7.519773104748562e-06, + "loss": 1.0428, + "step": 1702 + }, + { + "epoch": 4.3554987212276215, + "grad_norm": 0.1628922532881499, + "learning_rate": 7.507418523365542e-06, + "loss": 1.058, + "step": 1703 + }, + { + "epoch": 4.358056265984655, + "grad_norm": 0.1876763139643933, + "learning_rate": 7.495067997256742e-06, + "loss": 1.0112, + "step": 1704 + }, + { + "epoch": 4.360613810741688, + "grad_norm": 0.15693274545823557, + "learning_rate": 7.482721546515683e-06, + "loss": 1.0281, + "step": 1705 + }, + { + "epoch": 4.3631713554987215, + "grad_norm": 0.18630090934243648, + "learning_rate": 7.47037919122928e-06, + "loss": 1.0418, + "step": 1706 + }, + { + "epoch": 4.365728900255754, + "grad_norm": 0.16500214550907966, + "learning_rate": 7.458040951477763e-06, + "loss": 1.0279, + "step": 1707 + }, + { + "epoch": 4.368286445012788, + "grad_norm": 0.18494529984039387, + "learning_rate": 7.4457068473346836e-06, + "loss": 1.0155, + "step": 1708 + }, + { + "epoch": 4.370843989769821, + "grad_norm": 0.19216574362796557, + "learning_rate": 7.43337689886686e-06, + "loss": 1.0423, + "step": 1709 + }, + { + "epoch": 4.373401534526854, + "grad_norm": 0.16751025476175924, + "learning_rate": 7.42105112613434e-06, + "loss": 1.0317, + "step": 1710 + }, + { + "epoch": 4.375959079283888, + "grad_norm": 0.20151154222401438, + "learning_rate": 7.408729549190393e-06, + "loss": 1.0536, + "step": 1711 + }, + { + "epoch": 4.378516624040921, + "grad_norm": 0.18065737789912834, + "learning_rate": 7.3964121880814445e-06, + "loss": 1.0549, + "step": 1712 + }, + { + "epoch": 4.381074168797954, + "grad_norm": 0.17160881413407147, + "learning_rate": 7.3840990628470824e-06, + "loss": 1.0168, + "step": 1713 + }, + { + "epoch": 4.383631713554987, + "grad_norm": 0.1786512550850061, + "learning_rate": 7.371790193519979e-06, + "loss": 1.0435, + "step": 1714 + }, + { + "epoch": 4.3861892583120206, + "grad_norm": 0.19232717850899114, + "learning_rate": 7.359485600125904e-06, + "loss": 1.0389, + "step": 1715 + }, + { + "epoch": 4.388746803069053, + "grad_norm": 0.18440121677046997, + "learning_rate": 7.347185302683662e-06, + "loss": 1.0264, + "step": 1716 + }, + { + "epoch": 4.391304347826087, + "grad_norm": 0.19371415512946702, + "learning_rate": 7.334889321205063e-06, + "loss": 1.0622, + "step": 1717 + }, + { + "epoch": 4.3938618925831205, + "grad_norm": 0.19249478474991424, + "learning_rate": 7.322597675694904e-06, + "loss": 1.0029, + "step": 1718 + }, + { + "epoch": 4.396419437340153, + "grad_norm": 0.19009338152933727, + "learning_rate": 7.31031038615092e-06, + "loss": 1.0165, + "step": 1719 + }, + { + "epoch": 4.398976982097187, + "grad_norm": 0.18669974928276975, + "learning_rate": 7.298027472563768e-06, + "loss": 1.0357, + "step": 1720 + }, + { + "epoch": 4.40153452685422, + "grad_norm": 0.1650051526675111, + "learning_rate": 7.285748954916973e-06, + "loss": 1.0562, + "step": 1721 + }, + { + "epoch": 4.404092071611253, + "grad_norm": 0.1917534223305165, + "learning_rate": 7.273474853186922e-06, + "loss": 1.0409, + "step": 1722 + }, + { + "epoch": 4.406649616368286, + "grad_norm": 0.17737384077233112, + "learning_rate": 7.261205187342809e-06, + "loss": 1.0464, + "step": 1723 + }, + { + "epoch": 4.40920716112532, + "grad_norm": 0.17939864900718247, + "learning_rate": 7.248939977346612e-06, + "loss": 1.0153, + "step": 1724 + }, + { + "epoch": 4.411764705882353, + "grad_norm": 0.16822250340936998, + "learning_rate": 7.236679243153062e-06, + "loss": 1.0274, + "step": 1725 + }, + { + "epoch": 4.414322250639386, + "grad_norm": 0.2012483436702938, + "learning_rate": 7.224423004709607e-06, + "loss": 1.0302, + "step": 1726 + }, + { + "epoch": 4.41687979539642, + "grad_norm": 0.16437642340196237, + "learning_rate": 7.212171281956377e-06, + "loss": 1.0173, + "step": 1727 + }, + { + "epoch": 4.419437340153452, + "grad_norm": 0.18420316116672247, + "learning_rate": 7.199924094826167e-06, + "loss": 1.0154, + "step": 1728 + }, + { + "epoch": 4.421994884910486, + "grad_norm": 0.17063629208523548, + "learning_rate": 7.187681463244377e-06, + "loss": 1.0252, + "step": 1729 + }, + { + "epoch": 4.42455242966752, + "grad_norm": 0.2071747152600751, + "learning_rate": 7.175443407129008e-06, + "loss": 1.0643, + "step": 1730 + }, + { + "epoch": 4.427109974424552, + "grad_norm": 0.1596268627900996, + "learning_rate": 7.163209946390608e-06, + "loss": 1.0094, + "step": 1731 + }, + { + "epoch": 4.429667519181586, + "grad_norm": 0.17222832212411637, + "learning_rate": 7.1509811009322574e-06, + "loss": 1.0011, + "step": 1732 + }, + { + "epoch": 4.432225063938619, + "grad_norm": 0.18768984848570255, + "learning_rate": 7.138756890649516e-06, + "loss": 1.0344, + "step": 1733 + }, + { + "epoch": 4.434782608695652, + "grad_norm": 0.20394581557700622, + "learning_rate": 7.126537335430417e-06, + "loss": 1.0187, + "step": 1734 + }, + { + "epoch": 4.437340153452685, + "grad_norm": 0.1930227044611592, + "learning_rate": 7.1143224551554115e-06, + "loss": 1.0391, + "step": 1735 + }, + { + "epoch": 4.439897698209719, + "grad_norm": 0.19780011138369127, + "learning_rate": 7.102112269697341e-06, + "loss": 1.0599, + "step": 1736 + }, + { + "epoch": 4.442455242966752, + "grad_norm": 0.18641195549148987, + "learning_rate": 7.08990679892142e-06, + "loss": 1.0205, + "step": 1737 + }, + { + "epoch": 4.445012787723785, + "grad_norm": 0.1745033043017393, + "learning_rate": 7.077706062685181e-06, + "loss": 1.0254, + "step": 1738 + }, + { + "epoch": 4.447570332480819, + "grad_norm": 0.1875404190434515, + "learning_rate": 7.065510080838465e-06, + "loss": 1.0375, + "step": 1739 + }, + { + "epoch": 4.450127877237851, + "grad_norm": 0.17560201588299784, + "learning_rate": 7.053318873223365e-06, + "loss": 0.9962, + "step": 1740 + }, + { + "epoch": 4.452685421994885, + "grad_norm": 0.16337995441327988, + "learning_rate": 7.041132459674216e-06, + "loss": 1.0151, + "step": 1741 + }, + { + "epoch": 4.455242966751918, + "grad_norm": 0.17910647034147473, + "learning_rate": 7.028950860017555e-06, + "loss": 1.059, + "step": 1742 + }, + { + "epoch": 4.457800511508951, + "grad_norm": 0.1645714876947052, + "learning_rate": 7.016774094072077e-06, + "loss": 1.0151, + "step": 1743 + }, + { + "epoch": 4.460358056265985, + "grad_norm": 0.18052975261895468, + "learning_rate": 7.004602181648626e-06, + "loss": 1.0226, + "step": 1744 + }, + { + "epoch": 4.462915601023018, + "grad_norm": 0.15506591744701947, + "learning_rate": 6.992435142550133e-06, + "loss": 1.0315, + "step": 1745 + }, + { + "epoch": 4.465473145780051, + "grad_norm": 0.18883014972610887, + "learning_rate": 6.980272996571617e-06, + "loss": 1.035, + "step": 1746 + }, + { + "epoch": 4.468030690537084, + "grad_norm": 0.17244955302277767, + "learning_rate": 6.968115763500127e-06, + "loss": 1.0212, + "step": 1747 + }, + { + "epoch": 4.470588235294118, + "grad_norm": 0.17237420999484432, + "learning_rate": 6.95596346311472e-06, + "loss": 1.0262, + "step": 1748 + }, + { + "epoch": 4.4731457800511505, + "grad_norm": 0.18044664054131004, + "learning_rate": 6.943816115186432e-06, + "loss": 1.0285, + "step": 1749 + }, + { + "epoch": 4.475703324808184, + "grad_norm": 0.16838623145296286, + "learning_rate": 6.931673739478235e-06, + "loss": 1.0526, + "step": 1750 + }, + { + "epoch": 4.478260869565218, + "grad_norm": 0.16324922887275686, + "learning_rate": 6.919536355745018e-06, + "loss": 1.0174, + "step": 1751 + }, + { + "epoch": 4.4808184143222505, + "grad_norm": 0.16440559510930122, + "learning_rate": 6.907403983733543e-06, + "loss": 1.035, + "step": 1752 + }, + { + "epoch": 4.483375959079284, + "grad_norm": 0.15720327328308067, + "learning_rate": 6.895276643182423e-06, + "loss": 1.047, + "step": 1753 + }, + { + "epoch": 4.485933503836317, + "grad_norm": 0.16163765669193314, + "learning_rate": 6.883154353822079e-06, + "loss": 1.0465, + "step": 1754 + }, + { + "epoch": 4.4884910485933505, + "grad_norm": 0.17497015050920636, + "learning_rate": 6.871037135374722e-06, + "loss": 1.0184, + "step": 1755 + }, + { + "epoch": 4.491048593350383, + "grad_norm": 0.15908864283642854, + "learning_rate": 6.858925007554308e-06, + "loss": 1.0307, + "step": 1756 + }, + { + "epoch": 4.493606138107417, + "grad_norm": 0.18008191707505186, + "learning_rate": 6.8468179900665095e-06, + "loss": 1.0363, + "step": 1757 + }, + { + "epoch": 4.4961636828644505, + "grad_norm": 0.1854747706459379, + "learning_rate": 6.834716102608689e-06, + "loss": 1.0083, + "step": 1758 + }, + { + "epoch": 4.498721227621483, + "grad_norm": 0.1919413504278039, + "learning_rate": 6.8226193648698605e-06, + "loss": 0.996, + "step": 1759 + }, + { + "epoch": 4.501278772378517, + "grad_norm": 0.16472038994778412, + "learning_rate": 6.810527796530655e-06, + "loss": 1.0476, + "step": 1760 + }, + { + "epoch": 4.5038363171355495, + "grad_norm": 0.1877483916121461, + "learning_rate": 6.798441417263311e-06, + "loss": 1.042, + "step": 1761 + }, + { + "epoch": 4.506393861892583, + "grad_norm": 0.1524530347847294, + "learning_rate": 6.786360246731595e-06, + "loss": 1.0535, + "step": 1762 + }, + { + "epoch": 4.508951406649617, + "grad_norm": 0.16736289193940437, + "learning_rate": 6.774284304590832e-06, + "loss": 1.0384, + "step": 1763 + }, + { + "epoch": 4.5115089514066495, + "grad_norm": 0.1509168260512166, + "learning_rate": 6.762213610487813e-06, + "loss": 1.0124, + "step": 1764 + }, + { + "epoch": 4.514066496163683, + "grad_norm": 0.15987500159184168, + "learning_rate": 6.75014818406081e-06, + "loss": 1.0282, + "step": 1765 + }, + { + "epoch": 4.516624040920716, + "grad_norm": 0.16208604821494524, + "learning_rate": 6.7380880449395105e-06, + "loss": 1.017, + "step": 1766 + }, + { + "epoch": 4.5191815856777495, + "grad_norm": 0.1750240175749838, + "learning_rate": 6.726033212745009e-06, + "loss": 1.0448, + "step": 1767 + }, + { + "epoch": 4.521739130434782, + "grad_norm": 0.17627152188563489, + "learning_rate": 6.713983707089773e-06, + "loss": 1.0431, + "step": 1768 + }, + { + "epoch": 4.524296675191816, + "grad_norm": 0.172403571140956, + "learning_rate": 6.7019395475775805e-06, + "loss": 1.0014, + "step": 1769 + }, + { + "epoch": 4.526854219948849, + "grad_norm": 0.16551799261888245, + "learning_rate": 6.6899007538035376e-06, + "loss": 1.0277, + "step": 1770 + }, + { + "epoch": 4.529411764705882, + "grad_norm": 0.17935995088209722, + "learning_rate": 6.6778673453539984e-06, + "loss": 1.0214, + "step": 1771 + }, + { + "epoch": 4.531969309462916, + "grad_norm": 0.14762155206935834, + "learning_rate": 6.66583934180658e-06, + "loss": 1.0254, + "step": 1772 + }, + { + "epoch": 4.534526854219949, + "grad_norm": 0.18205952935739028, + "learning_rate": 6.653816762730079e-06, + "loss": 1.0128, + "step": 1773 + }, + { + "epoch": 4.537084398976982, + "grad_norm": 0.16531567285520288, + "learning_rate": 6.641799627684481e-06, + "loss": 1.0117, + "step": 1774 + }, + { + "epoch": 4.539641943734015, + "grad_norm": 0.1761641546294807, + "learning_rate": 6.629787956220924e-06, + "loss": 1.0047, + "step": 1775 + }, + { + "epoch": 4.542199488491049, + "grad_norm": 0.16044890357588265, + "learning_rate": 6.617781767881635e-06, + "loss": 1.0193, + "step": 1776 + }, + { + "epoch": 4.544757033248082, + "grad_norm": 0.159801416179025, + "learning_rate": 6.6057810821999406e-06, + "loss": 1.0344, + "step": 1777 + }, + { + "epoch": 4.547314578005115, + "grad_norm": 0.18194045342283055, + "learning_rate": 6.593785918700197e-06, + "loss": 1.046, + "step": 1778 + }, + { + "epoch": 4.549872122762149, + "grad_norm": 0.15701008924351048, + "learning_rate": 6.581796296897795e-06, + "loss": 1.0264, + "step": 1779 + }, + { + "epoch": 4.552429667519181, + "grad_norm": 0.16610935282488204, + "learning_rate": 6.569812236299089e-06, + "loss": 1.0207, + "step": 1780 + }, + { + "epoch": 4.554987212276215, + "grad_norm": 0.15940091408671517, + "learning_rate": 6.557833756401404e-06, + "loss": 1.049, + "step": 1781 + }, + { + "epoch": 4.557544757033249, + "grad_norm": 0.16618353240025538, + "learning_rate": 6.545860876692979e-06, + "loss": 1.0266, + "step": 1782 + }, + { + "epoch": 4.560102301790281, + "grad_norm": 0.17022750553375388, + "learning_rate": 6.533893616652932e-06, + "loss": 1.0791, + "step": 1783 + }, + { + "epoch": 4.562659846547315, + "grad_norm": 0.17223278557669286, + "learning_rate": 6.521931995751258e-06, + "loss": 1.001, + "step": 1784 + }, + { + "epoch": 4.565217391304348, + "grad_norm": 0.18588830803208972, + "learning_rate": 6.509976033448755e-06, + "loss": 1.0029, + "step": 1785 + }, + { + "epoch": 4.567774936061381, + "grad_norm": 0.15803052054999583, + "learning_rate": 6.498025749197036e-06, + "loss": 1.0085, + "step": 1786 + }, + { + "epoch": 4.570332480818414, + "grad_norm": 0.17758373561683846, + "learning_rate": 6.486081162438458e-06, + "loss": 1.0215, + "step": 1787 + }, + { + "epoch": 4.572890025575448, + "grad_norm": 0.1675050184516244, + "learning_rate": 6.4741422926061225e-06, + "loss": 1.0101, + "step": 1788 + }, + { + "epoch": 4.57544757033248, + "grad_norm": 0.1802049784719144, + "learning_rate": 6.462209159123825e-06, + "loss": 1.0594, + "step": 1789 + }, + { + "epoch": 4.578005115089514, + "grad_norm": 0.15407960949128488, + "learning_rate": 6.450281781406022e-06, + "loss": 1.0351, + "step": 1790 + }, + { + "epoch": 4.580562659846548, + "grad_norm": 0.17251700051840302, + "learning_rate": 6.438360178857818e-06, + "loss": 1.0237, + "step": 1791 + }, + { + "epoch": 4.58312020460358, + "grad_norm": 0.17736986767063925, + "learning_rate": 6.426444370874906e-06, + "loss": 1.0262, + "step": 1792 + }, + { + "epoch": 4.585677749360614, + "grad_norm": 0.18476336736016494, + "learning_rate": 6.414534376843566e-06, + "loss": 1.018, + "step": 1793 + }, + { + "epoch": 4.588235294117647, + "grad_norm": 0.17911429354068129, + "learning_rate": 6.402630216140618e-06, + "loss": 1.0286, + "step": 1794 + }, + { + "epoch": 4.59079283887468, + "grad_norm": 0.17311984725832297, + "learning_rate": 6.39073190813338e-06, + "loss": 1.0103, + "step": 1795 + }, + { + "epoch": 4.593350383631714, + "grad_norm": 0.1621278479186866, + "learning_rate": 6.37883947217966e-06, + "loss": 1.0228, + "step": 1796 + }, + { + "epoch": 4.595907928388747, + "grad_norm": 0.18444591716270403, + "learning_rate": 6.366952927627703e-06, + "loss": 1.0306, + "step": 1797 + }, + { + "epoch": 4.59846547314578, + "grad_norm": 0.1659804117379894, + "learning_rate": 6.355072293816178e-06, + "loss": 1.0072, + "step": 1798 + }, + { + "epoch": 4.601023017902813, + "grad_norm": 0.16571291930690385, + "learning_rate": 6.34319759007413e-06, + "loss": 1.0122, + "step": 1799 + }, + { + "epoch": 4.603580562659847, + "grad_norm": 0.1720471422264511, + "learning_rate": 6.331328835720961e-06, + "loss": 1.0465, + "step": 1800 + }, + { + "epoch": 4.6061381074168795, + "grad_norm": 0.16256427527474918, + "learning_rate": 6.319466050066395e-06, + "loss": 1.0069, + "step": 1801 + }, + { + "epoch": 4.608695652173913, + "grad_norm": 0.16289290458169317, + "learning_rate": 6.307609252410438e-06, + "loss": 0.9955, + "step": 1802 + }, + { + "epoch": 4.611253196930946, + "grad_norm": 0.16420344005471815, + "learning_rate": 6.295758462043362e-06, + "loss": 1.021, + "step": 1803 + }, + { + "epoch": 4.6138107416879794, + "grad_norm": 0.16431618715461352, + "learning_rate": 6.283913698245659e-06, + "loss": 0.9887, + "step": 1804 + }, + { + "epoch": 4.616368286445013, + "grad_norm": 0.162477757683666, + "learning_rate": 6.272074980288021e-06, + "loss": 1.0315, + "step": 1805 + }, + { + "epoch": 4.618925831202046, + "grad_norm": 0.1420949863331362, + "learning_rate": 6.2602423274313e-06, + "loss": 0.9946, + "step": 1806 + }, + { + "epoch": 4.621483375959079, + "grad_norm": 0.1617352765159284, + "learning_rate": 6.248415758926485e-06, + "loss": 1.0247, + "step": 1807 + }, + { + "epoch": 4.624040920716112, + "grad_norm": 0.14727458038419122, + "learning_rate": 6.236595294014662e-06, + "loss": 1.0695, + "step": 1808 + }, + { + "epoch": 4.626598465473146, + "grad_norm": 0.15513852752076332, + "learning_rate": 6.22478095192699e-06, + "loss": 1.0361, + "step": 1809 + }, + { + "epoch": 4.629156010230179, + "grad_norm": 0.15023148854538287, + "learning_rate": 6.212972751884663e-06, + "loss": 1.0263, + "step": 1810 + }, + { + "epoch": 4.631713554987212, + "grad_norm": 0.16087300720694614, + "learning_rate": 6.201170713098883e-06, + "loss": 1.0248, + "step": 1811 + }, + { + "epoch": 4.634271099744246, + "grad_norm": 0.15834981601790443, + "learning_rate": 6.189374854770832e-06, + "loss": 1.053, + "step": 1812 + }, + { + "epoch": 4.6368286445012785, + "grad_norm": 0.1573655447598696, + "learning_rate": 6.177585196091631e-06, + "loss": 0.9904, + "step": 1813 + }, + { + "epoch": 4.639386189258312, + "grad_norm": 0.158683133829273, + "learning_rate": 6.16580175624232e-06, + "loss": 1.0595, + "step": 1814 + }, + { + "epoch": 4.641943734015345, + "grad_norm": 0.1597812398342448, + "learning_rate": 6.15402455439382e-06, + "loss": 1.0517, + "step": 1815 + }, + { + "epoch": 4.6445012787723785, + "grad_norm": 0.15551450371650033, + "learning_rate": 6.142253609706898e-06, + "loss": 1.054, + "step": 1816 + }, + { + "epoch": 4.647058823529412, + "grad_norm": 0.19632917660508345, + "learning_rate": 6.130488941332151e-06, + "loss": 1.0512, + "step": 1817 + }, + { + "epoch": 4.649616368286445, + "grad_norm": 0.15643968941800954, + "learning_rate": 6.118730568409951e-06, + "loss": 1.039, + "step": 1818 + }, + { + "epoch": 4.6521739130434785, + "grad_norm": 0.20652844984094032, + "learning_rate": 6.106978510070443e-06, + "loss": 1.0129, + "step": 1819 + }, + { + "epoch": 4.654731457800511, + "grad_norm": 0.15097637750201956, + "learning_rate": 6.095232785433485e-06, + "loss": 1.0003, + "step": 1820 + }, + { + "epoch": 4.657289002557545, + "grad_norm": 0.20892906717171159, + "learning_rate": 6.083493413608639e-06, + "loss": 1.0032, + "step": 1821 + }, + { + "epoch": 4.659846547314578, + "grad_norm": 0.14676895460609313, + "learning_rate": 6.0717604136951315e-06, + "loss": 1.0575, + "step": 1822 + }, + { + "epoch": 4.662404092071611, + "grad_norm": 0.1744598380072282, + "learning_rate": 6.0600338047818155e-06, + "loss": 1.0012, + "step": 1823 + }, + { + "epoch": 4.664961636828645, + "grad_norm": 0.15898084906509888, + "learning_rate": 6.048313605947153e-06, + "loss": 1.0152, + "step": 1824 + }, + { + "epoch": 4.667519181585678, + "grad_norm": 0.18500242483627394, + "learning_rate": 6.036599836259175e-06, + "loss": 1.0202, + "step": 1825 + }, + { + "epoch": 4.670076726342711, + "grad_norm": 0.17586881973502083, + "learning_rate": 6.024892514775451e-06, + "loss": 1.0152, + "step": 1826 + }, + { + "epoch": 4.672634271099744, + "grad_norm": 0.1751917297897623, + "learning_rate": 6.013191660543063e-06, + "loss": 1.0185, + "step": 1827 + }, + { + "epoch": 4.675191815856778, + "grad_norm": 0.16539844174921248, + "learning_rate": 6.001497292598566e-06, + "loss": 1.0091, + "step": 1828 + }, + { + "epoch": 4.677749360613811, + "grad_norm": 0.16305138932194513, + "learning_rate": 5.98980942996797e-06, + "loss": 1.0171, + "step": 1829 + }, + { + "epoch": 4.680306905370844, + "grad_norm": 0.1978081666622713, + "learning_rate": 5.97812809166669e-06, + "loss": 1.04, + "step": 1830 + }, + { + "epoch": 4.6828644501278776, + "grad_norm": 0.14529737115947974, + "learning_rate": 5.966453296699541e-06, + "loss": 1.0219, + "step": 1831 + }, + { + "epoch": 4.68542199488491, + "grad_norm": 0.19132792503166993, + "learning_rate": 5.954785064060678e-06, + "loss": 1.0466, + "step": 1832 + }, + { + "epoch": 4.687979539641944, + "grad_norm": 0.14925809757481498, + "learning_rate": 5.943123412733587e-06, + "loss": 1.0168, + "step": 1833 + }, + { + "epoch": 4.690537084398977, + "grad_norm": 0.19480783069632648, + "learning_rate": 5.931468361691053e-06, + "loss": 1.074, + "step": 1834 + }, + { + "epoch": 4.69309462915601, + "grad_norm": 0.1597024405029427, + "learning_rate": 5.919819929895106e-06, + "loss": 1.0365, + "step": 1835 + }, + { + "epoch": 4.695652173913043, + "grad_norm": 0.179287834985346, + "learning_rate": 5.9081781362970205e-06, + "loss": 1.0461, + "step": 1836 + }, + { + "epoch": 4.698209718670077, + "grad_norm": 0.16882218098581764, + "learning_rate": 5.896542999837265e-06, + "loss": 1.0305, + "step": 1837 + }, + { + "epoch": 4.70076726342711, + "grad_norm": 0.14058129617791865, + "learning_rate": 5.8849145394454806e-06, + "loss": 0.9987, + "step": 1838 + }, + { + "epoch": 4.703324808184143, + "grad_norm": 0.18349693674349288, + "learning_rate": 5.873292774040442e-06, + "loss": 0.9943, + "step": 1839 + }, + { + "epoch": 4.705882352941177, + "grad_norm": 0.1610970199108, + "learning_rate": 5.861677722530037e-06, + "loss": 1.0579, + "step": 1840 + }, + { + "epoch": 4.708439897698209, + "grad_norm": 0.166987113555728, + "learning_rate": 5.850069403811235e-06, + "loss": 1.0181, + "step": 1841 + }, + { + "epoch": 4.710997442455243, + "grad_norm": 0.1677755864894642, + "learning_rate": 5.8384678367700325e-06, + "loss": 1.0125, + "step": 1842 + }, + { + "epoch": 4.713554987212277, + "grad_norm": 0.1779899568878102, + "learning_rate": 5.826873040281462e-06, + "loss": 1.0157, + "step": 1843 + }, + { + "epoch": 4.716112531969309, + "grad_norm": 0.16348039752545065, + "learning_rate": 5.81528503320953e-06, + "loss": 1.0343, + "step": 1844 + }, + { + "epoch": 4.718670076726343, + "grad_norm": 0.1670971620135551, + "learning_rate": 5.8037038344072e-06, + "loss": 1.0318, + "step": 1845 + }, + { + "epoch": 4.721227621483376, + "grad_norm": 0.18617223073968917, + "learning_rate": 5.792129462716355e-06, + "loss": 1.0219, + "step": 1846 + }, + { + "epoch": 4.723785166240409, + "grad_norm": 0.15449905092529612, + "learning_rate": 5.780561936967779e-06, + "loss": 1.0272, + "step": 1847 + }, + { + "epoch": 4.726342710997442, + "grad_norm": 0.1750868480359856, + "learning_rate": 5.769001275981112e-06, + "loss": 1.0565, + "step": 1848 + }, + { + "epoch": 4.728900255754476, + "grad_norm": 0.1663229129876114, + "learning_rate": 5.757447498564821e-06, + "loss": 1.0535, + "step": 1849 + }, + { + "epoch": 4.731457800511509, + "grad_norm": 0.15809631122185844, + "learning_rate": 5.745900623516189e-06, + "loss": 1.021, + "step": 1850 + }, + { + "epoch": 4.734015345268542, + "grad_norm": 0.16459750473842777, + "learning_rate": 5.734360669621255e-06, + "loss": 1.0248, + "step": 1851 + }, + { + "epoch": 4.736572890025576, + "grad_norm": 0.15287249372875325, + "learning_rate": 5.722827655654801e-06, + "loss": 1.0156, + "step": 1852 + }, + { + "epoch": 4.739130434782608, + "grad_norm": 0.1605211421637796, + "learning_rate": 5.711301600380317e-06, + "loss": 1.0569, + "step": 1853 + }, + { + "epoch": 4.741687979539642, + "grad_norm": 0.14939498740260876, + "learning_rate": 5.699782522549983e-06, + "loss": 1.0509, + "step": 1854 + }, + { + "epoch": 4.744245524296675, + "grad_norm": 0.16398542522125342, + "learning_rate": 5.688270440904613e-06, + "loss": 1.0273, + "step": 1855 + }, + { + "epoch": 4.746803069053708, + "grad_norm": 0.16733173044314129, + "learning_rate": 5.6767653741736405e-06, + "loss": 0.9938, + "step": 1856 + }, + { + "epoch": 4.749360613810742, + "grad_norm": 0.1505426061615439, + "learning_rate": 5.665267341075098e-06, + "loss": 1.0144, + "step": 1857 + }, + { + "epoch": 4.751918158567775, + "grad_norm": 0.1527851077672571, + "learning_rate": 5.653776360315562e-06, + "loss": 1.0478, + "step": 1858 + }, + { + "epoch": 4.754475703324808, + "grad_norm": 0.16913240191236387, + "learning_rate": 5.642292450590134e-06, + "loss": 1.0122, + "step": 1859 + }, + { + "epoch": 4.757033248081841, + "grad_norm": 0.158875356158748, + "learning_rate": 5.630815630582429e-06, + "loss": 1.0413, + "step": 1860 + }, + { + "epoch": 4.759590792838875, + "grad_norm": 0.14953756040104652, + "learning_rate": 5.61934591896451e-06, + "loss": 1.0337, + "step": 1861 + }, + { + "epoch": 4.762148337595908, + "grad_norm": 0.17219828313172605, + "learning_rate": 5.60788333439688e-06, + "loss": 1.0287, + "step": 1862 + }, + { + "epoch": 4.764705882352941, + "grad_norm": 0.1659776610530445, + "learning_rate": 5.596427895528443e-06, + "loss": 1.0443, + "step": 1863 + }, + { + "epoch": 4.767263427109975, + "grad_norm": 0.1676484186832149, + "learning_rate": 5.584979620996491e-06, + "loss": 1.0489, + "step": 1864 + }, + { + "epoch": 4.7698209718670075, + "grad_norm": 0.1623795959715509, + "learning_rate": 5.573538529426645e-06, + "loss": 1.0144, + "step": 1865 + }, + { + "epoch": 4.772378516624041, + "grad_norm": 0.16256260144035772, + "learning_rate": 5.562104639432845e-06, + "loss": 1.0427, + "step": 1866 + }, + { + "epoch": 4.774936061381074, + "grad_norm": 0.17175961986303814, + "learning_rate": 5.550677969617319e-06, + "loss": 1.0162, + "step": 1867 + }, + { + "epoch": 4.7774936061381075, + "grad_norm": 0.1542050330321217, + "learning_rate": 5.539258538570544e-06, + "loss": 1.0164, + "step": 1868 + }, + { + "epoch": 4.78005115089514, + "grad_norm": 0.15918533657676529, + "learning_rate": 5.527846364871219e-06, + "loss": 1.0309, + "step": 1869 + }, + { + "epoch": 4.782608695652174, + "grad_norm": 0.1403676241793028, + "learning_rate": 5.516441467086231e-06, + "loss": 1.0228, + "step": 1870 + }, + { + "epoch": 4.7851662404092075, + "grad_norm": 0.14773251181856192, + "learning_rate": 5.505043863770646e-06, + "loss": 1.0734, + "step": 1871 + }, + { + "epoch": 4.78772378516624, + "grad_norm": 0.16196858898805197, + "learning_rate": 5.493653573467647e-06, + "loss": 1.0048, + "step": 1872 + }, + { + "epoch": 4.790281329923274, + "grad_norm": 0.15355301379517172, + "learning_rate": 5.4822706147085205e-06, + "loss": 1.0125, + "step": 1873 + }, + { + "epoch": 4.792838874680307, + "grad_norm": 0.18982539717495267, + "learning_rate": 5.470895006012637e-06, + "loss": 0.9959, + "step": 1874 + }, + { + "epoch": 4.79539641943734, + "grad_norm": 0.1573171337655545, + "learning_rate": 5.459526765887397e-06, + "loss": 1.0297, + "step": 1875 + }, + { + "epoch": 4.797953964194374, + "grad_norm": 0.16351573968402464, + "learning_rate": 5.448165912828214e-06, + "loss": 0.9945, + "step": 1876 + }, + { + "epoch": 4.8005115089514065, + "grad_norm": 0.18629349709548856, + "learning_rate": 5.4368124653184835e-06, + "loss": 1.0363, + "step": 1877 + }, + { + "epoch": 4.80306905370844, + "grad_norm": 0.17008978855695026, + "learning_rate": 5.4254664418295634e-06, + "loss": 1.0273, + "step": 1878 + }, + { + "epoch": 4.805626598465473, + "grad_norm": 0.16524085689648021, + "learning_rate": 5.414127860820719e-06, + "loss": 1.0098, + "step": 1879 + }, + { + "epoch": 4.8081841432225065, + "grad_norm": 0.18739927868121126, + "learning_rate": 5.402796740739109e-06, + "loss": 1.0057, + "step": 1880 + }, + { + "epoch": 4.810741687979539, + "grad_norm": 0.17551431540439197, + "learning_rate": 5.391473100019767e-06, + "loss": 1.0378, + "step": 1881 + }, + { + "epoch": 4.813299232736573, + "grad_norm": 0.20076574431883742, + "learning_rate": 5.380156957085536e-06, + "loss": 1.0054, + "step": 1882 + }, + { + "epoch": 4.8158567774936065, + "grad_norm": 0.1633457331284817, + "learning_rate": 5.3688483303470895e-06, + "loss": 0.9945, + "step": 1883 + }, + { + "epoch": 4.818414322250639, + "grad_norm": 0.18981752589117254, + "learning_rate": 5.3575472382028386e-06, + "loss": 1.018, + "step": 1884 + }, + { + "epoch": 4.820971867007673, + "grad_norm": 0.1796254125656967, + "learning_rate": 5.346253699038966e-06, + "loss": 1.0175, + "step": 1885 + }, + { + "epoch": 4.823529411764706, + "grad_norm": 0.18612504881053146, + "learning_rate": 5.334967731229348e-06, + "loss": 1.0343, + "step": 1886 + }, + { + "epoch": 4.826086956521739, + "grad_norm": 0.1896503989682664, + "learning_rate": 5.323689353135546e-06, + "loss": 1.033, + "step": 1887 + }, + { + "epoch": 4.828644501278772, + "grad_norm": 0.17351769644886408, + "learning_rate": 5.312418583106784e-06, + "loss": 1.0341, + "step": 1888 + }, + { + "epoch": 4.831202046035806, + "grad_norm": 0.19813048664100952, + "learning_rate": 5.301155439479893e-06, + "loss": 1.0189, + "step": 1889 + }, + { + "epoch": 4.833759590792839, + "grad_norm": 0.17414587401870055, + "learning_rate": 5.289899940579315e-06, + "loss": 0.9979, + "step": 1890 + }, + { + "epoch": 4.836317135549872, + "grad_norm": 0.17954394790720937, + "learning_rate": 5.278652104717026e-06, + "loss": 1.033, + "step": 1891 + }, + { + "epoch": 4.838874680306906, + "grad_norm": 0.18225354012614833, + "learning_rate": 5.267411950192558e-06, + "loss": 1.0006, + "step": 1892 + }, + { + "epoch": 4.841432225063938, + "grad_norm": 0.19171250300846782, + "learning_rate": 5.256179495292953e-06, + "loss": 0.976, + "step": 1893 + }, + { + "epoch": 4.843989769820972, + "grad_norm": 0.16560762200333132, + "learning_rate": 5.244954758292691e-06, + "loss": 1.03, + "step": 1894 + }, + { + "epoch": 4.846547314578006, + "grad_norm": 0.17384349031638302, + "learning_rate": 5.233737757453733e-06, + "loss": 1.017, + "step": 1895 + }, + { + "epoch": 4.849104859335038, + "grad_norm": 0.18200737855014837, + "learning_rate": 5.222528511025429e-06, + "loss": 1.0544, + "step": 1896 + }, + { + "epoch": 4.851662404092072, + "grad_norm": 0.1674383880489774, + "learning_rate": 5.2113270372445334e-06, + "loss": 1.0199, + "step": 1897 + }, + { + "epoch": 4.854219948849105, + "grad_norm": 0.16206185822222566, + "learning_rate": 5.200133354335129e-06, + "loss": 1.0297, + "step": 1898 + }, + { + "epoch": 4.856777493606138, + "grad_norm": 0.16330979230562037, + "learning_rate": 5.188947480508644e-06, + "loss": 1.0618, + "step": 1899 + }, + { + "epoch": 4.859335038363171, + "grad_norm": 0.1641289208809162, + "learning_rate": 5.177769433963801e-06, + "loss": 1.0095, + "step": 1900 + }, + { + "epoch": 4.861892583120205, + "grad_norm": 0.16857653947800838, + "learning_rate": 5.166599232886579e-06, + "loss": 1.0132, + "step": 1901 + }, + { + "epoch": 4.864450127877237, + "grad_norm": 0.15123752972525892, + "learning_rate": 5.155436895450197e-06, + "loss": 1.0231, + "step": 1902 + }, + { + "epoch": 4.867007672634271, + "grad_norm": 0.18007827051394826, + "learning_rate": 5.144282439815075e-06, + "loss": 1.0299, + "step": 1903 + }, + { + "epoch": 4.869565217391305, + "grad_norm": 0.17145491388315698, + "learning_rate": 5.133135884128828e-06, + "loss": 1.0426, + "step": 1904 + }, + { + "epoch": 4.872122762148337, + "grad_norm": 0.15111451411798363, + "learning_rate": 5.121997246526188e-06, + "loss": 1.0335, + "step": 1905 + }, + { + "epoch": 4.874680306905371, + "grad_norm": 0.17562740075351813, + "learning_rate": 5.110866545129031e-06, + "loss": 1.0226, + "step": 1906 + }, + { + "epoch": 4.877237851662404, + "grad_norm": 0.14883986205754957, + "learning_rate": 5.099743798046315e-06, + "loss": 1.03, + "step": 1907 + }, + { + "epoch": 4.879795396419437, + "grad_norm": 0.16425606815927463, + "learning_rate": 5.088629023374052e-06, + "loss": 1.0524, + "step": 1908 + }, + { + "epoch": 4.882352941176471, + "grad_norm": 0.15699998164150683, + "learning_rate": 5.0775222391952826e-06, + "loss": 1.0598, + "step": 1909 + }, + { + "epoch": 4.884910485933504, + "grad_norm": 0.16747367530556498, + "learning_rate": 5.06642346358005e-06, + "loss": 1.0197, + "step": 1910 + }, + { + "epoch": 4.887468030690537, + "grad_norm": 0.19072243056188606, + "learning_rate": 5.055332714585372e-06, + "loss": 1.001, + "step": 1911 + }, + { + "epoch": 4.89002557544757, + "grad_norm": 0.16853967810789172, + "learning_rate": 5.044250010255202e-06, + "loss": 1.0432, + "step": 1912 + }, + { + "epoch": 4.892583120204604, + "grad_norm": 0.17828385119329374, + "learning_rate": 5.033175368620406e-06, + "loss": 1.0314, + "step": 1913 + }, + { + "epoch": 4.8951406649616365, + "grad_norm": 0.15062414843555882, + "learning_rate": 5.022108807698735e-06, + "loss": 1.0358, + "step": 1914 + }, + { + "epoch": 4.89769820971867, + "grad_norm": 0.17399854674836523, + "learning_rate": 5.0110503454947926e-06, + "loss": 1.0265, + "step": 1915 + }, + { + "epoch": 4.900255754475703, + "grad_norm": 0.16505478849391259, + "learning_rate": 5.000000000000003e-06, + "loss": 1.0495, + "step": 1916 + }, + { + "epoch": 4.9028132992327365, + "grad_norm": 0.1446909805445552, + "learning_rate": 4.988957789192583e-06, + "loss": 1.0044, + "step": 1917 + }, + { + "epoch": 4.90537084398977, + "grad_norm": 0.16047225013403066, + "learning_rate": 4.97792373103753e-06, + "loss": 0.977, + "step": 1918 + }, + { + "epoch": 4.907928388746803, + "grad_norm": 0.15267602057033672, + "learning_rate": 4.966897843486561e-06, + "loss": 1.0563, + "step": 1919 + }, + { + "epoch": 4.910485933503836, + "grad_norm": 0.14094891470116488, + "learning_rate": 4.955880144478101e-06, + "loss": 1.0172, + "step": 1920 + }, + { + "epoch": 4.913043478260869, + "grad_norm": 0.16225336285064607, + "learning_rate": 4.944870651937267e-06, + "loss": 1.0332, + "step": 1921 + }, + { + "epoch": 4.915601023017903, + "grad_norm": 0.15352807995544615, + "learning_rate": 4.933869383775809e-06, + "loss": 1.0285, + "step": 1922 + }, + { + "epoch": 4.918158567774936, + "grad_norm": 0.14893755036217834, + "learning_rate": 4.922876357892103e-06, + "loss": 1.0082, + "step": 1923 + }, + { + "epoch": 4.920716112531969, + "grad_norm": 0.17251988177114058, + "learning_rate": 4.911891592171113e-06, + "loss": 1.0131, + "step": 1924 + }, + { + "epoch": 4.923273657289003, + "grad_norm": 0.15340872718806947, + "learning_rate": 4.900915104484372e-06, + "loss": 1.0502, + "step": 1925 + }, + { + "epoch": 4.9258312020460355, + "grad_norm": 0.16259551968874744, + "learning_rate": 4.889946912689936e-06, + "loss": 1.0457, + "step": 1926 + }, + { + "epoch": 4.928388746803069, + "grad_norm": 0.15432669889294595, + "learning_rate": 4.878987034632361e-06, + "loss": 1.0491, + "step": 1927 + }, + { + "epoch": 4.930946291560103, + "grad_norm": 0.16399149074989694, + "learning_rate": 4.8680354881426935e-06, + "loss": 1.011, + "step": 1928 + }, + { + "epoch": 4.9335038363171355, + "grad_norm": 0.17537267004354543, + "learning_rate": 4.857092291038411e-06, + "loss": 1.0356, + "step": 1929 + }, + { + "epoch": 4.936061381074169, + "grad_norm": 0.15804425089068397, + "learning_rate": 4.846157461123411e-06, + "loss": 1.0556, + "step": 1930 + }, + { + "epoch": 4.938618925831202, + "grad_norm": 0.1644217524312441, + "learning_rate": 4.8352310161879724e-06, + "loss": 1.0521, + "step": 1931 + }, + { + "epoch": 4.9411764705882355, + "grad_norm": 0.166490586450367, + "learning_rate": 4.824312974008748e-06, + "loss": 1.0348, + "step": 1932 + }, + { + "epoch": 4.943734015345268, + "grad_norm": 0.15262614264530625, + "learning_rate": 4.813403352348703e-06, + "loss": 1.003, + "step": 1933 + }, + { + "epoch": 4.946291560102302, + "grad_norm": 0.16914604106371434, + "learning_rate": 4.8025021689571095e-06, + "loss": 1.0261, + "step": 1934 + }, + { + "epoch": 4.948849104859335, + "grad_norm": 0.14949420788516232, + "learning_rate": 4.791609441569517e-06, + "loss": 1.013, + "step": 1935 + }, + { + "epoch": 4.951406649616368, + "grad_norm": 0.18410232609002486, + "learning_rate": 4.780725187907707e-06, + "loss": 1.0211, + "step": 1936 + }, + { + "epoch": 4.953964194373402, + "grad_norm": 0.14300056243568887, + "learning_rate": 4.769849425679683e-06, + "loss": 1.0222, + "step": 1937 + }, + { + "epoch": 4.956521739130435, + "grad_norm": 0.17246451645014146, + "learning_rate": 4.758982172579621e-06, + "loss": 0.9967, + "step": 1938 + }, + { + "epoch": 4.959079283887468, + "grad_norm": 0.17259140226193048, + "learning_rate": 4.748123446287875e-06, + "loss": 1.0321, + "step": 1939 + }, + { + "epoch": 4.961636828644501, + "grad_norm": 1.1109363534677956, + "learning_rate": 4.737273264470909e-06, + "loss": 1.0923, + "step": 1940 + }, + { + "epoch": 4.964194373401535, + "grad_norm": 0.17074890567417172, + "learning_rate": 4.726431644781284e-06, + "loss": 1.0245, + "step": 1941 + }, + { + "epoch": 4.966751918158568, + "grad_norm": 0.15432050773937248, + "learning_rate": 4.715598604857648e-06, + "loss": 1.0378, + "step": 1942 + }, + { + "epoch": 4.969309462915601, + "grad_norm": 0.15888604747270782, + "learning_rate": 4.704774162324673e-06, + "loss": 1.0287, + "step": 1943 + }, + { + "epoch": 4.971867007672635, + "grad_norm": 0.17597082523498278, + "learning_rate": 4.6939583347930525e-06, + "loss": 1.0024, + "step": 1944 + }, + { + "epoch": 4.974424552429667, + "grad_norm": 0.15465610920055028, + "learning_rate": 4.6831511398594574e-06, + "loss": 1.0216, + "step": 1945 + }, + { + "epoch": 4.976982097186701, + "grad_norm": 0.16914400485984177, + "learning_rate": 4.672352595106525e-06, + "loss": 1.0595, + "step": 1946 + }, + { + "epoch": 4.979539641943734, + "grad_norm": 0.17772012019293779, + "learning_rate": 4.661562718102808e-06, + "loss": 1.0056, + "step": 1947 + }, + { + "epoch": 4.982097186700767, + "grad_norm": 0.14226899552306443, + "learning_rate": 4.65078152640276e-06, + "loss": 1.0221, + "step": 1948 + }, + { + "epoch": 4.9846547314578, + "grad_norm": 0.14866025187075746, + "learning_rate": 4.640009037546711e-06, + "loss": 1.0534, + "step": 1949 + }, + { + "epoch": 4.987212276214834, + "grad_norm": 0.18309163357147787, + "learning_rate": 4.629245269060826e-06, + "loss": 1.046, + "step": 1950 + }, + { + "epoch": 4.989769820971867, + "grad_norm": 0.14195791571684566, + "learning_rate": 4.61849023845708e-06, + "loss": 1.0119, + "step": 1951 + }, + { + "epoch": 4.9923273657289, + "grad_norm": 0.15240227847957083, + "learning_rate": 4.607743963233233e-06, + "loss": 1.0373, + "step": 1952 + }, + { + "epoch": 4.994884910485934, + "grad_norm": 0.1706260447764414, + "learning_rate": 4.5970064608728085e-06, + "loss": 0.9995, + "step": 1953 + }, + { + "epoch": 4.997442455242966, + "grad_norm": 0.16263531281395652, + "learning_rate": 4.586277748845056e-06, + "loss": 1.0053, + "step": 1954 + }, + { + "epoch": 5.0, + "grad_norm": 0.15411495644560275, + "learning_rate": 4.575557844604905e-06, + "loss": 1.0268, + "step": 1955 + }, + { + "epoch": 5.002557544757034, + "grad_norm": 0.15615925966080388, + "learning_rate": 4.5648467655929815e-06, + "loss": 1.0199, + "step": 1956 + }, + { + "epoch": 5.005115089514066, + "grad_norm": 0.16045903540647527, + "learning_rate": 4.554144529235537e-06, + "loss": 1.0277, + "step": 1957 + }, + { + "epoch": 5.0076726342711, + "grad_norm": 0.16031341969126212, + "learning_rate": 4.543451152944438e-06, + "loss": 1.0562, + "step": 1958 + }, + { + "epoch": 5.010230179028133, + "grad_norm": 0.1429706019310508, + "learning_rate": 4.532766654117146e-06, + "loss": 1.031, + "step": 1959 + }, + { + "epoch": 5.012787723785166, + "grad_norm": 0.15753846906492294, + "learning_rate": 4.5220910501366635e-06, + "loss": 1.0368, + "step": 1960 + }, + { + "epoch": 5.015345268542199, + "grad_norm": 0.14579202507979455, + "learning_rate": 4.511424358371544e-06, + "loss": 1.0358, + "step": 1961 + }, + { + "epoch": 5.017902813299233, + "grad_norm": 0.15694921661063782, + "learning_rate": 4.500766596175813e-06, + "loss": 1.0037, + "step": 1962 + }, + { + "epoch": 5.020460358056266, + "grad_norm": 0.16268209756361607, + "learning_rate": 4.490117780888994e-06, + "loss": 1.0191, + "step": 1963 + }, + { + "epoch": 5.023017902813299, + "grad_norm": 0.13601692002794843, + "learning_rate": 4.479477929836039e-06, + "loss": 1.0225, + "step": 1964 + }, + { + "epoch": 5.025575447570333, + "grad_norm": 0.1513485213042126, + "learning_rate": 4.4688470603273184e-06, + "loss": 0.9987, + "step": 1965 + }, + { + "epoch": 5.028132992327365, + "grad_norm": 0.14505501997147888, + "learning_rate": 4.458225189658598e-06, + "loss": 1.0244, + "step": 1966 + }, + { + "epoch": 5.030690537084399, + "grad_norm": 0.15866972934335427, + "learning_rate": 4.447612335110991e-06, + "loss": 1.0147, + "step": 1967 + }, + { + "epoch": 5.033248081841432, + "grad_norm": 0.15717036214065513, + "learning_rate": 4.43700851395096e-06, + "loss": 1.0056, + "step": 1968 + }, + { + "epoch": 5.035805626598465, + "grad_norm": 0.15634999112536652, + "learning_rate": 4.426413743430241e-06, + "loss": 1.0486, + "step": 1969 + }, + { + "epoch": 5.038363171355499, + "grad_norm": 0.1549586768650421, + "learning_rate": 4.415828040785877e-06, + "loss": 1.0046, + "step": 1970 + }, + { + "epoch": 5.040920716112532, + "grad_norm": 0.1643495461245206, + "learning_rate": 4.405251423240138e-06, + "loss": 1.0158, + "step": 1971 + }, + { + "epoch": 5.043478260869565, + "grad_norm": 0.14558675280550004, + "learning_rate": 4.3946839080005236e-06, + "loss": 1.0167, + "step": 1972 + }, + { + "epoch": 5.046035805626598, + "grad_norm": 0.16057769002475886, + "learning_rate": 4.384125512259718e-06, + "loss": 1.0412, + "step": 1973 + }, + { + "epoch": 5.048593350383632, + "grad_norm": 0.1589654545230765, + "learning_rate": 4.373576253195568e-06, + "loss": 1.0058, + "step": 1974 + }, + { + "epoch": 5.051150895140665, + "grad_norm": 0.14004326798784272, + "learning_rate": 4.363036147971069e-06, + "loss": 0.9958, + "step": 1975 + }, + { + "epoch": 5.053708439897698, + "grad_norm": 0.16704739125788623, + "learning_rate": 4.352505213734298e-06, + "loss": 1.0202, + "step": 1976 + }, + { + "epoch": 5.056265984654732, + "grad_norm": 0.15270263482532218, + "learning_rate": 4.3419834676184395e-06, + "loss": 1.0221, + "step": 1977 + }, + { + "epoch": 5.0588235294117645, + "grad_norm": 0.15264750560420307, + "learning_rate": 4.331470926741707e-06, + "loss": 1.0264, + "step": 1978 + }, + { + "epoch": 5.061381074168798, + "grad_norm": 0.1675831575968936, + "learning_rate": 4.320967608207354e-06, + "loss": 1.0256, + "step": 1979 + }, + { + "epoch": 5.063938618925831, + "grad_norm": 0.15506176173449848, + "learning_rate": 4.3104735291036214e-06, + "loss": 1.0246, + "step": 1980 + }, + { + "epoch": 5.0664961636828645, + "grad_norm": 0.147438074557832, + "learning_rate": 4.299988706503716e-06, + "loss": 0.9895, + "step": 1981 + }, + { + "epoch": 5.069053708439898, + "grad_norm": 0.13712823238173896, + "learning_rate": 4.289513157465796e-06, + "loss": 1.0069, + "step": 1982 + }, + { + "epoch": 5.071611253196931, + "grad_norm": 0.1530445973165712, + "learning_rate": 4.279046899032918e-06, + "loss": 1.028, + "step": 1983 + }, + { + "epoch": 5.0741687979539645, + "grad_norm": 0.1487111811647309, + "learning_rate": 4.268589948233034e-06, + "loss": 0.9806, + "step": 1984 + }, + { + "epoch": 5.076726342710997, + "grad_norm": 0.1536495899212468, + "learning_rate": 4.258142322078944e-06, + "loss": 1.0141, + "step": 1985 + }, + { + "epoch": 5.079283887468031, + "grad_norm": 0.1420705753526825, + "learning_rate": 4.247704037568289e-06, + "loss": 1.0484, + "step": 1986 + }, + { + "epoch": 5.081841432225064, + "grad_norm": 0.14854933088338998, + "learning_rate": 4.237275111683502e-06, + "loss": 1.0176, + "step": 1987 + }, + { + "epoch": 5.084398976982097, + "grad_norm": 0.15085396882702742, + "learning_rate": 4.226855561391792e-06, + "loss": 1.0241, + "step": 1988 + }, + { + "epoch": 5.086956521739131, + "grad_norm": 0.13480571166529362, + "learning_rate": 4.2164454036451185e-06, + "loss": 1.0105, + "step": 1989 + }, + { + "epoch": 5.089514066496164, + "grad_norm": 0.15439478858765343, + "learning_rate": 4.2060446553801585e-06, + "loss": 1.0571, + "step": 1990 + }, + { + "epoch": 5.092071611253197, + "grad_norm": 0.14887589003918353, + "learning_rate": 4.195653333518271e-06, + "loss": 1.0309, + "step": 1991 + }, + { + "epoch": 5.09462915601023, + "grad_norm": 0.14823587280930983, + "learning_rate": 4.1852714549654985e-06, + "loss": 1.0286, + "step": 1992 + }, + { + "epoch": 5.0971867007672635, + "grad_norm": 0.1502816473196306, + "learning_rate": 4.1748990366125005e-06, + "loss": 1.0092, + "step": 1993 + }, + { + "epoch": 5.099744245524296, + "grad_norm": 0.13426636004437947, + "learning_rate": 4.164536095334557e-06, + "loss": 1.0055, + "step": 1994 + }, + { + "epoch": 5.10230179028133, + "grad_norm": 0.14869672831898953, + "learning_rate": 4.154182647991519e-06, + "loss": 1.0492, + "step": 1995 + }, + { + "epoch": 5.1048593350383635, + "grad_norm": 0.15755018419795028, + "learning_rate": 4.143838711427808e-06, + "loss": 1.0103, + "step": 1996 + }, + { + "epoch": 5.107416879795396, + "grad_norm": 0.1503017786383216, + "learning_rate": 4.133504302472356e-06, + "loss": 1.0015, + "step": 1997 + }, + { + "epoch": 5.10997442455243, + "grad_norm": 0.14022700208845976, + "learning_rate": 4.123179437938596e-06, + "loss": 1.0394, + "step": 1998 + }, + { + "epoch": 5.112531969309463, + "grad_norm": 0.149747082086179, + "learning_rate": 4.112864134624447e-06, + "loss": 1.0406, + "step": 1999 + }, + { + "epoch": 5.115089514066496, + "grad_norm": 0.15174138196167658, + "learning_rate": 4.102558409312256e-06, + "loss": 1.022, + "step": 2000 + }, + { + "epoch": 5.117647058823529, + "grad_norm": 0.14846170493390945, + "learning_rate": 4.092262278768797e-06, + "loss": 1.0132, + "step": 2001 + }, + { + "epoch": 5.120204603580563, + "grad_norm": 0.14541949365283377, + "learning_rate": 4.0819757597452246e-06, + "loss": 1.0328, + "step": 2002 + }, + { + "epoch": 5.122762148337596, + "grad_norm": 0.16073985913183766, + "learning_rate": 4.0716988689770695e-06, + "loss": 1.0067, + "step": 2003 + }, + { + "epoch": 5.125319693094629, + "grad_norm": 0.14371815787004755, + "learning_rate": 4.061431623184188e-06, + "loss": 1.0289, + "step": 2004 + }, + { + "epoch": 5.127877237851663, + "grad_norm": 0.14339076964243316, + "learning_rate": 4.051174039070742e-06, + "loss": 0.9812, + "step": 2005 + }, + { + "epoch": 5.130434782608695, + "grad_norm": 0.1437711220903366, + "learning_rate": 4.040926133325188e-06, + "loss": 1.0059, + "step": 2006 + }, + { + "epoch": 5.132992327365729, + "grad_norm": 0.1432806446083087, + "learning_rate": 4.030687922620223e-06, + "loss": 1.0183, + "step": 2007 + }, + { + "epoch": 5.135549872122763, + "grad_norm": 0.14407049755074497, + "learning_rate": 4.020459423612777e-06, + "loss": 1.0328, + "step": 2008 + }, + { + "epoch": 5.138107416879795, + "grad_norm": 0.14311456671607106, + "learning_rate": 4.010240652943974e-06, + "loss": 1.0247, + "step": 2009 + }, + { + "epoch": 5.140664961636829, + "grad_norm": 0.14651674275116736, + "learning_rate": 4.000031627239123e-06, + "loss": 1.0271, + "step": 2010 + }, + { + "epoch": 5.143222506393862, + "grad_norm": 0.14244659447949104, + "learning_rate": 3.989832363107664e-06, + "loss": 0.9729, + "step": 2011 + }, + { + "epoch": 5.145780051150895, + "grad_norm": 0.1474525383109307, + "learning_rate": 3.9796428771431625e-06, + "loss": 1.0208, + "step": 2012 + }, + { + "epoch": 5.148337595907928, + "grad_norm": 0.14684653759057748, + "learning_rate": 3.96946318592328e-06, + "loss": 0.9944, + "step": 2013 + }, + { + "epoch": 5.150895140664962, + "grad_norm": 0.14793817657477276, + "learning_rate": 3.959293306009734e-06, + "loss": 1.0606, + "step": 2014 + }, + { + "epoch": 5.153452685421995, + "grad_norm": 0.13847357302909763, + "learning_rate": 3.949133253948284e-06, + "loss": 1.0035, + "step": 2015 + }, + { + "epoch": 5.156010230179028, + "grad_norm": 0.14747847539008258, + "learning_rate": 3.938983046268695e-06, + "loss": 0.9869, + "step": 2016 + }, + { + "epoch": 5.158567774936062, + "grad_norm": 0.14511374476416694, + "learning_rate": 3.9288426994847285e-06, + "loss": 1.0238, + "step": 2017 + }, + { + "epoch": 5.161125319693094, + "grad_norm": 0.15030414965811079, + "learning_rate": 3.918712230094091e-06, + "loss": 1.0521, + "step": 2018 + }, + { + "epoch": 5.163682864450128, + "grad_norm": 0.14420923408617164, + "learning_rate": 3.908591654578417e-06, + "loss": 0.9878, + "step": 2019 + }, + { + "epoch": 5.166240409207161, + "grad_norm": 0.1369795797536583, + "learning_rate": 3.89848098940326e-06, + "loss": 1.0203, + "step": 2020 + }, + { + "epoch": 5.168797953964194, + "grad_norm": 0.15862135307508646, + "learning_rate": 3.888380251018035e-06, + "loss": 1.0112, + "step": 2021 + }, + { + "epoch": 5.171355498721228, + "grad_norm": 0.13968732984433663, + "learning_rate": 3.878289455856013e-06, + "loss": 1.0589, + "step": 2022 + }, + { + "epoch": 5.173913043478261, + "grad_norm": 0.14444481777607088, + "learning_rate": 3.868208620334282e-06, + "loss": 1.0065, + "step": 2023 + }, + { + "epoch": 5.176470588235294, + "grad_norm": 0.14184611750434217, + "learning_rate": 3.858137760853737e-06, + "loss": 1.0189, + "step": 2024 + }, + { + "epoch": 5.179028132992327, + "grad_norm": 0.14923144029216218, + "learning_rate": 3.84807689379904e-06, + "loss": 1.0052, + "step": 2025 + }, + { + "epoch": 5.181585677749361, + "grad_norm": 0.15459564247502722, + "learning_rate": 3.838026035538581e-06, + "loss": 0.9946, + "step": 2026 + }, + { + "epoch": 5.1841432225063935, + "grad_norm": 0.1418795966374483, + "learning_rate": 3.827985202424488e-06, + "loss": 1.0234, + "step": 2027 + }, + { + "epoch": 5.186700767263427, + "grad_norm": 0.1553154903132494, + "learning_rate": 3.817954410792565e-06, + "loss": 1.0137, + "step": 2028 + }, + { + "epoch": 5.189258312020461, + "grad_norm": 0.14275503896178632, + "learning_rate": 3.8079336769622834e-06, + "loss": 1.0289, + "step": 2029 + }, + { + "epoch": 5.1918158567774935, + "grad_norm": 0.13897565956134958, + "learning_rate": 3.7979230172367453e-06, + "loss": 1.0148, + "step": 2030 + }, + { + "epoch": 5.194373401534527, + "grad_norm": 0.14252828284486727, + "learning_rate": 3.7879224479026745e-06, + "loss": 1.0068, + "step": 2031 + }, + { + "epoch": 5.19693094629156, + "grad_norm": 0.1517901716492953, + "learning_rate": 3.7779319852303766e-06, + "loss": 1.0572, + "step": 2032 + }, + { + "epoch": 5.1994884910485935, + "grad_norm": 0.1439259357160915, + "learning_rate": 3.7679516454736977e-06, + "loss": 1.0446, + "step": 2033 + }, + { + "epoch": 5.202046035805626, + "grad_norm": 0.1371345617669485, + "learning_rate": 3.757981444870035e-06, + "loss": 0.9957, + "step": 2034 + }, + { + "epoch": 5.20460358056266, + "grad_norm": 0.16004739713130242, + "learning_rate": 3.748021399640279e-06, + "loss": 1.0276, + "step": 2035 + }, + { + "epoch": 5.207161125319693, + "grad_norm": 0.1441426514349444, + "learning_rate": 3.7380715259888e-06, + "loss": 1.0344, + "step": 2036 + }, + { + "epoch": 5.209718670076726, + "grad_norm": 0.14152534835692054, + "learning_rate": 3.7281318401034183e-06, + "loss": 0.9949, + "step": 2037 + }, + { + "epoch": 5.21227621483376, + "grad_norm": 0.1481149663167974, + "learning_rate": 3.718202358155384e-06, + "loss": 1.0545, + "step": 2038 + }, + { + "epoch": 5.2148337595907925, + "grad_norm": 0.13716666870403715, + "learning_rate": 3.7082830962993497e-06, + "loss": 1.0388, + "step": 2039 + }, + { + "epoch": 5.217391304347826, + "grad_norm": 0.1427599492035968, + "learning_rate": 3.6983740706733207e-06, + "loss": 0.9945, + "step": 2040 + }, + { + "epoch": 5.21994884910486, + "grad_norm": 0.14437989757241948, + "learning_rate": 3.688475297398674e-06, + "loss": 1.037, + "step": 2041 + }, + { + "epoch": 5.2225063938618925, + "grad_norm": 0.1407689885502161, + "learning_rate": 3.6785867925800856e-06, + "loss": 1.0019, + "step": 2042 + }, + { + "epoch": 5.225063938618926, + "grad_norm": 0.1381622930416597, + "learning_rate": 3.668708572305546e-06, + "loss": 1.0384, + "step": 2043 + }, + { + "epoch": 5.227621483375959, + "grad_norm": 0.13975927307572164, + "learning_rate": 3.658840652646287e-06, + "loss": 1.0018, + "step": 2044 + }, + { + "epoch": 5.2301790281329925, + "grad_norm": 0.15578171256673842, + "learning_rate": 3.6489830496568067e-06, + "loss": 1.0221, + "step": 2045 + }, + { + "epoch": 5.232736572890025, + "grad_norm": 0.14587450260403836, + "learning_rate": 3.639135779374813e-06, + "loss": 1.0462, + "step": 2046 + }, + { + "epoch": 5.235294117647059, + "grad_norm": 0.14336907869458113, + "learning_rate": 3.6292988578211863e-06, + "loss": 1.0242, + "step": 2047 + }, + { + "epoch": 5.2378516624040925, + "grad_norm": 0.13614785911809554, + "learning_rate": 3.619472300999992e-06, + "loss": 1.002, + "step": 2048 + }, + { + "epoch": 5.240409207161125, + "grad_norm": 0.14654873047839187, + "learning_rate": 3.6096561248984186e-06, + "loss": 1.0365, + "step": 2049 + }, + { + "epoch": 5.242966751918159, + "grad_norm": 0.14832735168435557, + "learning_rate": 3.5998503454867807e-06, + "loss": 1.0206, + "step": 2050 + }, + { + "epoch": 5.245524296675192, + "grad_norm": 0.15182549845090051, + "learning_rate": 3.5900549787184534e-06, + "loss": 1.0086, + "step": 2051 + }, + { + "epoch": 5.248081841432225, + "grad_norm": 0.15218834374865772, + "learning_rate": 3.580270040529894e-06, + "loss": 1.0457, + "step": 2052 + }, + { + "epoch": 5.250639386189258, + "grad_norm": 0.1386445311628316, + "learning_rate": 3.570495546840591e-06, + "loss": 1.0316, + "step": 2053 + }, + { + "epoch": 5.253196930946292, + "grad_norm": 0.1415172130548022, + "learning_rate": 3.560731513553022e-06, + "loss": 1.033, + "step": 2054 + }, + { + "epoch": 5.255754475703325, + "grad_norm": 0.134688736061587, + "learning_rate": 3.5509779565526683e-06, + "loss": 1.0341, + "step": 2055 + }, + { + "epoch": 5.258312020460358, + "grad_norm": 0.14665953403303808, + "learning_rate": 3.5412348917079507e-06, + "loss": 1.0621, + "step": 2056 + }, + { + "epoch": 5.260869565217392, + "grad_norm": 0.13619183573807261, + "learning_rate": 3.5315023348702325e-06, + "loss": 1.0366, + "step": 2057 + }, + { + "epoch": 5.263427109974424, + "grad_norm": 0.13658849089622857, + "learning_rate": 3.521780301873773e-06, + "loss": 1.0008, + "step": 2058 + }, + { + "epoch": 5.265984654731458, + "grad_norm": 0.14630387436677678, + "learning_rate": 3.512068808535707e-06, + "loss": 1.0147, + "step": 2059 + }, + { + "epoch": 5.268542199488491, + "grad_norm": 0.13734073999332427, + "learning_rate": 3.502367870656035e-06, + "loss": 1.028, + "step": 2060 + }, + { + "epoch": 5.271099744245524, + "grad_norm": 0.1355644028489033, + "learning_rate": 3.492677504017573e-06, + "loss": 1.0026, + "step": 2061 + }, + { + "epoch": 5.273657289002558, + "grad_norm": 0.14119902993384847, + "learning_rate": 3.4829977243859414e-06, + "loss": 1.0093, + "step": 2062 + }, + { + "epoch": 5.276214833759591, + "grad_norm": 0.14118557253626327, + "learning_rate": 3.4733285475095324e-06, + "loss": 1.0255, + "step": 2063 + }, + { + "epoch": 5.278772378516624, + "grad_norm": 0.13630213438701977, + "learning_rate": 3.4636699891195e-06, + "loss": 1.0176, + "step": 2064 + }, + { + "epoch": 5.281329923273657, + "grad_norm": 0.1355438862392238, + "learning_rate": 3.454022064929711e-06, + "loss": 1.0355, + "step": 2065 + }, + { + "epoch": 5.283887468030691, + "grad_norm": 0.1335405410237401, + "learning_rate": 3.4443847906367313e-06, + "loss": 0.9999, + "step": 2066 + }, + { + "epoch": 5.286445012787723, + "grad_norm": 0.13568542243072879, + "learning_rate": 3.4347581819198095e-06, + "loss": 1.0069, + "step": 2067 + }, + { + "epoch": 5.289002557544757, + "grad_norm": 0.14279750042804518, + "learning_rate": 3.425142254440835e-06, + "loss": 1.0316, + "step": 2068 + }, + { + "epoch": 5.291560102301791, + "grad_norm": 0.1421562223189775, + "learning_rate": 3.4155370238443185e-06, + "loss": 0.9929, + "step": 2069 + }, + { + "epoch": 5.294117647058823, + "grad_norm": 0.13090998129388792, + "learning_rate": 3.405942505757367e-06, + "loss": 1.0235, + "step": 2070 + }, + { + "epoch": 5.296675191815857, + "grad_norm": 0.1447611334505954, + "learning_rate": 3.3963587157896694e-06, + "loss": 0.9883, + "step": 2071 + }, + { + "epoch": 5.29923273657289, + "grad_norm": 0.1486460622906693, + "learning_rate": 3.386785669533447e-06, + "loss": 1.0614, + "step": 2072 + }, + { + "epoch": 5.301790281329923, + "grad_norm": 0.13082209863415079, + "learning_rate": 3.377223382563446e-06, + "loss": 1.019, + "step": 2073 + }, + { + "epoch": 5.304347826086957, + "grad_norm": 0.14431855838963542, + "learning_rate": 3.367671870436915e-06, + "loss": 1.0744, + "step": 2074 + }, + { + "epoch": 5.30690537084399, + "grad_norm": 0.13501366283453947, + "learning_rate": 3.358131148693564e-06, + "loss": 1.0204, + "step": 2075 + }, + { + "epoch": 5.309462915601023, + "grad_norm": 0.13647498103708036, + "learning_rate": 3.3486012328555505e-06, + "loss": 1.0361, + "step": 2076 + }, + { + "epoch": 5.312020460358056, + "grad_norm": 0.13678423051822214, + "learning_rate": 3.33908213842745e-06, + "loss": 1.0416, + "step": 2077 + }, + { + "epoch": 5.31457800511509, + "grad_norm": 0.15117370323671084, + "learning_rate": 3.3295738808962388e-06, + "loss": 1.0398, + "step": 2078 + }, + { + "epoch": 5.3171355498721224, + "grad_norm": 0.13218102548293045, + "learning_rate": 3.3200764757312555e-06, + "loss": 1.0211, + "step": 2079 + }, + { + "epoch": 5.319693094629156, + "grad_norm": 0.13875158228376064, + "learning_rate": 3.310589938384179e-06, + "loss": 1.0246, + "step": 2080 + }, + { + "epoch": 5.322250639386189, + "grad_norm": 0.1390888027343779, + "learning_rate": 3.301114284289021e-06, + "loss": 1.0228, + "step": 2081 + }, + { + "epoch": 5.324808184143222, + "grad_norm": 0.14311106791965889, + "learning_rate": 3.291649528862074e-06, + "loss": 1.0366, + "step": 2082 + }, + { + "epoch": 5.327365728900256, + "grad_norm": 0.1329482436934704, + "learning_rate": 3.2821956875019045e-06, + "loss": 0.9983, + "step": 2083 + }, + { + "epoch": 5.329923273657289, + "grad_norm": 0.1353254341465528, + "learning_rate": 3.272752775589316e-06, + "loss": 1.0262, + "step": 2084 + }, + { + "epoch": 5.332480818414322, + "grad_norm": 0.14279181335598803, + "learning_rate": 3.2633208084873445e-06, + "loss": 1.0214, + "step": 2085 + }, + { + "epoch": 5.335038363171355, + "grad_norm": 0.14938681808695, + "learning_rate": 3.253899801541206e-06, + "loss": 1.0458, + "step": 2086 + }, + { + "epoch": 5.337595907928389, + "grad_norm": 0.13903091402439763, + "learning_rate": 3.244489770078286e-06, + "loss": 1.0699, + "step": 2087 + }, + { + "epoch": 5.340153452685422, + "grad_norm": 0.14447995472723943, + "learning_rate": 3.2350907294081258e-06, + "loss": 0.9936, + "step": 2088 + }, + { + "epoch": 5.342710997442455, + "grad_norm": 0.14276869094442168, + "learning_rate": 3.2257026948223726e-06, + "loss": 1.0565, + "step": 2089 + }, + { + "epoch": 5.345268542199489, + "grad_norm": 0.14335515694613532, + "learning_rate": 3.2163256815947674e-06, + "loss": 0.9993, + "step": 2090 + }, + { + "epoch": 5.3478260869565215, + "grad_norm": 0.14665513927933138, + "learning_rate": 3.206959704981133e-06, + "loss": 1.0555, + "step": 2091 + }, + { + "epoch": 5.350383631713555, + "grad_norm": 0.1322833527352921, + "learning_rate": 3.197604780219323e-06, + "loss": 0.9652, + "step": 2092 + }, + { + "epoch": 5.352941176470588, + "grad_norm": 0.13906561826785738, + "learning_rate": 3.188260922529215e-06, + "loss": 1.0432, + "step": 2093 + }, + { + "epoch": 5.3554987212276215, + "grad_norm": 0.14254937224329012, + "learning_rate": 3.1789281471126786e-06, + "loss": 1.0175, + "step": 2094 + }, + { + "epoch": 5.358056265984655, + "grad_norm": 0.14911195774932937, + "learning_rate": 3.1696064691535634e-06, + "loss": 1.0024, + "step": 2095 + }, + { + "epoch": 5.360613810741688, + "grad_norm": 0.1296333526942248, + "learning_rate": 3.1602959038176516e-06, + "loss": 1.016, + "step": 2096 + }, + { + "epoch": 5.3631713554987215, + "grad_norm": 0.14492528039945102, + "learning_rate": 3.1509964662526484e-06, + "loss": 1.0072, + "step": 2097 + }, + { + "epoch": 5.365728900255754, + "grad_norm": 0.14261896658846623, + "learning_rate": 3.1417081715881623e-06, + "loss": 0.997, + "step": 2098 + }, + { + "epoch": 5.368286445012788, + "grad_norm": 0.15062841301973245, + "learning_rate": 3.132431034935667e-06, + "loss": 1.0286, + "step": 2099 + }, + { + "epoch": 5.370843989769821, + "grad_norm": 0.14079332067477582, + "learning_rate": 3.1231650713884832e-06, + "loss": 1.0331, + "step": 2100 + }, + { + "epoch": 5.373401534526854, + "grad_norm": 0.13555419460898196, + "learning_rate": 3.1139102960217493e-06, + "loss": 1.0041, + "step": 2101 + }, + { + "epoch": 5.375959079283888, + "grad_norm": 0.13880524146849596, + "learning_rate": 3.1046667238924155e-06, + "loss": 1.0423, + "step": 2102 + }, + { + "epoch": 5.378516624040921, + "grad_norm": 0.1511402878049476, + "learning_rate": 3.0954343700391897e-06, + "loss": 1.0349, + "step": 2103 + }, + { + "epoch": 5.381074168797954, + "grad_norm": 0.14254863702344298, + "learning_rate": 3.0862132494825325e-06, + "loss": 1.026, + "step": 2104 + }, + { + "epoch": 5.383631713554987, + "grad_norm": 0.1352194409726658, + "learning_rate": 3.0770033772246376e-06, + "loss": 0.9938, + "step": 2105 + }, + { + "epoch": 5.3861892583120206, + "grad_norm": 0.14319029352124846, + "learning_rate": 3.067804768249386e-06, + "loss": 0.9968, + "step": 2106 + }, + { + "epoch": 5.388746803069053, + "grad_norm": 0.1348404188548053, + "learning_rate": 3.058617437522342e-06, + "loss": 1.0166, + "step": 2107 + }, + { + "epoch": 5.391304347826087, + "grad_norm": 0.14010852729827156, + "learning_rate": 3.0494413999907125e-06, + "loss": 1.0066, + "step": 2108 + }, + { + "epoch": 5.3938618925831205, + "grad_norm": 0.1351055036158788, + "learning_rate": 3.0402766705833455e-06, + "loss": 1.0052, + "step": 2109 + }, + { + "epoch": 5.396419437340153, + "grad_norm": 0.13186613064153313, + "learning_rate": 3.0311232642106768e-06, + "loss": 0.9969, + "step": 2110 + }, + { + "epoch": 5.398976982097187, + "grad_norm": 0.1408809630359071, + "learning_rate": 3.021981195764726e-06, + "loss": 1.0283, + "step": 2111 + }, + { + "epoch": 5.40153452685422, + "grad_norm": 0.12965889759923607, + "learning_rate": 3.0128504801190716e-06, + "loss": 1.0179, + "step": 2112 + }, + { + "epoch": 5.404092071611253, + "grad_norm": 0.13945206906826596, + "learning_rate": 3.003731132128811e-06, + "loss": 1.0099, + "step": 2113 + }, + { + "epoch": 5.406649616368286, + "grad_norm": 0.1400549514773388, + "learning_rate": 2.9946231666305627e-06, + "loss": 0.998, + "step": 2114 + }, + { + "epoch": 5.40920716112532, + "grad_norm": 0.13519306803227119, + "learning_rate": 2.9855265984424042e-06, + "loss": 1.0069, + "step": 2115 + }, + { + "epoch": 5.411764705882353, + "grad_norm": 0.12988356378358373, + "learning_rate": 2.976441442363893e-06, + "loss": 0.9928, + "step": 2116 + }, + { + "epoch": 5.414322250639386, + "grad_norm": 0.13225437647406532, + "learning_rate": 2.967367713176007e-06, + "loss": 1.0082, + "step": 2117 + }, + { + "epoch": 5.41687979539642, + "grad_norm": 0.13453763452834291, + "learning_rate": 2.9583054256411326e-06, + "loss": 0.9779, + "step": 2118 + }, + { + "epoch": 5.419437340153452, + "grad_norm": 0.13933174777230192, + "learning_rate": 2.9492545945030517e-06, + "loss": 0.9947, + "step": 2119 + }, + { + "epoch": 5.421994884910486, + "grad_norm": 0.13265772100907866, + "learning_rate": 2.940215234486894e-06, + "loss": 1.0304, + "step": 2120 + }, + { + "epoch": 5.42455242966752, + "grad_norm": 0.13461066684644984, + "learning_rate": 2.9311873602991435e-06, + "loss": 1.0265, + "step": 2121 + }, + { + "epoch": 5.427109974424552, + "grad_norm": 0.13302962430701365, + "learning_rate": 2.922170986627573e-06, + "loss": 0.9907, + "step": 2122 + }, + { + "epoch": 5.429667519181586, + "grad_norm": 0.1372156107097446, + "learning_rate": 2.913166128141265e-06, + "loss": 1.0362, + "step": 2123 + }, + { + "epoch": 5.432225063938619, + "grad_norm": 0.13526418969755188, + "learning_rate": 2.9041727994905686e-06, + "loss": 1.0335, + "step": 2124 + }, + { + "epoch": 5.434782608695652, + "grad_norm": 0.14056788233892892, + "learning_rate": 2.895191015307055e-06, + "loss": 0.9863, + "step": 2125 + }, + { + "epoch": 5.437340153452685, + "grad_norm": 0.13830914570568487, + "learning_rate": 2.8862207902035334e-06, + "loss": 1.0279, + "step": 2126 + }, + { + "epoch": 5.439897698209719, + "grad_norm": 0.13255464251905436, + "learning_rate": 2.877262138773994e-06, + "loss": 1.0074, + "step": 2127 + }, + { + "epoch": 5.442455242966752, + "grad_norm": 0.13094809127879986, + "learning_rate": 2.8683150755936107e-06, + "loss": 1.0007, + "step": 2128 + }, + { + "epoch": 5.445012787723785, + "grad_norm": 0.13969902391137623, + "learning_rate": 2.859379615218685e-06, + "loss": 1.0183, + "step": 2129 + }, + { + "epoch": 5.447570332480819, + "grad_norm": 0.13298200813066383, + "learning_rate": 2.850455772186658e-06, + "loss": 1.0553, + "step": 2130 + }, + { + "epoch": 5.450127877237851, + "grad_norm": 0.13752465215056384, + "learning_rate": 2.8415435610160667e-06, + "loss": 1.0029, + "step": 2131 + }, + { + "epoch": 5.452685421994885, + "grad_norm": 0.13776730476333435, + "learning_rate": 2.8326429962065184e-06, + "loss": 1.0591, + "step": 2132 + }, + { + "epoch": 5.455242966751918, + "grad_norm": 0.15290697841832607, + "learning_rate": 2.8237540922386764e-06, + "loss": 1.0234, + "step": 2133 + }, + { + "epoch": 5.457800511508951, + "grad_norm": 0.1435647245473299, + "learning_rate": 2.8148768635742286e-06, + "loss": 1.0408, + "step": 2134 + }, + { + "epoch": 5.460358056265985, + "grad_norm": 0.1348972282036283, + "learning_rate": 2.8060113246558783e-06, + "loss": 1.0582, + "step": 2135 + }, + { + "epoch": 5.462915601023018, + "grad_norm": 0.14312694503231538, + "learning_rate": 2.7971574899072938e-06, + "loss": 1.0557, + "step": 2136 + }, + { + "epoch": 5.465473145780051, + "grad_norm": 0.14626596664710145, + "learning_rate": 2.7883153737331136e-06, + "loss": 1.0213, + "step": 2137 + }, + { + "epoch": 5.468030690537084, + "grad_norm": 0.12723321182479033, + "learning_rate": 2.7794849905189138e-06, + "loss": 1.0258, + "step": 2138 + }, + { + "epoch": 5.470588235294118, + "grad_norm": 0.1297835067922189, + "learning_rate": 2.7706663546311705e-06, + "loss": 0.9791, + "step": 2139 + }, + { + "epoch": 5.4731457800511505, + "grad_norm": 0.14065052834912603, + "learning_rate": 2.761859480417255e-06, + "loss": 1.0364, + "step": 2140 + }, + { + "epoch": 5.475703324808184, + "grad_norm": 0.14903101964341123, + "learning_rate": 2.753064382205396e-06, + "loss": 1.046, + "step": 2141 + }, + { + "epoch": 5.478260869565218, + "grad_norm": 0.12884063957129957, + "learning_rate": 2.7442810743046742e-06, + "loss": 1.0377, + "step": 2142 + }, + { + "epoch": 5.4808184143222505, + "grad_norm": 0.13327063753076238, + "learning_rate": 2.735509571004982e-06, + "loss": 1.0095, + "step": 2143 + }, + { + "epoch": 5.483375959079284, + "grad_norm": 0.1571390786677921, + "learning_rate": 2.7267498865770005e-06, + "loss": 0.9769, + "step": 2144 + }, + { + "epoch": 5.485933503836317, + "grad_norm": 0.1320156220064998, + "learning_rate": 2.718002035272197e-06, + "loss": 1.0057, + "step": 2145 + }, + { + "epoch": 5.4884910485933505, + "grad_norm": 0.1360636747597633, + "learning_rate": 2.7092660313227748e-06, + "loss": 1.0064, + "step": 2146 + }, + { + "epoch": 5.491048593350383, + "grad_norm": 0.13394654726028757, + "learning_rate": 2.700541888941667e-06, + "loss": 1.0025, + "step": 2147 + }, + { + "epoch": 5.493606138107417, + "grad_norm": 0.1460012982176339, + "learning_rate": 2.6918296223225026e-06, + "loss": 1.0227, + "step": 2148 + }, + { + "epoch": 5.4961636828644505, + "grad_norm": 0.13049152775591077, + "learning_rate": 2.683129245639603e-06, + "loss": 1.0393, + "step": 2149 + }, + { + "epoch": 5.498721227621483, + "grad_norm": 0.15254103744247385, + "learning_rate": 2.6744407730479325e-06, + "loss": 1.0279, + "step": 2150 + }, + { + "epoch": 5.501278772378517, + "grad_norm": 0.1440023793657765, + "learning_rate": 2.66576421868309e-06, + "loss": 1.0295, + "step": 2151 + }, + { + "epoch": 5.5038363171355495, + "grad_norm": 0.13606809517331622, + "learning_rate": 2.6570995966612945e-06, + "loss": 1.0299, + "step": 2152 + }, + { + "epoch": 5.506393861892583, + "grad_norm": 0.13926181662872325, + "learning_rate": 2.6484469210793384e-06, + "loss": 1.037, + "step": 2153 + }, + { + "epoch": 5.508951406649617, + "grad_norm": 0.14473456019169403, + "learning_rate": 2.6398062060145867e-06, + "loss": 1.017, + "step": 2154 + }, + { + "epoch": 5.5115089514066495, + "grad_norm": 0.13272081994045937, + "learning_rate": 2.631177465524938e-06, + "loss": 1.0217, + "step": 2155 + }, + { + "epoch": 5.514066496163683, + "grad_norm": 0.14026203110310534, + "learning_rate": 2.6225607136488194e-06, + "loss": 1.0021, + "step": 2156 + }, + { + "epoch": 5.516624040920716, + "grad_norm": 0.13205919977316974, + "learning_rate": 2.613955964405146e-06, + "loss": 1.052, + "step": 2157 + }, + { + "epoch": 5.5191815856777495, + "grad_norm": 0.13360379756882199, + "learning_rate": 2.605363231793302e-06, + "loss": 1.0499, + "step": 2158 + }, + { + "epoch": 5.521739130434782, + "grad_norm": 0.14208435941220482, + "learning_rate": 2.5967825297931328e-06, + "loss": 1.0172, + "step": 2159 + }, + { + "epoch": 5.524296675191816, + "grad_norm": 0.13295870010362018, + "learning_rate": 2.5882138723649018e-06, + "loss": 1.0334, + "step": 2160 + }, + { + "epoch": 5.526854219948849, + "grad_norm": 0.12489034371588933, + "learning_rate": 2.5796572734492777e-06, + "loss": 1.0103, + "step": 2161 + }, + { + "epoch": 5.529411764705882, + "grad_norm": 0.13244599397256537, + "learning_rate": 2.571112746967309e-06, + "loss": 1.0218, + "step": 2162 + }, + { + "epoch": 5.531969309462916, + "grad_norm": 0.15003256070846932, + "learning_rate": 2.5625803068204126e-06, + "loss": 1.0759, + "step": 2163 + }, + { + "epoch": 5.534526854219949, + "grad_norm": 0.1356632599292978, + "learning_rate": 2.554059966890332e-06, + "loss": 1.0042, + "step": 2164 + }, + { + "epoch": 5.537084398976982, + "grad_norm": 0.15088785982749878, + "learning_rate": 2.545551741039125e-06, + "loss": 1.0084, + "step": 2165 + }, + { + "epoch": 5.539641943734015, + "grad_norm": 0.13549191741444538, + "learning_rate": 2.5370556431091486e-06, + "loss": 1.0447, + "step": 2166 + }, + { + "epoch": 5.542199488491049, + "grad_norm": 0.1345097927774657, + "learning_rate": 2.5285716869230192e-06, + "loss": 1.0352, + "step": 2167 + }, + { + "epoch": 5.544757033248082, + "grad_norm": 0.1377603438588639, + "learning_rate": 2.5200998862836044e-06, + "loss": 1.0456, + "step": 2168 + }, + { + "epoch": 5.547314578005115, + "grad_norm": 0.13719837282442893, + "learning_rate": 2.5116402549739904e-06, + "loss": 1.0111, + "step": 2169 + }, + { + "epoch": 5.549872122762149, + "grad_norm": 0.12784774794791698, + "learning_rate": 2.503192806757474e-06, + "loss": 1.0555, + "step": 2170 + }, + { + "epoch": 5.552429667519181, + "grad_norm": 0.1377625979101254, + "learning_rate": 2.494757555377524e-06, + "loss": 1.0217, + "step": 2171 + }, + { + "epoch": 5.554987212276215, + "grad_norm": 0.13849942681054245, + "learning_rate": 2.486334514557761e-06, + "loss": 1.0175, + "step": 2172 + }, + { + "epoch": 5.557544757033249, + "grad_norm": 0.14070221787371265, + "learning_rate": 2.477923698001955e-06, + "loss": 1.03, + "step": 2173 + }, + { + "epoch": 5.560102301790281, + "grad_norm": 0.12917105115289923, + "learning_rate": 2.469525119393974e-06, + "loss": 1.0316, + "step": 2174 + }, + { + "epoch": 5.562659846547315, + "grad_norm": 0.14393204543904917, + "learning_rate": 2.461138792397779e-06, + "loss": 1.0429, + "step": 2175 + }, + { + "epoch": 5.565217391304348, + "grad_norm": 0.1350830986575781, + "learning_rate": 2.4527647306574e-06, + "loss": 1.0005, + "step": 2176 + }, + { + "epoch": 5.567774936061381, + "grad_norm": 0.1272869817285887, + "learning_rate": 2.4444029477969157e-06, + "loss": 1.0083, + "step": 2177 + }, + { + "epoch": 5.570332480818414, + "grad_norm": 0.1329875176980315, + "learning_rate": 2.4360534574204196e-06, + "loss": 1.0064, + "step": 2178 + }, + { + "epoch": 5.572890025575448, + "grad_norm": 0.13284521850316935, + "learning_rate": 2.427716273112011e-06, + "loss": 1.026, + "step": 2179 + }, + { + "epoch": 5.57544757033248, + "grad_norm": 0.13655729094534802, + "learning_rate": 2.4193914084357708e-06, + "loss": 1.0311, + "step": 2180 + }, + { + "epoch": 5.578005115089514, + "grad_norm": 0.13249886049800538, + "learning_rate": 2.4110788769357305e-06, + "loss": 1.0245, + "step": 2181 + }, + { + "epoch": 5.580562659846548, + "grad_norm": 0.14032611666517894, + "learning_rate": 2.402778692135861e-06, + "loss": 1.0218, + "step": 2182 + }, + { + "epoch": 5.58312020460358, + "grad_norm": 0.13366091002172387, + "learning_rate": 2.394490867540039e-06, + "loss": 1.0275, + "step": 2183 + }, + { + "epoch": 5.585677749360614, + "grad_norm": 0.13700684117392312, + "learning_rate": 2.3862154166320417e-06, + "loss": 1.0055, + "step": 2184 + }, + { + "epoch": 5.588235294117647, + "grad_norm": 0.13884798487973146, + "learning_rate": 2.3779523528755143e-06, + "loss": 1.0298, + "step": 2185 + }, + { + "epoch": 5.59079283887468, + "grad_norm": 0.14068128211510497, + "learning_rate": 2.3697016897139345e-06, + "loss": 1.0568, + "step": 2186 + }, + { + "epoch": 5.593350383631714, + "grad_norm": 0.1367538445761975, + "learning_rate": 2.361463440570623e-06, + "loss": 1.0211, + "step": 2187 + }, + { + "epoch": 5.595907928388747, + "grad_norm": 0.137882423029852, + "learning_rate": 2.353237618848695e-06, + "loss": 1.0388, + "step": 2188 + }, + { + "epoch": 5.59846547314578, + "grad_norm": 0.13627762962811446, + "learning_rate": 2.3450242379310427e-06, + "loss": 1.0423, + "step": 2189 + }, + { + "epoch": 5.601023017902813, + "grad_norm": 0.13080557028764447, + "learning_rate": 2.3368233111803305e-06, + "loss": 1.0209, + "step": 2190 + }, + { + "epoch": 5.603580562659847, + "grad_norm": 0.13373365809565754, + "learning_rate": 2.328634851938949e-06, + "loss": 1.0548, + "step": 2191 + }, + { + "epoch": 5.6061381074168795, + "grad_norm": 0.14670903806258018, + "learning_rate": 2.3204588735290155e-06, + "loss": 1.0283, + "step": 2192 + }, + { + "epoch": 5.608695652173913, + "grad_norm": 0.1351316953465856, + "learning_rate": 2.312295389252326e-06, + "loss": 1.0253, + "step": 2193 + }, + { + "epoch": 5.611253196930946, + "grad_norm": 0.14536763822784776, + "learning_rate": 2.304144412390367e-06, + "loss": 1.0289, + "step": 2194 + }, + { + "epoch": 5.6138107416879794, + "grad_norm": 0.1373151541315976, + "learning_rate": 2.2960059562042647e-06, + "loss": 1.0227, + "step": 2195 + }, + { + "epoch": 5.616368286445013, + "grad_norm": 0.12983515898716327, + "learning_rate": 2.2878800339347763e-06, + "loss": 1.0256, + "step": 2196 + }, + { + "epoch": 5.618925831202046, + "grad_norm": 0.12825544867685706, + "learning_rate": 2.279766658802275e-06, + "loss": 1.0468, + "step": 2197 + }, + { + "epoch": 5.621483375959079, + "grad_norm": 0.14977773117762613, + "learning_rate": 2.2716658440067085e-06, + "loss": 1.0045, + "step": 2198 + }, + { + "epoch": 5.624040920716112, + "grad_norm": 0.163815240244753, + "learning_rate": 2.2635776027276056e-06, + "loss": 1.0211, + "step": 2199 + }, + { + "epoch": 5.626598465473146, + "grad_norm": 0.1311668589781632, + "learning_rate": 2.255501948124017e-06, + "loss": 1.0318, + "step": 2200 + }, + { + "epoch": 5.629156010230179, + "grad_norm": 0.13085196604157895, + "learning_rate": 2.247438893334537e-06, + "loss": 1.0219, + "step": 2201 + }, + { + "epoch": 5.631713554987212, + "grad_norm": 0.1273903714267332, + "learning_rate": 2.2393884514772457e-06, + "loss": 0.9929, + "step": 2202 + }, + { + "epoch": 5.634271099744246, + "grad_norm": 0.13914897324377146, + "learning_rate": 2.231350635649713e-06, + "loss": 1.0452, + "step": 2203 + }, + { + "epoch": 5.6368286445012785, + "grad_norm": 0.13636448611829766, + "learning_rate": 2.223325458928961e-06, + "loss": 1.0078, + "step": 2204 + }, + { + "epoch": 5.639386189258312, + "grad_norm": 0.13875063448351502, + "learning_rate": 2.2153129343714484e-06, + "loss": 1.044, + "step": 2205 + }, + { + "epoch": 5.641943734015345, + "grad_norm": 0.1268762090418032, + "learning_rate": 2.207313075013059e-06, + "loss": 1.021, + "step": 2206 + }, + { + "epoch": 5.6445012787723785, + "grad_norm": 0.14115564139986136, + "learning_rate": 2.1993258938690533e-06, + "loss": 0.9935, + "step": 2207 + }, + { + "epoch": 5.647058823529412, + "grad_norm": 0.13114159318824248, + "learning_rate": 2.191351403934082e-06, + "loss": 1.0314, + "step": 2208 + }, + { + "epoch": 5.649616368286445, + "grad_norm": 0.12884976286632582, + "learning_rate": 2.183389618182139e-06, + "loss": 1.0046, + "step": 2209 + }, + { + "epoch": 5.6521739130434785, + "grad_norm": 0.12995582182420992, + "learning_rate": 2.1754405495665553e-06, + "loss": 1.0373, + "step": 2210 + }, + { + "epoch": 5.654731457800511, + "grad_norm": 0.13421458626767693, + "learning_rate": 2.1675042110199664e-06, + "loss": 1.016, + "step": 2211 + }, + { + "epoch": 5.657289002557545, + "grad_norm": 0.13511795554454278, + "learning_rate": 2.1595806154542965e-06, + "loss": 1.0203, + "step": 2212 + }, + { + "epoch": 5.659846547314578, + "grad_norm": 0.12526718028345482, + "learning_rate": 2.1516697757607464e-06, + "loss": 1.048, + "step": 2213 + }, + { + "epoch": 5.662404092071611, + "grad_norm": 0.13609131915375153, + "learning_rate": 2.143771704809753e-06, + "loss": 1.0221, + "step": 2214 + }, + { + "epoch": 5.664961636828645, + "grad_norm": 0.13389453092548842, + "learning_rate": 2.1358864154509838e-06, + "loss": 0.995, + "step": 2215 + }, + { + "epoch": 5.667519181585678, + "grad_norm": 0.13120531247951384, + "learning_rate": 2.128013920513311e-06, + "loss": 1.002, + "step": 2216 + }, + { + "epoch": 5.670076726342711, + "grad_norm": 0.12595917765897047, + "learning_rate": 2.1201542328047965e-06, + "loss": 1.0307, + "step": 2217 + }, + { + "epoch": 5.672634271099744, + "grad_norm": 0.1327291524503786, + "learning_rate": 2.112307365112657e-06, + "loss": 1.0042, + "step": 2218 + }, + { + "epoch": 5.675191815856778, + "grad_norm": 0.14073038841763177, + "learning_rate": 2.1044733302032527e-06, + "loss": 1.0089, + "step": 2219 + }, + { + "epoch": 5.677749360613811, + "grad_norm": 0.13145348857222067, + "learning_rate": 2.0966521408220753e-06, + "loss": 1.0191, + "step": 2220 + }, + { + "epoch": 5.680306905370844, + "grad_norm": 0.13758179967194598, + "learning_rate": 2.088843809693708e-06, + "loss": 1.0389, + "step": 2221 + }, + { + "epoch": 5.6828644501278776, + "grad_norm": 0.12934306601192186, + "learning_rate": 2.081048349521814e-06, + "loss": 1.0386, + "step": 2222 + }, + { + "epoch": 5.68542199488491, + "grad_norm": 0.12132994106171455, + "learning_rate": 2.0732657729891236e-06, + "loss": 1.0237, + "step": 2223 + }, + { + "epoch": 5.687979539641944, + "grad_norm": 0.12639844337210293, + "learning_rate": 2.065496092757403e-06, + "loss": 1.0039, + "step": 2224 + }, + { + "epoch": 5.690537084398977, + "grad_norm": 0.1397408236378054, + "learning_rate": 2.0577393214674335e-06, + "loss": 1.0782, + "step": 2225 + }, + { + "epoch": 5.69309462915601, + "grad_norm": 0.12975569414651789, + "learning_rate": 2.049995471738995e-06, + "loss": 1.029, + "step": 2226 + }, + { + "epoch": 5.695652173913043, + "grad_norm": 0.13025784101096557, + "learning_rate": 2.042264556170853e-06, + "loss": 0.9846, + "step": 2227 + }, + { + "epoch": 5.698209718670077, + "grad_norm": 0.1282941793591346, + "learning_rate": 2.034546587340719e-06, + "loss": 1.0143, + "step": 2228 + }, + { + "epoch": 5.70076726342711, + "grad_norm": 0.13236558983338137, + "learning_rate": 2.026841577805245e-06, + "loss": 1.0534, + "step": 2229 + }, + { + "epoch": 5.703324808184143, + "grad_norm": 0.13423342295188723, + "learning_rate": 2.019149540100005e-06, + "loss": 1.0568, + "step": 2230 + }, + { + "epoch": 5.705882352941177, + "grad_norm": 0.13468947441049006, + "learning_rate": 2.0114704867394598e-06, + "loss": 1.014, + "step": 2231 + }, + { + "epoch": 5.708439897698209, + "grad_norm": 0.13388666927274886, + "learning_rate": 2.0038044302169492e-06, + "loss": 1.0246, + "step": 2232 + }, + { + "epoch": 5.710997442455243, + "grad_norm": 0.13458582769078975, + "learning_rate": 1.9961513830046663e-06, + "loss": 1.0335, + "step": 2233 + }, + { + "epoch": 5.713554987212277, + "grad_norm": 0.1334530516759338, + "learning_rate": 1.988511357553644e-06, + "loss": 1.0107, + "step": 2234 + }, + { + "epoch": 5.716112531969309, + "grad_norm": 0.13432155143391286, + "learning_rate": 1.980884366293725e-06, + "loss": 1.002, + "step": 2235 + }, + { + "epoch": 5.718670076726343, + "grad_norm": 0.1321302038455819, + "learning_rate": 1.973270421633543e-06, + "loss": 1.0281, + "step": 2236 + }, + { + "epoch": 5.721227621483376, + "grad_norm": 0.13482083547904436, + "learning_rate": 1.965669535960516e-06, + "loss": 1.0032, + "step": 2237 + }, + { + "epoch": 5.723785166240409, + "grad_norm": 0.1362582011621695, + "learning_rate": 1.9580817216408075e-06, + "loss": 1.0151, + "step": 2238 + }, + { + "epoch": 5.726342710997442, + "grad_norm": 0.13381683599607858, + "learning_rate": 1.9505069910193164e-06, + "loss": 0.9876, + "step": 2239 + }, + { + "epoch": 5.728900255754476, + "grad_norm": 0.12202356902109507, + "learning_rate": 1.9429453564196543e-06, + "loss": 1.0203, + "step": 2240 + }, + { + "epoch": 5.731457800511509, + "grad_norm": 0.12193705736628206, + "learning_rate": 1.9353968301441306e-06, + "loss": 0.9752, + "step": 2241 + }, + { + "epoch": 5.734015345268542, + "grad_norm": 0.1264989543927549, + "learning_rate": 1.927861424473726e-06, + "loss": 1.025, + "step": 2242 + }, + { + "epoch": 5.736572890025576, + "grad_norm": 0.14123473229613026, + "learning_rate": 1.920339151668069e-06, + "loss": 1.0125, + "step": 2243 + }, + { + "epoch": 5.739130434782608, + "grad_norm": 0.12538976213285152, + "learning_rate": 1.9128300239654353e-06, + "loss": 1.0103, + "step": 2244 + }, + { + "epoch": 5.741687979539642, + "grad_norm": 0.12777815103030538, + "learning_rate": 1.9053340535827004e-06, + "loss": 1.0365, + "step": 2245 + }, + { + "epoch": 5.744245524296675, + "grad_norm": 0.9983046758036718, + "learning_rate": 1.8978512527153414e-06, + "loss": 1.0208, + "step": 2246 + }, + { + "epoch": 5.746803069053708, + "grad_norm": 0.13869698166830857, + "learning_rate": 1.8903816335374048e-06, + "loss": 1.0092, + "step": 2247 + }, + { + "epoch": 5.749360613810742, + "grad_norm": 0.13909895674572456, + "learning_rate": 1.882925208201498e-06, + "loss": 0.9976, + "step": 2248 + }, + { + "epoch": 5.751918158567775, + "grad_norm": 0.13223029843900272, + "learning_rate": 1.8754819888387576e-06, + "loss": 1.0226, + "step": 2249 + }, + { + "epoch": 5.754475703324808, + "grad_norm": 0.1355611449623982, + "learning_rate": 1.868051987558832e-06, + "loss": 1.0547, + "step": 2250 + }, + { + "epoch": 5.757033248081841, + "grad_norm": 0.1335592612771471, + "learning_rate": 1.8606352164498754e-06, + "loss": 1.022, + "step": 2251 + }, + { + "epoch": 5.759590792838875, + "grad_norm": 0.13517321815446315, + "learning_rate": 1.8532316875785084e-06, + "loss": 1.059, + "step": 2252 + }, + { + "epoch": 5.762148337595908, + "grad_norm": 0.12900109188000092, + "learning_rate": 1.8458414129898072e-06, + "loss": 1.0121, + "step": 2253 + }, + { + "epoch": 5.764705882352941, + "grad_norm": 0.13164593690766663, + "learning_rate": 1.8384644047072864e-06, + "loss": 1.0363, + "step": 2254 + }, + { + "epoch": 5.767263427109975, + "grad_norm": 0.12836234729861262, + "learning_rate": 1.8311006747328775e-06, + "loss": 1.0342, + "step": 2255 + }, + { + "epoch": 5.7698209718670075, + "grad_norm": 0.13352486032417052, + "learning_rate": 1.8237502350469161e-06, + "loss": 1.028, + "step": 2256 + }, + { + "epoch": 5.772378516624041, + "grad_norm": 0.12666547237956713, + "learning_rate": 1.8164130976080962e-06, + "loss": 0.9998, + "step": 2257 + }, + { + "epoch": 5.774936061381074, + "grad_norm": 0.12597408036958038, + "learning_rate": 1.8090892743534904e-06, + "loss": 0.9861, + "step": 2258 + }, + { + "epoch": 5.7774936061381075, + "grad_norm": 0.13091969265184827, + "learning_rate": 1.8017787771984973e-06, + "loss": 1.0196, + "step": 2259 + }, + { + "epoch": 5.78005115089514, + "grad_norm": 0.1328229090332335, + "learning_rate": 1.7944816180368408e-06, + "loss": 1.0422, + "step": 2260 + }, + { + "epoch": 5.782608695652174, + "grad_norm": 0.12677176745235394, + "learning_rate": 1.7871978087405384e-06, + "loss": 1.0097, + "step": 2261 + }, + { + "epoch": 5.7851662404092075, + "grad_norm": 0.12437893059639113, + "learning_rate": 1.7799273611598943e-06, + "loss": 1.0121, + "step": 2262 + }, + { + "epoch": 5.78772378516624, + "grad_norm": 0.1251367564202301, + "learning_rate": 1.772670287123479e-06, + "loss": 0.9939, + "step": 2263 + }, + { + "epoch": 5.790281329923274, + "grad_norm": 0.1302978820127013, + "learning_rate": 1.765426598438088e-06, + "loss": 1.0377, + "step": 2264 + }, + { + "epoch": 5.792838874680307, + "grad_norm": 0.12296911765019702, + "learning_rate": 1.7581963068887554e-06, + "loss": 1.0082, + "step": 2265 + }, + { + "epoch": 5.79539641943734, + "grad_norm": 0.1310292740348814, + "learning_rate": 1.7509794242387135e-06, + "loss": 1.0455, + "step": 2266 + }, + { + "epoch": 5.797953964194374, + "grad_norm": 0.11962773068304663, + "learning_rate": 1.7437759622293771e-06, + "loss": 1.0301, + "step": 2267 + }, + { + "epoch": 5.8005115089514065, + "grad_norm": 0.1338997971252641, + "learning_rate": 1.7365859325803269e-06, + "loss": 1.028, + "step": 2268 + }, + { + "epoch": 5.80306905370844, + "grad_norm": 0.12161266269112997, + "learning_rate": 1.7294093469892948e-06, + "loss": 1.0253, + "step": 2269 + }, + { + "epoch": 5.805626598465473, + "grad_norm": 0.12194546591797659, + "learning_rate": 1.7222462171321397e-06, + "loss": 1.0112, + "step": 2270 + }, + { + "epoch": 5.8081841432225065, + "grad_norm": 0.12690399558973253, + "learning_rate": 1.7150965546628184e-06, + "loss": 1.0168, + "step": 2271 + }, + { + "epoch": 5.810741687979539, + "grad_norm": 0.1329159422591136, + "learning_rate": 1.7079603712133908e-06, + "loss": 0.9867, + "step": 2272 + }, + { + "epoch": 5.813299232736573, + "grad_norm": 0.12116530026113131, + "learning_rate": 1.7008376783939772e-06, + "loss": 1.0085, + "step": 2273 + }, + { + "epoch": 5.8158567774936065, + "grad_norm": 0.12935715986878404, + "learning_rate": 1.6937284877927596e-06, + "loss": 1.0162, + "step": 2274 + }, + { + "epoch": 5.818414322250639, + "grad_norm": 0.12690629229315065, + "learning_rate": 1.6866328109759377e-06, + "loss": 0.9794, + "step": 2275 + }, + { + "epoch": 5.820971867007673, + "grad_norm": 0.12407793133570494, + "learning_rate": 1.6795506594877388e-06, + "loss": 1.031, + "step": 2276 + }, + { + "epoch": 5.823529411764706, + "grad_norm": 0.12704984040936246, + "learning_rate": 1.6724820448503852e-06, + "loss": 1.0204, + "step": 2277 + }, + { + "epoch": 5.826086956521739, + "grad_norm": 0.13001027110393584, + "learning_rate": 1.6654269785640608e-06, + "loss": 1.0448, + "step": 2278 + }, + { + "epoch": 5.828644501278772, + "grad_norm": 0.11915860756194478, + "learning_rate": 1.658385472106926e-06, + "loss": 1.0146, + "step": 2279 + }, + { + "epoch": 5.831202046035806, + "grad_norm": 0.12897358959587038, + "learning_rate": 1.6513575369350654e-06, + "loss": 1.021, + "step": 2280 + }, + { + "epoch": 5.833759590792839, + "grad_norm": 0.13505425066582885, + "learning_rate": 1.6443431844824975e-06, + "loss": 1.0002, + "step": 2281 + }, + { + "epoch": 5.836317135549872, + "grad_norm": 0.12555260697675938, + "learning_rate": 1.637342426161126e-06, + "loss": 1.0013, + "step": 2282 + }, + { + "epoch": 5.838874680306906, + "grad_norm": 0.1276721077986895, + "learning_rate": 1.630355273360752e-06, + "loss": 1.0083, + "step": 2283 + }, + { + "epoch": 5.841432225063938, + "grad_norm": 0.12628248303483217, + "learning_rate": 1.623381737449038e-06, + "loss": 1.0495, + "step": 2284 + }, + { + "epoch": 5.843989769820972, + "grad_norm": 0.13396531513865312, + "learning_rate": 1.6164218297714884e-06, + "loss": 0.9778, + "step": 2285 + }, + { + "epoch": 5.846547314578006, + "grad_norm": 0.13405119018709796, + "learning_rate": 1.609475561651438e-06, + "loss": 0.9882, + "step": 2286 + }, + { + "epoch": 5.849104859335038, + "grad_norm": 0.11946775190358987, + "learning_rate": 1.6025429443900286e-06, + "loss": 1.0402, + "step": 2287 + }, + { + "epoch": 5.851662404092072, + "grad_norm": 0.1286546110791319, + "learning_rate": 1.5956239892661995e-06, + "loss": 1.0323, + "step": 2288 + }, + { + "epoch": 5.854219948849105, + "grad_norm": 0.12706067523411144, + "learning_rate": 1.588718707536656e-06, + "loss": 1.0153, + "step": 2289 + }, + { + "epoch": 5.856777493606138, + "grad_norm": 0.12632255275977317, + "learning_rate": 1.5818271104358574e-06, + "loss": 1.0359, + "step": 2290 + }, + { + "epoch": 5.859335038363171, + "grad_norm": 0.12022429130741803, + "learning_rate": 1.5749492091760054e-06, + "loss": 1.0272, + "step": 2291 + }, + { + "epoch": 5.861892583120205, + "grad_norm": 0.12754203390815988, + "learning_rate": 1.5680850149470139e-06, + "loss": 1.0141, + "step": 2292 + }, + { + "epoch": 5.864450127877237, + "grad_norm": 0.12789955923845803, + "learning_rate": 1.5612345389164974e-06, + "loss": 1.0213, + "step": 2293 + }, + { + "epoch": 5.867007672634271, + "grad_norm": 0.13105545311215508, + "learning_rate": 1.5543977922297494e-06, + "loss": 1.0203, + "step": 2294 + }, + { + "epoch": 5.869565217391305, + "grad_norm": 0.12692375648838364, + "learning_rate": 1.5475747860097335e-06, + "loss": 1.0175, + "step": 2295 + }, + { + "epoch": 5.872122762148337, + "grad_norm": 0.12758413074272634, + "learning_rate": 1.5407655313570525e-06, + "loss": 1.0187, + "step": 2296 + }, + { + "epoch": 5.874680306905371, + "grad_norm": 0.1347266986438743, + "learning_rate": 1.5339700393499357e-06, + "loss": 0.978, + "step": 2297 + }, + { + "epoch": 5.877237851662404, + "grad_norm": 0.1286412634763229, + "learning_rate": 1.5271883210442285e-06, + "loss": 1.0243, + "step": 2298 + }, + { + "epoch": 5.879795396419437, + "grad_norm": 0.13598473504010955, + "learning_rate": 1.5204203874733604e-06, + "loss": 1.0458, + "step": 2299 + }, + { + "epoch": 5.882352941176471, + "grad_norm": 0.12217909066335947, + "learning_rate": 1.5136662496483346e-06, + "loss": 1.0159, + "step": 2300 + }, + { + "epoch": 5.884910485933504, + "grad_norm": 0.13697298325476193, + "learning_rate": 1.5069259185577112e-06, + "loss": 1.0234, + "step": 2301 + }, + { + "epoch": 5.887468030690537, + "grad_norm": 0.12856950834935316, + "learning_rate": 1.5001994051675894e-06, + "loss": 1.0005, + "step": 2302 + }, + { + "epoch": 5.89002557544757, + "grad_norm": 0.12272037964597306, + "learning_rate": 1.4934867204215864e-06, + "loss": 1.0182, + "step": 2303 + }, + { + "epoch": 5.892583120204604, + "grad_norm": 0.12396363368680077, + "learning_rate": 1.486787875240816e-06, + "loss": 1.0023, + "step": 2304 + }, + { + "epoch": 5.8951406649616365, + "grad_norm": 0.12822276354353365, + "learning_rate": 1.480102880523886e-06, + "loss": 1.0114, + "step": 2305 + }, + { + "epoch": 5.89769820971867, + "grad_norm": 0.12823957750976692, + "learning_rate": 1.4734317471468618e-06, + "loss": 1.0279, + "step": 2306 + }, + { + "epoch": 5.900255754475703, + "grad_norm": 0.12481205791568802, + "learning_rate": 1.4667744859632615e-06, + "loss": 0.9748, + "step": 2307 + }, + { + "epoch": 5.9028132992327365, + "grad_norm": 0.12376259417000356, + "learning_rate": 1.4601311078040304e-06, + "loss": 1.0291, + "step": 2308 + }, + { + "epoch": 5.90537084398977, + "grad_norm": 0.12039082706987389, + "learning_rate": 1.4535016234775324e-06, + "loss": 0.9835, + "step": 2309 + }, + { + "epoch": 5.907928388746803, + "grad_norm": 0.1278580324817726, + "learning_rate": 1.4468860437695243e-06, + "loss": 1.0276, + "step": 2310 + }, + { + "epoch": 5.910485933503836, + "grad_norm": 0.12971723157693313, + "learning_rate": 1.4402843794431354e-06, + "loss": 1.0085, + "step": 2311 + }, + { + "epoch": 5.913043478260869, + "grad_norm": 0.12766208083651814, + "learning_rate": 1.4336966412388674e-06, + "loss": 1.0392, + "step": 2312 + }, + { + "epoch": 5.915601023017903, + "grad_norm": 0.12363722996422528, + "learning_rate": 1.4271228398745552e-06, + "loss": 1.0063, + "step": 2313 + }, + { + "epoch": 5.918158567774936, + "grad_norm": 0.12491762028888559, + "learning_rate": 1.4205629860453641e-06, + "loss": 1.0598, + "step": 2314 + }, + { + "epoch": 5.920716112531969, + "grad_norm": 0.12614418988739717, + "learning_rate": 1.4140170904237616e-06, + "loss": 1.0078, + "step": 2315 + }, + { + "epoch": 5.923273657289003, + "grad_norm": 0.12871200444350614, + "learning_rate": 1.4074851636595165e-06, + "loss": 0.9912, + "step": 2316 + }, + { + "epoch": 5.9258312020460355, + "grad_norm": 0.12176341068010405, + "learning_rate": 1.400967216379663e-06, + "loss": 1.0023, + "step": 2317 + }, + { + "epoch": 5.928388746803069, + "grad_norm": 0.12736989149935335, + "learning_rate": 1.394463259188491e-06, + "loss": 1.0097, + "step": 2318 + }, + { + "epoch": 5.930946291560103, + "grad_norm": 0.12401472625813548, + "learning_rate": 1.3879733026675367e-06, + "loss": 1.036, + "step": 2319 + }, + { + "epoch": 5.9335038363171355, + "grad_norm": 0.12937517228342466, + "learning_rate": 1.3814973573755518e-06, + "loss": 1.036, + "step": 2320 + }, + { + "epoch": 5.936061381074169, + "grad_norm": 0.127613205394154, + "learning_rate": 1.3750354338484916e-06, + "loss": 0.9881, + "step": 2321 + }, + { + "epoch": 5.938618925831202, + "grad_norm": 0.12739173803258835, + "learning_rate": 1.3685875425995064e-06, + "loss": 1.0191, + "step": 2322 + }, + { + "epoch": 5.9411764705882355, + "grad_norm": 0.13795008867321654, + "learning_rate": 1.3621536941189107e-06, + "loss": 1.0144, + "step": 2323 + }, + { + "epoch": 5.943734015345268, + "grad_norm": 0.12984194360371934, + "learning_rate": 1.355733898874173e-06, + "loss": 1.049, + "step": 2324 + }, + { + "epoch": 5.946291560102302, + "grad_norm": 0.13129623864662363, + "learning_rate": 1.3493281673098956e-06, + "loss": 1.015, + "step": 2325 + }, + { + "epoch": 5.948849104859335, + "grad_norm": 0.12793818903871373, + "learning_rate": 1.3429365098478087e-06, + "loss": 0.9981, + "step": 2326 + }, + { + "epoch": 5.951406649616368, + "grad_norm": 0.1255755665233896, + "learning_rate": 1.3365589368867371e-06, + "loss": 0.9794, + "step": 2327 + }, + { + "epoch": 5.953964194373402, + "grad_norm": 0.1279352390496069, + "learning_rate": 1.330195458802591e-06, + "loss": 1.0249, + "step": 2328 + }, + { + "epoch": 5.956521739130435, + "grad_norm": 0.128293917496119, + "learning_rate": 1.323846085948356e-06, + "loss": 0.9898, + "step": 2329 + }, + { + "epoch": 5.959079283887468, + "grad_norm": 0.12767639872018413, + "learning_rate": 1.3175108286540617e-06, + "loss": 1.0352, + "step": 2330 + }, + { + "epoch": 5.961636828644501, + "grad_norm": 0.12662645466299385, + "learning_rate": 1.3111896972267768e-06, + "loss": 1.0055, + "step": 2331 + }, + { + "epoch": 5.964194373401535, + "grad_norm": 0.12253304775794958, + "learning_rate": 1.3048827019505828e-06, + "loss": 0.9892, + "step": 2332 + }, + { + "epoch": 5.966751918158568, + "grad_norm": 0.13233724231669944, + "learning_rate": 1.2985898530865736e-06, + "loss": 0.9883, + "step": 2333 + }, + { + "epoch": 5.969309462915601, + "grad_norm": 0.12275354609893704, + "learning_rate": 1.2923111608728168e-06, + "loss": 1.0221, + "step": 2334 + }, + { + "epoch": 5.971867007672635, + "grad_norm": 0.13544461017695578, + "learning_rate": 1.2860466355243506e-06, + "loss": 1.0587, + "step": 2335 + }, + { + "epoch": 5.974424552429667, + "grad_norm": 0.125504059793445, + "learning_rate": 1.2797962872331693e-06, + "loss": 1.0096, + "step": 2336 + }, + { + "epoch": 5.976982097186701, + "grad_norm": 0.13226317160144294, + "learning_rate": 1.2735601261681985e-06, + "loss": 1.0489, + "step": 2337 + }, + { + "epoch": 5.979539641943734, + "grad_norm": 0.12803280744387227, + "learning_rate": 1.2673381624752813e-06, + "loss": 1.0307, + "step": 2338 + }, + { + "epoch": 5.982097186700767, + "grad_norm": 0.12863654527584692, + "learning_rate": 1.2611304062771613e-06, + "loss": 1.017, + "step": 2339 + }, + { + "epoch": 5.9846547314578, + "grad_norm": 0.12401870969986709, + "learning_rate": 1.254936867673474e-06, + "loss": 1.0056, + "step": 2340 + }, + { + "epoch": 5.987212276214834, + "grad_norm": 0.11891932350440772, + "learning_rate": 1.2487575567407184e-06, + "loss": 0.9998, + "step": 2341 + }, + { + "epoch": 5.989769820971867, + "grad_norm": 0.12341714944406178, + "learning_rate": 1.2425924835322422e-06, + "loss": 1.0247, + "step": 2342 + }, + { + "epoch": 5.9923273657289, + "grad_norm": 0.1229416512376773, + "learning_rate": 1.2364416580782413e-06, + "loss": 1.0195, + "step": 2343 + }, + { + "epoch": 5.994884910485934, + "grad_norm": 0.12303637728566778, + "learning_rate": 1.2303050903857195e-06, + "loss": 1.0156, + "step": 2344 + }, + { + "epoch": 5.997442455242966, + "grad_norm": 0.13561743214244987, + "learning_rate": 1.2241827904384928e-06, + "loss": 1.0304, + "step": 2345 + }, + { + "epoch": 6.0, + "grad_norm": 0.11664031093263695, + "learning_rate": 1.2180747681971539e-06, + "loss": 1.0047, + "step": 2346 + }, + { + "epoch": 6.002557544757034, + "grad_norm": 0.1230389316598828, + "learning_rate": 1.211981033599079e-06, + "loss": 1.0416, + "step": 2347 + }, + { + "epoch": 6.005115089514066, + "grad_norm": 0.12948288079807183, + "learning_rate": 1.2059015965583908e-06, + "loss": 1.0123, + "step": 2348 + }, + { + "epoch": 6.0076726342711, + "grad_norm": 0.1207876296019636, + "learning_rate": 1.1998364669659524e-06, + "loss": 0.9796, + "step": 2349 + }, + { + "epoch": 6.010230179028133, + "grad_norm": 0.1191785329656778, + "learning_rate": 1.1937856546893533e-06, + "loss": 0.9862, + "step": 2350 + }, + { + "epoch": 6.012787723785166, + "grad_norm": 0.12106597514269477, + "learning_rate": 1.1877491695728827e-06, + "loss": 1.0181, + "step": 2351 + }, + { + "epoch": 6.015345268542199, + "grad_norm": 0.12714775517717014, + "learning_rate": 1.181727021437531e-06, + "loss": 0.9901, + "step": 2352 + }, + { + "epoch": 6.017902813299233, + "grad_norm": 0.12314221662217836, + "learning_rate": 1.1757192200809487e-06, + "loss": 1.0139, + "step": 2353 + }, + { + "epoch": 6.020460358056266, + "grad_norm": 0.1205656248704543, + "learning_rate": 1.1697257752774581e-06, + "loss": 1.0064, + "step": 2354 + }, + { + "epoch": 6.023017902813299, + "grad_norm": 0.12375532206452915, + "learning_rate": 1.1637466967780186e-06, + "loss": 1.0055, + "step": 2355 + }, + { + "epoch": 6.025575447570333, + "grad_norm": 0.13727612152509278, + "learning_rate": 1.1577819943102132e-06, + "loss": 1.0334, + "step": 2356 + }, + { + "epoch": 6.028132992327365, + "grad_norm": 0.13743682672187252, + "learning_rate": 1.1518316775782456e-06, + "loss": 1.063, + "step": 2357 + }, + { + "epoch": 6.030690537084399, + "grad_norm": 0.1269152481030464, + "learning_rate": 1.1458957562629048e-06, + "loss": 1.0245, + "step": 2358 + }, + { + "epoch": 6.033248081841432, + "grad_norm": 0.12054742496527425, + "learning_rate": 1.1399742400215685e-06, + "loss": 1.016, + "step": 2359 + }, + { + "epoch": 6.035805626598465, + "grad_norm": 0.11563655740461991, + "learning_rate": 1.1340671384881664e-06, + "loss": 1.0034, + "step": 2360 + }, + { + "epoch": 6.038363171355499, + "grad_norm": 0.12654719374228424, + "learning_rate": 1.128174461273187e-06, + "loss": 1.0303, + "step": 2361 + }, + { + "epoch": 6.040920716112532, + "grad_norm": 0.13400791982749355, + "learning_rate": 1.122296217963651e-06, + "loss": 0.9908, + "step": 2362 + }, + { + "epoch": 6.043478260869565, + "grad_norm": 0.13721318190820386, + "learning_rate": 1.116432418123088e-06, + "loss": 1.0143, + "step": 2363 + }, + { + "epoch": 6.046035805626598, + "grad_norm": 0.1331473057560735, + "learning_rate": 1.1105830712915355e-06, + "loss": 1.0389, + "step": 2364 + }, + { + "epoch": 6.048593350383632, + "grad_norm": 0.12186052033355585, + "learning_rate": 1.1047481869855136e-06, + "loss": 0.9923, + "step": 2365 + }, + { + "epoch": 6.051150895140665, + "grad_norm": 0.130398414275441, + "learning_rate": 1.0989277746980186e-06, + "loss": 0.9989, + "step": 2366 + }, + { + "epoch": 6.053708439897698, + "grad_norm": 0.1212752348474763, + "learning_rate": 1.0931218438984903e-06, + "loss": 1.0002, + "step": 2367 + }, + { + "epoch": 6.056265984654732, + "grad_norm": 0.12066129403697316, + "learning_rate": 1.0873304040328193e-06, + "loss": 0.9855, + "step": 2368 + }, + { + "epoch": 6.0588235294117645, + "grad_norm": 0.12980745503624036, + "learning_rate": 1.0815534645233182e-06, + "loss": 1.0108, + "step": 2369 + }, + { + "epoch": 6.061381074168798, + "grad_norm": 0.12190895753762201, + "learning_rate": 1.075791034768704e-06, + "loss": 1.0134, + "step": 2370 + }, + { + "epoch": 6.063938618925831, + "grad_norm": 0.11736296572501317, + "learning_rate": 1.0700431241440888e-06, + "loss": 0.9819, + "step": 2371 + }, + { + "epoch": 6.0664961636828645, + "grad_norm": 0.11803134631202541, + "learning_rate": 1.064309742000963e-06, + "loss": 0.999, + "step": 2372 + }, + { + "epoch": 6.069053708439898, + "grad_norm": 0.12274428069266924, + "learning_rate": 1.0585908976671844e-06, + "loss": 1.0263, + "step": 2373 + }, + { + "epoch": 6.071611253196931, + "grad_norm": 0.1280904409678555, + "learning_rate": 1.052886600446954e-06, + "loss": 0.9989, + "step": 2374 + }, + { + "epoch": 6.0741687979539645, + "grad_norm": 0.13800491036101872, + "learning_rate": 1.0471968596208026e-06, + "loss": 1.0168, + "step": 2375 + }, + { + "epoch": 6.076726342710997, + "grad_norm": 0.125255996087832, + "learning_rate": 1.0415216844455889e-06, + "loss": 1.0016, + "step": 2376 + }, + { + "epoch": 6.079283887468031, + "grad_norm": 0.12500402095406113, + "learning_rate": 1.0358610841544657e-06, + "loss": 1.0207, + "step": 2377 + }, + { + "epoch": 6.081841432225064, + "grad_norm": 0.12102753345414748, + "learning_rate": 1.0302150679568745e-06, + "loss": 0.9889, + "step": 2378 + }, + { + "epoch": 6.084398976982097, + "grad_norm": 0.1263965580697967, + "learning_rate": 1.0245836450385304e-06, + "loss": 1.0278, + "step": 2379 + }, + { + "epoch": 6.086956521739131, + "grad_norm": 0.12426986420829644, + "learning_rate": 1.0189668245614092e-06, + "loss": 1.0024, + "step": 2380 + }, + { + "epoch": 6.089514066496164, + "grad_norm": 0.12124987678343191, + "learning_rate": 1.0133646156637244e-06, + "loss": 1.0346, + "step": 2381 + }, + { + "epoch": 6.092071611253197, + "grad_norm": 0.11760759251820775, + "learning_rate": 1.0077770274599187e-06, + "loss": 1.0176, + "step": 2382 + }, + { + "epoch": 6.09462915601023, + "grad_norm": 0.11882704515829542, + "learning_rate": 1.002204069040652e-06, + "loss": 0.9894, + "step": 2383 + }, + { + "epoch": 6.0971867007672635, + "grad_norm": 0.12369290549039276, + "learning_rate": 9.966457494727777e-07, + "loss": 1.04, + "step": 2384 + }, + { + "epoch": 6.099744245524296, + "grad_norm": 0.12345493397851956, + "learning_rate": 9.91102077799333e-07, + "loss": 1.0049, + "step": 2385 + }, + { + "epoch": 6.10230179028133, + "grad_norm": 0.12872126244712379, + "learning_rate": 9.855730630395244e-07, + "loss": 0.9933, + "step": 2386 + }, + { + "epoch": 6.1048593350383635, + "grad_norm": 0.11772835201472491, + "learning_rate": 9.800587141887173e-07, + "loss": 1.0285, + "step": 2387 + }, + { + "epoch": 6.107416879795396, + "grad_norm": 0.12252902927138364, + "learning_rate": 9.745590402184092e-07, + "loss": 1.0134, + "step": 2388 + }, + { + "epoch": 6.10997442455243, + "grad_norm": 0.12214679346044635, + "learning_rate": 9.690740500762241e-07, + "loss": 0.9778, + "step": 2389 + }, + { + "epoch": 6.112531969309463, + "grad_norm": 0.12270563199721099, + "learning_rate": 9.636037526859032e-07, + "loss": 1.0048, + "step": 2390 + }, + { + "epoch": 6.115089514066496, + "grad_norm": 0.13289561214559903, + "learning_rate": 9.58148156947276e-07, + "loss": 1.0355, + "step": 2391 + }, + { + "epoch": 6.117647058823529, + "grad_norm": 0.124015797218616, + "learning_rate": 9.52707271736254e-07, + "loss": 0.9894, + "step": 2392 + }, + { + "epoch": 6.120204603580563, + "grad_norm": 0.12869746602968873, + "learning_rate": 9.472811059048182e-07, + "loss": 1.034, + "step": 2393 + }, + { + "epoch": 6.122762148337596, + "grad_norm": 0.11502225665357182, + "learning_rate": 9.418696682810014e-07, + "loss": 1.0279, + "step": 2394 + }, + { + "epoch": 6.125319693094629, + "grad_norm": 0.12442843747682036, + "learning_rate": 9.364729676688755e-07, + "loss": 1.0346, + "step": 2395 + }, + { + "epoch": 6.127877237851663, + "grad_norm": 0.12203934311867798, + "learning_rate": 9.310910128485317e-07, + "loss": 1.0042, + "step": 2396 + }, + { + "epoch": 6.130434782608695, + "grad_norm": 0.13225053449453802, + "learning_rate": 9.257238125760781e-07, + "loss": 0.9979, + "step": 2397 + }, + { + "epoch": 6.132992327365729, + "grad_norm": 0.11626249473093271, + "learning_rate": 9.203713755836108e-07, + "loss": 1.0151, + "step": 2398 + }, + { + "epoch": 6.135549872122763, + "grad_norm": 0.12565196489418815, + "learning_rate": 9.150337105792129e-07, + "loss": 1.0003, + "step": 2399 + }, + { + "epoch": 6.138107416879795, + "grad_norm": 0.1176707888425743, + "learning_rate": 9.097108262469268e-07, + "loss": 1.0174, + "step": 2400 + }, + { + "epoch": 6.140664961636829, + "grad_norm": 0.1254506125476653, + "learning_rate": 9.044027312467574e-07, + "loss": 1.024, + "step": 2401 + }, + { + "epoch": 6.143222506393862, + "grad_norm": 0.12040306772801906, + "learning_rate": 8.991094342146423e-07, + "loss": 1.0238, + "step": 2402 + }, + { + "epoch": 6.145780051150895, + "grad_norm": 0.12003711394998114, + "learning_rate": 8.938309437624415e-07, + "loss": 1.0361, + "step": 2403 + }, + { + "epoch": 6.148337595907928, + "grad_norm": 0.1222116778211444, + "learning_rate": 8.885672684779345e-07, + "loss": 1.0195, + "step": 2404 + }, + { + "epoch": 6.150895140664962, + "grad_norm": 0.12213600424627216, + "learning_rate": 8.833184169247877e-07, + "loss": 1.0147, + "step": 2405 + }, + { + "epoch": 6.153452685421995, + "grad_norm": 0.11882499943476486, + "learning_rate": 8.780843976425568e-07, + "loss": 1.0443, + "step": 2406 + }, + { + "epoch": 6.156010230179028, + "grad_norm": 0.11944071935758879, + "learning_rate": 8.728652191466602e-07, + "loss": 1.0269, + "step": 2407 + }, + { + "epoch": 6.158567774936062, + "grad_norm": 0.12479032723786981, + "learning_rate": 8.676608899283789e-07, + "loss": 1.0407, + "step": 2408 + }, + { + "epoch": 6.161125319693094, + "grad_norm": 0.1232368778241773, + "learning_rate": 8.62471418454831e-07, + "loss": 0.998, + "step": 2409 + }, + { + "epoch": 6.163682864450128, + "grad_norm": 0.12380002645622601, + "learning_rate": 8.572968131689585e-07, + "loss": 1.0215, + "step": 2410 + }, + { + "epoch": 6.166240409207161, + "grad_norm": 0.11990258505813678, + "learning_rate": 8.521370824895236e-07, + "loss": 1.0362, + "step": 2411 + }, + { + "epoch": 6.168797953964194, + "grad_norm": 0.12763582460814127, + "learning_rate": 8.469922348110871e-07, + "loss": 1.0005, + "step": 2412 + }, + { + "epoch": 6.171355498721228, + "grad_norm": 0.12048771338001237, + "learning_rate": 8.41862278503991e-07, + "loss": 1.0154, + "step": 2413 + }, + { + "epoch": 6.173913043478261, + "grad_norm": 0.11110330026915051, + "learning_rate": 8.367472219143524e-07, + "loss": 0.9864, + "step": 2414 + }, + { + "epoch": 6.176470588235294, + "grad_norm": 0.12274015937027666, + "learning_rate": 8.316470733640525e-07, + "loss": 1.01, + "step": 2415 + }, + { + "epoch": 6.179028132992327, + "grad_norm": 0.11875414799502092, + "learning_rate": 8.265618411507148e-07, + "loss": 1.0349, + "step": 2416 + }, + { + "epoch": 6.181585677749361, + "grad_norm": 0.12112785116554001, + "learning_rate": 8.214915335476892e-07, + "loss": 1.0108, + "step": 2417 + }, + { + "epoch": 6.1841432225063935, + "grad_norm": 0.11843273179000395, + "learning_rate": 8.164361588040526e-07, + "loss": 1.0316, + "step": 2418 + }, + { + "epoch": 6.186700767263427, + "grad_norm": 0.12171206599055973, + "learning_rate": 8.113957251445837e-07, + "loss": 1.0181, + "step": 2419 + }, + { + "epoch": 6.189258312020461, + "grad_norm": 0.1332901069553243, + "learning_rate": 8.063702407697515e-07, + "loss": 1.0163, + "step": 2420 + }, + { + "epoch": 6.1918158567774935, + "grad_norm": 0.12665149802988054, + "learning_rate": 8.013597138557039e-07, + "loss": 1.0316, + "step": 2421 + }, + { + "epoch": 6.194373401534527, + "grad_norm": 0.11748240466353733, + "learning_rate": 7.963641525542564e-07, + "loss": 1.0295, + "step": 2422 + }, + { + "epoch": 6.19693094629156, + "grad_norm": 0.12263136155853388, + "learning_rate": 7.913835649928792e-07, + "loss": 1.0443, + "step": 2423 + }, + { + "epoch": 6.1994884910485935, + "grad_norm": 0.12057268564537553, + "learning_rate": 7.864179592746679e-07, + "loss": 0.9758, + "step": 2424 + }, + { + "epoch": 6.202046035805626, + "grad_norm": 0.11757878694680841, + "learning_rate": 7.814673434783604e-07, + "loss": 0.998, + "step": 2425 + }, + { + "epoch": 6.20460358056266, + "grad_norm": 0.18582779787648557, + "learning_rate": 7.765317256582949e-07, + "loss": 1.0115, + "step": 2426 + }, + { + "epoch": 6.207161125319693, + "grad_norm": 0.13582232353707813, + "learning_rate": 7.716111138444115e-07, + "loss": 1.0459, + "step": 2427 + }, + { + "epoch": 6.209718670076726, + "grad_norm": 0.13389475712289786, + "learning_rate": 7.667055160422432e-07, + "loss": 1.0274, + "step": 2428 + }, + { + "epoch": 6.21227621483376, + "grad_norm": 0.12673104354118297, + "learning_rate": 7.618149402328867e-07, + "loss": 1.0011, + "step": 2429 + }, + { + "epoch": 6.2148337595907925, + "grad_norm": 0.12765584122890725, + "learning_rate": 7.569393943730064e-07, + "loss": 1.0635, + "step": 2430 + }, + { + "epoch": 6.217391304347826, + "grad_norm": 0.11473857666105772, + "learning_rate": 7.52078886394807e-07, + "loss": 0.9878, + "step": 2431 + }, + { + "epoch": 6.21994884910486, + "grad_norm": 0.12228794360420046, + "learning_rate": 7.472334242060331e-07, + "loss": 1.0316, + "step": 2432 + }, + { + "epoch": 6.2225063938618925, + "grad_norm": 0.12426451417815787, + "learning_rate": 7.424030156899475e-07, + "loss": 1.0098, + "step": 2433 + }, + { + "epoch": 6.225063938618926, + "grad_norm": 0.11800919098475897, + "learning_rate": 7.375876687053252e-07, + "loss": 1.0508, + "step": 2434 + }, + { + "epoch": 6.227621483375959, + "grad_norm": 0.1309293626602563, + "learning_rate": 7.327873910864325e-07, + "loss": 1.0265, + "step": 2435 + }, + { + "epoch": 6.2301790281329925, + "grad_norm": 0.12364264713239634, + "learning_rate": 7.280021906430201e-07, + "loss": 1.038, + "step": 2436 + }, + { + "epoch": 6.232736572890025, + "grad_norm": 0.12731230734269985, + "learning_rate": 7.23232075160315e-07, + "loss": 0.9938, + "step": 2437 + }, + { + "epoch": 6.235294117647059, + "grad_norm": 0.11754730324986598, + "learning_rate": 7.184770523989904e-07, + "loss": 1.0209, + "step": 2438 + }, + { + "epoch": 6.2378516624040925, + "grad_norm": 0.12687711722398867, + "learning_rate": 7.137371300951746e-07, + "loss": 1.0369, + "step": 2439 + }, + { + "epoch": 6.240409207161125, + "grad_norm": 0.1226944492744433, + "learning_rate": 7.090123159604234e-07, + "loss": 1.0417, + "step": 2440 + }, + { + "epoch": 6.242966751918159, + "grad_norm": 0.11721843519340895, + "learning_rate": 7.043026176817158e-07, + "loss": 0.99, + "step": 2441 + }, + { + "epoch": 6.245524296675192, + "grad_norm": 0.12080675281454777, + "learning_rate": 6.996080429214347e-07, + "loss": 1.0065, + "step": 2442 + }, + { + "epoch": 6.248081841432225, + "grad_norm": 0.12010992913398671, + "learning_rate": 6.949285993173593e-07, + "loss": 1.0359, + "step": 2443 + }, + { + "epoch": 6.250639386189258, + "grad_norm": 0.11624614678372433, + "learning_rate": 6.902642944826544e-07, + "loss": 0.97, + "step": 2444 + }, + { + "epoch": 6.253196930946292, + "grad_norm": 0.12257573737475404, + "learning_rate": 6.856151360058505e-07, + "loss": 1.0192, + "step": 2445 + }, + { + "epoch": 6.255754475703325, + "grad_norm": 0.1201829684398593, + "learning_rate": 6.809811314508386e-07, + "loss": 1.0466, + "step": 2446 + }, + { + "epoch": 6.258312020460358, + "grad_norm": 0.12401967000820303, + "learning_rate": 6.763622883568521e-07, + "loss": 1.0356, + "step": 2447 + }, + { + "epoch": 6.260869565217392, + "grad_norm": 0.11778396980454381, + "learning_rate": 6.717586142384624e-07, + "loss": 1.036, + "step": 2448 + }, + { + "epoch": 6.263427109974424, + "grad_norm": 0.12185872889499474, + "learning_rate": 6.671701165855593e-07, + "loss": 1.0261, + "step": 2449 + }, + { + "epoch": 6.265984654731458, + "grad_norm": 0.1201489344194391, + "learning_rate": 6.625968028633389e-07, + "loss": 1.0119, + "step": 2450 + }, + { + "epoch": 6.268542199488491, + "grad_norm": 0.11988021977061444, + "learning_rate": 6.580386805122996e-07, + "loss": 1.021, + "step": 2451 + }, + { + "epoch": 6.271099744245524, + "grad_norm": 0.11792524228657224, + "learning_rate": 6.534957569482214e-07, + "loss": 1.0635, + "step": 2452 + }, + { + "epoch": 6.273657289002558, + "grad_norm": 0.11687466392592072, + "learning_rate": 6.489680395621556e-07, + "loss": 1.0129, + "step": 2453 + }, + { + "epoch": 6.276214833759591, + "grad_norm": 0.12220153331468454, + "learning_rate": 6.444555357204152e-07, + "loss": 0.9876, + "step": 2454 + }, + { + "epoch": 6.278772378516624, + "grad_norm": 0.11658584388896727, + "learning_rate": 6.39958252764562e-07, + "loss": 1.0258, + "step": 2455 + }, + { + "epoch": 6.281329923273657, + "grad_norm": 0.11595243705777233, + "learning_rate": 6.354761980113966e-07, + "loss": 1.0364, + "step": 2456 + }, + { + "epoch": 6.283887468030691, + "grad_norm": 0.11948349789713839, + "learning_rate": 6.31009378752937e-07, + "loss": 1.0295, + "step": 2457 + }, + { + "epoch": 6.286445012787723, + "grad_norm": 0.11578209417911318, + "learning_rate": 6.265578022564233e-07, + "loss": 1.003, + "step": 2458 + }, + { + "epoch": 6.289002557544757, + "grad_norm": 0.11954141892522423, + "learning_rate": 6.221214757642901e-07, + "loss": 1.0186, + "step": 2459 + }, + { + "epoch": 6.291560102301791, + "grad_norm": 0.1214032884466788, + "learning_rate": 6.177004064941616e-07, + "loss": 1.0325, + "step": 2460 + }, + { + "epoch": 6.294117647058823, + "grad_norm": 0.11798550854551848, + "learning_rate": 6.132946016388453e-07, + "loss": 1.0034, + "step": 2461 + }, + { + "epoch": 6.296675191815857, + "grad_norm": 0.12025821516068275, + "learning_rate": 6.089040683663083e-07, + "loss": 0.9823, + "step": 2462 + }, + { + "epoch": 6.29923273657289, + "grad_norm": 0.11951253909474888, + "learning_rate": 6.045288138196725e-07, + "loss": 1.0409, + "step": 2463 + }, + { + "epoch": 6.301790281329923, + "grad_norm": 0.11418311978255119, + "learning_rate": 6.001688451172027e-07, + "loss": 1.0022, + "step": 2464 + }, + { + "epoch": 6.304347826086957, + "grad_norm": 0.11934858308797691, + "learning_rate": 5.958241693522993e-07, + "loss": 1.0107, + "step": 2465 + }, + { + "epoch": 6.30690537084399, + "grad_norm": 0.12241414028875457, + "learning_rate": 5.914947935934756e-07, + "loss": 0.9971, + "step": 2466 + }, + { + "epoch": 6.309462915601023, + "grad_norm": 0.11903591318763888, + "learning_rate": 5.871807248843542e-07, + "loss": 1.0117, + "step": 2467 + }, + { + "epoch": 6.312020460358056, + "grad_norm": 0.11896713837542751, + "learning_rate": 5.828819702436573e-07, + "loss": 1.0199, + "step": 2468 + }, + { + "epoch": 6.31457800511509, + "grad_norm": 0.12256891371488562, + "learning_rate": 5.785985366651892e-07, + "loss": 1.003, + "step": 2469 + }, + { + "epoch": 6.3171355498721224, + "grad_norm": 0.1224791957117775, + "learning_rate": 5.743304311178289e-07, + "loss": 1.0067, + "step": 2470 + }, + { + "epoch": 6.319693094629156, + "grad_norm": 0.12119833550268867, + "learning_rate": 5.70077660545515e-07, + "loss": 1.0196, + "step": 2471 + }, + { + "epoch": 6.322250639386189, + "grad_norm": 0.11520605275376457, + "learning_rate": 5.658402318672418e-07, + "loss": 1.0127, + "step": 2472 + }, + { + "epoch": 6.324808184143222, + "grad_norm": 0.11525398133510434, + "learning_rate": 5.616181519770414e-07, + "loss": 1.0161, + "step": 2473 + }, + { + "epoch": 6.327365728900256, + "grad_norm": 0.12176149506861418, + "learning_rate": 5.574114277439702e-07, + "loss": 1.0216, + "step": 2474 + }, + { + "epoch": 6.329923273657289, + "grad_norm": 0.12541686899065785, + "learning_rate": 5.53220066012109e-07, + "loss": 1.0263, + "step": 2475 + }, + { + "epoch": 6.332480818414322, + "grad_norm": 0.12958665943781433, + "learning_rate": 5.490440736005397e-07, + "loss": 1.0737, + "step": 2476 + }, + { + "epoch": 6.335038363171355, + "grad_norm": 0.1273940622092984, + "learning_rate": 5.448834573033424e-07, + "loss": 1.028, + "step": 2477 + }, + { + "epoch": 6.337595907928389, + "grad_norm": 0.11799709709320902, + "learning_rate": 5.407382238895765e-07, + "loss": 0.9949, + "step": 2478 + }, + { + "epoch": 6.340153452685422, + "grad_norm": 0.1220634348791913, + "learning_rate": 5.366083801032806e-07, + "loss": 1.0422, + "step": 2479 + }, + { + "epoch": 6.342710997442455, + "grad_norm": 0.11889607141087616, + "learning_rate": 5.324939326634515e-07, + "loss": 1.0017, + "step": 2480 + }, + { + "epoch": 6.345268542199489, + "grad_norm": 0.12002156059223426, + "learning_rate": 5.283948882640355e-07, + "loss": 1.0181, + "step": 2481 + }, + { + "epoch": 6.3478260869565215, + "grad_norm": 0.11596540294437355, + "learning_rate": 5.24311253573927e-07, + "loss": 1.0346, + "step": 2482 + }, + { + "epoch": 6.350383631713555, + "grad_norm": 0.11502520531650343, + "learning_rate": 5.202430352369392e-07, + "loss": 1.0135, + "step": 2483 + }, + { + "epoch": 6.352941176470588, + "grad_norm": 0.12267491898314155, + "learning_rate": 5.161902398718121e-07, + "loss": 1.0435, + "step": 2484 + }, + { + "epoch": 6.3554987212276215, + "grad_norm": 0.12185761812901445, + "learning_rate": 5.121528740721871e-07, + "loss": 1.0377, + "step": 2485 + }, + { + "epoch": 6.358056265984655, + "grad_norm": 0.11976615175350093, + "learning_rate": 5.081309444066085e-07, + "loss": 1.034, + "step": 2486 + }, + { + "epoch": 6.360613810741688, + "grad_norm": 0.116555412280644, + "learning_rate": 5.041244574185056e-07, + "loss": 1.011, + "step": 2487 + }, + { + "epoch": 6.3631713554987215, + "grad_norm": 0.12515368166748755, + "learning_rate": 5.001334196261776e-07, + "loss": 0.9861, + "step": 2488 + }, + { + "epoch": 6.365728900255754, + "grad_norm": 0.11814447264484773, + "learning_rate": 4.961578375227982e-07, + "loss": 1.0146, + "step": 2489 + }, + { + "epoch": 6.368286445012788, + "grad_norm": 0.12245094109059326, + "learning_rate": 4.921977175763881e-07, + "loss": 1.0204, + "step": 2490 + }, + { + "epoch": 6.370843989769821, + "grad_norm": 0.12283694751475284, + "learning_rate": 4.882530662298168e-07, + "loss": 1.0313, + "step": 2491 + }, + { + "epoch": 6.373401534526854, + "grad_norm": 0.12224108783096758, + "learning_rate": 4.843238899007829e-07, + "loss": 1.032, + "step": 2492 + }, + { + "epoch": 6.375959079283888, + "grad_norm": 0.11751909048944272, + "learning_rate": 4.804101949818119e-07, + "loss": 1.0037, + "step": 2493 + }, + { + "epoch": 6.378516624040921, + "grad_norm": 0.1189722841334927, + "learning_rate": 4.765119878402424e-07, + "loss": 1.0218, + "step": 2494 + }, + { + "epoch": 6.381074168797954, + "grad_norm": 0.12188011601377355, + "learning_rate": 4.726292748182104e-07, + "loss": 1.0235, + "step": 2495 + }, + { + "epoch": 6.383631713554987, + "grad_norm": 0.11601162144284871, + "learning_rate": 4.687620622326505e-07, + "loss": 1.0095, + "step": 2496 + }, + { + "epoch": 6.3861892583120206, + "grad_norm": 0.11794823628283956, + "learning_rate": 4.6491035637527437e-07, + "loss": 1.0211, + "step": 2497 + }, + { + "epoch": 6.388746803069053, + "grad_norm": 0.12080963912657082, + "learning_rate": 4.6107416351256595e-07, + "loss": 0.996, + "step": 2498 + }, + { + "epoch": 6.391304347826087, + "grad_norm": 0.11852593163423941, + "learning_rate": 4.5725348988577057e-07, + "loss": 1.0473, + "step": 2499 + }, + { + "epoch": 6.3938618925831205, + "grad_norm": 0.1154582217572824, + "learning_rate": 4.5344834171088594e-07, + "loss": 0.9916, + "step": 2500 + }, + { + "epoch": 6.396419437340153, + "grad_norm": 0.12611349351005327, + "learning_rate": 4.496587251786544e-07, + "loss": 1.0537, + "step": 2501 + }, + { + "epoch": 6.398976982097187, + "grad_norm": 0.11841147140282605, + "learning_rate": 4.4588464645453856e-07, + "loss": 1.0354, + "step": 2502 + }, + { + "epoch": 6.40153452685422, + "grad_norm": 0.11761246404197793, + "learning_rate": 4.421261116787323e-07, + "loss": 1.0056, + "step": 2503 + }, + { + "epoch": 6.404092071611253, + "grad_norm": 0.116833267265145, + "learning_rate": 4.383831269661343e-07, + "loss": 0.9983, + "step": 2504 + }, + { + "epoch": 6.406649616368286, + "grad_norm": 0.12485584628194238, + "learning_rate": 4.3465569840635105e-07, + "loss": 1.0276, + "step": 2505 + }, + { + "epoch": 6.40920716112532, + "grad_norm": 0.11771747761741529, + "learning_rate": 4.309438320636705e-07, + "loss": 1.0119, + "step": 2506 + }, + { + "epoch": 6.411764705882353, + "grad_norm": 0.1167766752899283, + "learning_rate": 4.272475339770699e-07, + "loss": 1.0257, + "step": 2507 + }, + { + "epoch": 6.414322250639386, + "grad_norm": 0.11997899496687212, + "learning_rate": 4.235668101601964e-07, + "loss": 0.9887, + "step": 2508 + }, + { + "epoch": 6.41687979539642, + "grad_norm": 0.11897278858577053, + "learning_rate": 4.199016666013533e-07, + "loss": 1.0162, + "step": 2509 + }, + { + "epoch": 6.419437340153452, + "grad_norm": 0.1213013490317867, + "learning_rate": 4.1625210926350413e-07, + "loss": 1.0141, + "step": 2510 + }, + { + "epoch": 6.421994884910486, + "grad_norm": 0.12533002989447992, + "learning_rate": 4.1261814408424806e-07, + "loss": 1.0251, + "step": 2511 + }, + { + "epoch": 6.42455242966752, + "grad_norm": 0.12196478149472252, + "learning_rate": 4.089997769758225e-07, + "loss": 1.0365, + "step": 2512 + }, + { + "epoch": 6.427109974424552, + "grad_norm": 0.12143791187790264, + "learning_rate": 4.0539701382507847e-07, + "loss": 1.0032, + "step": 2513 + }, + { + "epoch": 6.429667519181586, + "grad_norm": 0.11682750481108217, + "learning_rate": 4.018098604934906e-07, + "loss": 1.0045, + "step": 2514 + }, + { + "epoch": 6.432225063938619, + "grad_norm": 0.11654420434670919, + "learning_rate": 3.982383228171338e-07, + "loss": 1.0122, + "step": 2515 + }, + { + "epoch": 6.434782608695652, + "grad_norm": 0.12087376970393812, + "learning_rate": 3.946824066066757e-07, + "loss": 1.0091, + "step": 2516 + }, + { + "epoch": 6.437340153452685, + "grad_norm": 0.11198028929740504, + "learning_rate": 3.9114211764736843e-07, + "loss": 0.9916, + "step": 2517 + }, + { + "epoch": 6.439897698209719, + "grad_norm": 0.117876547438714, + "learning_rate": 3.876174616990402e-07, + "loss": 0.9688, + "step": 2518 + }, + { + "epoch": 6.442455242966752, + "grad_norm": 0.11691097425539704, + "learning_rate": 3.8410844449608966e-07, + "loss": 1.0262, + "step": 2519 + }, + { + "epoch": 6.445012787723785, + "grad_norm": 0.12067476965271878, + "learning_rate": 3.8061507174746326e-07, + "loss": 1.0357, + "step": 2520 + }, + { + "epoch": 6.447570332480819, + "grad_norm": 0.11448044711242149, + "learning_rate": 3.7713734913666254e-07, + "loss": 1.0278, + "step": 2521 + }, + { + "epoch": 6.450127877237851, + "grad_norm": 0.11900503374045875, + "learning_rate": 3.73675282321726e-07, + "loss": 1.0293, + "step": 2522 + }, + { + "epoch": 6.452685421994885, + "grad_norm": 0.1237852363860751, + "learning_rate": 3.7022887693521914e-07, + "loss": 1.0432, + "step": 2523 + }, + { + "epoch": 6.455242966751918, + "grad_norm": 0.11395769439497158, + "learning_rate": 3.6679813858422673e-07, + "loss": 1.0451, + "step": 2524 + }, + { + "epoch": 6.457800511508951, + "grad_norm": 0.11755851431433859, + "learning_rate": 3.6338307285034626e-07, + "loss": 1.0166, + "step": 2525 + }, + { + "epoch": 6.460358056265985, + "grad_norm": 0.11537719335337888, + "learning_rate": 3.5998368528967764e-07, + "loss": 1.0221, + "step": 2526 + }, + { + "epoch": 6.462915601023018, + "grad_norm": 0.12098800578611382, + "learning_rate": 3.5659998143281027e-07, + "loss": 1.0474, + "step": 2527 + }, + { + "epoch": 6.465473145780051, + "grad_norm": 0.11989356063597686, + "learning_rate": 3.532319667848172e-07, + "loss": 1.0187, + "step": 2528 + }, + { + "epoch": 6.468030690537084, + "grad_norm": 0.1156244817453119, + "learning_rate": 3.498796468252508e-07, + "loss": 0.9894, + "step": 2529 + }, + { + "epoch": 6.470588235294118, + "grad_norm": 0.11213145863456157, + "learning_rate": 3.46543027008126e-07, + "loss": 1.0331, + "step": 2530 + }, + { + "epoch": 6.4731457800511505, + "grad_norm": 0.11707883319628067, + "learning_rate": 3.4322211276191176e-07, + "loss": 1.0259, + "step": 2531 + }, + { + "epoch": 6.475703324808184, + "grad_norm": 0.11350670721406404, + "learning_rate": 3.399169094895294e-07, + "loss": 1.0065, + "step": 2532 + }, + { + "epoch": 6.478260869565218, + "grad_norm": 0.11452239943111842, + "learning_rate": 3.366274225683397e-07, + "loss": 1.0382, + "step": 2533 + }, + { + "epoch": 6.4808184143222505, + "grad_norm": 0.11645854358551593, + "learning_rate": 3.3335365735012947e-07, + "loss": 0.9849, + "step": 2534 + }, + { + "epoch": 6.483375959079284, + "grad_norm": 0.1150643632230636, + "learning_rate": 3.3009561916111045e-07, + "loss": 1.0441, + "step": 2535 + }, + { + "epoch": 6.485933503836317, + "grad_norm": 0.11565843726243669, + "learning_rate": 3.2685331330190916e-07, + "loss": 1.0256, + "step": 2536 + }, + { + "epoch": 6.4884910485933505, + "grad_norm": 0.12172892123412701, + "learning_rate": 3.2362674504755385e-07, + "loss": 1.0006, + "step": 2537 + }, + { + "epoch": 6.491048593350383, + "grad_norm": 0.11416395245772691, + "learning_rate": 3.2041591964746767e-07, + "loss": 0.9981, + "step": 2538 + }, + { + "epoch": 6.493606138107417, + "grad_norm": 0.11099012627200047, + "learning_rate": 3.17220842325463e-07, + "loss": 0.9971, + "step": 2539 + }, + { + "epoch": 6.4961636828644505, + "grad_norm": 0.12666071845516697, + "learning_rate": 3.14041518279733e-07, + "loss": 1.019, + "step": 2540 + }, + { + "epoch": 6.498721227621483, + "grad_norm": 0.11694427326316041, + "learning_rate": 3.108779526828365e-07, + "loss": 1.048, + "step": 2541 + }, + { + "epoch": 6.501278772378517, + "grad_norm": 0.11663277776194486, + "learning_rate": 3.0773015068169876e-07, + "loss": 1.0205, + "step": 2542 + }, + { + "epoch": 6.5038363171355495, + "grad_norm": 0.11421370105035522, + "learning_rate": 3.045981173975965e-07, + "loss": 1.0062, + "step": 2543 + }, + { + "epoch": 6.506393861892583, + "grad_norm": 0.11416247400561318, + "learning_rate": 3.0148185792615137e-07, + "loss": 1.0221, + "step": 2544 + }, + { + "epoch": 6.508951406649617, + "grad_norm": 0.12004167269390631, + "learning_rate": 2.9838137733732343e-07, + "loss": 1.0336, + "step": 2545 + }, + { + "epoch": 6.5115089514066495, + "grad_norm": 0.12185027359479889, + "learning_rate": 2.9529668067539986e-07, + "loss": 1.0085, + "step": 2546 + }, + { + "epoch": 6.514066496163683, + "grad_norm": 0.11920181864869182, + "learning_rate": 2.922277729589906e-07, + "loss": 1.0212, + "step": 2547 + }, + { + "epoch": 6.516624040920716, + "grad_norm": 0.11457206340363568, + "learning_rate": 2.891746591810152e-07, + "loss": 1.0062, + "step": 2548 + }, + { + "epoch": 6.5191815856777495, + "grad_norm": 0.11396161204686395, + "learning_rate": 2.86137344308699e-07, + "loss": 1.0269, + "step": 2549 + }, + { + "epoch": 6.521739130434782, + "grad_norm": 0.11716042134956894, + "learning_rate": 2.8311583328356485e-07, + "loss": 1.0513, + "step": 2550 + }, + { + "epoch": 6.524296675191816, + "grad_norm": 0.11082138416428153, + "learning_rate": 2.801101310214205e-07, + "loss": 1.0133, + "step": 2551 + }, + { + "epoch": 6.526854219948849, + "grad_norm": 0.11831445098631707, + "learning_rate": 2.7712024241235757e-07, + "loss": 1.0184, + "step": 2552 + }, + { + "epoch": 6.529411764705882, + "grad_norm": 0.11918281125426747, + "learning_rate": 2.7414617232073505e-07, + "loss": 1.0344, + "step": 2553 + }, + { + "epoch": 6.531969309462916, + "grad_norm": 0.11681313613977624, + "learning_rate": 2.7118792558518237e-07, + "loss": 1.0219, + "step": 2554 + }, + { + "epoch": 6.534526854219949, + "grad_norm": 0.12570449518559115, + "learning_rate": 2.6824550701857966e-07, + "loss": 1.0192, + "step": 2555 + }, + { + "epoch": 6.537084398976982, + "grad_norm": 0.11631595597156608, + "learning_rate": 2.653189214080576e-07, + "loss": 0.9885, + "step": 2556 + }, + { + "epoch": 6.539641943734015, + "grad_norm": 0.11976742856004091, + "learning_rate": 2.624081735149897e-07, + "loss": 1.0225, + "step": 2557 + }, + { + "epoch": 6.542199488491049, + "grad_norm": 0.11687676414472607, + "learning_rate": 2.5951326807498123e-07, + "loss": 1.0051, + "step": 2558 + }, + { + "epoch": 6.544757033248082, + "grad_norm": 0.11626243542745685, + "learning_rate": 2.5663420979785915e-07, + "loss": 1.0256, + "step": 2559 + }, + { + "epoch": 6.547314578005115, + "grad_norm": 0.11473271542819383, + "learning_rate": 2.5377100336767547e-07, + "loss": 1.0134, + "step": 2560 + }, + { + "epoch": 6.549872122762149, + "grad_norm": 0.11617767916671155, + "learning_rate": 2.509236534426851e-07, + "loss": 1.0045, + "step": 2561 + }, + { + "epoch": 6.552429667519181, + "grad_norm": 0.11177045938404909, + "learning_rate": 2.4809216465534913e-07, + "loss": 1.0377, + "step": 2562 + }, + { + "epoch": 6.554987212276215, + "grad_norm": 0.11344781404055954, + "learning_rate": 2.4527654161232153e-07, + "loss": 1.0037, + "step": 2563 + }, + { + "epoch": 6.557544757033249, + "grad_norm": 0.12399390000812018, + "learning_rate": 2.424767888944468e-07, + "loss": 1.0462, + "step": 2564 + }, + { + "epoch": 6.560102301790281, + "grad_norm": 0.11847061868510626, + "learning_rate": 2.3969291105674805e-07, + "loss": 0.9959, + "step": 2565 + }, + { + "epoch": 6.562659846547315, + "grad_norm": 0.116920831153564, + "learning_rate": 2.3692491262841788e-07, + "loss": 0.9783, + "step": 2566 + }, + { + "epoch": 6.565217391304348, + "grad_norm": 0.12018087616989655, + "learning_rate": 2.3417279811281947e-07, + "loss": 0.9778, + "step": 2567 + }, + { + "epoch": 6.567774936061381, + "grad_norm": 0.11727845557913934, + "learning_rate": 2.3143657198746893e-07, + "loss": 1.042, + "step": 2568 + }, + { + "epoch": 6.570332480818414, + "grad_norm": 0.1156893274747709, + "learning_rate": 2.2871623870403649e-07, + "loss": 1.0302, + "step": 2569 + }, + { + "epoch": 6.572890025575448, + "grad_norm": 0.11720330890092409, + "learning_rate": 2.260118026883318e-07, + "loss": 1.0267, + "step": 2570 + }, + { + "epoch": 6.57544757033248, + "grad_norm": 0.11688767903985245, + "learning_rate": 2.233232683403075e-07, + "loss": 1.0292, + "step": 2571 + }, + { + "epoch": 6.578005115089514, + "grad_norm": 0.11603026043379294, + "learning_rate": 2.206506400340369e-07, + "loss": 1.0017, + "step": 2572 + }, + { + "epoch": 6.580562659846548, + "grad_norm": 0.11389458080146765, + "learning_rate": 2.1799392211772074e-07, + "loss": 1.0082, + "step": 2573 + }, + { + "epoch": 6.58312020460358, + "grad_norm": 0.1161474107114186, + "learning_rate": 2.1535311891367373e-07, + "loss": 1.0219, + "step": 2574 + }, + { + "epoch": 6.585677749360614, + "grad_norm": 0.11523869949699879, + "learning_rate": 2.1272823471831573e-07, + "loss": 1.0048, + "step": 2575 + }, + { + "epoch": 6.588235294117647, + "grad_norm": 0.11447790591214169, + "learning_rate": 2.101192738021718e-07, + "loss": 1.0116, + "step": 2576 + }, + { + "epoch": 6.59079283887468, + "grad_norm": 0.11643651666513412, + "learning_rate": 2.0752624040985436e-07, + "loss": 1.0117, + "step": 2577 + }, + { + "epoch": 6.593350383631714, + "grad_norm": 0.12040988081003166, + "learning_rate": 2.0494913876007105e-07, + "loss": 1.0255, + "step": 2578 + }, + { + "epoch": 6.595907928388747, + "grad_norm": 0.11872708662460554, + "learning_rate": 2.0238797304560243e-07, + "loss": 1.0241, + "step": 2579 + }, + { + "epoch": 6.59846547314578, + "grad_norm": 0.10983144316407795, + "learning_rate": 1.9984274743330424e-07, + "loss": 1.0106, + "step": 2580 + }, + { + "epoch": 6.601023017902813, + "grad_norm": 0.112895943367732, + "learning_rate": 1.9731346606410185e-07, + "loss": 1.0405, + "step": 2581 + }, + { + "epoch": 6.603580562659847, + "grad_norm": 0.11309181158689928, + "learning_rate": 1.9480013305297585e-07, + "loss": 1.0286, + "step": 2582 + }, + { + "epoch": 6.6061381074168795, + "grad_norm": 0.11579577875848088, + "learning_rate": 1.9230275248896425e-07, + "loss": 1.0137, + "step": 2583 + }, + { + "epoch": 6.608695652173913, + "grad_norm": 0.11932271374275923, + "learning_rate": 1.8982132843514577e-07, + "loss": 1.0352, + "step": 2584 + }, + { + "epoch": 6.611253196930946, + "grad_norm": 0.1187240263728754, + "learning_rate": 1.8735586492864556e-07, + "loss": 0.9899, + "step": 2585 + }, + { + "epoch": 6.6138107416879794, + "grad_norm": 0.12010362235501355, + "learning_rate": 1.8490636598061605e-07, + "loss": 1.0202, + "step": 2586 + }, + { + "epoch": 6.616368286445013, + "grad_norm": 0.11896072789581243, + "learning_rate": 1.8247283557624062e-07, + "loss": 1.0801, + "step": 2587 + }, + { + "epoch": 6.618925831202046, + "grad_norm": 0.11269695438058397, + "learning_rate": 1.8005527767471998e-07, + "loss": 1.0323, + "step": 2588 + }, + { + "epoch": 6.621483375959079, + "grad_norm": 0.11595014960172056, + "learning_rate": 1.7765369620926899e-07, + "loss": 1.0247, + "step": 2589 + }, + { + "epoch": 6.624040920716112, + "grad_norm": 0.11457210948093192, + "learning_rate": 1.752680950871144e-07, + "loss": 1.0561, + "step": 2590 + }, + { + "epoch": 6.626598465473146, + "grad_norm": 0.11577860483951284, + "learning_rate": 1.7289847818947492e-07, + "loss": 1.0182, + "step": 2591 + }, + { + "epoch": 6.629156010230179, + "grad_norm": 0.11240383490721378, + "learning_rate": 1.7054484937157112e-07, + "loss": 1.0255, + "step": 2592 + }, + { + "epoch": 6.631713554987212, + "grad_norm": 0.11631232042116323, + "learning_rate": 1.6820721246261106e-07, + "loss": 1.0299, + "step": 2593 + }, + { + "epoch": 6.634271099744246, + "grad_norm": 0.11273655621311057, + "learning_rate": 1.6588557126578365e-07, + "loss": 1.0407, + "step": 2594 + }, + { + "epoch": 6.6368286445012785, + "grad_norm": 0.11767164102993428, + "learning_rate": 1.6357992955825297e-07, + "loss": 1.0145, + "step": 2595 + }, + { + "epoch": 6.639386189258312, + "grad_norm": 0.11534695075999606, + "learning_rate": 1.6129029109115401e-07, + "loss": 1.0106, + "step": 2596 + }, + { + "epoch": 6.641943734015345, + "grad_norm": 0.11539400507669376, + "learning_rate": 1.59016659589587e-07, + "loss": 0.9862, + "step": 2597 + }, + { + "epoch": 6.6445012787723785, + "grad_norm": 0.11483047616375414, + "learning_rate": 1.567590387526041e-07, + "loss": 1.0301, + "step": 2598 + }, + { + "epoch": 6.647058823529412, + "grad_norm": 0.11260638212850177, + "learning_rate": 1.5451743225321726e-07, + "loss": 1.0088, + "step": 2599 + }, + { + "epoch": 6.649616368286445, + "grad_norm": 0.11619144848069289, + "learning_rate": 1.5229184373837912e-07, + "loss": 1.0117, + "step": 2600 + }, + { + "epoch": 6.6521739130434785, + "grad_norm": 0.12170161725444163, + "learning_rate": 1.5008227682898337e-07, + "loss": 1.0345, + "step": 2601 + }, + { + "epoch": 6.654731457800511, + "grad_norm": 0.11009879990340311, + "learning_rate": 1.4788873511985656e-07, + "loss": 1.0074, + "step": 2602 + }, + { + "epoch": 6.657289002557545, + "grad_norm": 0.11242257451547451, + "learning_rate": 1.4571122217975298e-07, + "loss": 1.0295, + "step": 2603 + }, + { + "epoch": 6.659846547314578, + "grad_norm": 0.11604613398078274, + "learning_rate": 1.4354974155135203e-07, + "loss": 1.0287, + "step": 2604 + }, + { + "epoch": 6.662404092071611, + "grad_norm": 0.11447891191608152, + "learning_rate": 1.4140429675124633e-07, + "loss": 1.0059, + "step": 2605 + }, + { + "epoch": 6.664961636828645, + "grad_norm": 0.11195548180186611, + "learning_rate": 1.3927489126993932e-07, + "loss": 1.0347, + "step": 2606 + }, + { + "epoch": 6.667519181585678, + "grad_norm": 0.11445065696070437, + "learning_rate": 1.3716152857184306e-07, + "loss": 1.012, + "step": 2607 + }, + { + "epoch": 6.670076726342711, + "grad_norm": 0.11614977059279803, + "learning_rate": 1.350642120952661e-07, + "loss": 0.9918, + "step": 2608 + }, + { + "epoch": 6.672634271099744, + "grad_norm": 0.11871269418863775, + "learning_rate": 1.3298294525241008e-07, + "loss": 1.0269, + "step": 2609 + }, + { + "epoch": 6.675191815856778, + "grad_norm": 0.10866128338893077, + "learning_rate": 1.3091773142936525e-07, + "loss": 1.0334, + "step": 2610 + }, + { + "epoch": 6.677749360613811, + "grad_norm": 0.12041795104852608, + "learning_rate": 1.2886857398610731e-07, + "loss": 0.9974, + "step": 2611 + }, + { + "epoch": 6.680306905370844, + "grad_norm": 0.11406194376177828, + "learning_rate": 1.2683547625648718e-07, + "loss": 1.0222, + "step": 2612 + }, + { + "epoch": 6.6828644501278776, + "grad_norm": 0.11240623577621248, + "learning_rate": 1.2481844154822565e-07, + "loss": 0.9952, + "step": 2613 + }, + { + "epoch": 6.68542199488491, + "grad_norm": 0.11514164915047609, + "learning_rate": 1.2281747314291437e-07, + "loss": 1.0026, + "step": 2614 + }, + { + "epoch": 6.687979539641944, + "grad_norm": 0.11222335726022206, + "learning_rate": 1.208325742960037e-07, + "loss": 1.0056, + "step": 2615 + }, + { + "epoch": 6.690537084398977, + "grad_norm": 0.11243016039454592, + "learning_rate": 1.1886374823679825e-07, + "loss": 1.0492, + "step": 2616 + }, + { + "epoch": 6.69309462915601, + "grad_norm": 0.11317201484958644, + "learning_rate": 1.1691099816845574e-07, + "loss": 1.0213, + "step": 2617 + }, + { + "epoch": 6.695652173913043, + "grad_norm": 0.1170626311837824, + "learning_rate": 1.149743272679793e-07, + "loss": 0.9974, + "step": 2618 + }, + { + "epoch": 6.698209718670077, + "grad_norm": 0.12262867677149476, + "learning_rate": 1.1305373868620961e-07, + "loss": 0.9967, + "step": 2619 + }, + { + "epoch": 6.70076726342711, + "grad_norm": 0.11396022297257247, + "learning_rate": 1.1114923554782608e-07, + "loss": 0.9956, + "step": 2620 + }, + { + "epoch": 6.703324808184143, + "grad_norm": 0.11735281558425238, + "learning_rate": 1.0926082095133572e-07, + "loss": 1.0193, + "step": 2621 + }, + { + "epoch": 6.705882352941177, + "grad_norm": 0.12029512917783149, + "learning_rate": 1.0738849796907091e-07, + "loss": 1.0473, + "step": 2622 + }, + { + "epoch": 6.708439897698209, + "grad_norm": 0.11312555151340069, + "learning_rate": 1.0553226964718277e-07, + "loss": 1.008, + "step": 2623 + }, + { + "epoch": 6.710997442455243, + "grad_norm": 0.11541322342299927, + "learning_rate": 1.0369213900564001e-07, + "loss": 1.0029, + "step": 2624 + }, + { + "epoch": 6.713554987212277, + "grad_norm": 0.11302071428638145, + "learning_rate": 1.0186810903822119e-07, + "loss": 0.9623, + "step": 2625 + }, + { + "epoch": 6.716112531969309, + "grad_norm": 0.11291140484686953, + "learning_rate": 1.0006018271250695e-07, + "loss": 1.0305, + "step": 2626 + }, + { + "epoch": 6.718670076726343, + "grad_norm": 0.11524487387426563, + "learning_rate": 9.826836296988107e-08, + "loss": 1.0596, + "step": 2627 + }, + { + "epoch": 6.721227621483376, + "grad_norm": 0.11543260535666969, + "learning_rate": 9.649265272552277e-08, + "loss": 1.0237, + "step": 2628 + }, + { + "epoch": 6.723785166240409, + "grad_norm": 0.11302904037284935, + "learning_rate": 9.473305486840112e-08, + "loss": 1.0177, + "step": 2629 + }, + { + "epoch": 6.726342710997442, + "grad_norm": 0.11210024116892857, + "learning_rate": 9.29895722612717e-08, + "loss": 1.0284, + "step": 2630 + }, + { + "epoch": 6.728900255754476, + "grad_norm": 0.11611360048557691, + "learning_rate": 9.126220774067218e-08, + "loss": 1.0313, + "step": 2631 + }, + { + "epoch": 6.731457800511509, + "grad_norm": 0.11281080704543008, + "learning_rate": 8.955096411691566e-08, + "loss": 1.0156, + "step": 2632 + }, + { + "epoch": 6.734015345268542, + "grad_norm": 0.11192307343079083, + "learning_rate": 8.785584417409065e-08, + "loss": 1.0173, + "step": 2633 + }, + { + "epoch": 6.736572890025576, + "grad_norm": 0.11483249975315203, + "learning_rate": 8.617685067004777e-08, + "loss": 1.0269, + "step": 2634 + }, + { + "epoch": 6.739130434782608, + "grad_norm": 0.11652633110386056, + "learning_rate": 8.451398633640861e-08, + "loss": 0.9978, + "step": 2635 + }, + { + "epoch": 6.741687979539642, + "grad_norm": 0.11193935061569056, + "learning_rate": 8.286725387854689e-08, + "loss": 1.0166, + "step": 2636 + }, + { + "epoch": 6.744245524296675, + "grad_norm": 0.1132575109344062, + "learning_rate": 8.123665597559393e-08, + "loss": 1.03, + "step": 2637 + }, + { + "epoch": 6.746803069053708, + "grad_norm": 0.10909141114205528, + "learning_rate": 7.962219528042991e-08, + "loss": 0.9843, + "step": 2638 + }, + { + "epoch": 6.749360613810742, + "grad_norm": 0.11510554903103819, + "learning_rate": 7.802387441968262e-08, + "loss": 1.0058, + "step": 2639 + }, + { + "epoch": 6.751918158567775, + "grad_norm": 0.1126125629269261, + "learning_rate": 7.644169599371975e-08, + "loss": 1.0451, + "step": 2640 + }, + { + "epoch": 6.754475703324808, + "grad_norm": 0.11361718582807691, + "learning_rate": 7.487566257664558e-08, + "loss": 1.0447, + "step": 2641 + }, + { + "epoch": 6.757033248081841, + "grad_norm": 0.11201362418480085, + "learning_rate": 7.332577671629982e-08, + "loss": 1.0003, + "step": 2642 + }, + { + "epoch": 6.759590792838875, + "grad_norm": 0.11250812055949669, + "learning_rate": 7.179204093424985e-08, + "loss": 1.0152, + "step": 2643 + }, + { + "epoch": 6.762148337595908, + "grad_norm": 0.11340595916397253, + "learning_rate": 7.027445772578856e-08, + "loss": 1.0136, + "step": 2644 + }, + { + "epoch": 6.764705882352941, + "grad_norm": 0.11043173067397596, + "learning_rate": 6.877302955992649e-08, + "loss": 1.0039, + "step": 2645 + }, + { + "epoch": 6.767263427109975, + "grad_norm": 0.11320152606971275, + "learning_rate": 6.72877588793952e-08, + "loss": 1.0263, + "step": 2646 + }, + { + "epoch": 6.7698209718670075, + "grad_norm": 0.11555065643180781, + "learning_rate": 6.581864810063732e-08, + "loss": 1.0095, + "step": 2647 + }, + { + "epoch": 6.772378516624041, + "grad_norm": 0.1114703182443358, + "learning_rate": 6.436569961380313e-08, + "loss": 1.0014, + "step": 2648 + }, + { + "epoch": 6.774936061381074, + "grad_norm": 0.11945044598900786, + "learning_rate": 6.292891578275063e-08, + "loss": 1.0308, + "step": 2649 + }, + { + "epoch": 6.7774936061381075, + "grad_norm": 0.11250868328242511, + "learning_rate": 6.150829894503662e-08, + "loss": 1.0107, + "step": 2650 + }, + { + "epoch": 6.78005115089514, + "grad_norm": 0.11491638958663465, + "learning_rate": 6.010385141191455e-08, + "loss": 1.0279, + "step": 2651 + }, + { + "epoch": 6.782608695652174, + "grad_norm": 0.1160903563132126, + "learning_rate": 5.8715575468333286e-08, + "loss": 1.0067, + "step": 2652 + }, + { + "epoch": 6.7851662404092075, + "grad_norm": 0.11673880519657757, + "learning_rate": 5.734347337293167e-08, + "loss": 1.0253, + "step": 2653 + }, + { + "epoch": 6.78772378516624, + "grad_norm": 0.11345092121417273, + "learning_rate": 5.598754735803513e-08, + "loss": 1.0256, + "step": 2654 + }, + { + "epoch": 6.790281329923274, + "grad_norm": 0.11245719320265857, + "learning_rate": 5.464779962964795e-08, + "loss": 1.023, + "step": 2655 + }, + { + "epoch": 6.792838874680307, + "grad_norm": 0.11318781711220266, + "learning_rate": 5.332423236745765e-08, + "loss": 0.9817, + "step": 2656 + }, + { + "epoch": 6.79539641943734, + "grad_norm": 0.11393255984182678, + "learning_rate": 5.201684772482507e-08, + "loss": 0.9919, + "step": 2657 + }, + { + "epoch": 6.797953964194374, + "grad_norm": 0.1114106983420887, + "learning_rate": 5.0725647828783196e-08, + "loss": 0.9949, + "step": 2658 + }, + { + "epoch": 6.8005115089514065, + "grad_norm": 0.11613702586163382, + "learning_rate": 4.945063478003276e-08, + "loss": 1.0246, + "step": 2659 + }, + { + "epoch": 6.80306905370844, + "grad_norm": 0.11426036986413816, + "learning_rate": 4.8191810652941096e-08, + "loss": 1.0434, + "step": 2660 + }, + { + "epoch": 6.805626598465473, + "grad_norm": 0.11654706791098739, + "learning_rate": 4.694917749553663e-08, + "loss": 1.0256, + "step": 2661 + }, + { + "epoch": 6.8081841432225065, + "grad_norm": 0.10999242921563646, + "learning_rate": 4.5722737329505495e-08, + "loss": 0.9802, + "step": 2662 + }, + { + "epoch": 6.810741687979539, + "grad_norm": 0.11948231260555445, + "learning_rate": 4.451249215018827e-08, + "loss": 1.0593, + "step": 2663 + }, + { + "epoch": 6.813299232736573, + "grad_norm": 0.11285924950704992, + "learning_rate": 4.331844392657991e-08, + "loss": 1.026, + "step": 2664 + }, + { + "epoch": 6.8158567774936065, + "grad_norm": 0.11299230638204774, + "learning_rate": 4.2140594601320915e-08, + "loss": 1.0162, + "step": 2665 + }, + { + "epoch": 6.818414322250639, + "grad_norm": 0.11595950299690573, + "learning_rate": 4.097894609069841e-08, + "loss": 0.9853, + "step": 2666 + }, + { + "epoch": 6.820971867007673, + "grad_norm": 0.1155751170348188, + "learning_rate": 3.983350028464283e-08, + "loss": 1.0022, + "step": 2667 + }, + { + "epoch": 6.823529411764706, + "grad_norm": 0.11385145480733656, + "learning_rate": 3.870425904672237e-08, + "loss": 1.0905, + "step": 2668 + }, + { + "epoch": 6.826086956521739, + "grad_norm": 0.11441498941945787, + "learning_rate": 3.7591224214141855e-08, + "loss": 1.032, + "step": 2669 + }, + { + "epoch": 6.828644501278772, + "grad_norm": 0.11622957694085463, + "learning_rate": 3.649439759773943e-08, + "loss": 1.0273, + "step": 2670 + }, + { + "epoch": 6.831202046035806, + "grad_norm": 0.11310752492427763, + "learning_rate": 3.541378098198323e-08, + "loss": 1.0202, + "step": 2671 + }, + { + "epoch": 6.833759590792839, + "grad_norm": 0.11205928985871322, + "learning_rate": 3.4349376124969136e-08, + "loss": 0.9919, + "step": 2672 + }, + { + "epoch": 6.836317135549872, + "grad_norm": 0.11055590937853152, + "learning_rate": 3.330118475841859e-08, + "loss": 1.019, + "step": 2673 + }, + { + "epoch": 6.838874680306906, + "grad_norm": 0.11098200209047006, + "learning_rate": 3.22692085876708e-08, + "loss": 0.9972, + "step": 2674 + }, + { + "epoch": 6.841432225063938, + "grad_norm": 0.11522340948350532, + "learning_rate": 3.125344929168828e-08, + "loss": 1.0004, + "step": 2675 + }, + { + "epoch": 6.843989769820972, + "grad_norm": 0.11422976235509531, + "learning_rate": 3.025390852304688e-08, + "loss": 1.0273, + "step": 2676 + }, + { + "epoch": 6.846547314578006, + "grad_norm": 0.11018216168196639, + "learning_rate": 2.927058790793802e-08, + "loss": 1.0102, + "step": 2677 + }, + { + "epoch": 6.849104859335038, + "grad_norm": 0.10995140569223621, + "learning_rate": 2.830348904616198e-08, + "loss": 0.991, + "step": 2678 + }, + { + "epoch": 6.851662404092072, + "grad_norm": 0.11543991907521552, + "learning_rate": 2.7352613511127946e-08, + "loss": 1.0338, + "step": 2679 + }, + { + "epoch": 6.854219948849105, + "grad_norm": 0.11129720513762761, + "learning_rate": 2.6417962849852875e-08, + "loss": 1.0094, + "step": 2680 + }, + { + "epoch": 6.856777493606138, + "grad_norm": 0.1097107046759256, + "learning_rate": 2.549953858295262e-08, + "loss": 1.0208, + "step": 2681 + }, + { + "epoch": 6.859335038363171, + "grad_norm": 0.1181695445768175, + "learning_rate": 2.459734220464638e-08, + "loss": 1.0015, + "step": 2682 + }, + { + "epoch": 6.861892583120205, + "grad_norm": 0.11107816598809478, + "learning_rate": 2.3711375182753347e-08, + "loss": 1.0261, + "step": 2683 + }, + { + "epoch": 6.864450127877237, + "grad_norm": 0.10839159774339671, + "learning_rate": 2.2841638958683855e-08, + "loss": 1.0135, + "step": 2684 + }, + { + "epoch": 6.867007672634271, + "grad_norm": 0.1121417586939987, + "learning_rate": 2.1988134947446004e-08, + "loss": 1.0035, + "step": 2685 + }, + { + "epoch": 6.869565217391305, + "grad_norm": 0.11209845991644457, + "learning_rate": 2.1150864537636817e-08, + "loss": 1.0321, + "step": 2686 + }, + { + "epoch": 6.872122762148337, + "grad_norm": 0.11303462530389491, + "learning_rate": 2.032982909144332e-08, + "loss": 1.012, + "step": 2687 + }, + { + "epoch": 6.874680306905371, + "grad_norm": 0.11117791002965544, + "learning_rate": 1.9525029944637008e-08, + "loss": 0.9929, + "step": 2688 + }, + { + "epoch": 6.877237851662404, + "grad_norm": 0.1089777437805983, + "learning_rate": 1.8736468406579388e-08, + "loss": 0.9931, + "step": 2689 + }, + { + "epoch": 6.879795396419437, + "grad_norm": 0.11251100033934079, + "learning_rate": 1.796414576020755e-08, + "loss": 1.0153, + "step": 2690 + }, + { + "epoch": 6.882352941176471, + "grad_norm": 0.11299998984552379, + "learning_rate": 1.720806326204305e-08, + "loss": 1.005, + "step": 2691 + }, + { + "epoch": 6.884910485933504, + "grad_norm": 0.11290626743296132, + "learning_rate": 1.646822214218524e-08, + "loss": 1.049, + "step": 2692 + }, + { + "epoch": 6.887468030690537, + "grad_norm": 0.11186130749976496, + "learning_rate": 1.5744623604310172e-08, + "loss": 1.003, + "step": 2693 + }, + { + "epoch": 6.89002557544757, + "grad_norm": 0.11028332749990057, + "learning_rate": 1.503726882566503e-08, + "loss": 0.9892, + "step": 2694 + }, + { + "epoch": 6.892583120204604, + "grad_norm": 0.11457205700764143, + "learning_rate": 1.4346158957073696e-08, + "loss": 1.0261, + "step": 2695 + }, + { + "epoch": 6.8951406649616365, + "grad_norm": 0.11434079231719742, + "learning_rate": 1.3671295122928974e-08, + "loss": 1.0118, + "step": 2696 + }, + { + "epoch": 6.89769820971867, + "grad_norm": 0.11590548541933458, + "learning_rate": 1.3012678421191471e-08, + "loss": 1.0397, + "step": 2697 + }, + { + "epoch": 6.900255754475703, + "grad_norm": 0.11241776946007812, + "learning_rate": 1.2370309923388501e-08, + "loss": 1.0214, + "step": 2698 + }, + { + "epoch": 6.9028132992327365, + "grad_norm": 0.11386908312296881, + "learning_rate": 1.1744190674614076e-08, + "loss": 1.0249, + "step": 2699 + }, + { + "epoch": 6.90537084398977, + "grad_norm": 0.1111155708841944, + "learning_rate": 1.1134321693525574e-08, + "loss": 1.0013, + "step": 2700 + }, + { + "epoch": 6.907928388746803, + "grad_norm": 0.11383791079341445, + "learning_rate": 1.0540703972341525e-08, + "loss": 1.0148, + "step": 2701 + }, + { + "epoch": 6.910485933503836, + "grad_norm": 0.11458774717785482, + "learning_rate": 9.963338476840501e-09, + "loss": 1.029, + "step": 2702 + }, + { + "epoch": 6.913043478260869, + "grad_norm": 0.11295695096599505, + "learning_rate": 9.402226146361104e-09, + "loss": 1.0136, + "step": 2703 + }, + { + "epoch": 6.915601023017903, + "grad_norm": 0.11389257052620162, + "learning_rate": 8.857367893796431e-09, + "loss": 0.9989, + "step": 2704 + }, + { + "epoch": 6.918158567774936, + "grad_norm": 0.11405136091559014, + "learning_rate": 8.328764605597395e-09, + "loss": 1.0239, + "step": 2705 + }, + { + "epoch": 6.920716112531969, + "grad_norm": 0.11514239271194625, + "learning_rate": 7.816417141768284e-09, + "loss": 1.041, + "step": 2706 + }, + { + "epoch": 6.923273657289003, + "grad_norm": 0.11236159186101047, + "learning_rate": 7.3203263358678775e-09, + "loss": 1.0297, + "step": 2707 + }, + { + "epoch": 6.9258312020460355, + "grad_norm": 0.112779013609661, + "learning_rate": 6.840492995002779e-09, + "loss": 1.0177, + "step": 2708 + }, + { + "epoch": 6.928388746803069, + "grad_norm": 0.11154163182583252, + "learning_rate": 6.376917899832968e-09, + "loss": 1.0262, + "step": 2709 + }, + { + "epoch": 6.930946291560103, + "grad_norm": 0.11358295898234577, + "learning_rate": 5.929601804566254e-09, + "loss": 1.0057, + "step": 2710 + }, + { + "epoch": 6.9335038363171355, + "grad_norm": 0.11003717187565273, + "learning_rate": 5.498545436957159e-09, + "loss": 1.0269, + "step": 2711 + }, + { + "epoch": 6.936061381074169, + "grad_norm": 0.10600474645039837, + "learning_rate": 5.0837494983091425e-09, + "loss": 0.9854, + "step": 2712 + }, + { + "epoch": 6.938618925831202, + "grad_norm": 0.10929642667614789, + "learning_rate": 4.6852146634668304e-09, + "loss": 1.0149, + "step": 2713 + }, + { + "epoch": 6.9411764705882355, + "grad_norm": 0.11582392789733863, + "learning_rate": 4.302941580823783e-09, + "loss": 0.9864, + "step": 2714 + }, + { + "epoch": 6.943734015345268, + "grad_norm": 0.11406855862931596, + "learning_rate": 3.936930872312506e-09, + "loss": 1.0296, + "step": 2715 + }, + { + "epoch": 6.946291560102302, + "grad_norm": 0.11629050448797144, + "learning_rate": 3.5871831334099992e-09, + "loss": 1.0319, + "step": 2716 + }, + { + "epoch": 6.948849104859335, + "grad_norm": 0.11235711633426523, + "learning_rate": 3.2536989331355406e-09, + "loss": 1.0061, + "step": 2717 + }, + { + "epoch": 6.951406649616368, + "grad_norm": 0.11339029722347495, + "learning_rate": 2.9364788140451296e-09, + "loss": 1.0558, + "step": 2718 + }, + { + "epoch": 6.953964194373402, + "grad_norm": 0.1122327401431765, + "learning_rate": 2.635523292237041e-09, + "loss": 1.043, + "step": 2719 + }, + { + "epoch": 6.956521739130435, + "grad_norm": 0.1150922652013077, + "learning_rate": 2.3508328573462745e-09, + "loss": 1.0157, + "step": 2720 + }, + { + "epoch": 6.959079283887468, + "grad_norm": 0.11034749878838018, + "learning_rate": 2.082407972547884e-09, + "loss": 1.0172, + "step": 2721 + }, + { + "epoch": 6.961636828644501, + "grad_norm": 0.11414568906111035, + "learning_rate": 1.8302490745503166e-09, + "loss": 1.0294, + "step": 2722 + }, + { + "epoch": 6.964194373401535, + "grad_norm": 0.11166620944982035, + "learning_rate": 1.5943565736020739e-09, + "loss": 1.0242, + "step": 2723 + }, + { + "epoch": 6.966751918158568, + "grad_norm": 0.11672921275884213, + "learning_rate": 1.3747308534850512e-09, + "loss": 1.0372, + "step": 2724 + }, + { + "epoch": 6.969309462915601, + "grad_norm": 0.11540312400728218, + "learning_rate": 1.1713722715167575e-09, + "loss": 1.0515, + "step": 2725 + }, + { + "epoch": 6.971867007672635, + "grad_norm": 0.11588267312835213, + "learning_rate": 9.84281158548095e-10, + "loss": 1.0291, + "step": 2726 + }, + { + "epoch": 6.974424552429667, + "grad_norm": 0.11642536438528109, + "learning_rate": 8.134578189644692e-10, + "loss": 1.013, + "step": 2727 + }, + { + "epoch": 6.976982097186701, + "grad_norm": 0.11741237126233431, + "learning_rate": 6.589025306869002e-10, + "loss": 1.0054, + "step": 2728 + }, + { + "epoch": 6.979539641943734, + "grad_norm": 0.1116075879721608, + "learning_rate": 5.206155451642491e-10, + "loss": 1.0299, + "step": 2729 + }, + { + "epoch": 6.982097186700767, + "grad_norm": 0.11444442287287329, + "learning_rate": 3.985970873821021e-10, + "loss": 1.0413, + "step": 2730 + }, + { + "epoch": 6.9846547314578, + "grad_norm": 0.12160291833827606, + "learning_rate": 2.928473558583278e-10, + "loss": 1.0317, + "step": 2731 + }, + { + "epoch": 6.987212276214834, + "grad_norm": 0.1124635627813877, + "learning_rate": 2.033665226386372e-10, + "loss": 1.0144, + "step": 2732 + }, + { + "epoch": 6.989769820971867, + "grad_norm": 0.11276149081438312, + "learning_rate": 1.301547333032449e-10, + "loss": 1.0007, + "step": 2733 + }, + { + "epoch": 6.9923273657289, + "grad_norm": 0.10984392228143453, + "learning_rate": 7.321210696464853e-11, + "loss": 0.9763, + "step": 2734 + }, + { + "epoch": 6.994884910485934, + "grad_norm": 0.11019543161726779, + "learning_rate": 3.253873626429816e-11, + "loss": 1.0013, + "step": 2735 + }, + { + "epoch": 6.997442455242966, + "grad_norm": 0.11197749059770203, + "learning_rate": 8.134687374816708e-12, + "loss": 1.0472, + "step": 2736 + }, + { + "epoch": 7.0, + "grad_norm": 0.11208987109779546, + "learning_rate": 0.0, + "loss": 0.9774, + "step": 2737 + }, + { + "epoch": 7.0, + "step": 2737, + "total_flos": 9969287656374272.0, + "train_loss": 1.063590354692078, + "train_runtime": 97730.0822, + "train_samples_per_second": 7.163, + "train_steps_per_second": 0.028 + } + ], + "logging_steps": 1.0, + "max_steps": 2737, + "num_input_tokens_seen": 0, + "num_train_epochs": 7, + "save_steps": 500, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": true, + "should_training_stop": true + }, + "attributes": {} + } + }, + "total_flos": 9969287656374272.0, + "train_batch_size": 4, + "trial_name": null, + "trial_params": null +} diff --git a/training_args.bin b/training_args.bin new file mode 100644 index 0000000..ea047dd --- /dev/null +++ b/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:18d9f7965f629fe28eb5108a674ba1b9771843ba037dcb8c9ac00ec3728b3263 +size 7224 diff --git a/training_loss.png b/training_loss.png new file mode 100644 index 0000000..88c923f Binary files /dev/null and b/training_loss.png differ diff --git a/vocab.json b/vocab.json new file mode 100644 index 0000000..6c49fc6 --- /dev/null +++ b/vocab.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ca10d7e9fb3ed18575dd1e277a2579c16d108e32f27439684afa0e10b1440910 +size 2776833