From 8c7ae215631840acc8e33da4f8f0f029a09bdad3 Mon Sep 17 00:00:00 2001 From: ModelHub XC Date: Sat, 16 May 2026 16:35:55 +0800 Subject: [PATCH] =?UTF-8?q?=E5=88=9D=E5=A7=8B=E5=8C=96=E9=A1=B9=E7=9B=AE?= =?UTF-8?q?=EF=BC=8C=E7=94=B1ModelHub=20XC=E7=A4=BE=E5=8C=BA=E6=8F=90?= =?UTF-8?q?=E4=BE=9B=E6=A8=A1=E5=9E=8B?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Model: laion/GLM-4_7-r2egym_sandboxes-maxeps-131k-lc Source: Original Platform --- .gitattributes | 56 + README.md | 60 + added_tokens.json | 28 + all_results.json | 14 + chat_template.jinja | 89 + config.json | 72 + configuration.json | 1 + generation_config.json | 12 + merges.txt | 3 + model-00001-of-00004.safetensors | 3 + model-00002-of-00004.safetensors | 3 + model-00003-of-00004.safetensors | 3 + model-00004-of-00004.safetensors | 3 + model.safetensors.index.json | 407 + run_summary.json | 12 + special_tokens_map.json | 31 + tokenizer.json | 3 + tokenizer_config.json | 240 + train_results.json | 14 + trainer_log.jsonl | 1584 ++++ trainer_state.json | 14292 +++++++++++++++++++++++++++++ training_args.bin | 3 + training_loss.png | Bin 0 -> 47452 bytes vocab.json | 3 + 24 files changed, 16936 insertions(+) create mode 100644 .gitattributes create mode 100644 README.md create mode 100644 added_tokens.json create mode 100644 all_results.json create mode 100644 chat_template.jinja create mode 100644 config.json create mode 100644 configuration.json create mode 100644 generation_config.json create mode 100644 merges.txt create mode 100644 model-00001-of-00004.safetensors create mode 100644 model-00002-of-00004.safetensors create mode 100644 model-00003-of-00004.safetensors create mode 100644 model-00004-of-00004.safetensors create mode 100644 model.safetensors.index.json create mode 100644 run_summary.json create mode 100644 special_tokens_map.json create mode 100644 tokenizer.json create mode 100644 tokenizer_config.json create mode 100644 train_results.json create mode 100644 trainer_log.jsonl create mode 100644 trainer_state.json create mode 100644 training_args.bin create mode 100644 training_loss.png create mode 100644 vocab.json diff --git a/.gitattributes b/.gitattributes new file mode 100644 index 0000000..f527506 --- /dev/null +++ b/.gitattributes @@ -0,0 +1,56 @@ +*.7z filter=lfs diff=lfs merge=lfs -text +*.arrow filter=lfs diff=lfs merge=lfs -text + + +*.bz2 filter=lfs diff=lfs merge=lfs -text +*.ftz filter=lfs diff=lfs merge=lfs -text +*.gz filter=lfs diff=lfs merge=lfs -text +*.h5 filter=lfs diff=lfs merge=lfs -text +*.joblib filter=lfs diff=lfs merge=lfs -text +*.lfs.* filter=lfs diff=lfs merge=lfs -text +*.model filter=lfs diff=lfs merge=lfs -text +*.msgpack filter=lfs diff=lfs merge=lfs -text +*.onnx filter=lfs diff=lfs merge=lfs -text +*.ot filter=lfs diff=lfs merge=lfs -text +*.parquet filter=lfs diff=lfs merge=lfs -text +*.pb filter=lfs diff=lfs merge=lfs -text +*.pt filter=lfs diff=lfs merge=lfs -text +*.pth filter=lfs diff=lfs merge=lfs -text +*.rar filter=lfs diff=lfs merge=lfs -text +saved_model/**/* filter=lfs diff=lfs merge=lfs -text +*.tar.* filter=lfs diff=lfs merge=lfs -text +*.tflite filter=lfs diff=lfs merge=lfs -text +*.tgz filter=lfs diff=lfs merge=lfs -text +*.xz filter=lfs diff=lfs merge=lfs -text +*.zip filter=lfs diff=lfs merge=lfs -text +*.zstandard filter=lfs diff=lfs merge=lfs -text +*.tfevents* filter=lfs diff=lfs merge=lfs -text +*.db* filter=lfs diff=lfs merge=lfs -text +*.ark* filter=lfs diff=lfs merge=lfs -text +**/*ckpt*data* filter=lfs diff=lfs merge=lfs -text +**/*ckpt*.meta filter=lfs diff=lfs merge=lfs -text +**/*ckpt*.index filter=lfs diff=lfs merge=lfs -text + +*.ckpt filter=lfs diff=lfs merge=lfs -text +*.gguf* filter=lfs diff=lfs merge=lfs -text +*.ggml filter=lfs diff=lfs merge=lfs -text +*.llamafile* filter=lfs diff=lfs merge=lfs -text +*.pt2 filter=lfs diff=lfs merge=lfs -text +*.mlmodel filter=lfs diff=lfs merge=lfs -text +*.npy filter=lfs diff=lfs merge=lfs -text +*.npz filter=lfs diff=lfs merge=lfs -text +*.pickle filter=lfs diff=lfs merge=lfs -text +*.pkl filter=lfs diff=lfs merge=lfs -text +*.tar filter=lfs diff=lfs merge=lfs -text +*.wasm filter=lfs diff=lfs merge=lfs -text +*.zst filter=lfs diff=lfs merge=lfs -text +*tfevents* filter=lfs diff=lfs merge=lfs -text + +model-00003-of-00004.safetensors filter=lfs diff=lfs merge=lfs -text +training_args.bin filter=lfs diff=lfs merge=lfs -text +tokenizer.json filter=lfs diff=lfs merge=lfs -text +model-00001-of-00004.safetensors filter=lfs diff=lfs merge=lfs -text +vocab.json filter=lfs diff=lfs merge=lfs -text +model-00004-of-00004.safetensors filter=lfs diff=lfs merge=lfs -text +model-00002-of-00004.safetensors filter=lfs diff=lfs merge=lfs -text +merges.txt filter=lfs diff=lfs merge=lfs -text \ No newline at end of file diff --git a/README.md b/README.md new file mode 100644 index 0000000..ae0ab38 --- /dev/null +++ b/README.md @@ -0,0 +1,60 @@ +--- +library_name: transformers +license: apache-2.0 +base_model: Qwen/Qwen3-8B +tags: +- llama-factory +- full +- generated_from_trainer +model-index: +- name: GLM-4_7-r2egym_sandboxes-maxeps-131k-lc + results: [] +--- + + + +# GLM-4_7-r2egym_sandboxes-maxeps-131k-lc + +This model is a fine-tuned version of [Qwen/Qwen3-8B](https://huggingface.co/Qwen/Qwen3-8B) on the /data/cat/ws/befe330h-befe330h-otagent/huggingface/hub/datasets--DCAgent2--GLM-4.7-r2egym_sandboxes-maxeps-131k/snapshots/167ff86e8203fa2412574480bf52623cb62320e8_thinking_preprocessed dataset. + +## Model description + +More information needed + +## Intended uses & limitations + +More information needed + +## Training and evaluation data + +More information needed + +## Training procedure + +### Training hyperparameters + +The following hyperparameters were used during training: +- learning_rate: 4e-05 +- train_batch_size: 1 +- eval_batch_size: 8 +- seed: 42 +- distributed_type: multi-GPU +- num_devices: 16 +- total_train_batch_size: 16 +- total_eval_batch_size: 128 +- optimizer: Use OptimizerNames.ADAMW_TORCH_FUSED with betas=(0.9,0.98) and epsilon=1e-08 and optimizer_args=No additional optimizer arguments +- lr_scheduler_type: cosine +- lr_scheduler_warmup_ratio: 0.1 +- num_epochs: 7.0 + +### Training results + + + +### Framework versions + +- Transformers 4.57.6 +- Pytorch 2.9.0+cu128 +- Datasets 4.4.1 +- Tokenizers 0.22.2 diff --git a/added_tokens.json b/added_tokens.json new file mode 100644 index 0000000..b54f913 --- /dev/null +++ b/added_tokens.json @@ -0,0 +1,28 @@ +{ + "": 151668, + "": 151658, + "": 151666, + "": 151667, + "": 151657, + "": 151665, + "<|box_end|>": 151649, + "<|box_start|>": 151648, + "<|endoftext|>": 151643, + "<|file_sep|>": 151664, + "<|fim_middle|>": 151660, + "<|fim_pad|>": 151662, + "<|fim_prefix|>": 151659, + "<|fim_suffix|>": 151661, + "<|im_end|>": 151645, + "<|im_start|>": 151644, + "<|image_pad|>": 151655, + "<|object_ref_end|>": 151647, + "<|object_ref_start|>": 151646, + "<|quad_end|>": 151651, + "<|quad_start|>": 151650, + "<|repo_name|>": 151663, + "<|video_pad|>": 151656, + "<|vision_end|>": 151653, + "<|vision_pad|>": 151654, + "<|vision_start|>": 151652 +} diff --git a/all_results.json b/all_results.json new file mode 100644 index 0000000..cf0922c --- /dev/null +++ b/all_results.json @@ -0,0 +1,14 @@ +{ + "achieved_tflops_per_gpu": 0.006566567166552009, + "achieved_tflops_per_gpu_theoretical": 3925.917867404427, + "epoch": 6.999116607773852, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2685817778110504, + "mfu_percent": 0.0006639602797322557, + "mfu_percent_theoretical": 396.9583283523182, + "total_flos": 7677204649476096.0, + "train_loss": 0.2887290850240313, + "train_runtime": 73070.9484, + "train_samples_per_second": 0.108, + "train_steps_per_second": 0.108 +} \ No newline at end of file diff --git a/chat_template.jinja b/chat_template.jinja new file mode 100644 index 0000000..01be9b3 --- /dev/null +++ b/chat_template.jinja @@ -0,0 +1,89 @@ +{%- if tools %} + {{- '<|im_start|>system\n' }} + {%- if messages[0].role == 'system' %} + {{- messages[0].content + '\n\n' }} + {%- endif %} + {{- "# Tools\n\nYou may call one or more functions to assist with the user query.\n\nYou are provided with function signatures within XML tags:\n" }} + {%- for tool in tools %} + {{- "\n" }} + {{- tool | tojson }} + {%- endfor %} + {{- "\n\n\nFor each function call, return a json object with function name and arguments within XML tags:\n\n{\"name\": , \"arguments\": }\n<|im_end|>\n" }} +{%- else %} + {%- if messages[0].role == 'system' %} + {{- '<|im_start|>system\n' + messages[0].content + '<|im_end|>\n' }} + {%- endif %} +{%- endif %} +{%- set ns = namespace(multi_step_tool=true, last_query_index=messages|length - 1) %} +{%- for message in messages[::-1] %} + {%- set index = (messages|length - 1) - loop.index0 %} + {%- if ns.multi_step_tool and message.role == "user" and message.content is string and not(message.content.startswith('') and message.content.endswith('')) %} + {%- set ns.multi_step_tool = false %} + {%- set ns.last_query_index = index %} + {%- endif %} +{%- endfor %} +{%- for message in messages %} + {%- if message.content is string %} + {%- set content = message.content %} + {%- else %} + {%- set content = '' %} + {%- endif %} + {%- if (message.role == "user") or (message.role == "system" and not loop.first) %} + {{- '<|im_start|>' + message.role + '\n' + content + '<|im_end|>' + '\n' }} + {%- elif message.role == "assistant" %} + {%- set reasoning_content = '' %} + {%- if message.reasoning_content is string %} + {%- set reasoning_content = message.reasoning_content %} + {%- else %} + {%- if '' in content %} + {%- set reasoning_content = content.split('')[0].rstrip('\n').split('')[-1].lstrip('\n') %} + {%- set content = content.split('')[-1].lstrip('\n') %} + {%- endif %} + {%- endif %} + {%- if loop.index0 > ns.last_query_index %} + {%- if loop.last or (not loop.last and reasoning_content) %} + {{- '<|im_start|>' + message.role + '\n\n' + reasoning_content.strip('\n') + '\n\n\n' + content.lstrip('\n') }} + {%- else %} + {{- '<|im_start|>' + message.role + '\n' + content }} + {%- endif %} + {%- else %} + {{- '<|im_start|>' + message.role + '\n' + content }} + {%- endif %} + {%- if message.tool_calls %} + {%- for tool_call in message.tool_calls %} + {%- if (loop.first and content) or (not loop.first) %} + {{- '\n' }} + {%- endif %} + {%- if tool_call.function %} + {%- set tool_call = tool_call.function %} + {%- endif %} + {{- '\n{"name": "' }} + {{- tool_call.name }} + {{- '", "arguments": ' }} + {%- if tool_call.arguments is string %} + {{- tool_call.arguments }} + {%- else %} + {{- tool_call.arguments | tojson }} + {%- endif %} + {{- '}\n' }} + {%- endfor %} + {%- endif %} + {{- '<|im_end|>\n' }} + {%- elif message.role == "tool" %} + {%- if loop.first or (messages[loop.index0 - 1].role != "tool") %} + {{- '<|im_start|>user' }} + {%- endif %} + {{- '\n\n' }} + {{- content }} + {{- '\n' }} + {%- if loop.last or (messages[loop.index0 + 1].role != "tool") %} + {{- '<|im_end|>\n' }} + {%- endif %} + {%- endif %} +{%- endfor %} +{%- if add_generation_prompt %} + {{- '<|im_start|>assistant\n' }} + {%- if enable_thinking is defined and enable_thinking is false %} + {{- '\n\n\n\n' }} + {%- endif %} +{%- endif %} \ No newline at end of file diff --git a/config.json b/config.json new file mode 100644 index 0000000..639d17a --- /dev/null +++ b/config.json @@ -0,0 +1,72 @@ +{ + "architectures": [ + "Qwen3ForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "dtype": "bfloat16", + "eos_token_id": 151645, + "head_dim": 128, + "hidden_act": "silu", + "hidden_size": 4096, + "initializer_range": 0.02, + "intermediate_size": 12288, + "layer_types": [ + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention" + ], + "max_position_embeddings": 163840, + "max_window_layers": 36, + "model_type": "qwen3", + "num_attention_heads": 32, + "num_hidden_layers": 36, + "num_key_value_heads": 8, + "pad_token_id": 151643, + "rms_norm_eps": 1e-06, + "rope_scaling": { + "factor": 4.0, + "original_max_position_embeddings": 32768, + "rope_type": "yarn" + }, + "rope_theta": 1000000, + "sliding_window": null, + "tie_word_embeddings": false, + "transformers_version": "4.57.6", + "use_cache": false, + "use_sliding_window": false, + "vocab_size": 151936 +} diff --git a/configuration.json b/configuration.json new file mode 100644 index 0000000..159097f --- /dev/null +++ b/configuration.json @@ -0,0 +1 @@ +{"framework": "pytorch", "task": "others", "allow_remote": true} \ No newline at end of file diff --git a/generation_config.json b/generation_config.json new file mode 100644 index 0000000..9adbb28 --- /dev/null +++ b/generation_config.json @@ -0,0 +1,12 @@ +{ + "do_sample": true, + "eos_token_id": [ + 151645, + 151643 + ], + "pad_token_id": 151643, + "temperature": 0.6, + "top_k": 20, + "top_p": 0.95, + "transformers_version": "4.57.6" +} diff --git a/merges.txt b/merges.txt new file mode 100644 index 0000000..80c1a19 --- /dev/null +++ b/merges.txt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8831e4f1a044471340f7c0a83d7bd71306a5b867e95fd870f74d0c5308a904d5 +size 1671853 diff --git a/model-00001-of-00004.safetensors b/model-00001-of-00004.safetensors new file mode 100644 index 0000000..7d23be2 --- /dev/null +++ b/model-00001-of-00004.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e63800f9dd1b304cd65f083510f0d99190dc85d6f275c1290279daf243923545 +size 4902257696 diff --git a/model-00002-of-00004.safetensors b/model-00002-of-00004.safetensors new file mode 100644 index 0000000..a893b07 --- /dev/null +++ b/model-00002-of-00004.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:966f8b7d1f978cd7c8eee11b11fc95a8a26ac58790e7d3567cf96883ac9f7169 +size 4915960368 diff --git a/model-00003-of-00004.safetensors b/model-00003-of-00004.safetensors new file mode 100644 index 0000000..00f8722 --- /dev/null +++ b/model-00003-of-00004.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:038f79304a558b8d76cb22637bf03a1a0ca2835a3cda1808d3911f9a36304090 +size 4983068496 diff --git a/model-00004-of-00004.safetensors b/model-00004-of-00004.safetensors new file mode 100644 index 0000000..63b83b8 --- /dev/null +++ b/model-00004-of-00004.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2ecf0e0c1203b8d08047c24af8e36d3ba9a7a786e0a2bc72c5c267a5a7eedb86 +size 1580230264 diff --git a/model.safetensors.index.json b/model.safetensors.index.json new file mode 100644 index 0000000..ba886c0 --- /dev/null +++ b/model.safetensors.index.json @@ -0,0 +1,407 @@ +{ + "metadata": { + "total_parameters": 308224, + "total_size": 16381470720 + }, + "weight_map": { + "lm_head.weight": "model-00004-of-00004.safetensors", + "model.embed_tokens.weight": "model-00001-of-00004.safetensors", + "model.layers.0.input_layernorm.weight": "model-00001-of-00004.safetensors", + "model.layers.0.mlp.down_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.0.mlp.gate_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.0.mlp.up_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.0.post_attention_layernorm.weight": "model-00001-of-00004.safetensors", + "model.layers.0.self_attn.k_norm.weight": "model-00001-of-00004.safetensors", + "model.layers.0.self_attn.k_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.0.self_attn.o_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.0.self_attn.q_norm.weight": "model-00001-of-00004.safetensors", + "model.layers.0.self_attn.q_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.0.self_attn.v_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.1.input_layernorm.weight": "model-00001-of-00004.safetensors", + "model.layers.1.mlp.down_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.1.mlp.gate_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.1.mlp.up_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.1.post_attention_layernorm.weight": "model-00001-of-00004.safetensors", + "model.layers.1.self_attn.k_norm.weight": "model-00001-of-00004.safetensors", + "model.layers.1.self_attn.k_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.1.self_attn.o_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.1.self_attn.q_norm.weight": "model-00001-of-00004.safetensors", + "model.layers.1.self_attn.q_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.1.self_attn.v_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.10.input_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.10.mlp.down_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.10.mlp.gate_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.10.mlp.up_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.10.post_attention_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.10.self_attn.k_norm.weight": "model-00002-of-00004.safetensors", + "model.layers.10.self_attn.k_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.10.self_attn.o_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.10.self_attn.q_norm.weight": "model-00002-of-00004.safetensors", + "model.layers.10.self_attn.q_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.10.self_attn.v_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.11.input_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.11.mlp.down_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.11.mlp.gate_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.11.mlp.up_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.11.post_attention_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.11.self_attn.k_norm.weight": "model-00002-of-00004.safetensors", + "model.layers.11.self_attn.k_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.11.self_attn.o_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.11.self_attn.q_norm.weight": "model-00002-of-00004.safetensors", + "model.layers.11.self_attn.q_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.11.self_attn.v_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.12.input_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.12.mlp.down_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.12.mlp.gate_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.12.mlp.up_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.12.post_attention_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.12.self_attn.k_norm.weight": "model-00002-of-00004.safetensors", + "model.layers.12.self_attn.k_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.12.self_attn.o_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.12.self_attn.q_norm.weight": "model-00002-of-00004.safetensors", + "model.layers.12.self_attn.q_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.12.self_attn.v_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.13.input_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.13.mlp.down_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.13.mlp.gate_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.13.mlp.up_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.13.post_attention_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.13.self_attn.k_norm.weight": "model-00002-of-00004.safetensors", + "model.layers.13.self_attn.k_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.13.self_attn.o_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.13.self_attn.q_norm.weight": "model-00002-of-00004.safetensors", + "model.layers.13.self_attn.q_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.13.self_attn.v_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.14.input_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.14.mlp.down_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.14.mlp.gate_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.14.mlp.up_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.14.post_attention_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.14.self_attn.k_norm.weight": "model-00002-of-00004.safetensors", + "model.layers.14.self_attn.k_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.14.self_attn.o_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.14.self_attn.q_norm.weight": "model-00002-of-00004.safetensors", + "model.layers.14.self_attn.q_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.14.self_attn.v_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.15.input_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.15.mlp.down_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.15.mlp.gate_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.15.mlp.up_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.15.post_attention_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.15.self_attn.k_norm.weight": "model-00002-of-00004.safetensors", + "model.layers.15.self_attn.k_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.15.self_attn.o_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.15.self_attn.q_norm.weight": "model-00002-of-00004.safetensors", + "model.layers.15.self_attn.q_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.15.self_attn.v_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.16.input_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.16.mlp.down_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.16.mlp.gate_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.16.mlp.up_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.16.post_attention_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.16.self_attn.k_norm.weight": "model-00002-of-00004.safetensors", + "model.layers.16.self_attn.k_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.16.self_attn.o_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.16.self_attn.q_norm.weight": "model-00002-of-00004.safetensors", + "model.layers.16.self_attn.q_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.16.self_attn.v_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.17.input_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.17.mlp.down_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.17.mlp.gate_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.17.mlp.up_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.17.post_attention_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.17.self_attn.k_norm.weight": "model-00002-of-00004.safetensors", + "model.layers.17.self_attn.k_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.17.self_attn.o_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.17.self_attn.q_norm.weight": "model-00002-of-00004.safetensors", + "model.layers.17.self_attn.q_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.17.self_attn.v_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.18.input_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.18.mlp.down_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.18.mlp.gate_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.18.mlp.up_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.18.post_attention_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.18.self_attn.k_norm.weight": "model-00002-of-00004.safetensors", + "model.layers.18.self_attn.k_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.18.self_attn.o_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.18.self_attn.q_norm.weight": "model-00002-of-00004.safetensors", + "model.layers.18.self_attn.q_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.18.self_attn.v_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.19.input_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.19.mlp.down_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.19.mlp.gate_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.19.mlp.up_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.19.post_attention_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.19.self_attn.k_norm.weight": "model-00002-of-00004.safetensors", + "model.layers.19.self_attn.k_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.19.self_attn.o_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.19.self_attn.q_norm.weight": "model-00002-of-00004.safetensors", + "model.layers.19.self_attn.q_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.19.self_attn.v_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.2.input_layernorm.weight": "model-00001-of-00004.safetensors", + "model.layers.2.mlp.down_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.2.mlp.gate_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.2.mlp.up_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.2.post_attention_layernorm.weight": "model-00001-of-00004.safetensors", + "model.layers.2.self_attn.k_norm.weight": "model-00001-of-00004.safetensors", + "model.layers.2.self_attn.k_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.2.self_attn.o_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.2.self_attn.q_norm.weight": "model-00001-of-00004.safetensors", + "model.layers.2.self_attn.q_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.2.self_attn.v_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.20.input_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.20.mlp.down_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.20.mlp.gate_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.20.mlp.up_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.20.post_attention_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.20.self_attn.k_norm.weight": "model-00002-of-00004.safetensors", + "model.layers.20.self_attn.k_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.20.self_attn.o_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.20.self_attn.q_norm.weight": "model-00002-of-00004.safetensors", + "model.layers.20.self_attn.q_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.20.self_attn.v_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.21.input_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.21.mlp.down_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.21.mlp.gate_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.21.mlp.up_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.21.post_attention_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.21.self_attn.k_norm.weight": "model-00002-of-00004.safetensors", + "model.layers.21.self_attn.k_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.21.self_attn.o_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.21.self_attn.q_norm.weight": "model-00002-of-00004.safetensors", + "model.layers.21.self_attn.q_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.21.self_attn.v_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.22.input_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.22.mlp.down_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.22.mlp.gate_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.22.mlp.up_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.22.post_attention_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.22.self_attn.k_norm.weight": "model-00002-of-00004.safetensors", + "model.layers.22.self_attn.k_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.22.self_attn.o_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.22.self_attn.q_norm.weight": "model-00002-of-00004.safetensors", + "model.layers.22.self_attn.q_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.22.self_attn.v_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.23.input_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.23.mlp.down_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.23.mlp.gate_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.23.mlp.up_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.23.post_attention_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.23.self_attn.k_norm.weight": "model-00003-of-00004.safetensors", + "model.layers.23.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.23.self_attn.o_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.23.self_attn.q_norm.weight": "model-00003-of-00004.safetensors", + "model.layers.23.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.23.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.24.input_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.24.mlp.down_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.24.mlp.gate_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.24.mlp.up_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.24.post_attention_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.24.self_attn.k_norm.weight": "model-00003-of-00004.safetensors", + "model.layers.24.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.24.self_attn.o_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.24.self_attn.q_norm.weight": "model-00003-of-00004.safetensors", + "model.layers.24.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.24.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.25.input_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.25.mlp.down_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.25.mlp.gate_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.25.mlp.up_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.25.post_attention_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.25.self_attn.k_norm.weight": "model-00003-of-00004.safetensors", + "model.layers.25.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.25.self_attn.o_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.25.self_attn.q_norm.weight": "model-00003-of-00004.safetensors", + "model.layers.25.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.25.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.26.input_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.26.mlp.down_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.26.mlp.gate_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.26.mlp.up_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.26.post_attention_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.26.self_attn.k_norm.weight": "model-00003-of-00004.safetensors", + "model.layers.26.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.26.self_attn.o_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.26.self_attn.q_norm.weight": "model-00003-of-00004.safetensors", + "model.layers.26.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.26.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.27.input_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.27.mlp.down_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.27.mlp.gate_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.27.mlp.up_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.27.post_attention_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.27.self_attn.k_norm.weight": "model-00003-of-00004.safetensors", + "model.layers.27.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.27.self_attn.o_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.27.self_attn.q_norm.weight": "model-00003-of-00004.safetensors", + "model.layers.27.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.27.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.28.input_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.28.mlp.down_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.28.mlp.gate_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.28.mlp.up_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.28.post_attention_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.28.self_attn.k_norm.weight": "model-00003-of-00004.safetensors", + "model.layers.28.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.28.self_attn.o_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.28.self_attn.q_norm.weight": "model-00003-of-00004.safetensors", + "model.layers.28.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.28.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.29.input_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.29.mlp.down_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.29.mlp.gate_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.29.mlp.up_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.29.post_attention_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.29.self_attn.k_norm.weight": "model-00003-of-00004.safetensors", + "model.layers.29.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.29.self_attn.o_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.29.self_attn.q_norm.weight": "model-00003-of-00004.safetensors", + "model.layers.29.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.29.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.3.input_layernorm.weight": "model-00001-of-00004.safetensors", + "model.layers.3.mlp.down_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.3.mlp.gate_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.3.mlp.up_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.3.post_attention_layernorm.weight": "model-00001-of-00004.safetensors", + "model.layers.3.self_attn.k_norm.weight": "model-00001-of-00004.safetensors", + "model.layers.3.self_attn.k_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.3.self_attn.o_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.3.self_attn.q_norm.weight": "model-00001-of-00004.safetensors", + "model.layers.3.self_attn.q_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.3.self_attn.v_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.30.input_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.30.mlp.down_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.30.mlp.gate_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.30.mlp.up_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.30.post_attention_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.30.self_attn.k_norm.weight": "model-00003-of-00004.safetensors", + "model.layers.30.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.30.self_attn.o_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.30.self_attn.q_norm.weight": "model-00003-of-00004.safetensors", + "model.layers.30.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.30.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.31.input_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.31.mlp.down_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.31.mlp.gate_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.31.mlp.up_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.31.post_attention_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.31.self_attn.k_norm.weight": "model-00003-of-00004.safetensors", + "model.layers.31.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.31.self_attn.o_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.31.self_attn.q_norm.weight": "model-00003-of-00004.safetensors", + "model.layers.31.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.31.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.32.input_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.32.mlp.down_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.32.mlp.gate_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.32.mlp.up_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.32.post_attention_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.32.self_attn.k_norm.weight": "model-00003-of-00004.safetensors", + "model.layers.32.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.32.self_attn.o_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.32.self_attn.q_norm.weight": "model-00003-of-00004.safetensors", + "model.layers.32.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.32.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.33.input_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.33.mlp.down_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.33.mlp.gate_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.33.mlp.up_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.33.post_attention_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.33.self_attn.k_norm.weight": "model-00003-of-00004.safetensors", + "model.layers.33.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.33.self_attn.o_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.33.self_attn.q_norm.weight": "model-00003-of-00004.safetensors", + "model.layers.33.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.33.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.34.input_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.34.mlp.down_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.34.mlp.gate_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.34.mlp.up_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.34.post_attention_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.34.self_attn.k_norm.weight": "model-00003-of-00004.safetensors", + "model.layers.34.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.34.self_attn.o_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.34.self_attn.q_norm.weight": "model-00003-of-00004.safetensors", + "model.layers.34.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.34.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.35.input_layernorm.weight": "model-00004-of-00004.safetensors", + "model.layers.35.mlp.down_proj.weight": "model-00004-of-00004.safetensors", + "model.layers.35.mlp.gate_proj.weight": "model-00004-of-00004.safetensors", + "model.layers.35.mlp.up_proj.weight": "model-00004-of-00004.safetensors", + "model.layers.35.post_attention_layernorm.weight": "model-00004-of-00004.safetensors", + "model.layers.35.self_attn.k_norm.weight": "model-00004-of-00004.safetensors", + "model.layers.35.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.35.self_attn.o_proj.weight": "model-00004-of-00004.safetensors", + "model.layers.35.self_attn.q_norm.weight": "model-00004-of-00004.safetensors", + "model.layers.35.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.35.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.4.input_layernorm.weight": "model-00001-of-00004.safetensors", + "model.layers.4.mlp.down_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.4.mlp.gate_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.4.mlp.up_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.4.post_attention_layernorm.weight": "model-00001-of-00004.safetensors", + "model.layers.4.self_attn.k_norm.weight": "model-00001-of-00004.safetensors", + "model.layers.4.self_attn.k_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.4.self_attn.o_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.4.self_attn.q_norm.weight": "model-00001-of-00004.safetensors", + "model.layers.4.self_attn.q_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.4.self_attn.v_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.5.input_layernorm.weight": "model-00001-of-00004.safetensors", + "model.layers.5.mlp.down_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.5.mlp.gate_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.5.mlp.up_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.5.post_attention_layernorm.weight": "model-00001-of-00004.safetensors", + "model.layers.5.self_attn.k_norm.weight": "model-00001-of-00004.safetensors", + "model.layers.5.self_attn.k_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.5.self_attn.o_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.5.self_attn.q_norm.weight": "model-00001-of-00004.safetensors", + "model.layers.5.self_attn.q_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.5.self_attn.v_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.6.input_layernorm.weight": "model-00001-of-00004.safetensors", + "model.layers.6.mlp.down_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.6.mlp.gate_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.6.mlp.up_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.6.post_attention_layernorm.weight": "model-00001-of-00004.safetensors", + "model.layers.6.self_attn.k_norm.weight": "model-00001-of-00004.safetensors", + "model.layers.6.self_attn.k_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.6.self_attn.o_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.6.self_attn.q_norm.weight": "model-00001-of-00004.safetensors", + "model.layers.6.self_attn.q_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.6.self_attn.v_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.7.input_layernorm.weight": "model-00001-of-00004.safetensors", + "model.layers.7.mlp.down_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.7.mlp.gate_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.7.mlp.up_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.7.post_attention_layernorm.weight": "model-00001-of-00004.safetensors", + "model.layers.7.self_attn.k_norm.weight": "model-00001-of-00004.safetensors", + "model.layers.7.self_attn.k_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.7.self_attn.o_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.7.self_attn.q_norm.weight": "model-00001-of-00004.safetensors", + "model.layers.7.self_attn.q_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.7.self_attn.v_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.8.input_layernorm.weight": "model-00001-of-00004.safetensors", + "model.layers.8.mlp.down_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.8.mlp.gate_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.8.mlp.up_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.8.post_attention_layernorm.weight": "model-00001-of-00004.safetensors", + "model.layers.8.self_attn.k_norm.weight": "model-00001-of-00004.safetensors", + "model.layers.8.self_attn.k_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.8.self_attn.o_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.8.self_attn.q_norm.weight": "model-00001-of-00004.safetensors", + "model.layers.8.self_attn.q_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.8.self_attn.v_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.9.input_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.9.mlp.down_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.9.mlp.gate_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.9.mlp.up_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.9.post_attention_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.9.self_attn.k_norm.weight": "model-00001-of-00004.safetensors", + "model.layers.9.self_attn.k_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.9.self_attn.o_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.9.self_attn.q_norm.weight": "model-00001-of-00004.safetensors", + "model.layers.9.self_attn.q_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.9.self_attn.v_proj.weight": "model-00001-of-00004.safetensors", + "model.norm.weight": "model-00004-of-00004.safetensors" + } +} diff --git a/run_summary.json b/run_summary.json new file mode 100644 index 0000000..1b2ca5c --- /dev/null +++ b/run_summary.json @@ -0,0 +1,12 @@ +{ + "agent_name": "167ff86e8203fa2412574480bf52623cb62320e8_thinking_preprocessed", + "training_start": "2026-02-08T11:53:11Z", + "training_end": null, + "created_by": "DCAgent", + "base_model_name": "Qwen/Qwen3-8B", + "dataset_name": "/data/cat/ws/befe330h-befe330h-otagent/huggingface/hub/datasets--DCAgent2--GLM-4.7-r2egym_sandboxes-maxeps-131k/snapshots/167ff86e8203fa2412574480bf52623cb62320e8_thinking_preprocessed", + "training_type": "SFT", + "training_parameters": "https://huggingface.co/laion/GLM-4_7-r2egym_sandboxes-maxeps-131k-lc/blob/main/config.json", + "wandb_link": "https://wandb.ai/dogml/OpenThoughts-Agent/runs/sft_GLM-4-7-r2egym_sandboxes-maxeps-131k_Qwen3-8B", + "traces_location_s3": null +} \ No newline at end of file diff --git a/special_tokens_map.json b/special_tokens_map.json new file mode 100644 index 0000000..ac23c0a --- /dev/null +++ b/special_tokens_map.json @@ -0,0 +1,31 @@ +{ + "additional_special_tokens": [ + "<|im_start|>", + "<|im_end|>", + "<|object_ref_start|>", + "<|object_ref_end|>", + "<|box_start|>", + "<|box_end|>", + "<|quad_start|>", + "<|quad_end|>", + "<|vision_start|>", + "<|vision_end|>", + "<|vision_pad|>", + "<|image_pad|>", + "<|video_pad|>" + ], + "eos_token": { + "content": "<|im_end|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "pad_token": { + "content": "<|endoftext|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + } +} diff --git a/tokenizer.json b/tokenizer.json new file mode 100644 index 0000000..cd71f61 --- /dev/null +++ b/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:aeb13307a71acd8fe81861d94ad54ab689df773318809eed3cbe794b4492dae4 +size 11422654 diff --git a/tokenizer_config.json b/tokenizer_config.json new file mode 100644 index 0000000..0209709 --- /dev/null +++ b/tokenizer_config.json @@ -0,0 +1,240 @@ +{ + "add_bos_token": false, + "add_prefix_space": false, + "added_tokens_decoder": { + "151643": { + "content": "<|endoftext|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "151644": { + "content": "<|im_start|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "151645": { + "content": "<|im_end|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "151646": { + "content": "<|object_ref_start|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "151647": { + "content": "<|object_ref_end|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "151648": { + "content": "<|box_start|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "151649": { + "content": "<|box_end|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "151650": { + "content": "<|quad_start|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "151651": { + "content": "<|quad_end|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "151652": { + "content": "<|vision_start|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "151653": { + "content": "<|vision_end|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "151654": { + "content": "<|vision_pad|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "151655": { + "content": "<|image_pad|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "151656": { + "content": "<|video_pad|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "151657": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "151658": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "151659": { + "content": "<|fim_prefix|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "151660": { + "content": "<|fim_middle|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "151661": { + "content": "<|fim_suffix|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "151662": { + "content": "<|fim_pad|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "151663": { + "content": "<|repo_name|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "151664": { + "content": "<|file_sep|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "151665": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "151666": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "151667": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "151668": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + } + }, + "additional_special_tokens": [ + "<|im_start|>", + "<|im_end|>", + "<|object_ref_start|>", + "<|object_ref_end|>", + "<|box_start|>", + "<|box_end|>", + "<|quad_start|>", + "<|quad_end|>", + "<|vision_start|>", + "<|vision_end|>", + "<|vision_pad|>", + "<|image_pad|>", + "<|video_pad|>" + ], + "bos_token": null, + "clean_up_tokenization_spaces": false, + "eos_token": "<|im_end|>", + "errors": "replace", + "extra_special_tokens": {}, + "model_max_length": 131072, + "pad_token": "<|endoftext|>", + "padding_side": "right", + "split_special_tokens": false, + "tokenizer_class": "Qwen2Tokenizer", + "unk_token": null +} diff --git a/train_results.json b/train_results.json new file mode 100644 index 0000000..cf0922c --- /dev/null +++ b/train_results.json @@ -0,0 +1,14 @@ +{ + "achieved_tflops_per_gpu": 0.006566567166552009, + "achieved_tflops_per_gpu_theoretical": 3925.917867404427, + "epoch": 6.999116607773852, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2685817778110504, + "mfu_percent": 0.0006639602797322557, + "mfu_percent_theoretical": 396.9583283523182, + "total_flos": 7677204649476096.0, + "train_loss": 0.2887290850240313, + "train_runtime": 73070.9484, + "train_samples_per_second": 0.108, + "train_steps_per_second": 0.108 +} \ No newline at end of file diff --git a/trainer_log.jsonl b/trainer_log.jsonl new file mode 100644 index 0000000..bbe8a73 --- /dev/null +++ b/trainer_log.jsonl @@ -0,0 +1,1584 @@ +{"current_steps": 5, "total_steps": 7924, "loss": 0.8392, "lr": 2.0176544766708703e-07, "epoch": 0.00441696113074205, "percentage": 0.06, "elapsed_time": "0:00:54", "remaining_time": "23:45:28"} +{"current_steps": 10, "total_steps": 7924, "loss": 0.8118, "lr": 4.5397225725094586e-07, "epoch": 0.0088339222614841, "percentage": 0.13, "elapsed_time": "0:01:39", "remaining_time": "21:57:09"} +{"current_steps": 15, "total_steps": 7924, "loss": 0.8166, "lr": 7.061790668348046e-07, "epoch": 0.013250883392226149, "percentage": 0.19, "elapsed_time": "0:02:26", "remaining_time": "21:23:03"} +{"current_steps": 20, "total_steps": 7924, "loss": 0.7684, "lr": 9.583858764186634e-07, "epoch": 0.0176678445229682, "percentage": 0.25, "elapsed_time": "0:03:12", "remaining_time": "21:06:16"} +{"current_steps": 25, "total_steps": 7924, "loss": 0.7393, "lr": 1.210592686002522e-06, "epoch": 0.022084805653710248, "percentage": 0.32, "elapsed_time": "0:03:58", "remaining_time": "20:56:21"} +{"current_steps": 30, "total_steps": 7924, "loss": 0.7164, "lr": 1.4627994955863808e-06, "epoch": 0.026501766784452298, "percentage": 0.38, "elapsed_time": "0:04:44", "remaining_time": "20:49:03"} +{"current_steps": 35, "total_steps": 7924, "loss": 0.6782, "lr": 1.7150063051702399e-06, "epoch": 0.030918727915194347, "percentage": 0.44, "elapsed_time": "0:05:30", "remaining_time": "20:42:07"} +{"current_steps": 40, "total_steps": 7924, "loss": 0.6596, "lr": 1.9672131147540985e-06, "epoch": 0.0353356890459364, "percentage": 0.5, "elapsed_time": "0:06:16", "remaining_time": "20:36:52"} +{"current_steps": 45, "total_steps": 7924, "loss": 0.6625, "lr": 2.2194199243379574e-06, "epoch": 0.03975265017667844, "percentage": 0.57, "elapsed_time": "0:07:02", "remaining_time": "20:32:43"} +{"current_steps": 50, "total_steps": 7924, "loss": 0.6164, "lr": 2.4716267339218163e-06, "epoch": 0.044169611307420496, "percentage": 0.63, "elapsed_time": "0:07:48", "remaining_time": "20:29:20"} +{"current_steps": 55, "total_steps": 7924, "loss": 0.5692, "lr": 2.723833543505675e-06, "epoch": 0.04858657243816254, "percentage": 0.69, "elapsed_time": "0:08:34", "remaining_time": "20:26:06"} +{"current_steps": 60, "total_steps": 7924, "loss": 0.5737, "lr": 2.9760403530895336e-06, "epoch": 0.053003533568904596, "percentage": 0.76, "elapsed_time": "0:09:19", "remaining_time": "20:23:13"} +{"current_steps": 65, "total_steps": 7924, "loss": 0.5621, "lr": 3.2282471626733925e-06, "epoch": 0.05742049469964664, "percentage": 0.82, "elapsed_time": "0:10:06", "remaining_time": "20:21:51"} +{"current_steps": 70, "total_steps": 7924, "loss": 0.5322, "lr": 3.480453972257251e-06, "epoch": 0.061837455830388695, "percentage": 0.88, "elapsed_time": "0:10:52", "remaining_time": "20:20:14"} +{"current_steps": 75, "total_steps": 7924, "loss": 0.5497, "lr": 3.73266078184111e-06, "epoch": 0.06625441696113074, "percentage": 0.95, "elapsed_time": "0:11:38", "remaining_time": "20:18:17"} +{"current_steps": 80, "total_steps": 7924, "loss": 0.5376, "lr": 3.984867591424969e-06, "epoch": 0.0706713780918728, "percentage": 1.01, "elapsed_time": "0:12:24", "remaining_time": "20:16:25"} +{"current_steps": 85, "total_steps": 7924, "loss": 0.5411, "lr": 4.237074401008828e-06, "epoch": 0.07508833922261485, "percentage": 1.07, "elapsed_time": "0:13:10", "remaining_time": "20:14:42"} +{"current_steps": 90, "total_steps": 7924, "loss": 0.5246, "lr": 4.4892812105926865e-06, "epoch": 0.07950530035335689, "percentage": 1.14, "elapsed_time": "0:13:56", "remaining_time": "20:12:53"} +{"current_steps": 95, "total_steps": 7924, "loss": 0.5438, "lr": 4.741488020176545e-06, "epoch": 0.08392226148409894, "percentage": 1.2, "elapsed_time": "0:14:42", "remaining_time": "20:11:51"} +{"current_steps": 100, "total_steps": 7924, "loss": 0.5607, "lr": 4.993694829760403e-06, "epoch": 0.08833922261484099, "percentage": 1.26, "elapsed_time": "0:15:28", "remaining_time": "20:10:42"} +{"current_steps": 105, "total_steps": 7924, "loss": 0.5857, "lr": 5.245901639344263e-06, "epoch": 0.09275618374558305, "percentage": 1.33, "elapsed_time": "0:16:14", "remaining_time": "20:09:37"} +{"current_steps": 110, "total_steps": 7924, "loss": 0.5065, "lr": 5.498108448928121e-06, "epoch": 0.09717314487632508, "percentage": 1.39, "elapsed_time": "0:17:00", "remaining_time": "20:08:15"} +{"current_steps": 115, "total_steps": 7924, "loss": 0.5241, "lr": 5.7503152585119805e-06, "epoch": 0.10159010600706714, "percentage": 1.45, "elapsed_time": "0:17:46", "remaining_time": "20:07:02"} +{"current_steps": 120, "total_steps": 7924, "loss": 0.4923, "lr": 6.00252206809584e-06, "epoch": 0.10600706713780919, "percentage": 1.51, "elapsed_time": "0:18:32", "remaining_time": "20:05:54"} +{"current_steps": 125, "total_steps": 7924, "loss": 0.4789, "lr": 6.254728877679697e-06, "epoch": 0.11042402826855123, "percentage": 1.58, "elapsed_time": "0:19:18", "remaining_time": "20:04:51"} +{"current_steps": 130, "total_steps": 7924, "loss": 0.4813, "lr": 6.506935687263557e-06, "epoch": 0.11484098939929328, "percentage": 1.64, "elapsed_time": "0:20:04", "remaining_time": "20:03:59"} +{"current_steps": 135, "total_steps": 7924, "loss": 0.5338, "lr": 6.759142496847415e-06, "epoch": 0.11925795053003534, "percentage": 1.7, "elapsed_time": "0:20:51", "remaining_time": "20:03:29"} +{"current_steps": 140, "total_steps": 7924, "loss": 0.4892, "lr": 7.0113493064312745e-06, "epoch": 0.12367491166077739, "percentage": 1.77, "elapsed_time": "0:21:38", "remaining_time": "20:02:48"} +{"current_steps": 145, "total_steps": 7924, "loss": 0.459, "lr": 7.263556116015134e-06, "epoch": 0.12809187279151943, "percentage": 1.83, "elapsed_time": "0:22:24", "remaining_time": "20:02:03"} +{"current_steps": 150, "total_steps": 7924, "loss": 0.4925, "lr": 7.515762925598991e-06, "epoch": 0.13250883392226148, "percentage": 1.89, "elapsed_time": "0:23:10", "remaining_time": "20:01:29"} +{"current_steps": 155, "total_steps": 7924, "loss": 0.4904, "lr": 7.76796973518285e-06, "epoch": 0.13692579505300354, "percentage": 1.96, "elapsed_time": "0:23:57", "remaining_time": "20:00:37"} +{"current_steps": 160, "total_steps": 7924, "loss": 0.484, "lr": 8.020176544766708e-06, "epoch": 0.1413427561837456, "percentage": 2.02, "elapsed_time": "0:24:43", "remaining_time": "19:59:33"} +{"current_steps": 165, "total_steps": 7924, "loss": 0.4711, "lr": 8.272383354350568e-06, "epoch": 0.14575971731448764, "percentage": 2.08, "elapsed_time": "0:25:29", "remaining_time": "19:58:34"} +{"current_steps": 170, "total_steps": 7924, "loss": 0.4725, "lr": 8.524590163934427e-06, "epoch": 0.1501766784452297, "percentage": 2.15, "elapsed_time": "0:26:15", "remaining_time": "19:57:42"} +{"current_steps": 175, "total_steps": 7924, "loss": 0.4435, "lr": 8.776796973518286e-06, "epoch": 0.15459363957597172, "percentage": 2.21, "elapsed_time": "0:27:01", "remaining_time": "19:56:45"} +{"current_steps": 180, "total_steps": 7924, "loss": 0.4582, "lr": 9.029003783102146e-06, "epoch": 0.15901060070671377, "percentage": 2.27, "elapsed_time": "0:27:47", "remaining_time": "19:55:49"} +{"current_steps": 185, "total_steps": 7924, "loss": 0.4924, "lr": 9.281210592686003e-06, "epoch": 0.16342756183745583, "percentage": 2.33, "elapsed_time": "0:28:34", "remaining_time": "19:55:04"} +{"current_steps": 190, "total_steps": 7924, "loss": 0.4778, "lr": 9.533417402269862e-06, "epoch": 0.16784452296819788, "percentage": 2.4, "elapsed_time": "0:29:20", "remaining_time": "19:54:09"} +{"current_steps": 195, "total_steps": 7924, "loss": 0.4345, "lr": 9.78562421185372e-06, "epoch": 0.17226148409893993, "percentage": 2.46, "elapsed_time": "0:30:06", "remaining_time": "19:53:08"} +{"current_steps": 200, "total_steps": 7924, "loss": 0.4199, "lr": 1.0037831021437581e-05, "epoch": 0.17667844522968199, "percentage": 2.52, "elapsed_time": "0:30:52", "remaining_time": "19:52:11"} +{"current_steps": 205, "total_steps": 7924, "loss": 0.4342, "lr": 1.0290037831021437e-05, "epoch": 0.18109540636042404, "percentage": 2.59, "elapsed_time": "0:31:38", "remaining_time": "19:51:23"} +{"current_steps": 210, "total_steps": 7924, "loss": 0.4274, "lr": 1.0542244640605296e-05, "epoch": 0.1855123674911661, "percentage": 2.65, "elapsed_time": "0:32:24", "remaining_time": "19:50:29"} +{"current_steps": 215, "total_steps": 7924, "loss": 0.4314, "lr": 1.0794451450189156e-05, "epoch": 0.18992932862190812, "percentage": 2.71, "elapsed_time": "0:33:10", "remaining_time": "19:49:33"} +{"current_steps": 220, "total_steps": 7924, "loss": 0.4208, "lr": 1.1046658259773015e-05, "epoch": 0.19434628975265017, "percentage": 2.78, "elapsed_time": "0:33:56", "remaining_time": "19:48:37"} +{"current_steps": 225, "total_steps": 7924, "loss": 0.4253, "lr": 1.1298865069356874e-05, "epoch": 0.19876325088339222, "percentage": 2.84, "elapsed_time": "0:34:42", "remaining_time": "19:47:52"} +{"current_steps": 230, "total_steps": 7924, "loss": 0.4605, "lr": 1.1551071878940732e-05, "epoch": 0.20318021201413428, "percentage": 2.9, "elapsed_time": "0:35:29", "remaining_time": "19:47:09"} +{"current_steps": 235, "total_steps": 7924, "loss": 0.4244, "lr": 1.1803278688524591e-05, "epoch": 0.20759717314487633, "percentage": 2.97, "elapsed_time": "0:36:15", "remaining_time": "19:46:22"} +{"current_steps": 240, "total_steps": 7924, "loss": 0.472, "lr": 1.205548549810845e-05, "epoch": 0.21201413427561838, "percentage": 3.03, "elapsed_time": "0:37:01", "remaining_time": "19:45:34"} +{"current_steps": 245, "total_steps": 7924, "loss": 0.4287, "lr": 1.230769230769231e-05, "epoch": 0.21643109540636044, "percentage": 3.09, "elapsed_time": "0:37:48", "remaining_time": "19:44:58"} +{"current_steps": 250, "total_steps": 7924, "loss": 0.4589, "lr": 1.2559899117276166e-05, "epoch": 0.22084805653710246, "percentage": 3.15, "elapsed_time": "0:38:34", "remaining_time": "19:44:12"} +{"current_steps": 255, "total_steps": 7924, "loss": 0.393, "lr": 1.2812105926860025e-05, "epoch": 0.2252650176678445, "percentage": 3.22, "elapsed_time": "0:39:20", "remaining_time": "19:43:21"} +{"current_steps": 260, "total_steps": 7924, "loss": 0.441, "lr": 1.3064312736443884e-05, "epoch": 0.22968197879858657, "percentage": 3.28, "elapsed_time": "0:40:06", "remaining_time": "19:42:28"} +{"current_steps": 265, "total_steps": 7924, "loss": 0.357, "lr": 1.3316519546027744e-05, "epoch": 0.23409893992932862, "percentage": 3.34, "elapsed_time": "0:40:53", "remaining_time": "19:41:41"} +{"current_steps": 270, "total_steps": 7924, "loss": 0.4148, "lr": 1.3568726355611603e-05, "epoch": 0.23851590106007067, "percentage": 3.41, "elapsed_time": "0:41:39", "remaining_time": "19:40:48"} +{"current_steps": 275, "total_steps": 7924, "loss": 0.387, "lr": 1.382093316519546e-05, "epoch": 0.24293286219081273, "percentage": 3.47, "elapsed_time": "0:42:25", "remaining_time": "19:39:50"} +{"current_steps": 280, "total_steps": 7924, "loss": 0.4286, "lr": 1.407313997477932e-05, "epoch": 0.24734982332155478, "percentage": 3.53, "elapsed_time": "0:43:11", "remaining_time": "19:38:56"} +{"current_steps": 285, "total_steps": 7924, "loss": 0.4184, "lr": 1.4325346784363179e-05, "epoch": 0.25176678445229683, "percentage": 3.6, "elapsed_time": "0:43:57", "remaining_time": "19:38:04"} +{"current_steps": 290, "total_steps": 7924, "loss": 0.3587, "lr": 1.4577553593947038e-05, "epoch": 0.25618374558303886, "percentage": 3.66, "elapsed_time": "0:44:43", "remaining_time": "19:37:10"} +{"current_steps": 295, "total_steps": 7924, "loss": 0.3728, "lr": 1.4829760403530898e-05, "epoch": 0.26060070671378094, "percentage": 3.72, "elapsed_time": "0:45:29", "remaining_time": "19:36:15"} +{"current_steps": 300, "total_steps": 7924, "loss": 0.4082, "lr": 1.5081967213114754e-05, "epoch": 0.26501766784452296, "percentage": 3.79, "elapsed_time": "0:46:15", "remaining_time": "19:35:24"} +{"current_steps": 305, "total_steps": 7924, "loss": 0.4319, "lr": 1.5334174022698615e-05, "epoch": 0.26943462897526504, "percentage": 3.85, "elapsed_time": "0:47:01", "remaining_time": "19:34:32"} +{"current_steps": 310, "total_steps": 7924, "loss": 0.3937, "lr": 1.5586380832282474e-05, "epoch": 0.27385159010600707, "percentage": 3.91, "elapsed_time": "0:47:47", "remaining_time": "19:33:46"} +{"current_steps": 315, "total_steps": 7924, "loss": 0.3971, "lr": 1.5838587641866333e-05, "epoch": 0.2782685512367491, "percentage": 3.98, "elapsed_time": "0:48:33", "remaining_time": "19:32:53"} +{"current_steps": 320, "total_steps": 7924, "loss": 0.3842, "lr": 1.6090794451450193e-05, "epoch": 0.2826855123674912, "percentage": 4.04, "elapsed_time": "0:49:19", "remaining_time": "19:32:07"} +{"current_steps": 325, "total_steps": 7924, "loss": 0.4017, "lr": 1.634300126103405e-05, "epoch": 0.2871024734982332, "percentage": 4.1, "elapsed_time": "0:50:05", "remaining_time": "19:31:16"} +{"current_steps": 330, "total_steps": 7924, "loss": 0.4222, "lr": 1.6595208070617908e-05, "epoch": 0.2915194346289753, "percentage": 4.16, "elapsed_time": "0:50:51", "remaining_time": "19:30:31"} +{"current_steps": 335, "total_steps": 7924, "loss": 0.3989, "lr": 1.6847414880201767e-05, "epoch": 0.2959363957597173, "percentage": 4.23, "elapsed_time": "0:51:38", "remaining_time": "19:29:43"} +{"current_steps": 340, "total_steps": 7924, "loss": 0.4091, "lr": 1.7099621689785626e-05, "epoch": 0.3003533568904594, "percentage": 4.29, "elapsed_time": "0:52:24", "remaining_time": "19:28:58"} +{"current_steps": 345, "total_steps": 7924, "loss": 0.4219, "lr": 1.7351828499369486e-05, "epoch": 0.3047703180212014, "percentage": 4.35, "elapsed_time": "0:53:10", "remaining_time": "19:28:08"} +{"current_steps": 350, "total_steps": 7924, "loss": 0.4224, "lr": 1.760403530895334e-05, "epoch": 0.30918727915194344, "percentage": 4.42, "elapsed_time": "0:53:56", "remaining_time": "19:27:20"} +{"current_steps": 355, "total_steps": 7924, "loss": 0.4439, "lr": 1.78562421185372e-05, "epoch": 0.3136042402826855, "percentage": 4.48, "elapsed_time": "0:54:42", "remaining_time": "19:26:36"} +{"current_steps": 360, "total_steps": 7924, "loss": 0.3943, "lr": 1.810844892812106e-05, "epoch": 0.31802120141342755, "percentage": 4.54, "elapsed_time": "0:55:29", "remaining_time": "19:25:57"} +{"current_steps": 365, "total_steps": 7924, "loss": 0.3837, "lr": 1.836065573770492e-05, "epoch": 0.3224381625441696, "percentage": 4.61, "elapsed_time": "0:56:15", "remaining_time": "19:25:14"} +{"current_steps": 370, "total_steps": 7924, "loss": 0.403, "lr": 1.861286254728878e-05, "epoch": 0.32685512367491165, "percentage": 4.67, "elapsed_time": "0:57:01", "remaining_time": "19:24:20"} +{"current_steps": 375, "total_steps": 7924, "loss": 0.4094, "lr": 1.8865069356872635e-05, "epoch": 0.33127208480565373, "percentage": 4.73, "elapsed_time": "0:57:47", "remaining_time": "19:23:25"} +{"current_steps": 380, "total_steps": 7924, "loss": 0.4166, "lr": 1.9117276166456494e-05, "epoch": 0.33568904593639576, "percentage": 4.8, "elapsed_time": "0:58:33", "remaining_time": "19:22:34"} +{"current_steps": 385, "total_steps": 7924, "loss": 0.3977, "lr": 1.9369482976040353e-05, "epoch": 0.3401060070671378, "percentage": 4.86, "elapsed_time": "0:59:19", "remaining_time": "19:21:44"} +{"current_steps": 390, "total_steps": 7924, "loss": 0.3811, "lr": 1.9621689785624213e-05, "epoch": 0.34452296819787986, "percentage": 4.92, "elapsed_time": "1:00:05", "remaining_time": "19:20:51"} +{"current_steps": 395, "total_steps": 7924, "loss": 0.4184, "lr": 1.9873896595208072e-05, "epoch": 0.3489399293286219, "percentage": 4.98, "elapsed_time": "1:00:51", "remaining_time": "19:19:56"} +{"current_steps": 400, "total_steps": 7924, "loss": 0.3603, "lr": 2.012610340479193e-05, "epoch": 0.35335689045936397, "percentage": 5.05, "elapsed_time": "1:01:37", "remaining_time": "19:19:04"} +{"current_steps": 405, "total_steps": 7924, "loss": 0.4153, "lr": 2.037831021437579e-05, "epoch": 0.357773851590106, "percentage": 5.11, "elapsed_time": "1:02:23", "remaining_time": "19:18:14"} +{"current_steps": 410, "total_steps": 7924, "loss": 0.43, "lr": 2.063051702395965e-05, "epoch": 0.3621908127208481, "percentage": 5.17, "elapsed_time": "1:03:09", "remaining_time": "19:17:23"} +{"current_steps": 415, "total_steps": 7924, "loss": 0.3932, "lr": 2.0882723833543506e-05, "epoch": 0.3666077738515901, "percentage": 5.24, "elapsed_time": "1:03:55", "remaining_time": "19:16:31"} +{"current_steps": 420, "total_steps": 7924, "loss": 0.3558, "lr": 2.113493064312737e-05, "epoch": 0.3710247349823322, "percentage": 5.3, "elapsed_time": "1:04:41", "remaining_time": "19:15:40"} +{"current_steps": 425, "total_steps": 7924, "loss": 0.382, "lr": 2.1387137452711224e-05, "epoch": 0.3754416961130742, "percentage": 5.36, "elapsed_time": "1:05:26", "remaining_time": "19:14:47"} +{"current_steps": 430, "total_steps": 7924, "loss": 0.3874, "lr": 2.1639344262295087e-05, "epoch": 0.37985865724381623, "percentage": 5.43, "elapsed_time": "1:06:12", "remaining_time": "19:13:54"} +{"current_steps": 435, "total_steps": 7924, "loss": 0.3913, "lr": 2.1891551071878943e-05, "epoch": 0.3842756183745583, "percentage": 5.49, "elapsed_time": "1:06:58", "remaining_time": "19:13:00"} +{"current_steps": 440, "total_steps": 7924, "loss": 0.3921, "lr": 2.21437578814628e-05, "epoch": 0.38869257950530034, "percentage": 5.55, "elapsed_time": "1:07:44", "remaining_time": "19:12:14"} +{"current_steps": 445, "total_steps": 7924, "loss": 0.377, "lr": 2.239596469104666e-05, "epoch": 0.3931095406360424, "percentage": 5.62, "elapsed_time": "1:08:30", "remaining_time": "19:11:25"} +{"current_steps": 450, "total_steps": 7924, "loss": 0.3703, "lr": 2.2648171500630518e-05, "epoch": 0.39752650176678445, "percentage": 5.68, "elapsed_time": "1:09:16", "remaining_time": "19:10:36"} +{"current_steps": 455, "total_steps": 7924, "loss": 0.3584, "lr": 2.290037831021438e-05, "epoch": 0.4019434628975265, "percentage": 5.74, "elapsed_time": "1:10:02", "remaining_time": "19:09:46"} +{"current_steps": 460, "total_steps": 7924, "loss": 0.4145, "lr": 2.3152585119798236e-05, "epoch": 0.40636042402826855, "percentage": 5.81, "elapsed_time": "1:10:48", "remaining_time": "19:08:57"} +{"current_steps": 465, "total_steps": 7924, "loss": 0.3628, "lr": 2.3404791929382092e-05, "epoch": 0.4107773851590106, "percentage": 5.87, "elapsed_time": "1:11:34", "remaining_time": "19:08:09"} +{"current_steps": 470, "total_steps": 7924, "loss": 0.405, "lr": 2.3656998738965955e-05, "epoch": 0.41519434628975266, "percentage": 5.93, "elapsed_time": "1:12:20", "remaining_time": "19:07:20"} +{"current_steps": 475, "total_steps": 7924, "loss": 0.3567, "lr": 2.390920554854981e-05, "epoch": 0.4196113074204947, "percentage": 5.99, "elapsed_time": "1:13:06", "remaining_time": "19:06:33"} +{"current_steps": 480, "total_steps": 7924, "loss": 0.4706, "lr": 2.4161412358133673e-05, "epoch": 0.42402826855123676, "percentage": 6.06, "elapsed_time": "1:13:52", "remaining_time": "19:05:47"} +{"current_steps": 485, "total_steps": 7924, "loss": 0.3921, "lr": 2.441361916771753e-05, "epoch": 0.4284452296819788, "percentage": 6.12, "elapsed_time": "1:14:39", "remaining_time": "19:05:05"} +{"current_steps": 490, "total_steps": 7924, "loss": 0.3424, "lr": 2.466582597730139e-05, "epoch": 0.43286219081272087, "percentage": 6.18, "elapsed_time": "1:15:25", "remaining_time": "19:04:18"} +{"current_steps": 495, "total_steps": 7924, "loss": 0.3737, "lr": 2.4918032786885248e-05, "epoch": 0.4372791519434629, "percentage": 6.25, "elapsed_time": "1:16:11", "remaining_time": "19:03:29"} +{"current_steps": 500, "total_steps": 7924, "loss": 0.3843, "lr": 2.5170239596469107e-05, "epoch": 0.4416961130742049, "percentage": 6.31, "elapsed_time": "1:16:57", "remaining_time": "19:02:42"} +{"current_steps": 505, "total_steps": 7924, "loss": 0.3745, "lr": 2.5422446406052967e-05, "epoch": 0.446113074204947, "percentage": 6.37, "elapsed_time": "1:17:43", "remaining_time": "19:01:51"} +{"current_steps": 510, "total_steps": 7924, "loss": 0.3803, "lr": 2.5674653215636826e-05, "epoch": 0.450530035335689, "percentage": 6.44, "elapsed_time": "1:18:29", "remaining_time": "19:01:02"} +{"current_steps": 515, "total_steps": 7924, "loss": 0.349, "lr": 2.5926860025220682e-05, "epoch": 0.4549469964664311, "percentage": 6.5, "elapsed_time": "1:19:15", "remaining_time": "19:00:16"} +{"current_steps": 520, "total_steps": 7924, "loss": 0.3474, "lr": 2.6179066834804544e-05, "epoch": 0.45936395759717313, "percentage": 6.56, "elapsed_time": "1:20:01", "remaining_time": "18:59:31"} +{"current_steps": 525, "total_steps": 7924, "loss": 0.3563, "lr": 2.64312736443884e-05, "epoch": 0.4637809187279152, "percentage": 6.63, "elapsed_time": "1:20:48", "remaining_time": "18:58:45"} +{"current_steps": 530, "total_steps": 7924, "loss": 0.3697, "lr": 2.668348045397226e-05, "epoch": 0.46819787985865724, "percentage": 6.69, "elapsed_time": "1:21:34", "remaining_time": "18:57:59"} +{"current_steps": 535, "total_steps": 7924, "loss": 0.3745, "lr": 2.693568726355612e-05, "epoch": 0.4726148409893993, "percentage": 6.75, "elapsed_time": "1:22:20", "remaining_time": "18:57:08"} +{"current_steps": 540, "total_steps": 7924, "loss": 0.3817, "lr": 2.7187894073139975e-05, "epoch": 0.47703180212014135, "percentage": 6.81, "elapsed_time": "1:23:06", "remaining_time": "18:56:23"} +{"current_steps": 545, "total_steps": 7924, "loss": 0.3744, "lr": 2.7440100882723838e-05, "epoch": 0.48144876325088337, "percentage": 6.88, "elapsed_time": "1:23:53", "remaining_time": "18:55:44"} +{"current_steps": 550, "total_steps": 7924, "loss": 0.3612, "lr": 2.7692307692307694e-05, "epoch": 0.48586572438162545, "percentage": 6.94, "elapsed_time": "1:24:39", "remaining_time": "18:55:00"} +{"current_steps": 555, "total_steps": 7924, "loss": 0.3848, "lr": 2.7944514501891556e-05, "epoch": 0.4902826855123675, "percentage": 7.0, "elapsed_time": "1:25:25", "remaining_time": "18:54:12"} +{"current_steps": 560, "total_steps": 7924, "loss": 0.3591, "lr": 2.8196721311475412e-05, "epoch": 0.49469964664310956, "percentage": 7.07, "elapsed_time": "1:26:11", "remaining_time": "18:53:22"} +{"current_steps": 565, "total_steps": 7924, "loss": 0.362, "lr": 2.8448928121059268e-05, "epoch": 0.4991166077738516, "percentage": 7.13, "elapsed_time": "1:26:57", "remaining_time": "18:52:32"} +{"current_steps": 570, "total_steps": 7924, "loss": 0.3565, "lr": 2.870113493064313e-05, "epoch": 0.5035335689045937, "percentage": 7.19, "elapsed_time": "1:27:43", "remaining_time": "18:51:43"} +{"current_steps": 575, "total_steps": 7924, "loss": 0.3903, "lr": 2.8953341740226987e-05, "epoch": 0.5079505300353356, "percentage": 7.26, "elapsed_time": "1:28:29", "remaining_time": "18:50:54"} +{"current_steps": 580, "total_steps": 7924, "loss": 0.3745, "lr": 2.920554854981085e-05, "epoch": 0.5123674911660777, "percentage": 7.32, "elapsed_time": "1:29:15", "remaining_time": "18:50:05"} +{"current_steps": 585, "total_steps": 7924, "loss": 0.427, "lr": 2.9457755359394705e-05, "epoch": 0.5167844522968198, "percentage": 7.38, "elapsed_time": "1:30:01", "remaining_time": "18:49:17"} +{"current_steps": 590, "total_steps": 7924, "loss": 0.4404, "lr": 2.9709962168978565e-05, "epoch": 0.5212014134275619, "percentage": 7.45, "elapsed_time": "1:30:46", "remaining_time": "18:48:28"} +{"current_steps": 595, "total_steps": 7924, "loss": 0.3604, "lr": 2.9962168978562424e-05, "epoch": 0.5256183745583038, "percentage": 7.51, "elapsed_time": "1:31:32", "remaining_time": "18:47:40"} +{"current_steps": 600, "total_steps": 7924, "loss": 0.3947, "lr": 3.0214375788146283e-05, "epoch": 0.5300353356890459, "percentage": 7.57, "elapsed_time": "1:32:19", "remaining_time": "18:46:53"} +{"current_steps": 605, "total_steps": 7924, "loss": 0.3543, "lr": 3.0466582597730143e-05, "epoch": 0.534452296819788, "percentage": 7.64, "elapsed_time": "1:33:05", "remaining_time": "18:46:05"} +{"current_steps": 610, "total_steps": 7924, "loss": 0.4353, "lr": 3.0718789407314e-05, "epoch": 0.5388692579505301, "percentage": 7.7, "elapsed_time": "1:33:50", "remaining_time": "18:45:15"} +{"current_steps": 615, "total_steps": 7924, "loss": 0.3326, "lr": 3.097099621689786e-05, "epoch": 0.5432862190812721, "percentage": 7.76, "elapsed_time": "1:34:37", "remaining_time": "18:44:29"} +{"current_steps": 620, "total_steps": 7924, "loss": 0.3872, "lr": 3.122320302648172e-05, "epoch": 0.5477031802120141, "percentage": 7.82, "elapsed_time": "1:35:23", "remaining_time": "18:43:43"} +{"current_steps": 625, "total_steps": 7924, "loss": 0.384, "lr": 3.1475409836065576e-05, "epoch": 0.5521201413427562, "percentage": 7.89, "elapsed_time": "1:36:09", "remaining_time": "18:42:55"} +{"current_steps": 630, "total_steps": 7924, "loss": 0.3579, "lr": 3.1727616645649436e-05, "epoch": 0.5565371024734982, "percentage": 7.95, "elapsed_time": "1:36:55", "remaining_time": "18:42:09"} +{"current_steps": 635, "total_steps": 7924, "loss": 0.3784, "lr": 3.1979823455233295e-05, "epoch": 0.5609540636042403, "percentage": 8.01, "elapsed_time": "1:37:41", "remaining_time": "18:41:23"} +{"current_steps": 640, "total_steps": 7924, "loss": 0.3777, "lr": 3.2232030264817154e-05, "epoch": 0.5653710247349824, "percentage": 8.08, "elapsed_time": "1:38:27", "remaining_time": "18:40:37"} +{"current_steps": 645, "total_steps": 7924, "loss": 0.3565, "lr": 3.2484237074401014e-05, "epoch": 0.5697879858657244, "percentage": 8.14, "elapsed_time": "1:39:13", "remaining_time": "18:39:49"} +{"current_steps": 650, "total_steps": 7924, "loss": 0.3576, "lr": 3.273644388398487e-05, "epoch": 0.5742049469964664, "percentage": 8.2, "elapsed_time": "1:39:59", "remaining_time": "18:39:01"} +{"current_steps": 655, "total_steps": 7924, "loss": 0.3129, "lr": 3.298865069356873e-05, "epoch": 0.5786219081272085, "percentage": 8.27, "elapsed_time": "1:40:45", "remaining_time": "18:38:12"} +{"current_steps": 660, "total_steps": 7924, "loss": 0.367, "lr": 3.324085750315259e-05, "epoch": 0.5830388692579506, "percentage": 8.33, "elapsed_time": "1:41:31", "remaining_time": "18:37:24"} +{"current_steps": 665, "total_steps": 7924, "loss": 0.3133, "lr": 3.3493064312736444e-05, "epoch": 0.5874558303886925, "percentage": 8.39, "elapsed_time": "1:42:17", "remaining_time": "18:36:36"} +{"current_steps": 670, "total_steps": 7924, "loss": 0.404, "lr": 3.37452711223203e-05, "epoch": 0.5918727915194346, "percentage": 8.46, "elapsed_time": "1:43:03", "remaining_time": "18:35:48"} +{"current_steps": 675, "total_steps": 7924, "loss": 0.3931, "lr": 3.399747793190416e-05, "epoch": 0.5962897526501767, "percentage": 8.52, "elapsed_time": "1:43:49", "remaining_time": "18:35:00"} +{"current_steps": 680, "total_steps": 7924, "loss": 0.3875, "lr": 3.424968474148802e-05, "epoch": 0.6007067137809188, "percentage": 8.58, "elapsed_time": "1:44:35", "remaining_time": "18:34:15"} +{"current_steps": 685, "total_steps": 7924, "loss": 0.3206, "lr": 3.450189155107188e-05, "epoch": 0.6051236749116607, "percentage": 8.64, "elapsed_time": "1:45:21", "remaining_time": "18:33:28"} +{"current_steps": 690, "total_steps": 7924, "loss": 0.3294, "lr": 3.475409836065574e-05, "epoch": 0.6095406360424028, "percentage": 8.71, "elapsed_time": "1:46:07", "remaining_time": "18:32:40"} +{"current_steps": 695, "total_steps": 7924, "loss": 0.3731, "lr": 3.50063051702396e-05, "epoch": 0.6139575971731449, "percentage": 8.77, "elapsed_time": "1:46:53", "remaining_time": "18:31:51"} +{"current_steps": 700, "total_steps": 7924, "loss": 0.3226, "lr": 3.525851197982346e-05, "epoch": 0.6183745583038869, "percentage": 8.83, "elapsed_time": "1:47:39", "remaining_time": "18:31:01"} +{"current_steps": 705, "total_steps": 7924, "loss": 0.3474, "lr": 3.551071878940732e-05, "epoch": 0.622791519434629, "percentage": 8.9, "elapsed_time": "1:48:25", "remaining_time": "18:30:14"} +{"current_steps": 710, "total_steps": 7924, "loss": 0.3719, "lr": 3.576292559899118e-05, "epoch": 0.627208480565371, "percentage": 8.96, "elapsed_time": "1:49:11", "remaining_time": "18:29:29"} +{"current_steps": 715, "total_steps": 7924, "loss": 0.3479, "lr": 3.601513240857503e-05, "epoch": 0.6316254416961131, "percentage": 9.02, "elapsed_time": "1:49:58", "remaining_time": "18:28:44"} +{"current_steps": 720, "total_steps": 7924, "loss": 0.4271, "lr": 3.6267339218158896e-05, "epoch": 0.6360424028268551, "percentage": 9.09, "elapsed_time": "1:50:44", "remaining_time": "18:27:59"} +{"current_steps": 725, "total_steps": 7924, "loss": 0.4105, "lr": 3.651954602774275e-05, "epoch": 0.6404593639575972, "percentage": 9.15, "elapsed_time": "1:51:30", "remaining_time": "18:27:10"} +{"current_steps": 730, "total_steps": 7924, "loss": 0.3365, "lr": 3.677175283732661e-05, "epoch": 0.6448763250883393, "percentage": 9.21, "elapsed_time": "1:52:16", "remaining_time": "18:26:22"} +{"current_steps": 735, "total_steps": 7924, "loss": 0.3602, "lr": 3.702395964691047e-05, "epoch": 0.6492932862190812, "percentage": 9.28, "elapsed_time": "1:53:02", "remaining_time": "18:25:36"} +{"current_steps": 740, "total_steps": 7924, "loss": 0.3479, "lr": 3.727616645649433e-05, "epoch": 0.6537102473498233, "percentage": 9.34, "elapsed_time": "1:53:48", "remaining_time": "18:24:49"} +{"current_steps": 745, "total_steps": 7924, "loss": 0.4159, "lr": 3.7528373266078186e-05, "epoch": 0.6581272084805654, "percentage": 9.4, "elapsed_time": "1:54:34", "remaining_time": "18:24:03"} +{"current_steps": 750, "total_steps": 7924, "loss": 0.3727, "lr": 3.7780580075662045e-05, "epoch": 0.6625441696113075, "percentage": 9.46, "elapsed_time": "1:55:20", "remaining_time": "18:23:16"} +{"current_steps": 755, "total_steps": 7924, "loss": 0.3583, "lr": 3.8032786885245905e-05, "epoch": 0.6669611307420494, "percentage": 9.53, "elapsed_time": "1:56:06", "remaining_time": "18:22:29"} +{"current_steps": 760, "total_steps": 7924, "loss": 0.3344, "lr": 3.8284993694829764e-05, "epoch": 0.6713780918727915, "percentage": 9.59, "elapsed_time": "1:56:52", "remaining_time": "18:21:41"} +{"current_steps": 765, "total_steps": 7924, "loss": 0.3582, "lr": 3.853720050441362e-05, "epoch": 0.6757950530035336, "percentage": 9.65, "elapsed_time": "1:57:38", "remaining_time": "18:20:55"} +{"current_steps": 770, "total_steps": 7924, "loss": 0.384, "lr": 3.878940731399748e-05, "epoch": 0.6802120141342756, "percentage": 9.72, "elapsed_time": "1:58:24", "remaining_time": "18:20:08"} +{"current_steps": 775, "total_steps": 7924, "loss": 0.3607, "lr": 3.904161412358134e-05, "epoch": 0.6846289752650176, "percentage": 9.78, "elapsed_time": "1:59:10", "remaining_time": "18:19:23"} +{"current_steps": 780, "total_steps": 7924, "loss": 0.3786, "lr": 3.9293820933165195e-05, "epoch": 0.6890459363957597, "percentage": 9.84, "elapsed_time": "1:59:57", "remaining_time": "18:18:37"} +{"current_steps": 785, "total_steps": 7924, "loss": 0.3336, "lr": 3.954602774274906e-05, "epoch": 0.6934628975265018, "percentage": 9.91, "elapsed_time": "2:00:43", "remaining_time": "18:17:51"} +{"current_steps": 790, "total_steps": 7924, "loss": 0.3405, "lr": 3.979823455233291e-05, "epoch": 0.6978798586572438, "percentage": 9.97, "elapsed_time": "2:01:29", "remaining_time": "18:17:03"} +{"current_steps": 795, "total_steps": 7924, "loss": 0.345, "lr": 3.99999980591192e-05, "epoch": 0.7022968197879859, "percentage": 10.03, "elapsed_time": "2:02:15", "remaining_time": "18:16:17"} +{"current_steps": 800, "total_steps": 7924, "loss": 0.4408, "lr": 3.99999301283305e-05, "epoch": 0.7067137809187279, "percentage": 10.1, "elapsed_time": "2:03:01", "remaining_time": "18:15:35"} +{"current_steps": 805, "total_steps": 7924, "loss": 0.3571, "lr": 3.999976515387813e-05, "epoch": 0.7111307420494699, "percentage": 10.16, "elapsed_time": "2:03:48", "remaining_time": "18:14:50"} +{"current_steps": 810, "total_steps": 7924, "loss": 0.3417, "lr": 3.9999503136562586e-05, "epoch": 0.715547703180212, "percentage": 10.22, "elapsed_time": "2:04:35", "remaining_time": "18:14:11"} +{"current_steps": 815, "total_steps": 7924, "loss": 0.3524, "lr": 3.999914407765523e-05, "epoch": 0.7199646643109541, "percentage": 10.29, "elapsed_time": "2:05:21", "remaining_time": "18:13:25"} +{"current_steps": 820, "total_steps": 7924, "loss": 0.3204, "lr": 3.999868797889828e-05, "epoch": 0.7243816254416962, "percentage": 10.35, "elapsed_time": "2:06:07", "remaining_time": "18:12:37"} +{"current_steps": 825, "total_steps": 7924, "loss": 0.3488, "lr": 3.999813484250483e-05, "epoch": 0.7287985865724381, "percentage": 10.41, "elapsed_time": "2:06:53", "remaining_time": "18:11:51"} +{"current_steps": 830, "total_steps": 7924, "loss": 0.3718, "lr": 3.99974846711588e-05, "epoch": 0.7332155477031802, "percentage": 10.47, "elapsed_time": "2:07:39", "remaining_time": "18:11:08"} +{"current_steps": 835, "total_steps": 7924, "loss": 0.3123, "lr": 3.9996737468014954e-05, "epoch": 0.7376325088339223, "percentage": 10.54, "elapsed_time": "2:08:26", "remaining_time": "18:10:25"} +{"current_steps": 840, "total_steps": 7924, "loss": 0.359, "lr": 3.999589323669887e-05, "epoch": 0.7420494699646644, "percentage": 10.6, "elapsed_time": "2:09:12", "remaining_time": "18:09:38"} +{"current_steps": 845, "total_steps": 7924, "loss": 0.3511, "lr": 3.9994951981306926e-05, "epoch": 0.7464664310954063, "percentage": 10.66, "elapsed_time": "2:09:58", "remaining_time": "18:08:54"} +{"current_steps": 850, "total_steps": 7924, "loss": 0.349, "lr": 3.9993913706406287e-05, "epoch": 0.7508833922261484, "percentage": 10.73, "elapsed_time": "2:10:44", "remaining_time": "18:08:07"} +{"current_steps": 855, "total_steps": 7924, "loss": 0.347, "lr": 3.999277841703486e-05, "epoch": 0.7553003533568905, "percentage": 10.79, "elapsed_time": "2:11:30", "remaining_time": "18:07:20"} +{"current_steps": 860, "total_steps": 7924, "loss": 0.3524, "lr": 3.999154611870131e-05, "epoch": 0.7597173144876325, "percentage": 10.85, "elapsed_time": "2:12:16", "remaining_time": "18:06:32"} +{"current_steps": 865, "total_steps": 7924, "loss": 0.3417, "lr": 3.999021681738499e-05, "epoch": 0.7641342756183745, "percentage": 10.92, "elapsed_time": "2:13:03", "remaining_time": "18:05:47"} +{"current_steps": 870, "total_steps": 7924, "loss": 0.3073, "lr": 3.998879051953593e-05, "epoch": 0.7685512367491166, "percentage": 10.98, "elapsed_time": "2:13:49", "remaining_time": "18:04:59"} +{"current_steps": 875, "total_steps": 7924, "loss": 0.3812, "lr": 3.9987267232074816e-05, "epoch": 0.7729681978798587, "percentage": 11.04, "elapsed_time": "2:14:35", "remaining_time": "18:04:13"} +{"current_steps": 880, "total_steps": 7924, "loss": 0.3718, "lr": 3.998564696239295e-05, "epoch": 0.7773851590106007, "percentage": 11.11, "elapsed_time": "2:15:21", "remaining_time": "18:03:29"} +{"current_steps": 885, "total_steps": 7924, "loss": 0.3356, "lr": 3.99839297183522e-05, "epoch": 0.7818021201413428, "percentage": 11.17, "elapsed_time": "2:16:07", "remaining_time": "18:02:40"} +{"current_steps": 890, "total_steps": 7924, "loss": 0.3528, "lr": 3.998211550828497e-05, "epoch": 0.7862190812720848, "percentage": 11.23, "elapsed_time": "2:16:53", "remaining_time": "18:01:51"} +{"current_steps": 895, "total_steps": 7924, "loss": 0.3497, "lr": 3.998020434099418e-05, "epoch": 0.7906360424028268, "percentage": 11.29, "elapsed_time": "2:17:39", "remaining_time": "18:01:04"} +{"current_steps": 900, "total_steps": 7924, "loss": 0.3586, "lr": 3.997819622575319e-05, "epoch": 0.7950530035335689, "percentage": 11.36, "elapsed_time": "2:18:25", "remaining_time": "18:00:16"} +{"current_steps": 905, "total_steps": 7924, "loss": 0.3576, "lr": 3.9976091172305794e-05, "epoch": 0.799469964664311, "percentage": 11.42, "elapsed_time": "2:19:11", "remaining_time": "17:59:30"} +{"current_steps": 910, "total_steps": 7924, "loss": 0.3383, "lr": 3.9973889190866105e-05, "epoch": 0.803886925795053, "percentage": 11.48, "elapsed_time": "2:19:57", "remaining_time": "17:58:42"} +{"current_steps": 915, "total_steps": 7924, "loss": 0.355, "lr": 3.99715902921186e-05, "epoch": 0.808303886925795, "percentage": 11.55, "elapsed_time": "2:20:43", "remaining_time": "17:57:56"} +{"current_steps": 920, "total_steps": 7924, "loss": 0.3669, "lr": 3.9969194487217987e-05, "epoch": 0.8127208480565371, "percentage": 11.61, "elapsed_time": "2:21:29", "remaining_time": "17:57:10"} +{"current_steps": 925, "total_steps": 7924, "loss": 0.3431, "lr": 3.9966701787789194e-05, "epoch": 0.8171378091872792, "percentage": 11.67, "elapsed_time": "2:22:15", "remaining_time": "17:56:24"} +{"current_steps": 930, "total_steps": 7924, "loss": 0.3553, "lr": 3.996411220592729e-05, "epoch": 0.8215547703180212, "percentage": 11.74, "elapsed_time": "2:23:02", "remaining_time": "17:55:40"} +{"current_steps": 935, "total_steps": 7924, "loss": 0.3087, "lr": 3.996142575419745e-05, "epoch": 0.8259717314487632, "percentage": 11.8, "elapsed_time": "2:23:48", "remaining_time": "17:54:54"} +{"current_steps": 940, "total_steps": 7924, "loss": 0.3472, "lr": 3.995864244563487e-05, "epoch": 0.8303886925795053, "percentage": 11.86, "elapsed_time": "2:24:34", "remaining_time": "17:54:07"} +{"current_steps": 945, "total_steps": 7924, "loss": 0.3563, "lr": 3.9955762293744735e-05, "epoch": 0.8348056537102474, "percentage": 11.93, "elapsed_time": "2:25:20", "remaining_time": "17:53:21"} +{"current_steps": 950, "total_steps": 7924, "loss": 0.3675, "lr": 3.9952785312502107e-05, "epoch": 0.8392226148409894, "percentage": 11.99, "elapsed_time": "2:26:06", "remaining_time": "17:52:34"} +{"current_steps": 955, "total_steps": 7924, "loss": 0.4159, "lr": 3.99497115163519e-05, "epoch": 0.8436395759717314, "percentage": 12.05, "elapsed_time": "2:26:52", "remaining_time": "17:51:47"} +{"current_steps": 960, "total_steps": 7924, "loss": 0.3492, "lr": 3.994654092020877e-05, "epoch": 0.8480565371024735, "percentage": 12.12, "elapsed_time": "2:27:38", "remaining_time": "17:50:59"} +{"current_steps": 965, "total_steps": 7924, "loss": 0.3413, "lr": 3.994327353945712e-05, "epoch": 0.8524734982332155, "percentage": 12.18, "elapsed_time": "2:28:24", "remaining_time": "17:50:14"} +{"current_steps": 970, "total_steps": 7924, "loss": 0.3682, "lr": 3.9939909389950894e-05, "epoch": 0.8568904593639576, "percentage": 12.24, "elapsed_time": "2:29:10", "remaining_time": "17:49:27"} +{"current_steps": 975, "total_steps": 7924, "loss": 0.3363, "lr": 3.9936448488013646e-05, "epoch": 0.8613074204946997, "percentage": 12.3, "elapsed_time": "2:29:56", "remaining_time": "17:48:40"} +{"current_steps": 980, "total_steps": 7924, "loss": 0.3754, "lr": 3.9932890850438356e-05, "epoch": 0.8657243816254417, "percentage": 12.37, "elapsed_time": "2:30:42", "remaining_time": "17:47:53"} +{"current_steps": 985, "total_steps": 7924, "loss": 0.359, "lr": 3.9929236494487395e-05, "epoch": 0.8701413427561837, "percentage": 12.43, "elapsed_time": "2:31:28", "remaining_time": "17:47:07"} +{"current_steps": 990, "total_steps": 7924, "loss": 0.3726, "lr": 3.9925485437892434e-05, "epoch": 0.8745583038869258, "percentage": 12.49, "elapsed_time": "2:32:14", "remaining_time": "17:46:20"} +{"current_steps": 995, "total_steps": 7924, "loss": 0.3198, "lr": 3.992163769885435e-05, "epoch": 0.8789752650176679, "percentage": 12.56, "elapsed_time": "2:33:00", "remaining_time": "17:45:31"} +{"current_steps": 1000, "total_steps": 7924, "loss": 0.3586, "lr": 3.9917693296043124e-05, "epoch": 0.8833922261484098, "percentage": 12.62, "elapsed_time": "2:33:46", "remaining_time": "17:44:44"} +{"current_steps": 1005, "total_steps": 7924, "loss": 0.3653, "lr": 3.9913652248597806e-05, "epoch": 0.8878091872791519, "percentage": 12.68, "elapsed_time": "2:34:32", "remaining_time": "17:43:56"} +{"current_steps": 1010, "total_steps": 7924, "loss": 0.3364, "lr": 3.990951457612637e-05, "epoch": 0.892226148409894, "percentage": 12.75, "elapsed_time": "2:35:18", "remaining_time": "17:43:07"} +{"current_steps": 1015, "total_steps": 7924, "loss": 0.3569, "lr": 3.9905280298705624e-05, "epoch": 0.8966431095406361, "percentage": 12.81, "elapsed_time": "2:36:04", "remaining_time": "17:42:20"} +{"current_steps": 1020, "total_steps": 7924, "loss": 0.3507, "lr": 3.9900949436881126e-05, "epoch": 0.901060070671378, "percentage": 12.87, "elapsed_time": "2:36:50", "remaining_time": "17:41:33"} +{"current_steps": 1025, "total_steps": 7924, "loss": 0.3224, "lr": 3.989652201166709e-05, "epoch": 0.9054770318021201, "percentage": 12.94, "elapsed_time": "2:37:36", "remaining_time": "17:40:47"} +{"current_steps": 1030, "total_steps": 7924, "loss": 0.3297, "lr": 3.989199804454627e-05, "epoch": 0.9098939929328622, "percentage": 13.0, "elapsed_time": "2:38:22", "remaining_time": "17:40:00"} +{"current_steps": 1035, "total_steps": 7924, "loss": 0.3366, "lr": 3.988737755746986e-05, "epoch": 0.9143109540636042, "percentage": 13.06, "elapsed_time": "2:39:08", "remaining_time": "17:39:13"} +{"current_steps": 1040, "total_steps": 7924, "loss": 0.3495, "lr": 3.9882660572857375e-05, "epoch": 0.9187279151943463, "percentage": 13.12, "elapsed_time": "2:39:54", "remaining_time": "17:38:27"} +{"current_steps": 1045, "total_steps": 7924, "loss": 0.3272, "lr": 3.987784711359658e-05, "epoch": 0.9231448763250883, "percentage": 13.19, "elapsed_time": "2:40:40", "remaining_time": "17:37:41"} +{"current_steps": 1050, "total_steps": 7924, "loss": 0.3611, "lr": 3.987293720304335e-05, "epoch": 0.9275618374558304, "percentage": 13.25, "elapsed_time": "2:41:26", "remaining_time": "17:36:54"} +{"current_steps": 1055, "total_steps": 7924, "loss": 0.3379, "lr": 3.9867930865021535e-05, "epoch": 0.9319787985865724, "percentage": 13.31, "elapsed_time": "2:42:12", "remaining_time": "17:36:09"} +{"current_steps": 1060, "total_steps": 7924, "loss": 0.3756, "lr": 3.9862828123822905e-05, "epoch": 0.9363957597173145, "percentage": 13.38, "elapsed_time": "2:42:58", "remaining_time": "17:35:22"} +{"current_steps": 1065, "total_steps": 7924, "loss": 0.3687, "lr": 3.985762900420698e-05, "epoch": 0.9408127208480566, "percentage": 13.44, "elapsed_time": "2:43:44", "remaining_time": "17:34:35"} +{"current_steps": 1070, "total_steps": 7924, "loss": 0.2972, "lr": 3.985233353140092e-05, "epoch": 0.9452296819787986, "percentage": 13.5, "elapsed_time": "2:44:30", "remaining_time": "17:33:49"} +{"current_steps": 1075, "total_steps": 7924, "loss": 0.3508, "lr": 3.984694173109942e-05, "epoch": 0.9496466431095406, "percentage": 13.57, "elapsed_time": "2:45:17", "remaining_time": "17:33:02"} +{"current_steps": 1080, "total_steps": 7924, "loss": 0.361, "lr": 3.984145362946458e-05, "epoch": 0.9540636042402827, "percentage": 13.63, "elapsed_time": "2:46:03", "remaining_time": "17:32:16"} +{"current_steps": 1085, "total_steps": 7924, "loss": 0.3525, "lr": 3.983586925312576e-05, "epoch": 0.9584805653710248, "percentage": 13.69, "elapsed_time": "2:46:49", "remaining_time": "17:31:29"} +{"current_steps": 1090, "total_steps": 7924, "loss": 0.3245, "lr": 3.983018862917948e-05, "epoch": 0.9628975265017667, "percentage": 13.76, "elapsed_time": "2:47:35", "remaining_time": "17:30:44"} +{"current_steps": 1095, "total_steps": 7924, "loss": 0.3461, "lr": 3.9824411785189264e-05, "epoch": 0.9673144876325088, "percentage": 13.82, "elapsed_time": "2:48:21", "remaining_time": "17:29:57"} +{"current_steps": 1100, "total_steps": 7924, "loss": 0.3357, "lr": 3.9818538749185506e-05, "epoch": 0.9717314487632509, "percentage": 13.88, "elapsed_time": "2:49:07", "remaining_time": "17:29:11"} +{"current_steps": 1105, "total_steps": 7924, "loss": 0.3559, "lr": 3.981256954966536e-05, "epoch": 0.976148409893993, "percentage": 13.94, "elapsed_time": "2:49:53", "remaining_time": "17:28:24"} +{"current_steps": 1110, "total_steps": 7924, "loss": 0.345, "lr": 3.9806504215592575e-05, "epoch": 0.980565371024735, "percentage": 14.01, "elapsed_time": "2:50:39", "remaining_time": "17:27:38"} +{"current_steps": 1115, "total_steps": 7924, "loss": 0.3427, "lr": 3.980034277639737e-05, "epoch": 0.984982332155477, "percentage": 14.07, "elapsed_time": "2:51:26", "remaining_time": "17:26:55"} +{"current_steps": 1120, "total_steps": 7924, "loss": 0.3845, "lr": 3.979408526197628e-05, "epoch": 0.9893992932862191, "percentage": 14.13, "elapsed_time": "2:52:12", "remaining_time": "17:26:11"} +{"current_steps": 1125, "total_steps": 7924, "loss": 0.3406, "lr": 3.9787731702692004e-05, "epoch": 0.9938162544169611, "percentage": 14.2, "elapsed_time": "2:52:58", "remaining_time": "17:25:24"} +{"current_steps": 1130, "total_steps": 7924, "loss": 0.3353, "lr": 3.9781282129373294e-05, "epoch": 0.9982332155477032, "percentage": 14.26, "elapsed_time": "2:53:44", "remaining_time": "17:24:38"} +{"current_steps": 1135, "total_steps": 7924, "loss": 0.3603, "lr": 3.9774736573314774e-05, "epoch": 1.0026501766784452, "percentage": 14.32, "elapsed_time": "2:54:31", "remaining_time": "17:23:52"} +{"current_steps": 1140, "total_steps": 7924, "loss": 0.3576, "lr": 3.9768095066276794e-05, "epoch": 1.0070671378091873, "percentage": 14.39, "elapsed_time": "2:55:17", "remaining_time": "17:23:07"} +{"current_steps": 1145, "total_steps": 7924, "loss": 0.3423, "lr": 3.9761357640485255e-05, "epoch": 1.0114840989399294, "percentage": 14.45, "elapsed_time": "2:56:03", "remaining_time": "17:22:21"} +{"current_steps": 1150, "total_steps": 7924, "loss": 0.317, "lr": 3.975452432863152e-05, "epoch": 1.0159010600706713, "percentage": 14.51, "elapsed_time": "2:56:49", "remaining_time": "17:21:35"} +{"current_steps": 1155, "total_steps": 7924, "loss": 0.3091, "lr": 3.974759516387216e-05, "epoch": 1.0203180212014133, "percentage": 14.58, "elapsed_time": "2:57:35", "remaining_time": "17:20:50"} +{"current_steps": 1160, "total_steps": 7924, "loss": 0.3357, "lr": 3.9740570179828905e-05, "epoch": 1.0247349823321554, "percentage": 14.64, "elapsed_time": "2:58:22", "remaining_time": "17:20:04"} +{"current_steps": 1165, "total_steps": 7924, "loss": 0.3105, "lr": 3.9733449410588354e-05, "epoch": 1.0291519434628975, "percentage": 14.7, "elapsed_time": "2:59:08", "remaining_time": "17:19:17"} +{"current_steps": 1170, "total_steps": 7924, "loss": 0.329, "lr": 3.972623289070191e-05, "epoch": 1.0335689045936396, "percentage": 14.77, "elapsed_time": "2:59:54", "remaining_time": "17:18:31"} +{"current_steps": 1175, "total_steps": 7924, "loss": 0.2882, "lr": 3.971892065518557e-05, "epoch": 1.0379858657243817, "percentage": 14.83, "elapsed_time": "3:00:40", "remaining_time": "17:17:44"} +{"current_steps": 1180, "total_steps": 7924, "loss": 0.2812, "lr": 3.971151273951979e-05, "epoch": 1.0424028268551238, "percentage": 14.89, "elapsed_time": "3:01:26", "remaining_time": "17:16:58"} +{"current_steps": 1185, "total_steps": 7924, "loss": 0.354, "lr": 3.970400917964922e-05, "epoch": 1.0468197879858656, "percentage": 14.95, "elapsed_time": "3:02:12", "remaining_time": "17:16:10"} +{"current_steps": 1190, "total_steps": 7924, "loss": 0.32, "lr": 3.969641001198266e-05, "epoch": 1.0512367491166077, "percentage": 15.02, "elapsed_time": "3:02:58", "remaining_time": "17:15:24"} +{"current_steps": 1195, "total_steps": 7924, "loss": 0.294, "lr": 3.9688715273392785e-05, "epoch": 1.0556537102473498, "percentage": 15.08, "elapsed_time": "3:03:44", "remaining_time": "17:14:36"} +{"current_steps": 1200, "total_steps": 7924, "loss": 0.3253, "lr": 3.9680925001216e-05, "epoch": 1.0600706713780919, "percentage": 15.14, "elapsed_time": "3:04:30", "remaining_time": "17:13:50"} +{"current_steps": 1205, "total_steps": 7924, "loss": 0.3327, "lr": 3.967303923325228e-05, "epoch": 1.064487632508834, "percentage": 15.21, "elapsed_time": "3:05:16", "remaining_time": "17:13:04"} +{"current_steps": 1210, "total_steps": 7924, "loss": 0.3793, "lr": 3.966505800776493e-05, "epoch": 1.068904593639576, "percentage": 15.27, "elapsed_time": "3:06:02", "remaining_time": "17:12:18"} +{"current_steps": 1215, "total_steps": 7924, "loss": 0.3273, "lr": 3.965698136348048e-05, "epoch": 1.073321554770318, "percentage": 15.33, "elapsed_time": "3:06:48", "remaining_time": "17:11:30"} +{"current_steps": 1220, "total_steps": 7924, "loss": 0.3072, "lr": 3.96488093395884e-05, "epoch": 1.0777385159010602, "percentage": 15.4, "elapsed_time": "3:07:34", "remaining_time": "17:10:45"} +{"current_steps": 1225, "total_steps": 7924, "loss": 0.3266, "lr": 3.964054197574099e-05, "epoch": 1.082155477031802, "percentage": 15.46, "elapsed_time": "3:08:20", "remaining_time": "17:09:59"} +{"current_steps": 1230, "total_steps": 7924, "loss": 0.3013, "lr": 3.963217931205317e-05, "epoch": 1.0865724381625441, "percentage": 15.52, "elapsed_time": "3:09:06", "remaining_time": "17:09:10"} +{"current_steps": 1235, "total_steps": 7924, "loss": 0.32, "lr": 3.962372138910223e-05, "epoch": 1.0909893992932862, "percentage": 15.59, "elapsed_time": "3:09:52", "remaining_time": "17:08:21"} +{"current_steps": 1240, "total_steps": 7924, "loss": 0.3129, "lr": 3.9615168247927735e-05, "epoch": 1.0954063604240283, "percentage": 15.65, "elapsed_time": "3:10:38", "remaining_time": "17:07:35"} +{"current_steps": 1245, "total_steps": 7924, "loss": 0.3373, "lr": 3.9606519930031225e-05, "epoch": 1.0998233215547704, "percentage": 15.71, "elapsed_time": "3:11:24", "remaining_time": "17:06:48"} +{"current_steps": 1250, "total_steps": 7924, "loss": 0.3615, "lr": 3.959777647737606e-05, "epoch": 1.1042402826855124, "percentage": 15.77, "elapsed_time": "3:12:10", "remaining_time": "17:06:01"} +{"current_steps": 1255, "total_steps": 7924, "loss": 0.3505, "lr": 3.958893793238723e-05, "epoch": 1.1086572438162543, "percentage": 15.84, "elapsed_time": "3:12:55", "remaining_time": "17:05:13"} +{"current_steps": 1260, "total_steps": 7924, "loss": 0.3865, "lr": 3.958000433795113e-05, "epoch": 1.1130742049469964, "percentage": 15.9, "elapsed_time": "3:13:41", "remaining_time": "17:04:26"} +{"current_steps": 1265, "total_steps": 7924, "loss": 0.3463, "lr": 3.957097573741534e-05, "epoch": 1.1174911660777385, "percentage": 15.96, "elapsed_time": "3:14:28", "remaining_time": "17:03:41"} +{"current_steps": 1270, "total_steps": 7924, "loss": 0.3429, "lr": 3.956185217458843e-05, "epoch": 1.1219081272084805, "percentage": 16.03, "elapsed_time": "3:15:13", "remaining_time": "17:02:53"} +{"current_steps": 1275, "total_steps": 7924, "loss": 0.306, "lr": 3.955263369373977e-05, "epoch": 1.1263250883392226, "percentage": 16.09, "elapsed_time": "3:16:00", "remaining_time": "17:02:09"} +{"current_steps": 1280, "total_steps": 7924, "loss": 0.3344, "lr": 3.9543320339599266e-05, "epoch": 1.1307420494699647, "percentage": 16.15, "elapsed_time": "3:16:46", "remaining_time": "17:01:24"} +{"current_steps": 1285, "total_steps": 7924, "loss": 0.3495, "lr": 3.953391215735718e-05, "epoch": 1.1351590106007068, "percentage": 16.22, "elapsed_time": "3:17:33", "remaining_time": "17:00:41"} +{"current_steps": 1290, "total_steps": 7924, "loss": 0.3221, "lr": 3.952440919266389e-05, "epoch": 1.1395759717314489, "percentage": 16.28, "elapsed_time": "3:18:19", "remaining_time": "16:59:54"} +{"current_steps": 1295, "total_steps": 7924, "loss": 0.3149, "lr": 3.951481149162968e-05, "epoch": 1.1439929328621907, "percentage": 16.34, "elapsed_time": "3:19:05", "remaining_time": "16:59:08"} +{"current_steps": 1300, "total_steps": 7924, "loss": 0.3011, "lr": 3.950511910082452e-05, "epoch": 1.1484098939929328, "percentage": 16.41, "elapsed_time": "3:19:52", "remaining_time": "16:58:24"} +{"current_steps": 1305, "total_steps": 7924, "loss": 0.3092, "lr": 3.949533206727784e-05, "epoch": 1.1528268551236749, "percentage": 16.47, "elapsed_time": "3:20:40", "remaining_time": "16:57:47"} +{"current_steps": 1310, "total_steps": 7924, "loss": 0.3042, "lr": 3.948545043847826e-05, "epoch": 1.157243816254417, "percentage": 16.53, "elapsed_time": "3:21:26", "remaining_time": "16:57:04"} +{"current_steps": 1315, "total_steps": 7924, "loss": 0.3432, "lr": 3.947547426237344e-05, "epoch": 1.161660777385159, "percentage": 16.6, "elapsed_time": "3:22:12", "remaining_time": "16:56:17"} +{"current_steps": 1320, "total_steps": 7924, "loss": 0.3098, "lr": 3.9465403587369784e-05, "epoch": 1.1660777385159011, "percentage": 16.66, "elapsed_time": "3:22:59", "remaining_time": "16:55:36"} +{"current_steps": 1325, "total_steps": 7924, "loss": 0.3043, "lr": 3.945523846233222e-05, "epoch": 1.170494699646643, "percentage": 16.72, "elapsed_time": "3:23:47", "remaining_time": "16:54:57"} +{"current_steps": 1330, "total_steps": 7924, "loss": 0.3261, "lr": 3.944497893658396e-05, "epoch": 1.174911660777385, "percentage": 16.78, "elapsed_time": "3:24:33", "remaining_time": "16:54:10"} +{"current_steps": 1335, "total_steps": 7924, "loss": 0.3588, "lr": 3.943462505990629e-05, "epoch": 1.1793286219081272, "percentage": 16.85, "elapsed_time": "3:25:19", "remaining_time": "16:53:23"} +{"current_steps": 1340, "total_steps": 7924, "loss": 0.3394, "lr": 3.942417688253827e-05, "epoch": 1.1837455830388692, "percentage": 16.91, "elapsed_time": "3:26:05", "remaining_time": "16:52:36"} +{"current_steps": 1345, "total_steps": 7924, "loss": 0.3199, "lr": 3.9413634455176584e-05, "epoch": 1.1881625441696113, "percentage": 16.97, "elapsed_time": "3:26:51", "remaining_time": "16:51:49"} +{"current_steps": 1350, "total_steps": 7924, "loss": 0.3039, "lr": 3.940299782897517e-05, "epoch": 1.1925795053003534, "percentage": 17.04, "elapsed_time": "3:27:37", "remaining_time": "16:51:03"} +{"current_steps": 1355, "total_steps": 7924, "loss": 0.3124, "lr": 3.939226705554507e-05, "epoch": 1.1969964664310955, "percentage": 17.1, "elapsed_time": "3:28:23", "remaining_time": "16:50:16"} +{"current_steps": 1360, "total_steps": 7924, "loss": 0.3508, "lr": 3.9381442186954155e-05, "epoch": 1.2014134275618376, "percentage": 17.16, "elapsed_time": "3:29:09", "remaining_time": "16:49:29"} +{"current_steps": 1365, "total_steps": 7924, "loss": 0.3369, "lr": 3.9370523275726844e-05, "epoch": 1.2058303886925794, "percentage": 17.23, "elapsed_time": "3:29:55", "remaining_time": "16:48:42"} +{"current_steps": 1370, "total_steps": 7924, "loss": 0.3035, "lr": 3.935951037484388e-05, "epoch": 1.2102473498233215, "percentage": 17.29, "elapsed_time": "3:30:41", "remaining_time": "16:47:55"} +{"current_steps": 1375, "total_steps": 7924, "loss": 0.3162, "lr": 3.934840353774208e-05, "epoch": 1.2146643109540636, "percentage": 17.35, "elapsed_time": "3:31:27", "remaining_time": "16:47:07"} +{"current_steps": 1380, "total_steps": 7924, "loss": 0.2926, "lr": 3.9337202818314016e-05, "epoch": 1.2190812720848057, "percentage": 17.42, "elapsed_time": "3:32:13", "remaining_time": "16:46:20"} +{"current_steps": 1385, "total_steps": 7924, "loss": 0.3642, "lr": 3.932590827090783e-05, "epoch": 1.2234982332155477, "percentage": 17.48, "elapsed_time": "3:32:59", "remaining_time": "16:45:34"} +{"current_steps": 1390, "total_steps": 7924, "loss": 0.3168, "lr": 3.931451995032693e-05, "epoch": 1.2279151943462898, "percentage": 17.54, "elapsed_time": "3:33:45", "remaining_time": "16:44:47"} +{"current_steps": 1395, "total_steps": 7924, "loss": 0.3519, "lr": 3.930303791182972e-05, "epoch": 1.232332155477032, "percentage": 17.6, "elapsed_time": "3:34:31", "remaining_time": "16:44:00"} +{"current_steps": 1400, "total_steps": 7924, "loss": 0.3215, "lr": 3.929146221112936e-05, "epoch": 1.2367491166077738, "percentage": 17.67, "elapsed_time": "3:35:16", "remaining_time": "16:43:10"} +{"current_steps": 1405, "total_steps": 7924, "loss": 0.3281, "lr": 3.927979290439346e-05, "epoch": 1.2411660777385158, "percentage": 17.73, "elapsed_time": "3:36:02", "remaining_time": "16:42:25"} +{"current_steps": 1410, "total_steps": 7924, "loss": 0.3312, "lr": 3.926803004824382e-05, "epoch": 1.245583038869258, "percentage": 17.79, "elapsed_time": "3:36:48", "remaining_time": "16:41:39"} +{"current_steps": 1415, "total_steps": 7924, "loss": 0.3385, "lr": 3.925617369975619e-05, "epoch": 1.25, "percentage": 17.86, "elapsed_time": "3:37:35", "remaining_time": "16:40:55"} +{"current_steps": 1420, "total_steps": 7924, "loss": 0.3509, "lr": 3.924422391645994e-05, "epoch": 1.254416961130742, "percentage": 17.92, "elapsed_time": "3:38:21", "remaining_time": "16:40:09"} +{"current_steps": 1425, "total_steps": 7924, "loss": 0.3268, "lr": 3.923218075633781e-05, "epoch": 1.2588339222614842, "percentage": 17.98, "elapsed_time": "3:39:07", "remaining_time": "16:39:23"} +{"current_steps": 1430, "total_steps": 7924, "loss": 0.3149, "lr": 3.9220044277825615e-05, "epoch": 1.2632508833922262, "percentage": 18.05, "elapsed_time": "3:39:53", "remaining_time": "16:38:36"} +{"current_steps": 1435, "total_steps": 7924, "loss": 0.2994, "lr": 3.920781453981199e-05, "epoch": 1.2676678445229683, "percentage": 18.11, "elapsed_time": "3:40:39", "remaining_time": "16:37:50"} +{"current_steps": 1440, "total_steps": 7924, "loss": 0.3217, "lr": 3.919549160163806e-05, "epoch": 1.2720848056537102, "percentage": 18.17, "elapsed_time": "3:41:25", "remaining_time": "16:37:03"} +{"current_steps": 1445, "total_steps": 7924, "loss": 0.3579, "lr": 3.91830755230972e-05, "epoch": 1.2765017667844523, "percentage": 18.24, "elapsed_time": "3:42:12", "remaining_time": "16:36:17"} +{"current_steps": 1450, "total_steps": 7924, "loss": 0.3226, "lr": 3.91705663644347e-05, "epoch": 1.2809187279151943, "percentage": 18.3, "elapsed_time": "3:42:57", "remaining_time": "16:35:30"} +{"current_steps": 1455, "total_steps": 7924, "loss": 0.323, "lr": 3.91579641863475e-05, "epoch": 1.2853356890459364, "percentage": 18.36, "elapsed_time": "3:43:43", "remaining_time": "16:34:42"} +{"current_steps": 1460, "total_steps": 7924, "loss": 0.3446, "lr": 3.91452690499839e-05, "epoch": 1.2897526501766785, "percentage": 18.43, "elapsed_time": "3:44:30", "remaining_time": "16:33:58"} +{"current_steps": 1465, "total_steps": 7924, "loss": 0.333, "lr": 3.913248101694323e-05, "epoch": 1.2941696113074204, "percentage": 18.49, "elapsed_time": "3:45:16", "remaining_time": "16:33:11"} +{"current_steps": 1470, "total_steps": 7924, "loss": 0.3269, "lr": 3.911960014927559e-05, "epoch": 1.2985865724381624, "percentage": 18.55, "elapsed_time": "3:46:02", "remaining_time": "16:32:25"} +{"current_steps": 1475, "total_steps": 7924, "loss": 0.3081, "lr": 3.910662650948153e-05, "epoch": 1.3030035335689045, "percentage": 18.61, "elapsed_time": "3:46:48", "remaining_time": "16:31:38"} +{"current_steps": 1480, "total_steps": 7924, "loss": 0.3063, "lr": 3.9093560160511746e-05, "epoch": 1.3074204946996466, "percentage": 18.68, "elapsed_time": "3:47:34", "remaining_time": "16:30:51"} +{"current_steps": 1485, "total_steps": 7924, "loss": 0.316, "lr": 3.9080401165766776e-05, "epoch": 1.3118374558303887, "percentage": 18.74, "elapsed_time": "3:48:20", "remaining_time": "16:30:04"} +{"current_steps": 1490, "total_steps": 7924, "loss": 0.2849, "lr": 3.9067149589096695e-05, "epoch": 1.3162544169611308, "percentage": 18.8, "elapsed_time": "3:49:06", "remaining_time": "16:29:18"} +{"current_steps": 1495, "total_steps": 7924, "loss": 0.3029, "lr": 3.905380549480081e-05, "epoch": 1.3206713780918728, "percentage": 18.87, "elapsed_time": "3:49:52", "remaining_time": "16:28:31"} +{"current_steps": 1500, "total_steps": 7924, "loss": 0.3015, "lr": 3.904036894762734e-05, "epoch": 1.325088339222615, "percentage": 18.93, "elapsed_time": "3:50:38", "remaining_time": "16:27:44"} +{"current_steps": 1505, "total_steps": 7924, "loss": 0.3119, "lr": 3.9026840012773094e-05, "epoch": 1.329505300353357, "percentage": 18.99, "elapsed_time": "3:51:55", "remaining_time": "16:29:12"} +{"current_steps": 1510, "total_steps": 7924, "loss": 0.3241, "lr": 3.901321875588317e-05, "epoch": 1.3339222614840989, "percentage": 19.06, "elapsed_time": "3:52:42", "remaining_time": "16:28:27"} +{"current_steps": 1515, "total_steps": 7924, "loss": 0.3218, "lr": 3.899950524305064e-05, "epoch": 1.338339222614841, "percentage": 19.12, "elapsed_time": "3:53:28", "remaining_time": "16:27:40"} +{"current_steps": 1520, "total_steps": 7924, "loss": 0.3332, "lr": 3.898569954081621e-05, "epoch": 1.342756183745583, "percentage": 19.18, "elapsed_time": "3:54:14", "remaining_time": "16:26:52"} +{"current_steps": 1525, "total_steps": 7924, "loss": 0.3047, "lr": 3.897180171616791e-05, "epoch": 1.3471731448763251, "percentage": 19.25, "elapsed_time": "3:55:00", "remaining_time": "16:26:06"} +{"current_steps": 1530, "total_steps": 7924, "loss": 0.348, "lr": 3.895781183654076e-05, "epoch": 1.3515901060070672, "percentage": 19.31, "elapsed_time": "3:55:46", "remaining_time": "16:25:19"} +{"current_steps": 1535, "total_steps": 7924, "loss": 0.3056, "lr": 3.894372996981647e-05, "epoch": 1.356007067137809, "percentage": 19.37, "elapsed_time": "3:56:32", "remaining_time": "16:24:31"} +{"current_steps": 1540, "total_steps": 7924, "loss": 0.2863, "lr": 3.892955618432306e-05, "epoch": 1.3604240282685511, "percentage": 19.43, "elapsed_time": "3:57:18", "remaining_time": "16:23:43"} +{"current_steps": 1545, "total_steps": 7924, "loss": 0.3671, "lr": 3.891529054883458e-05, "epoch": 1.3648409893992932, "percentage": 19.5, "elapsed_time": "3:58:03", "remaining_time": "16:22:54"} +{"current_steps": 1550, "total_steps": 7924, "loss": 0.3164, "lr": 3.8900933132570755e-05, "epoch": 1.3692579505300353, "percentage": 19.56, "elapsed_time": "3:58:49", "remaining_time": "16:22:07"} +{"current_steps": 1555, "total_steps": 7924, "loss": 0.364, "lr": 3.888648400519663e-05, "epoch": 1.3736749116607774, "percentage": 19.62, "elapsed_time": "3:59:35", "remaining_time": "16:21:20"} +{"current_steps": 1560, "total_steps": 7924, "loss": 0.2918, "lr": 3.8871943236822274e-05, "epoch": 1.3780918727915195, "percentage": 19.69, "elapsed_time": "4:00:21", "remaining_time": "16:20:32"} +{"current_steps": 1565, "total_steps": 7924, "loss": 0.3015, "lr": 3.88573108980024e-05, "epoch": 1.3825088339222615, "percentage": 19.75, "elapsed_time": "4:01:07", "remaining_time": "16:19:45"} +{"current_steps": 1570, "total_steps": 7924, "loss": 0.2891, "lr": 3.8842587059736054e-05, "epoch": 1.3869257950530036, "percentage": 19.81, "elapsed_time": "4:01:53", "remaining_time": "16:18:58"} +{"current_steps": 1575, "total_steps": 7924, "loss": 0.3524, "lr": 3.882777179346622e-05, "epoch": 1.3913427561837457, "percentage": 19.88, "elapsed_time": "4:02:39", "remaining_time": "16:18:11"} +{"current_steps": 1580, "total_steps": 7924, "loss": 0.343, "lr": 3.881286517107957e-05, "epoch": 1.3957597173144876, "percentage": 19.94, "elapsed_time": "4:03:25", "remaining_time": "16:17:24"} +{"current_steps": 1585, "total_steps": 7924, "loss": 0.3196, "lr": 3.879786726490599e-05, "epoch": 1.4001766784452296, "percentage": 20.0, "elapsed_time": "4:04:11", "remaining_time": "16:16:38"} +{"current_steps": 1590, "total_steps": 7924, "loss": 0.329, "lr": 3.8782778147718335e-05, "epoch": 1.4045936395759717, "percentage": 20.07, "elapsed_time": "4:04:57", "remaining_time": "16:15:50"} +{"current_steps": 1595, "total_steps": 7924, "loss": 0.3, "lr": 3.876759789273202e-05, "epoch": 1.4090106007067138, "percentage": 20.13, "elapsed_time": "4:05:43", "remaining_time": "16:15:03"} +{"current_steps": 1600, "total_steps": 7924, "loss": 0.3075, "lr": 3.8752326573604684e-05, "epoch": 1.4134275618374559, "percentage": 20.19, "elapsed_time": "4:06:29", "remaining_time": "16:14:15"} +{"current_steps": 1605, "total_steps": 7924, "loss": 0.3195, "lr": 3.873696426443581e-05, "epoch": 1.417844522968198, "percentage": 20.25, "elapsed_time": "4:07:15", "remaining_time": "16:13:28"} +{"current_steps": 1610, "total_steps": 7924, "loss": 0.3251, "lr": 3.872151103976642e-05, "epoch": 1.4222614840989398, "percentage": 20.32, "elapsed_time": "4:08:01", "remaining_time": "16:12:41"} +{"current_steps": 1615, "total_steps": 7924, "loss": 0.3442, "lr": 3.870596697457863e-05, "epoch": 1.426678445229682, "percentage": 20.38, "elapsed_time": "4:08:47", "remaining_time": "16:11:53"} +{"current_steps": 1620, "total_steps": 7924, "loss": 0.3426, "lr": 3.8690332144295375e-05, "epoch": 1.431095406360424, "percentage": 20.44, "elapsed_time": "4:09:33", "remaining_time": "16:11:06"} +{"current_steps": 1625, "total_steps": 7924, "loss": 0.332, "lr": 3.867460662477996e-05, "epoch": 1.435512367491166, "percentage": 20.51, "elapsed_time": "4:10:19", "remaining_time": "16:10:19"} +{"current_steps": 1630, "total_steps": 7924, "loss": 0.3076, "lr": 3.865879049233577e-05, "epoch": 1.4399293286219081, "percentage": 20.57, "elapsed_time": "4:11:05", "remaining_time": "16:09:33"} +{"current_steps": 1635, "total_steps": 7924, "loss": 0.3124, "lr": 3.864288382370584e-05, "epoch": 1.4443462897526502, "percentage": 20.63, "elapsed_time": "4:11:51", "remaining_time": "16:08:45"} +{"current_steps": 1640, "total_steps": 7924, "loss": 0.3393, "lr": 3.8626886696072495e-05, "epoch": 1.4487632508833923, "percentage": 20.7, "elapsed_time": "4:12:37", "remaining_time": "16:07:58"} +{"current_steps": 1645, "total_steps": 7924, "loss": 0.3086, "lr": 3.8610799187057025e-05, "epoch": 1.4531802120141344, "percentage": 20.76, "elapsed_time": "4:13:23", "remaining_time": "16:07:11"} +{"current_steps": 1650, "total_steps": 7924, "loss": 0.3026, "lr": 3.8594621374719226e-05, "epoch": 1.4575971731448762, "percentage": 20.82, "elapsed_time": "4:14:09", "remaining_time": "16:06:24"} +{"current_steps": 1655, "total_steps": 7924, "loss": 0.3182, "lr": 3.857835333755709e-05, "epoch": 1.4620141342756183, "percentage": 20.89, "elapsed_time": "4:14:55", "remaining_time": "16:05:37"} +{"current_steps": 1660, "total_steps": 7924, "loss": 0.3236, "lr": 3.856199515450638e-05, "epoch": 1.4664310954063604, "percentage": 20.95, "elapsed_time": "4:15:41", "remaining_time": "16:04:51"} +{"current_steps": 1665, "total_steps": 7924, "loss": 0.3233, "lr": 3.8545546904940285e-05, "epoch": 1.4708480565371025, "percentage": 21.01, "elapsed_time": "4:16:27", "remaining_time": "16:04:03"} +{"current_steps": 1670, "total_steps": 7924, "loss": 0.3243, "lr": 3.8529008668668996e-05, "epoch": 1.4752650176678446, "percentage": 21.08, "elapsed_time": "4:17:13", "remaining_time": "16:03:16"} +{"current_steps": 1675, "total_steps": 7924, "loss": 0.3054, "lr": 3.851238052593935e-05, "epoch": 1.4796819787985867, "percentage": 21.14, "elapsed_time": "4:17:59", "remaining_time": "16:02:29"} +{"current_steps": 1680, "total_steps": 7924, "loss": 0.3252, "lr": 3.849566255743442e-05, "epoch": 1.4840989399293285, "percentage": 21.2, "elapsed_time": "4:18:45", "remaining_time": "16:01:42"} +{"current_steps": 1685, "total_steps": 7924, "loss": 0.3139, "lr": 3.8478854844273134e-05, "epoch": 1.4885159010600706, "percentage": 21.26, "elapsed_time": "4:19:31", "remaining_time": "16:00:56"} +{"current_steps": 1690, "total_steps": 7924, "loss": 0.2846, "lr": 3.846195746800988e-05, "epoch": 1.4929328621908127, "percentage": 21.33, "elapsed_time": "4:20:17", "remaining_time": "16:00:09"} +{"current_steps": 1695, "total_steps": 7924, "loss": 0.3371, "lr": 3.8444970510634124e-05, "epoch": 1.4973498233215548, "percentage": 21.39, "elapsed_time": "4:21:03", "remaining_time": "15:59:22"} +{"current_steps": 1700, "total_steps": 7924, "loss": 0.3295, "lr": 3.842789405456996e-05, "epoch": 1.5017667844522968, "percentage": 21.45, "elapsed_time": "4:21:50", "remaining_time": "15:58:37"} +{"current_steps": 1705, "total_steps": 7924, "loss": 0.3237, "lr": 3.841072818267578e-05, "epoch": 1.506183745583039, "percentage": 21.52, "elapsed_time": "4:22:36", "remaining_time": "15:57:50"} +{"current_steps": 1710, "total_steps": 7924, "loss": 0.3196, "lr": 3.839347297824383e-05, "epoch": 1.510600706713781, "percentage": 21.58, "elapsed_time": "4:23:22", "remaining_time": "15:57:03"} +{"current_steps": 1715, "total_steps": 7924, "loss": 0.352, "lr": 3.837612852499982e-05, "epoch": 1.515017667844523, "percentage": 21.64, "elapsed_time": "4:24:08", "remaining_time": "15:56:17"} +{"current_steps": 1720, "total_steps": 7924, "loss": 0.3625, "lr": 3.8358694907102504e-05, "epoch": 1.5194346289752652, "percentage": 21.71, "elapsed_time": "4:24:54", "remaining_time": "15:55:31"} +{"current_steps": 1725, "total_steps": 7924, "loss": 0.3449, "lr": 3.834117220914328e-05, "epoch": 1.523851590106007, "percentage": 21.77, "elapsed_time": "4:25:41", "remaining_time": "15:54:46"} +{"current_steps": 1730, "total_steps": 7924, "loss": 0.307, "lr": 3.832356051614579e-05, "epoch": 1.528268551236749, "percentage": 21.83, "elapsed_time": "4:26:27", "remaining_time": "15:53:59"} +{"current_steps": 1735, "total_steps": 7924, "loss": 0.3011, "lr": 3.8305859913565505e-05, "epoch": 1.5326855123674912, "percentage": 21.9, "elapsed_time": "4:27:13", "remaining_time": "15:53:12"} +{"current_steps": 1740, "total_steps": 7924, "loss": 0.3087, "lr": 3.8288070487289274e-05, "epoch": 1.5371024734982333, "percentage": 21.96, "elapsed_time": "4:27:59", "remaining_time": "15:52:25"} +{"current_steps": 1745, "total_steps": 7924, "loss": 0.3183, "lr": 3.827019232363496e-05, "epoch": 1.5415194346289751, "percentage": 22.02, "elapsed_time": "4:28:45", "remaining_time": "15:51:38"} +{"current_steps": 1750, "total_steps": 7924, "loss": 0.3252, "lr": 3.8252225509350985e-05, "epoch": 1.5459363957597172, "percentage": 22.08, "elapsed_time": "4:29:31", "remaining_time": "15:50:53"} +{"current_steps": 1755, "total_steps": 7924, "loss": 0.3276, "lr": 3.823417013161594e-05, "epoch": 1.5503533568904593, "percentage": 22.15, "elapsed_time": "4:30:17", "remaining_time": "15:50:07"} +{"current_steps": 1760, "total_steps": 7924, "loss": 0.3399, "lr": 3.821602627803813e-05, "epoch": 1.5547703180212014, "percentage": 22.21, "elapsed_time": "4:31:03", "remaining_time": "15:49:20"} +{"current_steps": 1765, "total_steps": 7924, "loss": 0.3248, "lr": 3.819779403665515e-05, "epoch": 1.5591872791519434, "percentage": 22.27, "elapsed_time": "4:31:49", "remaining_time": "15:48:32"} +{"current_steps": 1770, "total_steps": 7924, "loss": 0.3323, "lr": 3.8179473495933497e-05, "epoch": 1.5636042402826855, "percentage": 22.34, "elapsed_time": "4:32:35", "remaining_time": "15:47:45"} +{"current_steps": 1775, "total_steps": 7924, "loss": 0.2712, "lr": 3.8161064744768096e-05, "epoch": 1.5680212014134276, "percentage": 22.4, "elapsed_time": "4:33:21", "remaining_time": "15:46:58"} +{"current_steps": 1780, "total_steps": 7924, "loss": 0.3833, "lr": 3.814256787248189e-05, "epoch": 1.5724381625441697, "percentage": 22.46, "elapsed_time": "4:34:07", "remaining_time": "15:46:11"} +{"current_steps": 1785, "total_steps": 7924, "loss": 0.3099, "lr": 3.81239829688254e-05, "epoch": 1.5768551236749118, "percentage": 22.53, "elapsed_time": "4:34:53", "remaining_time": "15:45:25"} +{"current_steps": 1790, "total_steps": 7924, "loss": 0.3416, "lr": 3.810531012397632e-05, "epoch": 1.5812720848056538, "percentage": 22.59, "elapsed_time": "4:35:39", "remaining_time": "15:44:38"} +{"current_steps": 1795, "total_steps": 7924, "loss": 0.3393, "lr": 3.8086549428539016e-05, "epoch": 1.585689045936396, "percentage": 22.65, "elapsed_time": "4:36:26", "remaining_time": "15:43:53"} +{"current_steps": 1800, "total_steps": 7924, "loss": 0.2922, "lr": 3.806770097354413e-05, "epoch": 1.5901060070671378, "percentage": 22.72, "elapsed_time": "4:37:12", "remaining_time": "15:43:06"} +{"current_steps": 1805, "total_steps": 7924, "loss": 0.3178, "lr": 3.8048764850448146e-05, "epoch": 1.5945229681978799, "percentage": 22.78, "elapsed_time": "4:37:58", "remaining_time": "15:42:19"} +{"current_steps": 1810, "total_steps": 7924, "loss": 0.3071, "lr": 3.802974115113292e-05, "epoch": 1.598939929328622, "percentage": 22.84, "elapsed_time": "4:38:44", "remaining_time": "15:41:32"} +{"current_steps": 1815, "total_steps": 7924, "loss": 0.3603, "lr": 3.801062996790526e-05, "epoch": 1.6033568904593638, "percentage": 22.91, "elapsed_time": "4:39:30", "remaining_time": "15:40:45"} +{"current_steps": 1820, "total_steps": 7924, "loss": 0.3065, "lr": 3.7991431393496435e-05, "epoch": 1.6077738515901059, "percentage": 22.97, "elapsed_time": "4:40:16", "remaining_time": "15:39:59"} +{"current_steps": 1825, "total_steps": 7924, "loss": 0.2951, "lr": 3.797214552106178e-05, "epoch": 1.612190812720848, "percentage": 23.03, "elapsed_time": "4:41:02", "remaining_time": "15:39:13"} +{"current_steps": 1830, "total_steps": 7924, "loss": 0.3327, "lr": 3.7952772444180205e-05, "epoch": 1.61660777385159, "percentage": 23.09, "elapsed_time": "4:41:49", "remaining_time": "15:38:27"} +{"current_steps": 1835, "total_steps": 7924, "loss": 0.3209, "lr": 3.793331225685376e-05, "epoch": 1.6210247349823321, "percentage": 23.16, "elapsed_time": "4:42:35", "remaining_time": "15:37:41"} +{"current_steps": 1840, "total_steps": 7924, "loss": 0.2859, "lr": 3.791376505350716e-05, "epoch": 1.6254416961130742, "percentage": 23.22, "elapsed_time": "4:43:20", "remaining_time": "15:36:54"} +{"current_steps": 1845, "total_steps": 7924, "loss": 0.2862, "lr": 3.789413092898735e-05, "epoch": 1.6298586572438163, "percentage": 23.28, "elapsed_time": "4:44:07", "remaining_time": "15:36:07"} +{"current_steps": 1850, "total_steps": 7924, "loss": 0.2997, "lr": 3.7874409978563045e-05, "epoch": 1.6342756183745584, "percentage": 23.35, "elapsed_time": "4:44:53", "remaining_time": "15:35:21"} +{"current_steps": 1855, "total_steps": 7924, "loss": 0.2763, "lr": 3.785460229792422e-05, "epoch": 1.6386925795053005, "percentage": 23.41, "elapsed_time": "4:45:39", "remaining_time": "15:34:35"} +{"current_steps": 1860, "total_steps": 7924, "loss": 0.3189, "lr": 3.783470798318173e-05, "epoch": 1.6431095406360425, "percentage": 23.47, "elapsed_time": "4:46:25", "remaining_time": "15:33:47"} +{"current_steps": 1865, "total_steps": 7924, "loss": 0.3393, "lr": 3.7814727130866756e-05, "epoch": 1.6475265017667846, "percentage": 23.54, "elapsed_time": "4:47:11", "remaining_time": "15:33:00"} +{"current_steps": 1870, "total_steps": 7924, "loss": 0.3433, "lr": 3.779465983793039e-05, "epoch": 1.6519434628975265, "percentage": 23.6, "elapsed_time": "4:47:57", "remaining_time": "15:32:14"} +{"current_steps": 1875, "total_steps": 7924, "loss": 0.3252, "lr": 3.7774506201743175e-05, "epoch": 1.6563604240282686, "percentage": 23.66, "elapsed_time": "4:48:43", "remaining_time": "15:31:27"} +{"current_steps": 1880, "total_steps": 7924, "loss": 0.3066, "lr": 3.775426632009456e-05, "epoch": 1.6607773851590106, "percentage": 23.73, "elapsed_time": "4:49:29", "remaining_time": "15:30:41"} +{"current_steps": 1885, "total_steps": 7924, "loss": 0.3205, "lr": 3.7733940291192516e-05, "epoch": 1.6651943462897525, "percentage": 23.79, "elapsed_time": "4:50:15", "remaining_time": "15:29:54"} +{"current_steps": 1890, "total_steps": 7924, "loss": 0.3574, "lr": 3.771352821366301e-05, "epoch": 1.6696113074204946, "percentage": 23.85, "elapsed_time": "4:51:01", "remaining_time": "15:29:07"} +{"current_steps": 1895, "total_steps": 7924, "loss": 0.3129, "lr": 3.769303018654951e-05, "epoch": 1.6740282685512367, "percentage": 23.91, "elapsed_time": "4:51:47", "remaining_time": "15:28:21"} +{"current_steps": 1900, "total_steps": 7924, "loss": 0.3588, "lr": 3.7672446309312554e-05, "epoch": 1.6784452296819787, "percentage": 23.98, "elapsed_time": "4:52:33", "remaining_time": "15:27:34"} +{"current_steps": 1905, "total_steps": 7924, "loss": 0.3609, "lr": 3.765177668182923e-05, "epoch": 1.6828621908127208, "percentage": 24.04, "elapsed_time": "4:53:19", "remaining_time": "15:26:47"} +{"current_steps": 1910, "total_steps": 7924, "loss": 0.3016, "lr": 3.763102140439272e-05, "epoch": 1.687279151943463, "percentage": 24.1, "elapsed_time": "4:54:05", "remaining_time": "15:25:59"} +{"current_steps": 1915, "total_steps": 7924, "loss": 0.2874, "lr": 3.7610180577711774e-05, "epoch": 1.691696113074205, "percentage": 24.17, "elapsed_time": "4:54:51", "remaining_time": "15:25:13"} +{"current_steps": 1920, "total_steps": 7924, "loss": 0.3251, "lr": 3.758925430291025e-05, "epoch": 1.696113074204947, "percentage": 24.23, "elapsed_time": "4:55:37", "remaining_time": "15:24:26"} +{"current_steps": 1925, "total_steps": 7924, "loss": 0.326, "lr": 3.756824268152663e-05, "epoch": 1.7005300353356891, "percentage": 24.29, "elapsed_time": "4:56:23", "remaining_time": "15:23:39"} +{"current_steps": 1930, "total_steps": 7924, "loss": 0.3568, "lr": 3.7547145815513504e-05, "epoch": 1.7049469964664312, "percentage": 24.36, "elapsed_time": "4:57:09", "remaining_time": "15:22:52"} +{"current_steps": 1935, "total_steps": 7924, "loss": 0.3531, "lr": 3.752596380723709e-05, "epoch": 1.7093639575971733, "percentage": 24.42, "elapsed_time": "4:57:55", "remaining_time": "15:22:06"} +{"current_steps": 1940, "total_steps": 7924, "loss": 0.3044, "lr": 3.750469675947672e-05, "epoch": 1.7137809187279152, "percentage": 24.48, "elapsed_time": "4:58:41", "remaining_time": "15:21:19"} +{"current_steps": 1945, "total_steps": 7924, "loss": 0.3225, "lr": 3.7483344775424376e-05, "epoch": 1.7181978798586572, "percentage": 24.55, "elapsed_time": "4:59:27", "remaining_time": "15:20:32"} +{"current_steps": 1950, "total_steps": 7924, "loss": 0.3168, "lr": 3.746190795868416e-05, "epoch": 1.7226148409893993, "percentage": 24.61, "elapsed_time": "5:00:13", "remaining_time": "15:19:45"} +{"current_steps": 1955, "total_steps": 7924, "loss": 0.2932, "lr": 3.7440386413271796e-05, "epoch": 1.7270318021201412, "percentage": 24.67, "elapsed_time": "5:00:59", "remaining_time": "15:18:58"} +{"current_steps": 1960, "total_steps": 7924, "loss": 0.3082, "lr": 3.741878024361412e-05, "epoch": 1.7314487632508833, "percentage": 24.73, "elapsed_time": "5:01:45", "remaining_time": "15:18:11"} +{"current_steps": 1965, "total_steps": 7924, "loss": 0.2994, "lr": 3.7397089554548606e-05, "epoch": 1.7358657243816253, "percentage": 24.8, "elapsed_time": "5:02:31", "remaining_time": "15:17:25"} +{"current_steps": 1970, "total_steps": 7924, "loss": 0.2718, "lr": 3.73753144513228e-05, "epoch": 1.7402826855123674, "percentage": 24.86, "elapsed_time": "5:03:17", "remaining_time": "15:16:38"} +{"current_steps": 1975, "total_steps": 7924, "loss": 0.3195, "lr": 3.735345503959388e-05, "epoch": 1.7446996466431095, "percentage": 24.92, "elapsed_time": "5:04:03", "remaining_time": "15:15:51"} +{"current_steps": 1980, "total_steps": 7924, "loss": 0.307, "lr": 3.7331511425428075e-05, "epoch": 1.7491166077738516, "percentage": 24.99, "elapsed_time": "5:04:49", "remaining_time": "15:15:04"} +{"current_steps": 1985, "total_steps": 7924, "loss": 0.3163, "lr": 3.73094837153002e-05, "epoch": 1.7535335689045937, "percentage": 25.05, "elapsed_time": "5:05:34", "remaining_time": "15:14:17"} +{"current_steps": 1990, "total_steps": 7924, "loss": 0.3476, "lr": 3.7287372016093106e-05, "epoch": 1.7579505300353357, "percentage": 25.11, "elapsed_time": "5:06:20", "remaining_time": "15:13:30"} +{"current_steps": 1995, "total_steps": 7924, "loss": 0.3238, "lr": 3.726517643509718e-05, "epoch": 1.7623674911660778, "percentage": 25.18, "elapsed_time": "5:07:07", "remaining_time": "15:12:43"} +{"current_steps": 2000, "total_steps": 7924, "loss": 0.313, "lr": 3.724289708000984e-05, "epoch": 1.76678445229682, "percentage": 25.24, "elapsed_time": "5:07:53", "remaining_time": "15:11:57"} +{"current_steps": 2005, "total_steps": 7924, "loss": 0.3045, "lr": 3.722053405893495e-05, "epoch": 1.771201413427562, "percentage": 25.3, "elapsed_time": "5:08:39", "remaining_time": "15:11:10"} +{"current_steps": 2010, "total_steps": 7924, "loss": 0.3038, "lr": 3.7198087480382386e-05, "epoch": 1.7756183745583038, "percentage": 25.37, "elapsed_time": "5:09:25", "remaining_time": "15:10:23"} +{"current_steps": 2015, "total_steps": 7924, "loss": 0.3153, "lr": 3.7175557453267435e-05, "epoch": 1.780035335689046, "percentage": 25.43, "elapsed_time": "5:10:10", "remaining_time": "15:09:36"} +{"current_steps": 2020, "total_steps": 7924, "loss": 0.3231, "lr": 3.715294408691029e-05, "epoch": 1.784452296819788, "percentage": 25.49, "elapsed_time": "5:10:57", "remaining_time": "15:08:51"} +{"current_steps": 2025, "total_steps": 7924, "loss": 0.3279, "lr": 3.713024749103554e-05, "epoch": 1.78886925795053, "percentage": 25.56, "elapsed_time": "5:11:43", "remaining_time": "15:08:05"} +{"current_steps": 2030, "total_steps": 7924, "loss": 0.3089, "lr": 3.71074677757716e-05, "epoch": 1.793286219081272, "percentage": 25.62, "elapsed_time": "5:12:29", "remaining_time": "15:07:19"} +{"current_steps": 2035, "total_steps": 7924, "loss": 0.3438, "lr": 3.708460505165021e-05, "epoch": 1.797703180212014, "percentage": 25.68, "elapsed_time": "5:13:15", "remaining_time": "15:06:32"} +{"current_steps": 2040, "total_steps": 7924, "loss": 0.3271, "lr": 3.706165942960589e-05, "epoch": 1.802120141342756, "percentage": 25.74, "elapsed_time": "5:14:01", "remaining_time": "15:05:45"} +{"current_steps": 2045, "total_steps": 7924, "loss": 0.3168, "lr": 3.703863102097538e-05, "epoch": 1.8065371024734982, "percentage": 25.81, "elapsed_time": "5:14:47", "remaining_time": "15:04:57"} +{"current_steps": 2050, "total_steps": 7924, "loss": 0.3165, "lr": 3.701551993749714e-05, "epoch": 1.8109540636042403, "percentage": 25.87, "elapsed_time": "5:15:33", "remaining_time": "15:04:11"} +{"current_steps": 2055, "total_steps": 7924, "loss": 0.3048, "lr": 3.6992326291310764e-05, "epoch": 1.8153710247349824, "percentage": 25.93, "elapsed_time": "5:16:19", "remaining_time": "15:03:24"} +{"current_steps": 2060, "total_steps": 7924, "loss": 0.2975, "lr": 3.696905019495647e-05, "epoch": 1.8197879858657244, "percentage": 26.0, "elapsed_time": "5:17:05", "remaining_time": "15:02:38"} +{"current_steps": 2065, "total_steps": 7924, "loss": 0.3234, "lr": 3.6945691761374535e-05, "epoch": 1.8242049469964665, "percentage": 26.06, "elapsed_time": "5:17:51", "remaining_time": "15:01:51"} +{"current_steps": 2070, "total_steps": 7924, "loss": 0.3236, "lr": 3.692225110390474e-05, "epoch": 1.8286219081272086, "percentage": 26.12, "elapsed_time": "5:18:37", "remaining_time": "15:01:04"} +{"current_steps": 2075, "total_steps": 7924, "loss": 0.3203, "lr": 3.689872833628587e-05, "epoch": 1.8330388692579507, "percentage": 26.19, "elapsed_time": "5:19:23", "remaining_time": "15:00:19"} +{"current_steps": 2080, "total_steps": 7924, "loss": 0.3268, "lr": 3.687512357265509e-05, "epoch": 1.8374558303886925, "percentage": 26.25, "elapsed_time": "5:20:09", "remaining_time": "14:59:32"} +{"current_steps": 2085, "total_steps": 7924, "loss": 0.3141, "lr": 3.685143692754743e-05, "epoch": 1.8418727915194346, "percentage": 26.31, "elapsed_time": "5:20:55", "remaining_time": "14:58:45"} +{"current_steps": 2090, "total_steps": 7924, "loss": 0.3092, "lr": 3.6827668515895234e-05, "epoch": 1.8462897526501767, "percentage": 26.38, "elapsed_time": "5:21:41", "remaining_time": "14:57:59"} +{"current_steps": 2095, "total_steps": 7924, "loss": 0.3024, "lr": 3.68038184530276e-05, "epoch": 1.8507067137809188, "percentage": 26.44, "elapsed_time": "5:22:27", "remaining_time": "14:57:11"} +{"current_steps": 2100, "total_steps": 7924, "loss": 0.2935, "lr": 3.6779886854669815e-05, "epoch": 1.8551236749116606, "percentage": 26.5, "elapsed_time": "5:23:13", "remaining_time": "14:56:25"} +{"current_steps": 2105, "total_steps": 7924, "loss": 0.3282, "lr": 3.6755873836942756e-05, "epoch": 1.8595406360424027, "percentage": 26.56, "elapsed_time": "5:23:59", "remaining_time": "14:55:38"} +{"current_steps": 2110, "total_steps": 7924, "loss": 0.3506, "lr": 3.673177951636242e-05, "epoch": 1.8639575971731448, "percentage": 26.63, "elapsed_time": "5:24:45", "remaining_time": "14:54:52"} +{"current_steps": 2115, "total_steps": 7924, "loss": 0.3433, "lr": 3.670760400983925e-05, "epoch": 1.8683745583038869, "percentage": 26.69, "elapsed_time": "5:25:31", "remaining_time": "14:54:05"} +{"current_steps": 2120, "total_steps": 7924, "loss": 0.3342, "lr": 3.6683347434677654e-05, "epoch": 1.872791519434629, "percentage": 26.75, "elapsed_time": "5:26:17", "remaining_time": "14:53:18"} +{"current_steps": 2125, "total_steps": 7924, "loss": 0.2953, "lr": 3.6659009908575394e-05, "epoch": 1.877208480565371, "percentage": 26.82, "elapsed_time": "5:27:03", "remaining_time": "14:52:32"} +{"current_steps": 2130, "total_steps": 7924, "loss": 0.3263, "lr": 3.663459154962301e-05, "epoch": 1.8816254416961131, "percentage": 26.88, "elapsed_time": "5:27:49", "remaining_time": "14:51:45"} +{"current_steps": 2135, "total_steps": 7924, "loss": 0.3149, "lr": 3.661009247630326e-05, "epoch": 1.8860424028268552, "percentage": 26.94, "elapsed_time": "5:28:35", "remaining_time": "14:50:58"} +{"current_steps": 2140, "total_steps": 7924, "loss": 0.2887, "lr": 3.658551280749055e-05, "epoch": 1.8904593639575973, "percentage": 27.01, "elapsed_time": "5:29:21", "remaining_time": "14:50:11"} +{"current_steps": 2145, "total_steps": 7924, "loss": 0.2892, "lr": 3.656085266245038e-05, "epoch": 1.8948763250883394, "percentage": 27.07, "elapsed_time": "5:30:07", "remaining_time": "14:49:25"} +{"current_steps": 2150, "total_steps": 7924, "loss": 0.3093, "lr": 3.653611216083867e-05, "epoch": 1.8992932862190812, "percentage": 27.13, "elapsed_time": "5:30:53", "remaining_time": "14:48:39"} +{"current_steps": 2155, "total_steps": 7924, "loss": 0.3111, "lr": 3.651129142270132e-05, "epoch": 1.9037102473498233, "percentage": 27.2, "elapsed_time": "5:31:40", "remaining_time": "14:47:53"} +{"current_steps": 2160, "total_steps": 7924, "loss": 0.3236, "lr": 3.6486390568473494e-05, "epoch": 1.9081272084805654, "percentage": 27.26, "elapsed_time": "5:32:25", "remaining_time": "14:47:06"} +{"current_steps": 2165, "total_steps": 7924, "loss": 0.2967, "lr": 3.646140971897914e-05, "epoch": 1.9125441696113075, "percentage": 27.32, "elapsed_time": "5:33:12", "remaining_time": "14:46:20"} +{"current_steps": 2170, "total_steps": 7924, "loss": 0.3371, "lr": 3.6436348995430314e-05, "epoch": 1.9169611307420493, "percentage": 27.39, "elapsed_time": "5:33:58", "remaining_time": "14:45:33"} +{"current_steps": 2175, "total_steps": 7924, "loss": 0.3121, "lr": 3.641120851942669e-05, "epoch": 1.9213780918727914, "percentage": 27.45, "elapsed_time": "5:34:44", "remaining_time": "14:44:47"} +{"current_steps": 2180, "total_steps": 7924, "loss": 0.3264, "lr": 3.638598841295487e-05, "epoch": 1.9257950530035335, "percentage": 27.51, "elapsed_time": "5:35:30", "remaining_time": "14:44:00"} +{"current_steps": 2185, "total_steps": 7924, "loss": 0.3463, "lr": 3.6360688798387865e-05, "epoch": 1.9302120141342756, "percentage": 27.57, "elapsed_time": "5:36:16", "remaining_time": "14:43:14"} +{"current_steps": 2190, "total_steps": 7924, "loss": 0.2941, "lr": 3.633530979848446e-05, "epoch": 1.9346289752650176, "percentage": 27.64, "elapsed_time": "5:37:02", "remaining_time": "14:42:28"} +{"current_steps": 2195, "total_steps": 7924, "loss": 0.3671, "lr": 3.6309851536388664e-05, "epoch": 1.9390459363957597, "percentage": 27.7, "elapsed_time": "5:37:48", "remaining_time": "14:41:42"} +{"current_steps": 2200, "total_steps": 7924, "loss": 0.3231, "lr": 3.6284314135629036e-05, "epoch": 1.9434628975265018, "percentage": 27.76, "elapsed_time": "5:38:34", "remaining_time": "14:40:55"} +{"current_steps": 2205, "total_steps": 7924, "loss": 0.3538, "lr": 3.625869772011816e-05, "epoch": 1.947879858657244, "percentage": 27.83, "elapsed_time": "5:39:20", "remaining_time": "14:40:09"} +{"current_steps": 2210, "total_steps": 7924, "loss": 0.3141, "lr": 3.6233002414152025e-05, "epoch": 1.952296819787986, "percentage": 27.89, "elapsed_time": "5:40:06", "remaining_time": "14:39:22"} +{"current_steps": 2215, "total_steps": 7924, "loss": 0.3353, "lr": 3.620722834240939e-05, "epoch": 1.956713780918728, "percentage": 27.95, "elapsed_time": "5:40:53", "remaining_time": "14:38:36"} +{"current_steps": 2220, "total_steps": 7924, "loss": 0.3328, "lr": 3.61813756299512e-05, "epoch": 1.96113074204947, "percentage": 28.02, "elapsed_time": "5:41:39", "remaining_time": "14:37:49"} +{"current_steps": 2225, "total_steps": 7924, "loss": 0.3315, "lr": 3.6155444402219995e-05, "epoch": 1.965547703180212, "percentage": 28.08, "elapsed_time": "5:42:24", "remaining_time": "14:37:02"} +{"current_steps": 2230, "total_steps": 7924, "loss": 0.313, "lr": 3.612943478503929e-05, "epoch": 1.969964664310954, "percentage": 28.14, "elapsed_time": "5:43:10", "remaining_time": "14:36:16"} +{"current_steps": 2235, "total_steps": 7924, "loss": 0.2988, "lr": 3.610334690461295e-05, "epoch": 1.9743816254416962, "percentage": 28.21, "elapsed_time": "5:43:56", "remaining_time": "14:35:29"} +{"current_steps": 2240, "total_steps": 7924, "loss": 0.3008, "lr": 3.6077180887524584e-05, "epoch": 1.978798586572438, "percentage": 28.27, "elapsed_time": "5:44:42", "remaining_time": "14:34:43"} +{"current_steps": 2245, "total_steps": 7924, "loss": 0.3143, "lr": 3.605093686073694e-05, "epoch": 1.98321554770318, "percentage": 28.33, "elapsed_time": "5:45:28", "remaining_time": "14:33:56"} +{"current_steps": 2250, "total_steps": 7924, "loss": 0.3122, "lr": 3.602461495159131e-05, "epoch": 1.9876325088339222, "percentage": 28.39, "elapsed_time": "5:46:14", "remaining_time": "14:33:09"} +{"current_steps": 2255, "total_steps": 7924, "loss": 0.3075, "lr": 3.5998215287806845e-05, "epoch": 1.9920494699646643, "percentage": 28.46, "elapsed_time": "5:47:01", "remaining_time": "14:32:23"} +{"current_steps": 2260, "total_steps": 7924, "loss": 0.3088, "lr": 3.597173799748001e-05, "epoch": 1.9964664310954063, "percentage": 28.52, "elapsed_time": "5:47:47", "remaining_time": "14:31:37"} +{"current_steps": 2265, "total_steps": 7924, "loss": 0.2974, "lr": 3.594518320908391e-05, "epoch": 2.001766784452297, "percentage": 28.58, "elapsed_time": "5:48:33", "remaining_time": "14:30:51"} +{"current_steps": 2270, "total_steps": 7924, "loss": 0.2677, "lr": 3.591855105146769e-05, "epoch": 2.006183745583039, "percentage": 28.65, "elapsed_time": "5:49:19", "remaining_time": "14:30:04"} +{"current_steps": 2275, "total_steps": 7924, "loss": 0.2922, "lr": 3.589184165385592e-05, "epoch": 2.010600706713781, "percentage": 28.71, "elapsed_time": "5:50:05", "remaining_time": "14:29:18"} +{"current_steps": 2280, "total_steps": 7924, "loss": 0.2791, "lr": 3.586505514584793e-05, "epoch": 2.015017667844523, "percentage": 28.77, "elapsed_time": "5:50:51", "remaining_time": "14:28:31"} +{"current_steps": 2285, "total_steps": 7924, "loss": 0.3277, "lr": 3.583819165741722e-05, "epoch": 2.019434628975265, "percentage": 28.84, "elapsed_time": "5:51:37", "remaining_time": "14:27:45"} +{"current_steps": 2290, "total_steps": 7924, "loss": 0.2556, "lr": 3.581125131891082e-05, "epoch": 2.0238515901060072, "percentage": 28.9, "elapsed_time": "5:52:24", "remaining_time": "14:26:59"} +{"current_steps": 2295, "total_steps": 7924, "loss": 0.3267, "lr": 3.578423426104864e-05, "epoch": 2.0282685512367493, "percentage": 28.96, "elapsed_time": "5:53:10", "remaining_time": "14:26:14"} +{"current_steps": 2300, "total_steps": 7924, "loss": 0.2794, "lr": 3.5757140614922846e-05, "epoch": 2.032685512367491, "percentage": 29.03, "elapsed_time": "5:53:56", "remaining_time": "14:25:27"} +{"current_steps": 2305, "total_steps": 7924, "loss": 0.3039, "lr": 3.572997051199724e-05, "epoch": 2.037102473498233, "percentage": 29.09, "elapsed_time": "5:54:42", "remaining_time": "14:24:41"} +{"current_steps": 2310, "total_steps": 7924, "loss": 0.2865, "lr": 3.5702724084106596e-05, "epoch": 2.041519434628975, "percentage": 29.15, "elapsed_time": "5:55:28", "remaining_time": "14:23:55"} +{"current_steps": 2315, "total_steps": 7924, "loss": 0.3152, "lr": 3.567540146345604e-05, "epoch": 2.045936395759717, "percentage": 29.22, "elapsed_time": "5:56:14", "remaining_time": "14:23:08"} +{"current_steps": 2320, "total_steps": 7924, "loss": 0.3071, "lr": 3.5648002782620375e-05, "epoch": 2.0503533568904593, "percentage": 29.28, "elapsed_time": "5:57:00", "remaining_time": "14:22:22"} +{"current_steps": 2325, "total_steps": 7924, "loss": 0.2835, "lr": 3.562052817454351e-05, "epoch": 2.0547703180212014, "percentage": 29.34, "elapsed_time": "5:57:46", "remaining_time": "14:21:35"} +{"current_steps": 2330, "total_steps": 7924, "loss": 0.2967, "lr": 3.5592977772537734e-05, "epoch": 2.0591872791519434, "percentage": 29.4, "elapsed_time": "5:58:33", "remaining_time": "14:20:50"} +{"current_steps": 2335, "total_steps": 7924, "loss": 0.2975, "lr": 3.55653517102831e-05, "epoch": 2.0636042402826855, "percentage": 29.47, "elapsed_time": "5:59:19", "remaining_time": "14:20:05"} +{"current_steps": 2340, "total_steps": 7924, "loss": 0.2931, "lr": 3.5537650121826804e-05, "epoch": 2.0680212014134276, "percentage": 29.53, "elapsed_time": "6:00:06", "remaining_time": "14:19:19"} +{"current_steps": 2345, "total_steps": 7924, "loss": 0.3258, "lr": 3.550987314158249e-05, "epoch": 2.0724381625441697, "percentage": 29.59, "elapsed_time": "6:00:52", "remaining_time": "14:18:32"} +{"current_steps": 2350, "total_steps": 7924, "loss": 0.2963, "lr": 3.5482020904329635e-05, "epoch": 2.0768551236749118, "percentage": 29.66, "elapsed_time": "6:01:38", "remaining_time": "14:17:46"} +{"current_steps": 2355, "total_steps": 7924, "loss": 0.3224, "lr": 3.545409354521286e-05, "epoch": 2.081272084805654, "percentage": 29.72, "elapsed_time": "6:02:24", "remaining_time": "14:17:00"} +{"current_steps": 2360, "total_steps": 7924, "loss": 0.2875, "lr": 3.542609119974129e-05, "epoch": 2.085689045936396, "percentage": 29.78, "elapsed_time": "6:03:10", "remaining_time": "14:16:13"} +{"current_steps": 2365, "total_steps": 7924, "loss": 0.2737, "lr": 3.539801400378793e-05, "epoch": 2.090106007067138, "percentage": 29.85, "elapsed_time": "6:03:56", "remaining_time": "14:15:27"} +{"current_steps": 2370, "total_steps": 7924, "loss": 0.2733, "lr": 3.5369862093588946e-05, "epoch": 2.0945229681978796, "percentage": 29.91, "elapsed_time": "6:04:42", "remaining_time": "14:14:40"} +{"current_steps": 2375, "total_steps": 7924, "loss": 0.3283, "lr": 3.534163560574304e-05, "epoch": 2.0989399293286217, "percentage": 29.97, "elapsed_time": "6:05:28", "remaining_time": "14:13:53"} +{"current_steps": 2380, "total_steps": 7924, "loss": 0.3206, "lr": 3.531333467721078e-05, "epoch": 2.103356890459364, "percentage": 30.04, "elapsed_time": "6:06:14", "remaining_time": "14:13:07"} +{"current_steps": 2385, "total_steps": 7924, "loss": 0.2959, "lr": 3.5284959445313945e-05, "epoch": 2.107773851590106, "percentage": 30.1, "elapsed_time": "6:07:00", "remaining_time": "14:12:21"} +{"current_steps": 2390, "total_steps": 7924, "loss": 0.3114, "lr": 3.525651004773481e-05, "epoch": 2.112190812720848, "percentage": 30.16, "elapsed_time": "6:07:46", "remaining_time": "14:11:34"} +{"current_steps": 2395, "total_steps": 7924, "loss": 0.3066, "lr": 3.522798662251558e-05, "epoch": 2.11660777385159, "percentage": 30.22, "elapsed_time": "6:08:32", "remaining_time": "14:10:47"} +{"current_steps": 2400, "total_steps": 7924, "loss": 0.288, "lr": 3.51993893080576e-05, "epoch": 2.121024734982332, "percentage": 30.29, "elapsed_time": "6:09:18", "remaining_time": "14:10:01"} +{"current_steps": 2405, "total_steps": 7924, "loss": 0.3052, "lr": 3.517071824312077e-05, "epoch": 2.125441696113074, "percentage": 30.35, "elapsed_time": "6:10:04", "remaining_time": "14:09:14"} +{"current_steps": 2410, "total_steps": 7924, "loss": 0.2777, "lr": 3.5141973566822843e-05, "epoch": 2.1298586572438163, "percentage": 30.41, "elapsed_time": "6:10:50", "remaining_time": "14:08:28"} +{"current_steps": 2415, "total_steps": 7924, "loss": 0.2757, "lr": 3.511315541863873e-05, "epoch": 2.1342756183745584, "percentage": 30.48, "elapsed_time": "6:11:36", "remaining_time": "14:07:41"} +{"current_steps": 2420, "total_steps": 7924, "loss": 0.3008, "lr": 3.508426393839986e-05, "epoch": 2.1386925795053005, "percentage": 30.54, "elapsed_time": "6:12:22", "remaining_time": "14:06:54"} +{"current_steps": 2425, "total_steps": 7924, "loss": 0.2822, "lr": 3.505529926629348e-05, "epoch": 2.1431095406360425, "percentage": 30.6, "elapsed_time": "6:13:08", "remaining_time": "14:06:08"} +{"current_steps": 2430, "total_steps": 7924, "loss": 0.2722, "lr": 3.502626154286196e-05, "epoch": 2.1475265017667846, "percentage": 30.67, "elapsed_time": "6:13:54", "remaining_time": "14:05:21"} +{"current_steps": 2435, "total_steps": 7924, "loss": 0.2772, "lr": 3.4997150909002156e-05, "epoch": 2.1519434628975267, "percentage": 30.73, "elapsed_time": "6:14:40", "remaining_time": "14:04:35"} +{"current_steps": 2440, "total_steps": 7924, "loss": 0.3005, "lr": 3.496796750596469e-05, "epoch": 2.1563604240282688, "percentage": 30.79, "elapsed_time": "6:15:26", "remaining_time": "14:03:48"} +{"current_steps": 2445, "total_steps": 7924, "loss": 0.262, "lr": 3.4938711475353286e-05, "epoch": 2.1607773851590104, "percentage": 30.86, "elapsed_time": "6:16:12", "remaining_time": "14:03:02"} +{"current_steps": 2450, "total_steps": 7924, "loss": 0.3254, "lr": 3.490938295912404e-05, "epoch": 2.1651943462897525, "percentage": 30.92, "elapsed_time": "6:16:58", "remaining_time": "14:02:16"} +{"current_steps": 2455, "total_steps": 7924, "loss": 0.2815, "lr": 3.487998209958479e-05, "epoch": 2.1696113074204946, "percentage": 30.98, "elapsed_time": "6:17:44", "remaining_time": "14:01:29"} +{"current_steps": 2460, "total_steps": 7924, "loss": 0.2918, "lr": 3.485050903939439e-05, "epoch": 2.1740282685512367, "percentage": 31.04, "elapsed_time": "6:18:30", "remaining_time": "14:00:42"} +{"current_steps": 2465, "total_steps": 7924, "loss": 0.3105, "lr": 3.482096392156203e-05, "epoch": 2.1784452296819787, "percentage": 31.11, "elapsed_time": "6:19:16", "remaining_time": "13:59:56"} +{"current_steps": 2470, "total_steps": 7924, "loss": 0.2912, "lr": 3.4791346889446536e-05, "epoch": 2.182862190812721, "percentage": 31.17, "elapsed_time": "6:20:02", "remaining_time": "13:59:09"} +{"current_steps": 2475, "total_steps": 7924, "loss": 0.2811, "lr": 3.476165808675567e-05, "epoch": 2.187279151943463, "percentage": 31.23, "elapsed_time": "6:20:47", "remaining_time": "13:58:22"} +{"current_steps": 2480, "total_steps": 7924, "loss": 0.3342, "lr": 3.473189765754544e-05, "epoch": 2.191696113074205, "percentage": 31.3, "elapsed_time": "6:21:34", "remaining_time": "13:57:36"} +{"current_steps": 2485, "total_steps": 7924, "loss": 0.3031, "lr": 3.4702065746219416e-05, "epoch": 2.196113074204947, "percentage": 31.36, "elapsed_time": "6:22:20", "remaining_time": "13:56:50"} +{"current_steps": 2490, "total_steps": 7924, "loss": 0.3215, "lr": 3.467216249752799e-05, "epoch": 2.200530035335689, "percentage": 31.42, "elapsed_time": "6:23:06", "remaining_time": "13:56:03"} +{"current_steps": 2495, "total_steps": 7924, "loss": 0.2966, "lr": 3.4642188056567726e-05, "epoch": 2.204946996466431, "percentage": 31.49, "elapsed_time": "6:23:52", "remaining_time": "13:55:17"} +{"current_steps": 2500, "total_steps": 7924, "loss": 0.2672, "lr": 3.461214256878059e-05, "epoch": 2.2093639575971733, "percentage": 31.55, "elapsed_time": "6:24:38", "remaining_time": "13:54:31"} +{"current_steps": 2505, "total_steps": 7924, "loss": 0.293, "lr": 3.458202617995332e-05, "epoch": 2.2137809187279154, "percentage": 31.61, "elapsed_time": "6:25:24", "remaining_time": "13:53:45"} +{"current_steps": 2510, "total_steps": 7924, "loss": 0.2677, "lr": 3.4551839036216645e-05, "epoch": 2.218197879858657, "percentage": 31.68, "elapsed_time": "6:26:10", "remaining_time": "13:52:58"} +{"current_steps": 2515, "total_steps": 7924, "loss": 0.3118, "lr": 3.452158128404465e-05, "epoch": 2.222614840989399, "percentage": 31.74, "elapsed_time": "6:26:56", "remaining_time": "13:52:11"} +{"current_steps": 2520, "total_steps": 7924, "loss": 0.2811, "lr": 3.449125307025399e-05, "epoch": 2.227031802120141, "percentage": 31.8, "elapsed_time": "6:27:42", "remaining_time": "13:51:25"} +{"current_steps": 2525, "total_steps": 7924, "loss": 0.2657, "lr": 3.446085454200322e-05, "epoch": 2.2314487632508833, "percentage": 31.87, "elapsed_time": "6:28:28", "remaining_time": "13:50:39"} +{"current_steps": 2530, "total_steps": 7924, "loss": 0.294, "lr": 3.44303858467921e-05, "epoch": 2.2358657243816253, "percentage": 31.93, "elapsed_time": "6:29:14", "remaining_time": "13:49:52"} +{"current_steps": 2535, "total_steps": 7924, "loss": 0.3009, "lr": 3.4399847132460826e-05, "epoch": 2.2402826855123674, "percentage": 31.99, "elapsed_time": "6:30:00", "remaining_time": "13:49:06"} +{"current_steps": 2540, "total_steps": 7924, "loss": 0.2864, "lr": 3.436923854718935e-05, "epoch": 2.2446996466431095, "percentage": 32.05, "elapsed_time": "6:30:46", "remaining_time": "13:48:19"} +{"current_steps": 2545, "total_steps": 7924, "loss": 0.3324, "lr": 3.433856023949666e-05, "epoch": 2.2491166077738516, "percentage": 32.12, "elapsed_time": "6:31:32", "remaining_time": "13:47:32"} +{"current_steps": 2550, "total_steps": 7924, "loss": 0.3372, "lr": 3.430781235824006e-05, "epoch": 2.2535335689045937, "percentage": 32.18, "elapsed_time": "6:32:18", "remaining_time": "13:46:46"} +{"current_steps": 2555, "total_steps": 7924, "loss": 0.276, "lr": 3.427699505261439e-05, "epoch": 2.2579505300353357, "percentage": 32.24, "elapsed_time": "6:33:04", "remaining_time": "13:45:58"} +{"current_steps": 2560, "total_steps": 7924, "loss": 0.3106, "lr": 3.4246108472151404e-05, "epoch": 2.262367491166078, "percentage": 32.31, "elapsed_time": "6:33:50", "remaining_time": "13:45:12"} +{"current_steps": 2565, "total_steps": 7924, "loss": 0.3131, "lr": 3.421515276671897e-05, "epoch": 2.26678445229682, "percentage": 32.37, "elapsed_time": "6:34:36", "remaining_time": "13:44:26"} +{"current_steps": 2570, "total_steps": 7924, "loss": 0.2993, "lr": 3.418412808652037e-05, "epoch": 2.271201413427562, "percentage": 32.43, "elapsed_time": "6:35:22", "remaining_time": "13:43:40"} +{"current_steps": 2575, "total_steps": 7924, "loss": 0.2756, "lr": 3.4153034582093546e-05, "epoch": 2.275618374558304, "percentage": 32.5, "elapsed_time": "6:36:08", "remaining_time": "13:42:54"} +{"current_steps": 2580, "total_steps": 7924, "loss": 0.3148, "lr": 3.412187240431043e-05, "epoch": 2.280035335689046, "percentage": 32.56, "elapsed_time": "6:36:54", "remaining_time": "13:42:07"} +{"current_steps": 2585, "total_steps": 7924, "loss": 0.3066, "lr": 3.409064170437612e-05, "epoch": 2.2844522968197882, "percentage": 32.62, "elapsed_time": "6:37:40", "remaining_time": "13:41:21"} +{"current_steps": 2590, "total_steps": 7924, "loss": 0.2891, "lr": 3.405934263382824e-05, "epoch": 2.28886925795053, "percentage": 32.69, "elapsed_time": "6:38:26", "remaining_time": "13:40:34"} +{"current_steps": 2595, "total_steps": 7924, "loss": 0.2759, "lr": 3.4027975344536125e-05, "epoch": 2.293286219081272, "percentage": 32.75, "elapsed_time": "6:39:12", "remaining_time": "13:39:47"} +{"current_steps": 2600, "total_steps": 7924, "loss": 0.3083, "lr": 3.399653998870016e-05, "epoch": 2.297703180212014, "percentage": 32.81, "elapsed_time": "6:39:58", "remaining_time": "13:39:00"} +{"current_steps": 2605, "total_steps": 7924, "loss": 0.2856, "lr": 3.396503671885098e-05, "epoch": 2.302120141342756, "percentage": 32.87, "elapsed_time": "6:40:43", "remaining_time": "13:38:13"} +{"current_steps": 2610, "total_steps": 7924, "loss": 0.2716, "lr": 3.3933465687848745e-05, "epoch": 2.306537102473498, "percentage": 32.94, "elapsed_time": "6:41:29", "remaining_time": "13:37:27"} +{"current_steps": 2615, "total_steps": 7924, "loss": 0.2747, "lr": 3.390182704888242e-05, "epoch": 2.3109540636042403, "percentage": 33.0, "elapsed_time": "6:42:15", "remaining_time": "13:36:40"} +{"current_steps": 2620, "total_steps": 7924, "loss": 0.2731, "lr": 3.387012095546903e-05, "epoch": 2.3153710247349824, "percentage": 33.06, "elapsed_time": "6:43:01", "remaining_time": "13:35:53"} +{"current_steps": 2625, "total_steps": 7924, "loss": 0.2676, "lr": 3.3838347561452854e-05, "epoch": 2.3197879858657244, "percentage": 33.13, "elapsed_time": "6:43:47", "remaining_time": "13:35:06"} +{"current_steps": 2630, "total_steps": 7924, "loss": 0.3206, "lr": 3.380650702100478e-05, "epoch": 2.3242049469964665, "percentage": 33.19, "elapsed_time": "6:44:33", "remaining_time": "13:34:19"} +{"current_steps": 2635, "total_steps": 7924, "loss": 0.2588, "lr": 3.3774599488621477e-05, "epoch": 2.3286219081272086, "percentage": 33.25, "elapsed_time": "6:45:18", "remaining_time": "13:33:32"} +{"current_steps": 2640, "total_steps": 7924, "loss": 0.287, "lr": 3.374262511912468e-05, "epoch": 2.3330388692579507, "percentage": 33.32, "elapsed_time": "6:46:04", "remaining_time": "13:32:46"} +{"current_steps": 2645, "total_steps": 7924, "loss": 0.3141, "lr": 3.371058406766043e-05, "epoch": 2.3374558303886928, "percentage": 33.38, "elapsed_time": "6:46:50", "remaining_time": "13:32:00"} +{"current_steps": 2650, "total_steps": 7924, "loss": 0.2838, "lr": 3.3678476489698316e-05, "epoch": 2.3418727915194344, "percentage": 33.44, "elapsed_time": "6:47:37", "remaining_time": "13:31:14"} +{"current_steps": 2655, "total_steps": 7924, "loss": 0.2916, "lr": 3.364630254103073e-05, "epoch": 2.3462897526501765, "percentage": 33.51, "elapsed_time": "6:48:23", "remaining_time": "13:30:29"} +{"current_steps": 2660, "total_steps": 7924, "loss": 0.2578, "lr": 3.3614062377772124e-05, "epoch": 2.3507067137809186, "percentage": 33.57, "elapsed_time": "6:49:10", "remaining_time": "13:29:43"} +{"current_steps": 2665, "total_steps": 7924, "loss": 0.2999, "lr": 3.358175615635821e-05, "epoch": 2.3551236749116606, "percentage": 33.63, "elapsed_time": "6:49:56", "remaining_time": "13:28:57"} +{"current_steps": 2670, "total_steps": 7924, "loss": 0.2791, "lr": 3.354938403354524e-05, "epoch": 2.3595406360424027, "percentage": 33.7, "elapsed_time": "6:50:42", "remaining_time": "13:28:11"} +{"current_steps": 2675, "total_steps": 7924, "loss": 0.2736, "lr": 3.351694616640924e-05, "epoch": 2.363957597173145, "percentage": 33.76, "elapsed_time": "6:51:28", "remaining_time": "13:27:25"} +{"current_steps": 2680, "total_steps": 7924, "loss": 0.2929, "lr": 3.348444271234523e-05, "epoch": 2.368374558303887, "percentage": 33.82, "elapsed_time": "6:52:15", "remaining_time": "13:26:41"} +{"current_steps": 2685, "total_steps": 7924, "loss": 0.2975, "lr": 3.3451873829066474e-05, "epoch": 2.372791519434629, "percentage": 33.88, "elapsed_time": "6:53:02", "remaining_time": "13:25:56"} +{"current_steps": 2690, "total_steps": 7924, "loss": 0.2851, "lr": 3.341923967460371e-05, "epoch": 2.377208480565371, "percentage": 33.95, "elapsed_time": "6:53:48", "remaining_time": "13:25:09"} +{"current_steps": 2695, "total_steps": 7924, "loss": 0.3136, "lr": 3.338654040730439e-05, "epoch": 2.381625441696113, "percentage": 34.01, "elapsed_time": "6:54:34", "remaining_time": "13:24:23"} +{"current_steps": 2700, "total_steps": 7924, "loss": 0.3478, "lr": 3.335377618583191e-05, "epoch": 2.386042402826855, "percentage": 34.07, "elapsed_time": "6:55:20", "remaining_time": "13:23:36"} +{"current_steps": 2705, "total_steps": 7924, "loss": 0.2869, "lr": 3.332094716916481e-05, "epoch": 2.3904593639575973, "percentage": 34.14, "elapsed_time": "6:56:07", "remaining_time": "13:22:51"} +{"current_steps": 2710, "total_steps": 7924, "loss": 0.2872, "lr": 3.328805351659606e-05, "epoch": 2.3948763250883394, "percentage": 34.2, "elapsed_time": "6:56:53", "remaining_time": "13:22:04"} +{"current_steps": 2715, "total_steps": 7924, "loss": 0.2787, "lr": 3.3255095387732245e-05, "epoch": 2.3992932862190814, "percentage": 34.26, "elapsed_time": "6:57:38", "remaining_time": "13:21:17"} +{"current_steps": 2720, "total_steps": 7924, "loss": 0.283, "lr": 3.3222072942492807e-05, "epoch": 2.4037102473498235, "percentage": 34.33, "elapsed_time": "6:58:25", "remaining_time": "13:20:32"} +{"current_steps": 2725, "total_steps": 7924, "loss": 0.3262, "lr": 3.318898634110925e-05, "epoch": 2.4081272084805656, "percentage": 34.39, "elapsed_time": "6:59:11", "remaining_time": "13:19:45"} +{"current_steps": 2730, "total_steps": 7924, "loss": 0.2902, "lr": 3.31558357441244e-05, "epoch": 2.4125441696113072, "percentage": 34.45, "elapsed_time": "6:59:56", "remaining_time": "13:18:58"} +{"current_steps": 2735, "total_steps": 7924, "loss": 0.3299, "lr": 3.312262131239157e-05, "epoch": 2.4169611307420493, "percentage": 34.52, "elapsed_time": "7:00:43", "remaining_time": "13:18:14"} +{"current_steps": 2740, "total_steps": 7924, "loss": 0.2914, "lr": 3.308934320707385e-05, "epoch": 2.4213780918727914, "percentage": 34.58, "elapsed_time": "7:01:29", "remaining_time": "13:17:27"} +{"current_steps": 2745, "total_steps": 7924, "loss": 0.3168, "lr": 3.305600158964325e-05, "epoch": 2.4257950530035335, "percentage": 34.64, "elapsed_time": "7:02:18", "remaining_time": "13:16:46"} +{"current_steps": 2750, "total_steps": 7924, "loss": 0.3298, "lr": 3.3022596621879976e-05, "epoch": 2.4302120141342756, "percentage": 34.7, "elapsed_time": "7:03:04", "remaining_time": "13:16:00"} +{"current_steps": 2755, "total_steps": 7924, "loss": 0.2721, "lr": 3.298912846587162e-05, "epoch": 2.4346289752650176, "percentage": 34.77, "elapsed_time": "7:03:50", "remaining_time": "13:15:13"} +{"current_steps": 2760, "total_steps": 7924, "loss": 0.304, "lr": 3.2955597284012375e-05, "epoch": 2.4390459363957597, "percentage": 34.83, "elapsed_time": "7:04:36", "remaining_time": "13:14:27"} +{"current_steps": 2765, "total_steps": 7924, "loss": 0.322, "lr": 3.2922003239002234e-05, "epoch": 2.443462897526502, "percentage": 34.89, "elapsed_time": "7:05:23", "remaining_time": "13:13:42"} +{"current_steps": 2770, "total_steps": 7924, "loss": 0.2833, "lr": 3.288834649384624e-05, "epoch": 2.447879858657244, "percentage": 34.96, "elapsed_time": "7:06:09", "remaining_time": "13:12:56"} +{"current_steps": 2775, "total_steps": 7924, "loss": 0.329, "lr": 3.2854627211853656e-05, "epoch": 2.452296819787986, "percentage": 35.02, "elapsed_time": "7:06:55", "remaining_time": "13:12:10"} +{"current_steps": 2780, "total_steps": 7924, "loss": 0.3262, "lr": 3.2820845556637173e-05, "epoch": 2.456713780918728, "percentage": 35.08, "elapsed_time": "7:07:41", "remaining_time": "13:11:23"} +{"current_steps": 2785, "total_steps": 7924, "loss": 0.2892, "lr": 3.278700169211216e-05, "epoch": 2.46113074204947, "percentage": 35.15, "elapsed_time": "7:08:27", "remaining_time": "13:10:36"} +{"current_steps": 2790, "total_steps": 7924, "loss": 0.2874, "lr": 3.275309578249581e-05, "epoch": 2.4655477031802118, "percentage": 35.21, "elapsed_time": "7:09:13", "remaining_time": "13:09:50"} +{"current_steps": 2795, "total_steps": 7924, "loss": 0.2761, "lr": 3.2719127992306386e-05, "epoch": 2.469964664310954, "percentage": 35.27, "elapsed_time": "7:09:59", "remaining_time": "13:09:04"} +{"current_steps": 2800, "total_steps": 7924, "loss": 0.2861, "lr": 3.26850984863624e-05, "epoch": 2.474381625441696, "percentage": 35.34, "elapsed_time": "7:10:45", "remaining_time": "13:08:17"} +{"current_steps": 2805, "total_steps": 7924, "loss": 0.2931, "lr": 3.265100742978183e-05, "epoch": 2.478798586572438, "percentage": 35.4, "elapsed_time": "7:11:31", "remaining_time": "13:07:31"} +{"current_steps": 2810, "total_steps": 7924, "loss": 0.2993, "lr": 3.261685498798131e-05, "epoch": 2.48321554770318, "percentage": 35.46, "elapsed_time": "7:12:17", "remaining_time": "13:06:44"} +{"current_steps": 2815, "total_steps": 7924, "loss": 0.2439, "lr": 3.258264132667531e-05, "epoch": 2.487632508833922, "percentage": 35.52, "elapsed_time": "7:13:03", "remaining_time": "13:05:58"} +{"current_steps": 2820, "total_steps": 7924, "loss": 0.2679, "lr": 3.254836661187537e-05, "epoch": 2.4920494699646643, "percentage": 35.59, "elapsed_time": "7:13:49", "remaining_time": "13:05:11"} +{"current_steps": 2825, "total_steps": 7924, "loss": 0.2843, "lr": 3.2514031009889264e-05, "epoch": 2.4964664310954063, "percentage": 35.65, "elapsed_time": "7:14:35", "remaining_time": "13:04:25"} +{"current_steps": 2830, "total_steps": 7924, "loss": 0.2926, "lr": 3.247963468732021e-05, "epoch": 2.5008833922261484, "percentage": 35.71, "elapsed_time": "7:15:21", "remaining_time": "13:03:39"} +{"current_steps": 2835, "total_steps": 7924, "loss": 0.2898, "lr": 3.244517781106604e-05, "epoch": 2.5053003533568905, "percentage": 35.78, "elapsed_time": "7:16:08", "remaining_time": "13:02:53"} +{"current_steps": 2840, "total_steps": 7924, "loss": 0.2766, "lr": 3.241066054831842e-05, "epoch": 2.5097173144876326, "percentage": 35.84, "elapsed_time": "7:16:54", "remaining_time": "13:02:07"} +{"current_steps": 2845, "total_steps": 7924, "loss": 0.2677, "lr": 3.237608306656201e-05, "epoch": 2.5141342756183747, "percentage": 35.9, "elapsed_time": "7:17:40", "remaining_time": "13:01:20"} +{"current_steps": 2850, "total_steps": 7924, "loss": 0.3027, "lr": 3.234144553357368e-05, "epoch": 2.5185512367491167, "percentage": 35.97, "elapsed_time": "7:18:26", "remaining_time": "13:00:34"} +{"current_steps": 2855, "total_steps": 7924, "loss": 0.2627, "lr": 3.230674811742167e-05, "epoch": 2.522968197879859, "percentage": 36.03, "elapsed_time": "7:19:12", "remaining_time": "12:59:47"} +{"current_steps": 2860, "total_steps": 7924, "loss": 0.2944, "lr": 3.227199098646479e-05, "epoch": 2.527385159010601, "percentage": 36.09, "elapsed_time": "7:19:58", "remaining_time": "12:59:01"} +{"current_steps": 2865, "total_steps": 7924, "loss": 0.3646, "lr": 3.223717430935158e-05, "epoch": 2.531802120141343, "percentage": 36.16, "elapsed_time": "7:20:44", "remaining_time": "12:58:15"} +{"current_steps": 2870, "total_steps": 7924, "loss": 0.2613, "lr": 3.2202298255019546e-05, "epoch": 2.536219081272085, "percentage": 36.22, "elapsed_time": "7:21:30", "remaining_time": "12:57:28"} +{"current_steps": 2875, "total_steps": 7924, "loss": 0.2772, "lr": 3.216736299269427e-05, "epoch": 2.5406360424028267, "percentage": 36.28, "elapsed_time": "7:22:16", "remaining_time": "12:56:42"} +{"current_steps": 2880, "total_steps": 7924, "loss": 0.3025, "lr": 3.213236869188864e-05, "epoch": 2.545053003533569, "percentage": 36.35, "elapsed_time": "7:23:02", "remaining_time": "12:55:56"} +{"current_steps": 2885, "total_steps": 7924, "loss": 0.2999, "lr": 3.209731552240201e-05, "epoch": 2.549469964664311, "percentage": 36.41, "elapsed_time": "7:23:48", "remaining_time": "12:55:09"} +{"current_steps": 2890, "total_steps": 7924, "loss": 0.3097, "lr": 3.206220365431937e-05, "epoch": 2.553886925795053, "percentage": 36.47, "elapsed_time": "7:24:34", "remaining_time": "12:54:23"} +{"current_steps": 2895, "total_steps": 7924, "loss": 0.2994, "lr": 3.202703325801054e-05, "epoch": 2.558303886925795, "percentage": 36.53, "elapsed_time": "7:25:20", "remaining_time": "12:53:36"} +{"current_steps": 2900, "total_steps": 7924, "loss": 0.3254, "lr": 3.19918045041293e-05, "epoch": 2.562720848056537, "percentage": 36.6, "elapsed_time": "7:26:06", "remaining_time": "12:52:50"} +{"current_steps": 2905, "total_steps": 7924, "loss": 0.3266, "lr": 3.1956517563612645e-05, "epoch": 2.567137809187279, "percentage": 36.66, "elapsed_time": "7:26:52", "remaining_time": "12:52:03"} +{"current_steps": 2910, "total_steps": 7924, "loss": 0.3028, "lr": 3.1921172607679846e-05, "epoch": 2.5715547703180213, "percentage": 36.72, "elapsed_time": "7:27:38", "remaining_time": "12:51:17"} +{"current_steps": 2915, "total_steps": 7924, "loss": 0.29, "lr": 3.1885769807831714e-05, "epoch": 2.5759717314487633, "percentage": 36.79, "elapsed_time": "7:28:24", "remaining_time": "12:50:31"} +{"current_steps": 2920, "total_steps": 7924, "loss": 0.3317, "lr": 3.185030933584972e-05, "epoch": 2.5803886925795054, "percentage": 36.85, "elapsed_time": "7:29:10", "remaining_time": "12:49:44"} +{"current_steps": 2925, "total_steps": 7924, "loss": 0.314, "lr": 3.181479136379518e-05, "epoch": 2.5848056537102475, "percentage": 36.91, "elapsed_time": "7:29:56", "remaining_time": "12:48:59"} +{"current_steps": 2930, "total_steps": 7924, "loss": 0.2583, "lr": 3.177921606400838e-05, "epoch": 2.589222614840989, "percentage": 36.98, "elapsed_time": "7:30:43", "remaining_time": "12:48:13"} +{"current_steps": 2935, "total_steps": 7924, "loss": 0.2935, "lr": 3.1743583609107815e-05, "epoch": 2.5936395759717312, "percentage": 37.04, "elapsed_time": "7:31:29", "remaining_time": "12:47:27"} +{"current_steps": 2940, "total_steps": 7924, "loss": 0.2884, "lr": 3.1707894171989266e-05, "epoch": 2.5980565371024733, "percentage": 37.1, "elapsed_time": "7:32:15", "remaining_time": "12:46:41"} +{"current_steps": 2945, "total_steps": 7924, "loss": 0.3147, "lr": 3.167214792582505e-05, "epoch": 2.6024734982332154, "percentage": 37.17, "elapsed_time": "7:33:02", "remaining_time": "12:45:55"} +{"current_steps": 2950, "total_steps": 7924, "loss": 0.2943, "lr": 3.163634504406309e-05, "epoch": 2.6068904593639575, "percentage": 37.23, "elapsed_time": "7:33:48", "remaining_time": "12:45:09"} +{"current_steps": 2955, "total_steps": 7924, "loss": 0.2724, "lr": 3.160048570042614e-05, "epoch": 2.6113074204946995, "percentage": 37.29, "elapsed_time": "7:34:34", "remaining_time": "12:44:23"} +{"current_steps": 2960, "total_steps": 7924, "loss": 0.2943, "lr": 3.1564570068910905e-05, "epoch": 2.6157243816254416, "percentage": 37.35, "elapsed_time": "7:35:20", "remaining_time": "12:43:37"} +{"current_steps": 2965, "total_steps": 7924, "loss": 0.2963, "lr": 3.152859832378723e-05, "epoch": 2.6201413427561837, "percentage": 37.42, "elapsed_time": "7:36:06", "remaining_time": "12:42:51"} +{"current_steps": 2970, "total_steps": 7924, "loss": 0.2916, "lr": 3.1492570639597216e-05, "epoch": 2.624558303886926, "percentage": 37.48, "elapsed_time": "7:36:52", "remaining_time": "12:42:04"} +{"current_steps": 2975, "total_steps": 7924, "loss": 0.2875, "lr": 3.145648719115439e-05, "epoch": 2.628975265017668, "percentage": 37.54, "elapsed_time": "7:37:38", "remaining_time": "12:41:18"} +{"current_steps": 2980, "total_steps": 7924, "loss": 0.3208, "lr": 3.1420348153542875e-05, "epoch": 2.63339222614841, "percentage": 37.61, "elapsed_time": "7:38:24", "remaining_time": "12:40:32"} +{"current_steps": 2985, "total_steps": 7924, "loss": 0.3028, "lr": 3.138415370211651e-05, "epoch": 2.637809187279152, "percentage": 37.67, "elapsed_time": "7:39:11", "remaining_time": "12:39:46"} +{"current_steps": 2990, "total_steps": 7924, "loss": 0.2762, "lr": 3.1347904012498015e-05, "epoch": 2.642226148409894, "percentage": 37.73, "elapsed_time": "7:39:56", "remaining_time": "12:38:59"} +{"current_steps": 2995, "total_steps": 7924, "loss": 0.2736, "lr": 3.1311599260578144e-05, "epoch": 2.646643109540636, "percentage": 37.8, "elapsed_time": "7:40:42", "remaining_time": "12:38:12"} +{"current_steps": 3000, "total_steps": 7924, "loss": 0.2793, "lr": 3.1275239622514805e-05, "epoch": 2.6510600706713783, "percentage": 37.86, "elapsed_time": "7:41:28", "remaining_time": "12:37:26"} +{"current_steps": 3005, "total_steps": 7924, "loss": 0.2988, "lr": 3.123882527473226e-05, "epoch": 2.6554770318021204, "percentage": 37.92, "elapsed_time": "7:42:46", "remaining_time": "12:37:32"} +{"current_steps": 3010, "total_steps": 7924, "loss": 0.2891, "lr": 3.1202356393920205e-05, "epoch": 2.6598939929328624, "percentage": 37.99, "elapsed_time": "7:43:32", "remaining_time": "12:36:45"} +{"current_steps": 3015, "total_steps": 7924, "loss": 0.3026, "lr": 3.1165833157032945e-05, "epoch": 2.664310954063604, "percentage": 38.05, "elapsed_time": "7:44:18", "remaining_time": "12:35:58"} +{"current_steps": 3020, "total_steps": 7924, "loss": 0.3406, "lr": 3.112925574128853e-05, "epoch": 2.668727915194346, "percentage": 38.11, "elapsed_time": "7:45:04", "remaining_time": "12:35:12"} +{"current_steps": 3025, "total_steps": 7924, "loss": 0.2728, "lr": 3.109262432416791e-05, "epoch": 2.6731448763250882, "percentage": 38.18, "elapsed_time": "7:45:50", "remaining_time": "12:34:25"} +{"current_steps": 3030, "total_steps": 7924, "loss": 0.3298, "lr": 3.105593908341405e-05, "epoch": 2.6775618374558303, "percentage": 38.24, "elapsed_time": "7:46:35", "remaining_time": "12:33:38"} +{"current_steps": 3035, "total_steps": 7924, "loss": 0.2969, "lr": 3.1019200197031074e-05, "epoch": 2.6819787985865724, "percentage": 38.3, "elapsed_time": "7:47:21", "remaining_time": "12:32:51"} +{"current_steps": 3040, "total_steps": 7924, "loss": 0.2909, "lr": 3.098240784328342e-05, "epoch": 2.6863957597173145, "percentage": 38.36, "elapsed_time": "7:48:07", "remaining_time": "12:32:04"} +{"current_steps": 3045, "total_steps": 7924, "loss": 0.296, "lr": 3.094556220069495e-05, "epoch": 2.6908127208480566, "percentage": 38.43, "elapsed_time": "7:48:53", "remaining_time": "12:31:17"} +{"current_steps": 3050, "total_steps": 7924, "loss": 0.3134, "lr": 3.09086634480481e-05, "epoch": 2.6952296819787986, "percentage": 38.49, "elapsed_time": "7:49:39", "remaining_time": "12:30:30"} +{"current_steps": 3055, "total_steps": 7924, "loss": 0.2997, "lr": 3.087171176438299e-05, "epoch": 2.6996466431095407, "percentage": 38.55, "elapsed_time": "7:50:25", "remaining_time": "12:29:44"} +{"current_steps": 3060, "total_steps": 7924, "loss": 0.2636, "lr": 3.083470732899659e-05, "epoch": 2.704063604240283, "percentage": 38.62, "elapsed_time": "7:51:11", "remaining_time": "12:28:59"} +{"current_steps": 3065, "total_steps": 7924, "loss": 0.2955, "lr": 3.0797650321441836e-05, "epoch": 2.708480565371025, "percentage": 38.68, "elapsed_time": "7:51:58", "remaining_time": "12:28:13"} +{"current_steps": 3070, "total_steps": 7924, "loss": 0.3018, "lr": 3.076054092152673e-05, "epoch": 2.7128975265017665, "percentage": 38.74, "elapsed_time": "7:52:44", "remaining_time": "12:27:27"} +{"current_steps": 3075, "total_steps": 7924, "loss": 0.3081, "lr": 3.072337930931351e-05, "epoch": 2.7173144876325086, "percentage": 38.81, "elapsed_time": "7:53:30", "remaining_time": "12:26:40"} +{"current_steps": 3080, "total_steps": 7924, "loss": 0.2835, "lr": 3.068616566511777e-05, "epoch": 2.7217314487632507, "percentage": 38.87, "elapsed_time": "7:54:16", "remaining_time": "12:25:53"} +{"current_steps": 3085, "total_steps": 7924, "loss": 0.3086, "lr": 3.0648900169507546e-05, "epoch": 2.7261484098939928, "percentage": 38.93, "elapsed_time": "7:55:02", "remaining_time": "12:25:07"} +{"current_steps": 3090, "total_steps": 7924, "loss": 0.2973, "lr": 3.0611583003302483e-05, "epoch": 2.730565371024735, "percentage": 39.0, "elapsed_time": "7:55:48", "remaining_time": "12:24:20"} +{"current_steps": 3095, "total_steps": 7924, "loss": 0.2868, "lr": 3.0574214347572944e-05, "epoch": 2.734982332155477, "percentage": 39.06, "elapsed_time": "7:56:34", "remaining_time": "12:23:34"} +{"current_steps": 3100, "total_steps": 7924, "loss": 0.315, "lr": 3.0536794383639124e-05, "epoch": 2.739399293286219, "percentage": 39.12, "elapsed_time": "7:57:20", "remaining_time": "12:22:47"} +{"current_steps": 3105, "total_steps": 7924, "loss": 0.2744, "lr": 3.0499323293070168e-05, "epoch": 2.743816254416961, "percentage": 39.18, "elapsed_time": "7:58:06", "remaining_time": "12:22:01"} +{"current_steps": 3110, "total_steps": 7924, "loss": 0.2514, "lr": 3.0461801257683316e-05, "epoch": 2.748233215547703, "percentage": 39.25, "elapsed_time": "7:58:52", "remaining_time": "12:21:14"} +{"current_steps": 3115, "total_steps": 7924, "loss": 0.3227, "lr": 3.0424228459542996e-05, "epoch": 2.7526501766784452, "percentage": 39.31, "elapsed_time": "7:59:38", "remaining_time": "12:20:28"} +{"current_steps": 3120, "total_steps": 7924, "loss": 0.3368, "lr": 3.0386605080959933e-05, "epoch": 2.7570671378091873, "percentage": 39.37, "elapsed_time": "8:00:24", "remaining_time": "12:19:42"} +{"current_steps": 3125, "total_steps": 7924, "loss": 0.3192, "lr": 3.0348931304490308e-05, "epoch": 2.7614840989399294, "percentage": 39.44, "elapsed_time": "8:01:10", "remaining_time": "12:18:56"} +{"current_steps": 3130, "total_steps": 7924, "loss": 0.3052, "lr": 3.0311207312934802e-05, "epoch": 2.7659010600706715, "percentage": 39.5, "elapsed_time": "8:01:56", "remaining_time": "12:18:09"} +{"current_steps": 3135, "total_steps": 7924, "loss": 0.3438, "lr": 3.0273433289337782e-05, "epoch": 2.7703180212014136, "percentage": 39.56, "elapsed_time": "8:02:42", "remaining_time": "12:17:22"} +{"current_steps": 3140, "total_steps": 7924, "loss": 0.3097, "lr": 3.0235609416986382e-05, "epoch": 2.7747349823321557, "percentage": 39.63, "elapsed_time": "8:03:28", "remaining_time": "12:16:36"} +{"current_steps": 3145, "total_steps": 7924, "loss": 0.2465, "lr": 3.0197735879409582e-05, "epoch": 2.7791519434628977, "percentage": 39.69, "elapsed_time": "8:04:14", "remaining_time": "12:15:49"} +{"current_steps": 3150, "total_steps": 7924, "loss": 0.2401, "lr": 3.015981286037737e-05, "epoch": 2.78356890459364, "percentage": 39.75, "elapsed_time": "8:05:00", "remaining_time": "12:15:03"} +{"current_steps": 3155, "total_steps": 7924, "loss": 0.2884, "lr": 3.0121840543899828e-05, "epoch": 2.787985865724382, "percentage": 39.82, "elapsed_time": "8:05:46", "remaining_time": "12:14:16"} +{"current_steps": 3160, "total_steps": 7924, "loss": 0.3056, "lr": 3.008381911422624e-05, "epoch": 2.7924028268551235, "percentage": 39.88, "elapsed_time": "8:06:32", "remaining_time": "12:13:30"} +{"current_steps": 3165, "total_steps": 7924, "loss": 0.2683, "lr": 3.0045748755844183e-05, "epoch": 2.7968197879858656, "percentage": 39.94, "elapsed_time": "8:07:18", "remaining_time": "12:12:43"} +{"current_steps": 3170, "total_steps": 7924, "loss": 0.3035, "lr": 3.000762965347866e-05, "epoch": 2.8012367491166077, "percentage": 40.01, "elapsed_time": "8:08:03", "remaining_time": "12:11:56"} +{"current_steps": 3175, "total_steps": 7924, "loss": 0.3052, "lr": 2.9969461992091187e-05, "epoch": 2.8056537102473498, "percentage": 40.07, "elapsed_time": "8:08:49", "remaining_time": "12:11:09"} +{"current_steps": 3180, "total_steps": 7924, "loss": 0.2972, "lr": 2.9931245956878892e-05, "epoch": 2.810070671378092, "percentage": 40.13, "elapsed_time": "8:09:35", "remaining_time": "12:10:22"} +{"current_steps": 3185, "total_steps": 7924, "loss": 0.2809, "lr": 2.9892981733273622e-05, "epoch": 2.814487632508834, "percentage": 40.19, "elapsed_time": "8:10:21", "remaining_time": "12:09:36"} +{"current_steps": 3190, "total_steps": 7924, "loss": 0.3045, "lr": 2.9854669506941056e-05, "epoch": 2.818904593639576, "percentage": 40.26, "elapsed_time": "8:11:07", "remaining_time": "12:08:49"} +{"current_steps": 3195, "total_steps": 7924, "loss": 0.3209, "lr": 2.9816309463779777e-05, "epoch": 2.823321554770318, "percentage": 40.32, "elapsed_time": "8:11:52", "remaining_time": "12:08:02"} +{"current_steps": 3200, "total_steps": 7924, "loss": 0.3045, "lr": 2.9777901789920393e-05, "epoch": 2.82773851590106, "percentage": 40.38, "elapsed_time": "8:12:38", "remaining_time": "12:07:15"} +{"current_steps": 3205, "total_steps": 7924, "loss": 0.2956, "lr": 2.9739446671724633e-05, "epoch": 2.8321554770318023, "percentage": 40.45, "elapsed_time": "8:13:24", "remaining_time": "12:06:28"} +{"current_steps": 3210, "total_steps": 7924, "loss": 0.3104, "lr": 2.9700944295784416e-05, "epoch": 2.836572438162544, "percentage": 40.51, "elapsed_time": "8:14:09", "remaining_time": "12:05:42"} +{"current_steps": 3215, "total_steps": 7924, "loss": 0.247, "lr": 2.9662394848920976e-05, "epoch": 2.840989399293286, "percentage": 40.57, "elapsed_time": "8:14:55", "remaining_time": "12:04:55"} +{"current_steps": 3220, "total_steps": 7924, "loss": 0.3235, "lr": 2.962379851818396e-05, "epoch": 2.845406360424028, "percentage": 40.64, "elapsed_time": "8:15:41", "remaining_time": "12:04:08"} +{"current_steps": 3225, "total_steps": 7924, "loss": 0.2588, "lr": 2.9585155490850463e-05, "epoch": 2.84982332155477, "percentage": 40.7, "elapsed_time": "8:16:27", "remaining_time": "12:03:22"} +{"current_steps": 3230, "total_steps": 7924, "loss": 0.2845, "lr": 2.954646595442421e-05, "epoch": 2.854240282685512, "percentage": 40.76, "elapsed_time": "8:17:13", "remaining_time": "12:02:36"} +{"current_steps": 3235, "total_steps": 7924, "loss": 0.2964, "lr": 2.9507730096634558e-05, "epoch": 2.8586572438162543, "percentage": 40.83, "elapsed_time": "8:17:59", "remaining_time": "12:01:49"} +{"current_steps": 3240, "total_steps": 7924, "loss": 0.2745, "lr": 2.9468948105435652e-05, "epoch": 2.8630742049469964, "percentage": 40.89, "elapsed_time": "8:18:45", "remaining_time": "12:01:02"} +{"current_steps": 3245, "total_steps": 7924, "loss": 0.2944, "lr": 2.943012016900548e-05, "epoch": 2.8674911660777385, "percentage": 40.95, "elapsed_time": "8:19:31", "remaining_time": "12:00:16"} +{"current_steps": 3250, "total_steps": 7924, "loss": 0.303, "lr": 2.9391246475744952e-05, "epoch": 2.8719081272084805, "percentage": 41.01, "elapsed_time": "8:20:17", "remaining_time": "11:59:29"} +{"current_steps": 3255, "total_steps": 7924, "loss": 0.3017, "lr": 2.9352327214277002e-05, "epoch": 2.8763250883392226, "percentage": 41.08, "elapsed_time": "8:21:03", "remaining_time": "11:58:43"} +{"current_steps": 3260, "total_steps": 7924, "loss": 0.2971, "lr": 2.931336257344569e-05, "epoch": 2.8807420494699647, "percentage": 41.14, "elapsed_time": "8:21:49", "remaining_time": "11:57:57"} +{"current_steps": 3265, "total_steps": 7924, "loss": 0.2663, "lr": 2.9274352742315234e-05, "epoch": 2.885159010600707, "percentage": 41.2, "elapsed_time": "8:22:35", "remaining_time": "11:57:10"} +{"current_steps": 3270, "total_steps": 7924, "loss": 0.2654, "lr": 2.923529791016916e-05, "epoch": 2.889575971731449, "percentage": 41.27, "elapsed_time": "8:23:21", "remaining_time": "11:56:24"} +{"current_steps": 3275, "total_steps": 7924, "loss": 0.3023, "lr": 2.919619826650932e-05, "epoch": 2.893992932862191, "percentage": 41.33, "elapsed_time": "8:24:07", "remaining_time": "11:55:38"} +{"current_steps": 3280, "total_steps": 7924, "loss": 0.2484, "lr": 2.9157054001055007e-05, "epoch": 2.898409893992933, "percentage": 41.39, "elapsed_time": "8:24:53", "remaining_time": "11:54:51"} +{"current_steps": 3285, "total_steps": 7924, "loss": 0.2768, "lr": 2.9117865303742043e-05, "epoch": 2.902826855123675, "percentage": 41.46, "elapsed_time": "8:25:39", "remaining_time": "11:54:04"} +{"current_steps": 3290, "total_steps": 7924, "loss": 0.2925, "lr": 2.9078632364721813e-05, "epoch": 2.907243816254417, "percentage": 41.52, "elapsed_time": "8:26:25", "remaining_time": "11:53:18"} +{"current_steps": 3295, "total_steps": 7924, "loss": 0.3046, "lr": 2.903935537436041e-05, "epoch": 2.9116607773851593, "percentage": 41.58, "elapsed_time": "8:27:11", "remaining_time": "11:52:31"} +{"current_steps": 3300, "total_steps": 7924, "loss": 0.2693, "lr": 2.900003452323764e-05, "epoch": 2.916077738515901, "percentage": 41.65, "elapsed_time": "8:27:57", "remaining_time": "11:51:45"} +{"current_steps": 3305, "total_steps": 7924, "loss": 0.2469, "lr": 2.8960670002146138e-05, "epoch": 2.920494699646643, "percentage": 41.71, "elapsed_time": "8:28:43", "remaining_time": "11:50:58"} +{"current_steps": 3310, "total_steps": 7924, "loss": 0.2897, "lr": 2.8921262002090443e-05, "epoch": 2.924911660777385, "percentage": 41.77, "elapsed_time": "8:29:29", "remaining_time": "11:50:12"} +{"current_steps": 3315, "total_steps": 7924, "loss": 0.2631, "lr": 2.888181071428607e-05, "epoch": 2.929328621908127, "percentage": 41.83, "elapsed_time": "8:30:15", "remaining_time": "11:49:25"} +{"current_steps": 3320, "total_steps": 7924, "loss": 0.3335, "lr": 2.884231633015854e-05, "epoch": 2.9337455830388692, "percentage": 41.9, "elapsed_time": "8:31:00", "remaining_time": "11:48:38"} +{"current_steps": 3325, "total_steps": 7924, "loss": 0.2989, "lr": 2.8802779041342527e-05, "epoch": 2.9381625441696113, "percentage": 41.96, "elapsed_time": "8:31:46", "remaining_time": "11:47:52"} +{"current_steps": 3330, "total_steps": 7924, "loss": 0.2627, "lr": 2.876319903968086e-05, "epoch": 2.9425795053003534, "percentage": 42.02, "elapsed_time": "8:32:32", "remaining_time": "11:47:05"} +{"current_steps": 3335, "total_steps": 7924, "loss": 0.27, "lr": 2.8723576517223635e-05, "epoch": 2.9469964664310955, "percentage": 42.09, "elapsed_time": "8:33:18", "remaining_time": "11:46:18"} +{"current_steps": 3340, "total_steps": 7924, "loss": 0.2604, "lr": 2.8683911666227254e-05, "epoch": 2.9514134275618376, "percentage": 42.15, "elapsed_time": "8:34:04", "remaining_time": "11:45:32"} +{"current_steps": 3345, "total_steps": 7924, "loss": 0.2799, "lr": 2.864420467915352e-05, "epoch": 2.9558303886925796, "percentage": 42.21, "elapsed_time": "8:34:50", "remaining_time": "11:44:46"} +{"current_steps": 3350, "total_steps": 7924, "loss": 0.2745, "lr": 2.8604455748668675e-05, "epoch": 2.9602473498233217, "percentage": 42.28, "elapsed_time": "8:35:36", "remaining_time": "11:43:59"} +{"current_steps": 3355, "total_steps": 7924, "loss": 0.3111, "lr": 2.8564665067642485e-05, "epoch": 2.9646643109540634, "percentage": 42.34, "elapsed_time": "8:36:22", "remaining_time": "11:43:12"} +{"current_steps": 3360, "total_steps": 7924, "loss": 0.3157, "lr": 2.8524832829147297e-05, "epoch": 2.9690812720848054, "percentage": 42.4, "elapsed_time": "8:37:08", "remaining_time": "11:42:26"} +{"current_steps": 3365, "total_steps": 7924, "loss": 0.2835, "lr": 2.8484959226457115e-05, "epoch": 2.9734982332155475, "percentage": 42.47, "elapsed_time": "8:37:54", "remaining_time": "11:41:39"} +{"current_steps": 3370, "total_steps": 7924, "loss": 0.2942, "lr": 2.8445044453046624e-05, "epoch": 2.9779151943462896, "percentage": 42.53, "elapsed_time": "8:38:39", "remaining_time": "11:40:53"} +{"current_steps": 3375, "total_steps": 7924, "loss": 0.2498, "lr": 2.8405088702590296e-05, "epoch": 2.9823321554770317, "percentage": 42.59, "elapsed_time": "8:39:25", "remaining_time": "11:40:06"} +{"current_steps": 3380, "total_steps": 7924, "loss": 0.2906, "lr": 2.8365092168961442e-05, "epoch": 2.9867491166077738, "percentage": 42.66, "elapsed_time": "8:40:11", "remaining_time": "11:39:20"} +{"current_steps": 3385, "total_steps": 7924, "loss": 0.2954, "lr": 2.8325055046231232e-05, "epoch": 2.991166077738516, "percentage": 42.72, "elapsed_time": "8:40:57", "remaining_time": "11:38:33"} +{"current_steps": 3390, "total_steps": 7924, "loss": 0.3104, "lr": 2.8284977528667806e-05, "epoch": 2.995583038869258, "percentage": 42.78, "elapsed_time": "8:41:43", "remaining_time": "11:37:47"} +{"current_steps": 3395, "total_steps": 7924, "loss": 0.2734, "lr": 2.8244859810735304e-05, "epoch": 3.0008833922261484, "percentage": 42.84, "elapsed_time": "8:42:29", "remaining_time": "11:37:00"} +{"current_steps": 3400, "total_steps": 7924, "loss": 0.2752, "lr": 2.8204702087092907e-05, "epoch": 3.0053003533568905, "percentage": 42.91, "elapsed_time": "8:43:15", "remaining_time": "11:36:14"} +{"current_steps": 3405, "total_steps": 7924, "loss": 0.2768, "lr": 2.8164504552593946e-05, "epoch": 3.0097173144876326, "percentage": 42.97, "elapsed_time": "8:44:01", "remaining_time": "11:35:27"} +{"current_steps": 3410, "total_steps": 7924, "loss": 0.2868, "lr": 2.8124267402284892e-05, "epoch": 3.0141342756183747, "percentage": 43.03, "elapsed_time": "8:44:47", "remaining_time": "11:34:41"} +{"current_steps": 3415, "total_steps": 7924, "loss": 0.2472, "lr": 2.808399083140445e-05, "epoch": 3.0185512367491167, "percentage": 43.1, "elapsed_time": "8:45:33", "remaining_time": "11:33:55"} +{"current_steps": 3420, "total_steps": 7924, "loss": 0.2434, "lr": 2.804367503538261e-05, "epoch": 3.022968197879859, "percentage": 43.16, "elapsed_time": "8:46:19", "remaining_time": "11:33:08"} +{"current_steps": 3425, "total_steps": 7924, "loss": 0.297, "lr": 2.800332020983968e-05, "epoch": 3.027385159010601, "percentage": 43.22, "elapsed_time": "8:47:05", "remaining_time": "11:32:22"} +{"current_steps": 3430, "total_steps": 7924, "loss": 0.2712, "lr": 2.796292655058535e-05, "epoch": 3.0318021201413425, "percentage": 43.29, "elapsed_time": "8:47:51", "remaining_time": "11:31:36"} +{"current_steps": 3435, "total_steps": 7924, "loss": 0.2873, "lr": 2.792249425361773e-05, "epoch": 3.0362190812720846, "percentage": 43.35, "elapsed_time": "8:48:37", "remaining_time": "11:30:49"} +{"current_steps": 3440, "total_steps": 7924, "loss": 0.2605, "lr": 2.788202351512243e-05, "epoch": 3.0406360424028267, "percentage": 43.41, "elapsed_time": "8:49:23", "remaining_time": "11:30:03"} +{"current_steps": 3445, "total_steps": 7924, "loss": 0.2623, "lr": 2.7841514531471574e-05, "epoch": 3.045053003533569, "percentage": 43.48, "elapsed_time": "8:50:09", "remaining_time": "11:29:16"} +{"current_steps": 3450, "total_steps": 7924, "loss": 0.262, "lr": 2.7800967499222845e-05, "epoch": 3.049469964664311, "percentage": 43.54, "elapsed_time": "8:50:55", "remaining_time": "11:28:29"} +{"current_steps": 3455, "total_steps": 7924, "loss": 0.25, "lr": 2.7760382615118562e-05, "epoch": 3.053886925795053, "percentage": 43.6, "elapsed_time": "8:51:40", "remaining_time": "11:27:43"} +{"current_steps": 3460, "total_steps": 7924, "loss": 0.2938, "lr": 2.7719760076084713e-05, "epoch": 3.058303886925795, "percentage": 43.66, "elapsed_time": "8:52:26", "remaining_time": "11:26:56"} +{"current_steps": 3465, "total_steps": 7924, "loss": 0.2674, "lr": 2.7679100079229982e-05, "epoch": 3.062720848056537, "percentage": 43.73, "elapsed_time": "8:53:12", "remaining_time": "11:26:10"} +{"current_steps": 3470, "total_steps": 7924, "loss": 0.281, "lr": 2.7638402821844808e-05, "epoch": 3.067137809187279, "percentage": 43.79, "elapsed_time": "8:53:58", "remaining_time": "11:25:23"} +{"current_steps": 3475, "total_steps": 7924, "loss": 0.2591, "lr": 2.7597668501400436e-05, "epoch": 3.0715547703180213, "percentage": 43.85, "elapsed_time": "8:54:44", "remaining_time": "11:24:37"} +{"current_steps": 3480, "total_steps": 7924, "loss": 0.29, "lr": 2.7556897315547934e-05, "epoch": 3.0759717314487633, "percentage": 43.92, "elapsed_time": "8:55:30", "remaining_time": "11:23:50"} +{"current_steps": 3485, "total_steps": 7924, "loss": 0.2386, "lr": 2.7516089462117265e-05, "epoch": 3.0803886925795054, "percentage": 43.98, "elapsed_time": "8:56:16", "remaining_time": "11:23:04"} +{"current_steps": 3490, "total_steps": 7924, "loss": 0.259, "lr": 2.747524513911629e-05, "epoch": 3.0848056537102475, "percentage": 44.04, "elapsed_time": "8:57:01", "remaining_time": "11:22:17"} +{"current_steps": 3495, "total_steps": 7924, "loss": 0.2747, "lr": 2.7434364544729844e-05, "epoch": 3.0892226148409896, "percentage": 44.11, "elapsed_time": "8:57:47", "remaining_time": "11:21:30"} +{"current_steps": 3500, "total_steps": 7924, "loss": 0.2907, "lr": 2.7393447877318756e-05, "epoch": 3.0936395759717312, "percentage": 44.17, "elapsed_time": "8:58:33", "remaining_time": "11:20:43"} +{"current_steps": 3505, "total_steps": 7924, "loss": 0.2578, "lr": 2.735249533541888e-05, "epoch": 3.0980565371024733, "percentage": 44.23, "elapsed_time": "8:59:18", "remaining_time": "11:19:57"} +{"current_steps": 3510, "total_steps": 7924, "loss": 0.261, "lr": 2.7311507117740138e-05, "epoch": 3.1024734982332154, "percentage": 44.3, "elapsed_time": "9:00:04", "remaining_time": "11:19:10"} +{"current_steps": 3515, "total_steps": 7924, "loss": 0.2604, "lr": 2.7270483423165578e-05, "epoch": 3.1068904593639575, "percentage": 44.36, "elapsed_time": "9:00:50", "remaining_time": "11:18:23"} +{"current_steps": 3520, "total_steps": 7924, "loss": 0.2418, "lr": 2.7229424450750378e-05, "epoch": 3.1113074204946995, "percentage": 44.42, "elapsed_time": "9:01:36", "remaining_time": "11:17:37"} +{"current_steps": 3525, "total_steps": 7924, "loss": 0.2593, "lr": 2.7188330399720883e-05, "epoch": 3.1157243816254416, "percentage": 44.49, "elapsed_time": "9:02:22", "remaining_time": "11:16:50"} +{"current_steps": 3530, "total_steps": 7924, "loss": 0.2697, "lr": 2.7147201469473645e-05, "epoch": 3.1201413427561837, "percentage": 44.55, "elapsed_time": "9:03:08", "remaining_time": "11:16:04"} +{"current_steps": 3535, "total_steps": 7924, "loss": 0.27, "lr": 2.7106037859574482e-05, "epoch": 3.124558303886926, "percentage": 44.61, "elapsed_time": "9:03:54", "remaining_time": "11:15:18"} +{"current_steps": 3540, "total_steps": 7924, "loss": 0.2552, "lr": 2.706483976975746e-05, "epoch": 3.128975265017668, "percentage": 44.67, "elapsed_time": "9:04:40", "remaining_time": "11:14:31"} +{"current_steps": 3545, "total_steps": 7924, "loss": 0.2812, "lr": 2.702360739992395e-05, "epoch": 3.13339222614841, "percentage": 44.74, "elapsed_time": "9:05:26", "remaining_time": "11:13:45"} +{"current_steps": 3550, "total_steps": 7924, "loss": 0.2648, "lr": 2.698234095014167e-05, "epoch": 3.137809187279152, "percentage": 44.8, "elapsed_time": "9:06:12", "remaining_time": "11:12:59"} +{"current_steps": 3555, "total_steps": 7924, "loss": 0.2887, "lr": 2.6941040620643685e-05, "epoch": 3.142226148409894, "percentage": 44.86, "elapsed_time": "9:06:58", "remaining_time": "11:12:12"} +{"current_steps": 3560, "total_steps": 7924, "loss": 0.2926, "lr": 2.689970661182747e-05, "epoch": 3.146643109540636, "percentage": 44.93, "elapsed_time": "9:07:44", "remaining_time": "11:11:26"} +{"current_steps": 3565, "total_steps": 7924, "loss": 0.288, "lr": 2.6858339124253902e-05, "epoch": 3.1510600706713783, "percentage": 44.99, "elapsed_time": "9:08:30", "remaining_time": "11:10:40"} +{"current_steps": 3570, "total_steps": 7924, "loss": 0.2707, "lr": 2.681693835864631e-05, "epoch": 3.1554770318021204, "percentage": 45.05, "elapsed_time": "9:09:16", "remaining_time": "11:09:54"} +{"current_steps": 3575, "total_steps": 7924, "loss": 0.2665, "lr": 2.6775504515889498e-05, "epoch": 3.159893992932862, "percentage": 45.12, "elapsed_time": "9:10:02", "remaining_time": "11:09:07"} +{"current_steps": 3580, "total_steps": 7924, "loss": 0.298, "lr": 2.6734037797028764e-05, "epoch": 3.164310954063604, "percentage": 45.18, "elapsed_time": "9:10:48", "remaining_time": "11:08:21"} +{"current_steps": 3585, "total_steps": 7924, "loss": 0.2867, "lr": 2.6692538403268916e-05, "epoch": 3.168727915194346, "percentage": 45.24, "elapsed_time": "9:11:35", "remaining_time": "11:07:35"} +{"current_steps": 3590, "total_steps": 7924, "loss": 0.3015, "lr": 2.6651006535973327e-05, "epoch": 3.1731448763250882, "percentage": 45.31, "elapsed_time": "9:12:21", "remaining_time": "11:06:49"} +{"current_steps": 3595, "total_steps": 7924, "loss": 0.2832, "lr": 2.660944239666293e-05, "epoch": 3.1775618374558303, "percentage": 45.37, "elapsed_time": "9:13:07", "remaining_time": "11:06:02"} +{"current_steps": 3600, "total_steps": 7924, "loss": 0.2804, "lr": 2.6567846187015245e-05, "epoch": 3.1819787985865724, "percentage": 45.43, "elapsed_time": "9:13:53", "remaining_time": "11:05:16"} +{"current_steps": 3605, "total_steps": 7924, "loss": 0.3103, "lr": 2.6526218108863408e-05, "epoch": 3.1863957597173145, "percentage": 45.49, "elapsed_time": "9:14:39", "remaining_time": "11:04:30"} +{"current_steps": 3610, "total_steps": 7924, "loss": 0.262, "lr": 2.648455836419518e-05, "epoch": 3.1908127208480566, "percentage": 45.56, "elapsed_time": "9:15:25", "remaining_time": "11:03:44"} +{"current_steps": 3615, "total_steps": 7924, "loss": 0.2611, "lr": 2.6442867155151984e-05, "epoch": 3.1952296819787986, "percentage": 45.62, "elapsed_time": "9:16:11", "remaining_time": "11:02:58"} +{"current_steps": 3620, "total_steps": 7924, "loss": 0.2458, "lr": 2.6401144684027915e-05, "epoch": 3.1996466431095407, "percentage": 45.68, "elapsed_time": "9:16:57", "remaining_time": "11:02:11"} +{"current_steps": 3625, "total_steps": 7924, "loss": 0.2722, "lr": 2.635939115326874e-05, "epoch": 3.204063604240283, "percentage": 45.75, "elapsed_time": "9:17:43", "remaining_time": "11:01:25"} +{"current_steps": 3630, "total_steps": 7924, "loss": 0.3019, "lr": 2.631760676547096e-05, "epoch": 3.208480565371025, "percentage": 45.81, "elapsed_time": "9:18:29", "remaining_time": "11:00:38"} +{"current_steps": 3635, "total_steps": 7924, "loss": 0.2825, "lr": 2.6275791723380772e-05, "epoch": 3.212897526501767, "percentage": 45.87, "elapsed_time": "9:19:14", "remaining_time": "10:59:52"} +{"current_steps": 3640, "total_steps": 7924, "loss": 0.2657, "lr": 2.6233946229893147e-05, "epoch": 3.2173144876325086, "percentage": 45.94, "elapsed_time": "9:20:00", "remaining_time": "10:59:05"} +{"current_steps": 3645, "total_steps": 7924, "loss": 0.2425, "lr": 2.6192070488050783e-05, "epoch": 3.2217314487632507, "percentage": 46.0, "elapsed_time": "9:20:46", "remaining_time": "10:58:19"} +{"current_steps": 3650, "total_steps": 7924, "loss": 0.2519, "lr": 2.615016470104316e-05, "epoch": 3.2261484098939928, "percentage": 46.06, "elapsed_time": "9:21:32", "remaining_time": "10:57:32"} +{"current_steps": 3655, "total_steps": 7924, "loss": 0.31, "lr": 2.6108229072205545e-05, "epoch": 3.230565371024735, "percentage": 46.13, "elapsed_time": "9:22:18", "remaining_time": "10:56:46"} +{"current_steps": 3660, "total_steps": 7924, "loss": 0.2921, "lr": 2.606626380501801e-05, "epoch": 3.234982332155477, "percentage": 46.19, "elapsed_time": "9:23:04", "remaining_time": "10:55:59"} +{"current_steps": 3665, "total_steps": 7924, "loss": 0.2716, "lr": 2.6024269103104417e-05, "epoch": 3.239399293286219, "percentage": 46.25, "elapsed_time": "9:23:50", "remaining_time": "10:55:13"} +{"current_steps": 3670, "total_steps": 7924, "loss": 0.264, "lr": 2.5982245170231467e-05, "epoch": 3.243816254416961, "percentage": 46.31, "elapsed_time": "9:24:36", "remaining_time": "10:54:27"} +{"current_steps": 3675, "total_steps": 7924, "loss": 0.2544, "lr": 2.5940192210307697e-05, "epoch": 3.248233215547703, "percentage": 46.38, "elapsed_time": "9:25:22", "remaining_time": "10:53:40"} +{"current_steps": 3680, "total_steps": 7924, "loss": 0.2543, "lr": 2.5898110427382487e-05, "epoch": 3.2526501766784452, "percentage": 46.44, "elapsed_time": "9:26:08", "remaining_time": "10:52:54"} +{"current_steps": 3685, "total_steps": 7924, "loss": 0.2803, "lr": 2.5856000025645065e-05, "epoch": 3.2570671378091873, "percentage": 46.5, "elapsed_time": "9:26:54", "remaining_time": "10:52:08"} +{"current_steps": 3690, "total_steps": 7924, "loss": 0.3275, "lr": 2.581386120942353e-05, "epoch": 3.2614840989399294, "percentage": 46.57, "elapsed_time": "9:27:40", "remaining_time": "10:51:22"} +{"current_steps": 3695, "total_steps": 7924, "loss": 0.309, "lr": 2.577169418318385e-05, "epoch": 3.2659010600706715, "percentage": 46.63, "elapsed_time": "9:28:26", "remaining_time": "10:50:35"} +{"current_steps": 3700, "total_steps": 7924, "loss": 0.2956, "lr": 2.5729499151528877e-05, "epoch": 3.2703180212014136, "percentage": 46.69, "elapsed_time": "9:29:12", "remaining_time": "10:49:49"} +{"current_steps": 3705, "total_steps": 7924, "loss": 0.3056, "lr": 2.568727631919735e-05, "epoch": 3.2747349823321557, "percentage": 46.76, "elapsed_time": "9:29:58", "remaining_time": "10:49:03"} +{"current_steps": 3710, "total_steps": 7924, "loss": 0.2645, "lr": 2.5645025891062897e-05, "epoch": 3.2791519434628977, "percentage": 46.82, "elapsed_time": "9:30:44", "remaining_time": "10:48:16"} +{"current_steps": 3715, "total_steps": 7924, "loss": 0.2863, "lr": 2.5602748072133054e-05, "epoch": 3.28356890459364, "percentage": 46.88, "elapsed_time": "9:31:31", "remaining_time": "10:47:31"} +{"current_steps": 3720, "total_steps": 7924, "loss": 0.2814, "lr": 2.5560443067548263e-05, "epoch": 3.2879858657243815, "percentage": 46.95, "elapsed_time": "9:32:17", "remaining_time": "10:46:44"} +{"current_steps": 3725, "total_steps": 7924, "loss": 0.251, "lr": 2.5518111082580873e-05, "epoch": 3.2924028268551235, "percentage": 47.01, "elapsed_time": "9:33:02", "remaining_time": "10:45:57"} +{"current_steps": 3730, "total_steps": 7924, "loss": 0.2566, "lr": 2.547575232263414e-05, "epoch": 3.2968197879858656, "percentage": 47.07, "elapsed_time": "9:33:48", "remaining_time": "10:45:11"} +{"current_steps": 3735, "total_steps": 7924, "loss": 0.2739, "lr": 2.5433366993241252e-05, "epoch": 3.3012367491166077, "percentage": 47.14, "elapsed_time": "9:34:34", "remaining_time": "10:44:25"} +{"current_steps": 3740, "total_steps": 7924, "loss": 0.2552, "lr": 2.5390955300064306e-05, "epoch": 3.3056537102473498, "percentage": 47.2, "elapsed_time": "9:35:20", "remaining_time": "10:43:39"} +{"current_steps": 3745, "total_steps": 7924, "loss": 0.283, "lr": 2.5348517448893323e-05, "epoch": 3.310070671378092, "percentage": 47.26, "elapsed_time": "9:36:07", "remaining_time": "10:42:52"} +{"current_steps": 3750, "total_steps": 7924, "loss": 0.306, "lr": 2.530605364564526e-05, "epoch": 3.314487632508834, "percentage": 47.32, "elapsed_time": "9:36:52", "remaining_time": "10:42:06"} +{"current_steps": 3755, "total_steps": 7924, "loss": 0.2711, "lr": 2.5263564096362972e-05, "epoch": 3.318904593639576, "percentage": 47.39, "elapsed_time": "9:37:38", "remaining_time": "10:41:19"} +{"current_steps": 3760, "total_steps": 7924, "loss": 0.2561, "lr": 2.5221049007214276e-05, "epoch": 3.323321554770318, "percentage": 47.45, "elapsed_time": "9:38:24", "remaining_time": "10:40:33"} +{"current_steps": 3765, "total_steps": 7924, "loss": 0.2672, "lr": 2.5178508584490882e-05, "epoch": 3.32773851590106, "percentage": 47.51, "elapsed_time": "9:39:10", "remaining_time": "10:39:46"} +{"current_steps": 3770, "total_steps": 7924, "loss": 0.3056, "lr": 2.5135943034607434e-05, "epoch": 3.3321554770318023, "percentage": 47.58, "elapsed_time": "9:39:56", "remaining_time": "10:39:00"} +{"current_steps": 3775, "total_steps": 7924, "loss": 0.2076, "lr": 2.50933525641005e-05, "epoch": 3.3365724381625443, "percentage": 47.64, "elapsed_time": "9:40:42", "remaining_time": "10:38:13"} +{"current_steps": 3780, "total_steps": 7924, "loss": 0.2822, "lr": 2.5050737379627575e-05, "epoch": 3.340989399293286, "percentage": 47.7, "elapsed_time": "9:41:28", "remaining_time": "10:37:27"} +{"current_steps": 3785, "total_steps": 7924, "loss": 0.2573, "lr": 2.5008097687966052e-05, "epoch": 3.345406360424028, "percentage": 47.77, "elapsed_time": "9:42:14", "remaining_time": "10:36:41"} +{"current_steps": 3790, "total_steps": 7924, "loss": 0.3068, "lr": 2.4965433696012255e-05, "epoch": 3.34982332155477, "percentage": 47.83, "elapsed_time": "9:43:00", "remaining_time": "10:35:55"} +{"current_steps": 3795, "total_steps": 7924, "loss": 0.3033, "lr": 2.49227456107804e-05, "epoch": 3.354240282685512, "percentage": 47.89, "elapsed_time": "9:43:46", "remaining_time": "10:35:09"} +{"current_steps": 3800, "total_steps": 7924, "loss": 0.2727, "lr": 2.488003363940163e-05, "epoch": 3.3586572438162543, "percentage": 47.96, "elapsed_time": "9:44:32", "remaining_time": "10:34:23"} +{"current_steps": 3805, "total_steps": 7924, "loss": 0.2831, "lr": 2.4837297989122987e-05, "epoch": 3.3630742049469964, "percentage": 48.02, "elapsed_time": "9:45:18", "remaining_time": "10:33:36"} +{"current_steps": 3810, "total_steps": 7924, "loss": 0.2837, "lr": 2.4794538867306385e-05, "epoch": 3.3674911660777385, "percentage": 48.08, "elapsed_time": "9:46:04", "remaining_time": "10:32:50"} +{"current_steps": 3815, "total_steps": 7924, "loss": 0.2699, "lr": 2.4751756481427637e-05, "epoch": 3.3719081272084805, "percentage": 48.14, "elapsed_time": "9:46:50", "remaining_time": "10:32:03"} +{"current_steps": 3820, "total_steps": 7924, "loss": 0.2375, "lr": 2.4708951039075462e-05, "epoch": 3.3763250883392226, "percentage": 48.21, "elapsed_time": "9:47:36", "remaining_time": "10:31:17"} +{"current_steps": 3825, "total_steps": 7924, "loss": 0.257, "lr": 2.4666122747950416e-05, "epoch": 3.3807420494699647, "percentage": 48.27, "elapsed_time": "9:48:22", "remaining_time": "10:30:31"} +{"current_steps": 3830, "total_steps": 7924, "loss": 0.2552, "lr": 2.4623271815863943e-05, "epoch": 3.385159010600707, "percentage": 48.33, "elapsed_time": "9:49:08", "remaining_time": "10:29:45"} +{"current_steps": 3835, "total_steps": 7924, "loss": 0.3036, "lr": 2.4580398450737338e-05, "epoch": 3.389575971731449, "percentage": 48.4, "elapsed_time": "9:49:54", "remaining_time": "10:28:59"} +{"current_steps": 3840, "total_steps": 7924, "loss": 0.2842, "lr": 2.4537502860600754e-05, "epoch": 3.393992932862191, "percentage": 48.46, "elapsed_time": "9:50:40", "remaining_time": "10:28:12"} +{"current_steps": 3845, "total_steps": 7924, "loss": 0.2717, "lr": 2.4494585253592184e-05, "epoch": 3.398409893992933, "percentage": 48.52, "elapsed_time": "9:51:26", "remaining_time": "10:27:26"} +{"current_steps": 3850, "total_steps": 7924, "loss": 0.2567, "lr": 2.445164583795643e-05, "epoch": 3.402826855123675, "percentage": 48.59, "elapsed_time": "9:52:12", "remaining_time": "10:26:39"} +{"current_steps": 3855, "total_steps": 7924, "loss": 0.2485, "lr": 2.4408684822044152e-05, "epoch": 3.407243816254417, "percentage": 48.65, "elapsed_time": "9:52:58", "remaining_time": "10:25:53"} +{"current_steps": 3860, "total_steps": 7924, "loss": 0.2891, "lr": 2.4365702414310786e-05, "epoch": 3.411660777385159, "percentage": 48.71, "elapsed_time": "9:53:44", "remaining_time": "10:25:07"} +{"current_steps": 3865, "total_steps": 7924, "loss": 0.2936, "lr": 2.4322698823315572e-05, "epoch": 3.416077738515901, "percentage": 48.78, "elapsed_time": "9:54:30", "remaining_time": "10:24:20"} +{"current_steps": 3870, "total_steps": 7924, "loss": 0.2531, "lr": 2.4279674257720548e-05, "epoch": 3.420494699646643, "percentage": 48.84, "elapsed_time": "9:55:15", "remaining_time": "10:23:33"} +{"current_steps": 3875, "total_steps": 7924, "loss": 0.2879, "lr": 2.4236628926289506e-05, "epoch": 3.424911660777385, "percentage": 48.9, "elapsed_time": "9:56:01", "remaining_time": "10:22:47"} +{"current_steps": 3880, "total_steps": 7924, "loss": 0.2488, "lr": 2.4193563037887025e-05, "epoch": 3.429328621908127, "percentage": 48.97, "elapsed_time": "9:56:47", "remaining_time": "10:22:01"} +{"current_steps": 3885, "total_steps": 7924, "loss": 0.2452, "lr": 2.4150476801477404e-05, "epoch": 3.4337455830388692, "percentage": 49.03, "elapsed_time": "9:57:33", "remaining_time": "10:21:14"} +{"current_steps": 3890, "total_steps": 7924, "loss": 0.2405, "lr": 2.4107370426123685e-05, "epoch": 3.4381625441696113, "percentage": 49.09, "elapsed_time": "9:58:19", "remaining_time": "10:20:28"} +{"current_steps": 3895, "total_steps": 7924, "loss": 0.2926, "lr": 2.406424412098664e-05, "epoch": 3.4425795053003534, "percentage": 49.15, "elapsed_time": "9:59:05", "remaining_time": "10:19:41"} +{"current_steps": 3900, "total_steps": 7924, "loss": 0.2537, "lr": 2.4021098095323713e-05, "epoch": 3.4469964664310955, "percentage": 49.22, "elapsed_time": "9:59:51", "remaining_time": "10:18:55"} +{"current_steps": 3905, "total_steps": 7924, "loss": 0.2725, "lr": 2.3977932558488074e-05, "epoch": 3.4514134275618376, "percentage": 49.28, "elapsed_time": "10:00:37", "remaining_time": "10:18:09"} +{"current_steps": 3910, "total_steps": 7924, "loss": 0.2794, "lr": 2.3934747719927534e-05, "epoch": 3.4558303886925796, "percentage": 49.34, "elapsed_time": "10:01:23", "remaining_time": "10:17:23"} +{"current_steps": 3915, "total_steps": 7924, "loss": 0.2999, "lr": 2.3891543789183573e-05, "epoch": 3.4602473498233217, "percentage": 49.41, "elapsed_time": "10:02:09", "remaining_time": "10:16:37"} +{"current_steps": 3920, "total_steps": 7924, "loss": 0.2683, "lr": 2.3848320975890316e-05, "epoch": 3.464664310954064, "percentage": 49.47, "elapsed_time": "10:02:55", "remaining_time": "10:15:50"} +{"current_steps": 3925, "total_steps": 7924, "loss": 0.2442, "lr": 2.3805079489773508e-05, "epoch": 3.4690812720848054, "percentage": 49.53, "elapsed_time": "10:03:40", "remaining_time": "10:15:03"} +{"current_steps": 3930, "total_steps": 7924, "loss": 0.2982, "lr": 2.376181954064948e-05, "epoch": 3.4734982332155475, "percentage": 49.6, "elapsed_time": "10:04:27", "remaining_time": "10:14:17"} +{"current_steps": 3935, "total_steps": 7924, "loss": 0.2994, "lr": 2.3718541338424176e-05, "epoch": 3.4779151943462896, "percentage": 49.66, "elapsed_time": "10:05:12", "remaining_time": "10:13:31"} +{"current_steps": 3940, "total_steps": 7924, "loss": 0.2486, "lr": 2.3675245093092082e-05, "epoch": 3.4823321554770317, "percentage": 49.72, "elapsed_time": "10:05:58", "remaining_time": "10:12:44"} +{"current_steps": 3945, "total_steps": 7924, "loss": 0.2812, "lr": 2.3631931014735258e-05, "epoch": 3.4867491166077738, "percentage": 49.79, "elapsed_time": "10:06:44", "remaining_time": "10:11:58"} +{"current_steps": 3950, "total_steps": 7924, "loss": 0.2562, "lr": 2.358859931352227e-05, "epoch": 3.491166077738516, "percentage": 49.85, "elapsed_time": "10:07:30", "remaining_time": "10:11:12"} +{"current_steps": 3955, "total_steps": 7924, "loss": 0.2416, "lr": 2.3545250199707207e-05, "epoch": 3.495583038869258, "percentage": 49.91, "elapsed_time": "10:08:16", "remaining_time": "10:10:25"} +{"current_steps": 3960, "total_steps": 7924, "loss": 0.2578, "lr": 2.350188388362865e-05, "epoch": 3.5, "percentage": 49.97, "elapsed_time": "10:09:02", "remaining_time": "10:09:39"} +{"current_steps": 3965, "total_steps": 7924, "loss": 0.2683, "lr": 2.3458500575708642e-05, "epoch": 3.504416961130742, "percentage": 50.04, "elapsed_time": "10:09:48", "remaining_time": "10:08:53"} +{"current_steps": 3970, "total_steps": 7924, "loss": 0.2803, "lr": 2.341510048645167e-05, "epoch": 3.508833922261484, "percentage": 50.1, "elapsed_time": "10:10:34", "remaining_time": "10:08:07"} +{"current_steps": 3975, "total_steps": 7924, "loss": 0.3018, "lr": 2.337168382644367e-05, "epoch": 3.5132508833922262, "percentage": 50.16, "elapsed_time": "10:11:20", "remaining_time": "10:07:20"} +{"current_steps": 3980, "total_steps": 7924, "loss": 0.3153, "lr": 2.332825080635094e-05, "epoch": 3.5176678445229683, "percentage": 50.23, "elapsed_time": "10:12:06", "remaining_time": "10:06:34"} +{"current_steps": 3985, "total_steps": 7924, "loss": 0.2874, "lr": 2.3284801636919205e-05, "epoch": 3.5220848056537104, "percentage": 50.29, "elapsed_time": "10:12:52", "remaining_time": "10:05:48"} +{"current_steps": 3990, "total_steps": 7924, "loss": 0.2659, "lr": 2.3241336528972522e-05, "epoch": 3.5265017667844525, "percentage": 50.35, "elapsed_time": "10:13:38", "remaining_time": "10:05:01"} +{"current_steps": 3995, "total_steps": 7924, "loss": 0.2901, "lr": 2.3197855693412295e-05, "epoch": 3.5309187279151946, "percentage": 50.42, "elapsed_time": "10:14:24", "remaining_time": "10:04:15"} +{"current_steps": 4000, "total_steps": 7924, "loss": 0.2733, "lr": 2.3154359341216243e-05, "epoch": 3.5353356890459366, "percentage": 50.48, "elapsed_time": "10:15:10", "remaining_time": "10:03:29"} +{"current_steps": 4005, "total_steps": 7924, "loss": 0.2487, "lr": 2.311084768343737e-05, "epoch": 3.5397526501766783, "percentage": 50.54, "elapsed_time": "10:15:56", "remaining_time": "10:02:42"} +{"current_steps": 4010, "total_steps": 7924, "loss": 0.2892, "lr": 2.306732093120295e-05, "epoch": 3.5441696113074204, "percentage": 50.61, "elapsed_time": "10:16:42", "remaining_time": "10:01:56"} +{"current_steps": 4015, "total_steps": 7924, "loss": 0.2833, "lr": 2.3023779295713497e-05, "epoch": 3.5485865724381624, "percentage": 50.67, "elapsed_time": "10:17:28", "remaining_time": "10:01:10"} +{"current_steps": 4020, "total_steps": 7924, "loss": 0.2633, "lr": 2.2980222988241733e-05, "epoch": 3.5530035335689045, "percentage": 50.73, "elapsed_time": "10:18:14", "remaining_time": "10:00:24"} +{"current_steps": 4025, "total_steps": 7924, "loss": 0.2422, "lr": 2.293665222013158e-05, "epoch": 3.5574204946996466, "percentage": 50.8, "elapsed_time": "10:19:00", "remaining_time": "9:59:38"} +{"current_steps": 4030, "total_steps": 7924, "loss": 0.2314, "lr": 2.2893067202797136e-05, "epoch": 3.5618374558303887, "percentage": 50.86, "elapsed_time": "10:19:46", "remaining_time": "9:58:51"} +{"current_steps": 4035, "total_steps": 7924, "loss": 0.27, "lr": 2.2849468147721615e-05, "epoch": 3.5662544169611308, "percentage": 50.92, "elapsed_time": "10:20:32", "remaining_time": "9:58:05"} +{"current_steps": 4040, "total_steps": 7924, "loss": 0.272, "lr": 2.280585526645637e-05, "epoch": 3.570671378091873, "percentage": 50.98, "elapsed_time": "10:21:19", "remaining_time": "9:57:19"} +{"current_steps": 4045, "total_steps": 7924, "loss": 0.2872, "lr": 2.2762228770619815e-05, "epoch": 3.575088339222615, "percentage": 51.05, "elapsed_time": "10:22:05", "remaining_time": "9:56:33"} +{"current_steps": 4050, "total_steps": 7924, "loss": 0.2657, "lr": 2.2718588871896454e-05, "epoch": 3.579505300353357, "percentage": 51.11, "elapsed_time": "10:22:51", "remaining_time": "9:55:47"} +{"current_steps": 4055, "total_steps": 7924, "loss": 0.2521, "lr": 2.2674935782035804e-05, "epoch": 3.583922261484099, "percentage": 51.17, "elapsed_time": "10:23:37", "remaining_time": "9:55:01"} +{"current_steps": 4060, "total_steps": 7924, "loss": 0.2349, "lr": 2.2631269712851385e-05, "epoch": 3.5883392226148407, "percentage": 51.24, "elapsed_time": "10:24:23", "remaining_time": "9:54:15"} +{"current_steps": 4065, "total_steps": 7924, "loss": 0.26, "lr": 2.258759087621971e-05, "epoch": 3.592756183745583, "percentage": 51.3, "elapsed_time": "10:25:10", "remaining_time": "9:53:29"} +{"current_steps": 4070, "total_steps": 7924, "loss": 0.299, "lr": 2.2543899484079245e-05, "epoch": 3.597173144876325, "percentage": 51.36, "elapsed_time": "10:25:56", "remaining_time": "9:52:43"} +{"current_steps": 4075, "total_steps": 7924, "loss": 0.2602, "lr": 2.2500195748429352e-05, "epoch": 3.601590106007067, "percentage": 51.43, "elapsed_time": "10:26:42", "remaining_time": "9:51:56"} +{"current_steps": 4080, "total_steps": 7924, "loss": 0.2457, "lr": 2.2456479881329315e-05, "epoch": 3.606007067137809, "percentage": 51.49, "elapsed_time": "10:27:28", "remaining_time": "9:51:11"} +{"current_steps": 4085, "total_steps": 7924, "loss": 0.2718, "lr": 2.2412752094897267e-05, "epoch": 3.610424028268551, "percentage": 51.55, "elapsed_time": "10:28:15", "remaining_time": "9:50:25"} +{"current_steps": 4090, "total_steps": 7924, "loss": 0.2614, "lr": 2.236901260130918e-05, "epoch": 3.614840989399293, "percentage": 51.62, "elapsed_time": "10:29:01", "remaining_time": "9:49:38"} +{"current_steps": 4095, "total_steps": 7924, "loss": 0.2518, "lr": 2.2325261612797832e-05, "epoch": 3.6192579505300353, "percentage": 51.68, "elapsed_time": "10:29:46", "remaining_time": "9:48:52"} +{"current_steps": 4100, "total_steps": 7924, "loss": 0.268, "lr": 2.2281499341651767e-05, "epoch": 3.6236749116607774, "percentage": 51.74, "elapsed_time": "10:30:33", "remaining_time": "9:48:06"} +{"current_steps": 4105, "total_steps": 7924, "loss": 0.2728, "lr": 2.223772600021429e-05, "epoch": 3.6280918727915195, "percentage": 51.8, "elapsed_time": "10:31:19", "remaining_time": "9:47:20"} +{"current_steps": 4110, "total_steps": 7924, "loss": 0.3153, "lr": 2.2193941800882418e-05, "epoch": 3.6325088339222615, "percentage": 51.87, "elapsed_time": "10:32:06", "remaining_time": "9:46:35"} +{"current_steps": 4115, "total_steps": 7924, "loss": 0.3003, "lr": 2.2150146956105836e-05, "epoch": 3.6369257950530036, "percentage": 51.93, "elapsed_time": "10:32:53", "remaining_time": "9:45:49"} +{"current_steps": 4120, "total_steps": 7924, "loss": 0.2801, "lr": 2.210634167838591e-05, "epoch": 3.6413427561837457, "percentage": 51.99, "elapsed_time": "10:33:39", "remaining_time": "9:45:03"} +{"current_steps": 4125, "total_steps": 7924, "loss": 0.2378, "lr": 2.2062526180274607e-05, "epoch": 3.645759717314488, "percentage": 52.06, "elapsed_time": "10:34:26", "remaining_time": "9:44:18"} +{"current_steps": 4130, "total_steps": 7924, "loss": 0.2642, "lr": 2.2018700674373487e-05, "epoch": 3.65017667844523, "percentage": 52.12, "elapsed_time": "10:35:12", "remaining_time": "9:43:31"} +{"current_steps": 4135, "total_steps": 7924, "loss": 0.281, "lr": 2.1974865373332695e-05, "epoch": 3.654593639575972, "percentage": 52.18, "elapsed_time": "10:35:58", "remaining_time": "9:42:45"} +{"current_steps": 4140, "total_steps": 7924, "loss": 0.2649, "lr": 2.1931020489849865e-05, "epoch": 3.659010600706714, "percentage": 52.25, "elapsed_time": "10:36:44", "remaining_time": "9:41:59"} +{"current_steps": 4145, "total_steps": 7924, "loss": 0.2716, "lr": 2.1887166236669154e-05, "epoch": 3.663427561837456, "percentage": 52.31, "elapsed_time": "10:37:31", "remaining_time": "9:41:13"} +{"current_steps": 4150, "total_steps": 7924, "loss": 0.2425, "lr": 2.184330282658018e-05, "epoch": 3.6678445229681977, "percentage": 52.37, "elapsed_time": "10:38:18", "remaining_time": "9:40:28"} +{"current_steps": 4155, "total_steps": 7924, "loss": 0.31, "lr": 2.1799430472416975e-05, "epoch": 3.67226148409894, "percentage": 52.44, "elapsed_time": "10:39:04", "remaining_time": "9:39:42"} +{"current_steps": 4160, "total_steps": 7924, "loss": 0.2795, "lr": 2.1755549387056997e-05, "epoch": 3.676678445229682, "percentage": 52.5, "elapsed_time": "10:39:50", "remaining_time": "9:38:56"} +{"current_steps": 4165, "total_steps": 7924, "loss": 0.2469, "lr": 2.1711659783420043e-05, "epoch": 3.681095406360424, "percentage": 52.56, "elapsed_time": "10:40:36", "remaining_time": "9:38:09"} +{"current_steps": 4170, "total_steps": 7924, "loss": 0.282, "lr": 2.1667761874467256e-05, "epoch": 3.685512367491166, "percentage": 52.62, "elapsed_time": "10:41:22", "remaining_time": "9:37:23"} +{"current_steps": 4175, "total_steps": 7924, "loss": 0.2547, "lr": 2.162385587320008e-05, "epoch": 3.689929328621908, "percentage": 52.69, "elapsed_time": "10:42:07", "remaining_time": "9:36:36"} +{"current_steps": 4180, "total_steps": 7924, "loss": 0.2515, "lr": 2.1579941992659214e-05, "epoch": 3.6943462897526502, "percentage": 52.75, "elapsed_time": "10:42:54", "remaining_time": "9:35:51"} +{"current_steps": 4185, "total_steps": 7924, "loss": 0.2546, "lr": 2.1536020445923595e-05, "epoch": 3.6987632508833923, "percentage": 52.81, "elapsed_time": "10:43:41", "remaining_time": "9:35:05"} +{"current_steps": 4190, "total_steps": 7924, "loss": 0.2705, "lr": 2.1492091446109372e-05, "epoch": 3.7031802120141344, "percentage": 52.88, "elapsed_time": "10:44:27", "remaining_time": "9:34:19"} +{"current_steps": 4195, "total_steps": 7924, "loss": 0.2806, "lr": 2.1448155206368823e-05, "epoch": 3.7075971731448765, "percentage": 52.94, "elapsed_time": "10:45:12", "remaining_time": "9:33:32"} +{"current_steps": 4200, "total_steps": 7924, "loss": 0.2435, "lr": 2.1404211939889392e-05, "epoch": 3.712014134275618, "percentage": 53.0, "elapsed_time": "10:45:58", "remaining_time": "9:32:45"} +{"current_steps": 4205, "total_steps": 7924, "loss": 0.3053, "lr": 2.1360261859892594e-05, "epoch": 3.71643109540636, "percentage": 53.07, "elapsed_time": "10:46:44", "remaining_time": "9:31:59"} +{"current_steps": 4210, "total_steps": 7924, "loss": 0.3006, "lr": 2.1316305179633016e-05, "epoch": 3.7208480565371023, "percentage": 53.13, "elapsed_time": "10:47:30", "remaining_time": "9:31:13"} +{"current_steps": 4215, "total_steps": 7924, "loss": 0.2419, "lr": 2.1272342112397272e-05, "epoch": 3.7252650176678443, "percentage": 53.19, "elapsed_time": "10:48:16", "remaining_time": "9:30:27"} +{"current_steps": 4220, "total_steps": 7924, "loss": 0.2842, "lr": 2.1228372871502955e-05, "epoch": 3.7296819787985864, "percentage": 53.26, "elapsed_time": "10:49:02", "remaining_time": "9:29:40"} +{"current_steps": 4225, "total_steps": 7924, "loss": 0.2336, "lr": 2.1184397670297624e-05, "epoch": 3.7340989399293285, "percentage": 53.32, "elapsed_time": "10:49:48", "remaining_time": "9:28:54"} +{"current_steps": 4230, "total_steps": 7924, "loss": 0.2854, "lr": 2.1140416722157765e-05, "epoch": 3.7385159010600706, "percentage": 53.38, "elapsed_time": "10:50:34", "remaining_time": "9:28:08"} +{"current_steps": 4235, "total_steps": 7924, "loss": 0.253, "lr": 2.1096430240487723e-05, "epoch": 3.7429328621908127, "percentage": 53.45, "elapsed_time": "10:51:20", "remaining_time": "9:27:22"} +{"current_steps": 4240, "total_steps": 7924, "loss": 0.2336, "lr": 2.105243843871873e-05, "epoch": 3.7473498233215548, "percentage": 53.51, "elapsed_time": "10:52:07", "remaining_time": "9:26:36"} +{"current_steps": 4245, "total_steps": 7924, "loss": 0.2863, "lr": 2.100844153030779e-05, "epoch": 3.751766784452297, "percentage": 53.57, "elapsed_time": "10:52:52", "remaining_time": "9:25:49"} +{"current_steps": 4250, "total_steps": 7924, "loss": 0.2202, "lr": 2.096443972873673e-05, "epoch": 3.756183745583039, "percentage": 53.63, "elapsed_time": "10:53:38", "remaining_time": "9:25:03"} +{"current_steps": 4255, "total_steps": 7924, "loss": 0.2904, "lr": 2.0920433247511092e-05, "epoch": 3.760600706713781, "percentage": 53.7, "elapsed_time": "10:54:24", "remaining_time": "9:24:16"} +{"current_steps": 4260, "total_steps": 7924, "loss": 0.2641, "lr": 2.087642230015912e-05, "epoch": 3.765017667844523, "percentage": 53.76, "elapsed_time": "10:55:10", "remaining_time": "9:23:30"} +{"current_steps": 4265, "total_steps": 7924, "loss": 0.2684, "lr": 2.0832407100230747e-05, "epoch": 3.769434628975265, "percentage": 53.82, "elapsed_time": "10:55:55", "remaining_time": "9:22:43"} +{"current_steps": 4270, "total_steps": 7924, "loss": 0.276, "lr": 2.078838786129653e-05, "epoch": 3.7738515901060072, "percentage": 53.89, "elapsed_time": "10:56:41", "remaining_time": "9:21:57"} +{"current_steps": 4275, "total_steps": 7924, "loss": 0.2752, "lr": 2.0744364796946624e-05, "epoch": 3.7782685512367493, "percentage": 53.95, "elapsed_time": "10:57:27", "remaining_time": "9:21:11"} +{"current_steps": 4280, "total_steps": 7924, "loss": 0.2855, "lr": 2.0700338120789754e-05, "epoch": 3.7826855123674914, "percentage": 54.01, "elapsed_time": "10:58:13", "remaining_time": "9:20:24"} +{"current_steps": 4285, "total_steps": 7924, "loss": 0.2542, "lr": 2.0656308046452157e-05, "epoch": 3.7871024734982335, "percentage": 54.08, "elapsed_time": "10:58:59", "remaining_time": "9:19:38"} +{"current_steps": 4290, "total_steps": 7924, "loss": 0.2917, "lr": 2.0612274787576565e-05, "epoch": 3.791519434628975, "percentage": 54.14, "elapsed_time": "10:59:45", "remaining_time": "9:18:52"} +{"current_steps": 4295, "total_steps": 7924, "loss": 0.2617, "lr": 2.0568238557821175e-05, "epoch": 3.795936395759717, "percentage": 54.2, "elapsed_time": "11:00:31", "remaining_time": "9:18:06"} +{"current_steps": 4300, "total_steps": 7924, "loss": 0.2591, "lr": 2.0524199570858573e-05, "epoch": 3.8003533568904593, "percentage": 54.27, "elapsed_time": "11:01:17", "remaining_time": "9:17:20"} +{"current_steps": 4305, "total_steps": 7924, "loss": 0.2536, "lr": 2.048015804037474e-05, "epoch": 3.8047703180212014, "percentage": 54.33, "elapsed_time": "11:02:03", "remaining_time": "9:16:33"} +{"current_steps": 4310, "total_steps": 7924, "loss": 0.2946, "lr": 2.0436114180068008e-05, "epoch": 3.8091872791519434, "percentage": 54.39, "elapsed_time": "11:02:49", "remaining_time": "9:15:47"} +{"current_steps": 4315, "total_steps": 7924, "loss": 0.2882, "lr": 2.039206820364798e-05, "epoch": 3.8136042402826855, "percentage": 54.45, "elapsed_time": "11:03:35", "remaining_time": "9:15:01"} +{"current_steps": 4320, "total_steps": 7924, "loss": 0.2684, "lr": 2.034802032483457e-05, "epoch": 3.8180212014134276, "percentage": 54.52, "elapsed_time": "11:04:21", "remaining_time": "9:14:14"} +{"current_steps": 4325, "total_steps": 7924, "loss": 0.251, "lr": 2.0303970757356894e-05, "epoch": 3.8224381625441697, "percentage": 54.58, "elapsed_time": "11:05:07", "remaining_time": "9:13:28"} +{"current_steps": 4330, "total_steps": 7924, "loss": 0.2544, "lr": 2.025991971495226e-05, "epoch": 3.8268551236749118, "percentage": 54.64, "elapsed_time": "11:05:53", "remaining_time": "9:12:42"} +{"current_steps": 4335, "total_steps": 7924, "loss": 0.2919, "lr": 2.021586741136516e-05, "epoch": 3.831272084805654, "percentage": 54.71, "elapsed_time": "11:06:39", "remaining_time": "9:11:55"} +{"current_steps": 4340, "total_steps": 7924, "loss": 0.2375, "lr": 2.017181406034617e-05, "epoch": 3.835689045936396, "percentage": 54.77, "elapsed_time": "11:07:24", "remaining_time": "9:11:09"} +{"current_steps": 4345, "total_steps": 7924, "loss": 0.2724, "lr": 2.0127759875650974e-05, "epoch": 3.8401060070671376, "percentage": 54.83, "elapsed_time": "11:08:10", "remaining_time": "9:10:22"} +{"current_steps": 4350, "total_steps": 7924, "loss": 0.2649, "lr": 2.0083705071039297e-05, "epoch": 3.8445229681978796, "percentage": 54.9, "elapsed_time": "11:08:56", "remaining_time": "9:09:36"} +{"current_steps": 4355, "total_steps": 7924, "loss": 0.2563, "lr": 2.0039649860273855e-05, "epoch": 3.8489399293286217, "percentage": 54.96, "elapsed_time": "11:09:42", "remaining_time": "9:08:50"} +{"current_steps": 4360, "total_steps": 7924, "loss": 0.2609, "lr": 1.9995594457119364e-05, "epoch": 3.853356890459364, "percentage": 55.02, "elapsed_time": "11:10:28", "remaining_time": "9:08:04"} +{"current_steps": 4365, "total_steps": 7924, "loss": 0.3256, "lr": 1.995153907534145e-05, "epoch": 3.857773851590106, "percentage": 55.09, "elapsed_time": "11:11:14", "remaining_time": "9:07:17"} +{"current_steps": 4370, "total_steps": 7924, "loss": 0.2733, "lr": 1.990748392870563e-05, "epoch": 3.862190812720848, "percentage": 55.15, "elapsed_time": "11:12:00", "remaining_time": "9:06:31"} +{"current_steps": 4375, "total_steps": 7924, "loss": 0.3132, "lr": 1.986342923097631e-05, "epoch": 3.86660777385159, "percentage": 55.21, "elapsed_time": "11:12:46", "remaining_time": "9:05:45"} +{"current_steps": 4380, "total_steps": 7924, "loss": 0.2478, "lr": 1.98193751959157e-05, "epoch": 3.871024734982332, "percentage": 55.28, "elapsed_time": "11:13:32", "remaining_time": "9:04:59"} +{"current_steps": 4385, "total_steps": 7924, "loss": 0.3042, "lr": 1.977532203728278e-05, "epoch": 3.875441696113074, "percentage": 55.34, "elapsed_time": "11:14:18", "remaining_time": "9:04:13"} +{"current_steps": 4390, "total_steps": 7924, "loss": 0.2595, "lr": 1.9731269968832305e-05, "epoch": 3.8798586572438163, "percentage": 55.4, "elapsed_time": "11:15:04", "remaining_time": "9:03:26"} +{"current_steps": 4395, "total_steps": 7924, "loss": 0.2887, "lr": 1.9687219204313717e-05, "epoch": 3.8842756183745584, "percentage": 55.46, "elapsed_time": "11:15:50", "remaining_time": "9:02:40"} +{"current_steps": 4400, "total_steps": 7924, "loss": 0.2878, "lr": 1.9643169957470157e-05, "epoch": 3.8886925795053005, "percentage": 55.53, "elapsed_time": "11:16:36", "remaining_time": "9:01:54"} +{"current_steps": 4405, "total_steps": 7924, "loss": 0.256, "lr": 1.959912244203737e-05, "epoch": 3.8931095406360425, "percentage": 55.59, "elapsed_time": "11:17:22", "remaining_time": "9:01:07"} +{"current_steps": 4410, "total_steps": 7924, "loss": 0.2952, "lr": 1.9555076871742734e-05, "epoch": 3.8975265017667846, "percentage": 55.65, "elapsed_time": "11:18:08", "remaining_time": "9:00:21"} +{"current_steps": 4415, "total_steps": 7924, "loss": 0.2534, "lr": 1.951103346030415e-05, "epoch": 3.9019434628975267, "percentage": 55.72, "elapsed_time": "11:18:54", "remaining_time": "8:59:35"} +{"current_steps": 4420, "total_steps": 7924, "loss": 0.2328, "lr": 1.9466992421429076e-05, "epoch": 3.9063604240282688, "percentage": 55.78, "elapsed_time": "11:19:40", "remaining_time": "8:58:48"} +{"current_steps": 4425, "total_steps": 7924, "loss": 0.2697, "lr": 1.9422953968813454e-05, "epoch": 3.910777385159011, "percentage": 55.84, "elapsed_time": "11:20:26", "remaining_time": "8:58:02"} +{"current_steps": 4430, "total_steps": 7924, "loss": 0.256, "lr": 1.937891831614066e-05, "epoch": 3.9151943462897525, "percentage": 55.91, "elapsed_time": "11:21:11", "remaining_time": "8:57:16"} +{"current_steps": 4435, "total_steps": 7924, "loss": 0.2937, "lr": 1.93348856770805e-05, "epoch": 3.9196113074204946, "percentage": 55.97, "elapsed_time": "11:21:57", "remaining_time": "8:56:29"} +{"current_steps": 4440, "total_steps": 7924, "loss": 0.2579, "lr": 1.929085626528814e-05, "epoch": 3.9240282685512367, "percentage": 56.03, "elapsed_time": "11:22:43", "remaining_time": "8:55:43"} +{"current_steps": 4445, "total_steps": 7924, "loss": 0.2978, "lr": 1.9246830294403108e-05, "epoch": 3.9284452296819787, "percentage": 56.1, "elapsed_time": "11:23:29", "remaining_time": "8:54:57"} +{"current_steps": 4450, "total_steps": 7924, "loss": 0.2748, "lr": 1.920280797804822e-05, "epoch": 3.932862190812721, "percentage": 56.16, "elapsed_time": "11:24:15", "remaining_time": "8:54:11"} +{"current_steps": 4455, "total_steps": 7924, "loss": 0.3056, "lr": 1.915878952982857e-05, "epoch": 3.937279151943463, "percentage": 56.22, "elapsed_time": "11:25:01", "remaining_time": "8:53:24"} +{"current_steps": 4460, "total_steps": 7924, "loss": 0.2758, "lr": 1.911477516333048e-05, "epoch": 3.941696113074205, "percentage": 56.28, "elapsed_time": "11:25:47", "remaining_time": "8:52:38"} +{"current_steps": 4465, "total_steps": 7924, "loss": 0.2521, "lr": 1.907076509212046e-05, "epoch": 3.946113074204947, "percentage": 56.35, "elapsed_time": "11:26:33", "remaining_time": "8:51:52"} +{"current_steps": 4470, "total_steps": 7924, "loss": 0.3113, "lr": 1.9026759529744187e-05, "epoch": 3.950530035335689, "percentage": 56.41, "elapsed_time": "11:27:19", "remaining_time": "8:51:06"} +{"current_steps": 4475, "total_steps": 7924, "loss": 0.2627, "lr": 1.8982758689725447e-05, "epoch": 3.954946996466431, "percentage": 56.47, "elapsed_time": "11:28:05", "remaining_time": "8:50:20"} +{"current_steps": 4480, "total_steps": 7924, "loss": 0.2416, "lr": 1.8938762785565137e-05, "epoch": 3.9593639575971733, "percentage": 56.54, "elapsed_time": "11:28:51", "remaining_time": "8:49:33"} +{"current_steps": 4485, "total_steps": 7924, "loss": 0.2648, "lr": 1.8894772030740182e-05, "epoch": 3.963780918727915, "percentage": 56.6, "elapsed_time": "11:29:37", "remaining_time": "8:48:47"} +{"current_steps": 4490, "total_steps": 7924, "loss": 0.29, "lr": 1.8850786638702528e-05, "epoch": 3.968197879858657, "percentage": 56.66, "elapsed_time": "11:30:23", "remaining_time": "8:48:01"} +{"current_steps": 4495, "total_steps": 7924, "loss": 0.2798, "lr": 1.88068068228781e-05, "epoch": 3.972614840989399, "percentage": 56.73, "elapsed_time": "11:31:09", "remaining_time": "8:47:15"} +{"current_steps": 4500, "total_steps": 7924, "loss": 0.2907, "lr": 1.876283279666576e-05, "epoch": 3.977031802120141, "percentage": 56.79, "elapsed_time": "11:31:55", "remaining_time": "8:46:28"} +{"current_steps": 4505, "total_steps": 7924, "loss": 0.2882, "lr": 1.87188647734363e-05, "epoch": 3.9814487632508833, "percentage": 56.85, "elapsed_time": "11:33:13", "remaining_time": "8:46:07"} +{"current_steps": 4510, "total_steps": 7924, "loss": 0.2813, "lr": 1.8674902966531354e-05, "epoch": 3.9858657243816253, "percentage": 56.92, "elapsed_time": "11:34:00", "remaining_time": "8:45:20"} +{"current_steps": 4515, "total_steps": 7924, "loss": 0.2905, "lr": 1.8630947589262417e-05, "epoch": 3.9902826855123674, "percentage": 56.98, "elapsed_time": "11:34:45", "remaining_time": "8:44:34"} +{"current_steps": 4520, "total_steps": 7924, "loss": 0.2682, "lr": 1.858699885490977e-05, "epoch": 3.9946996466431095, "percentage": 57.04, "elapsed_time": "11:35:31", "remaining_time": "8:43:48"} +{"current_steps": 4525, "total_steps": 7924, "loss": 0.2526, "lr": 1.8543056976721472e-05, "epoch": 3.9991166077738516, "percentage": 57.1, "elapsed_time": "11:36:17", "remaining_time": "8:43:01"} +{"current_steps": 4530, "total_steps": 7924, "loss": 0.2219, "lr": 1.84991221679123e-05, "epoch": 4.004416961130742, "percentage": 57.17, "elapsed_time": "11:37:03", "remaining_time": "8:42:15"} +{"current_steps": 4535, "total_steps": 7924, "loss": 0.2414, "lr": 1.845519464166275e-05, "epoch": 4.008833922261484, "percentage": 57.23, "elapsed_time": "11:37:49", "remaining_time": "8:41:29"} +{"current_steps": 4540, "total_steps": 7924, "loss": 0.2617, "lr": 1.8411274611117974e-05, "epoch": 4.013250883392226, "percentage": 57.29, "elapsed_time": "11:38:35", "remaining_time": "8:40:42"} +{"current_steps": 4545, "total_steps": 7924, "loss": 0.2354, "lr": 1.836736228938674e-05, "epoch": 4.017667844522968, "percentage": 57.36, "elapsed_time": "11:39:21", "remaining_time": "8:39:56"} +{"current_steps": 4550, "total_steps": 7924, "loss": 0.2476, "lr": 1.832345788954043e-05, "epoch": 4.02208480565371, "percentage": 57.42, "elapsed_time": "11:40:07", "remaining_time": "8:39:10"} +{"current_steps": 4555, "total_steps": 7924, "loss": 0.2612, "lr": 1.8279561624611962e-05, "epoch": 4.0265017667844525, "percentage": 57.48, "elapsed_time": "11:40:54", "remaining_time": "8:38:24"} +{"current_steps": 4560, "total_steps": 7924, "loss": 0.2545, "lr": 1.8235673707594822e-05, "epoch": 4.030918727915195, "percentage": 57.55, "elapsed_time": "11:41:39", "remaining_time": "8:37:37"} +{"current_steps": 4565, "total_steps": 7924, "loss": 0.2485, "lr": 1.819179435144195e-05, "epoch": 4.035335689045937, "percentage": 57.61, "elapsed_time": "11:42:25", "remaining_time": "8:36:51"} +{"current_steps": 4570, "total_steps": 7924, "loss": 0.2517, "lr": 1.8147923769064776e-05, "epoch": 4.039752650176679, "percentage": 57.67, "elapsed_time": "11:43:11", "remaining_time": "8:36:05"} +{"current_steps": 4575, "total_steps": 7924, "loss": 0.242, "lr": 1.8104062173332134e-05, "epoch": 4.044169611307421, "percentage": 57.74, "elapsed_time": "11:43:57", "remaining_time": "8:35:18"} +{"current_steps": 4580, "total_steps": 7924, "loss": 0.2391, "lr": 1.8060209777069267e-05, "epoch": 4.048586572438163, "percentage": 57.8, "elapsed_time": "11:44:43", "remaining_time": "8:34:32"} +{"current_steps": 4585, "total_steps": 7924, "loss": 0.2537, "lr": 1.801636679305679e-05, "epoch": 4.053003533568905, "percentage": 57.86, "elapsed_time": "11:45:29", "remaining_time": "8:33:46"} +{"current_steps": 4590, "total_steps": 7924, "loss": 0.2101, "lr": 1.797253343402962e-05, "epoch": 4.057420494699647, "percentage": 57.93, "elapsed_time": "11:46:15", "remaining_time": "8:33:00"} +{"current_steps": 4595, "total_steps": 7924, "loss": 0.2666, "lr": 1.7928709912676e-05, "epoch": 4.061837455830389, "percentage": 57.99, "elapsed_time": "11:47:02", "remaining_time": "8:32:14"} +{"current_steps": 4600, "total_steps": 7924, "loss": 0.2344, "lr": 1.788489644163642e-05, "epoch": 4.06625441696113, "percentage": 58.05, "elapsed_time": "11:47:48", "remaining_time": "8:31:27"} +{"current_steps": 4605, "total_steps": 7924, "loss": 0.2724, "lr": 1.784109323350261e-05, "epoch": 4.070671378091872, "percentage": 58.11, "elapsed_time": "11:48:33", "remaining_time": "8:30:41"} +{"current_steps": 4610, "total_steps": 7924, "loss": 0.2666, "lr": 1.77973005008165e-05, "epoch": 4.0750883392226145, "percentage": 58.18, "elapsed_time": "11:49:19", "remaining_time": "8:29:55"} +{"current_steps": 4615, "total_steps": 7924, "loss": 0.2325, "lr": 1.7753518456069198e-05, "epoch": 4.079505300353357, "percentage": 58.24, "elapsed_time": "11:50:05", "remaining_time": "8:29:08"} +{"current_steps": 4620, "total_steps": 7924, "loss": 0.2456, "lr": 1.770974731169995e-05, "epoch": 4.083922261484099, "percentage": 58.3, "elapsed_time": "11:50:51", "remaining_time": "8:28:22"} +{"current_steps": 4625, "total_steps": 7924, "loss": 0.2454, "lr": 1.76659872800951e-05, "epoch": 4.088339222614841, "percentage": 58.37, "elapsed_time": "11:51:37", "remaining_time": "8:27:36"} +{"current_steps": 4630, "total_steps": 7924, "loss": 0.2482, "lr": 1.7622238573587093e-05, "epoch": 4.092756183745583, "percentage": 58.43, "elapsed_time": "11:52:24", "remaining_time": "8:26:50"} +{"current_steps": 4635, "total_steps": 7924, "loss": 0.281, "lr": 1.7578501404453388e-05, "epoch": 4.097173144876325, "percentage": 58.49, "elapsed_time": "11:53:10", "remaining_time": "8:26:04"} +{"current_steps": 4640, "total_steps": 7924, "loss": 0.2383, "lr": 1.7534775984915503e-05, "epoch": 4.101590106007067, "percentage": 58.56, "elapsed_time": "11:53:56", "remaining_time": "8:25:18"} +{"current_steps": 4645, "total_steps": 7924, "loss": 0.2795, "lr": 1.7491062527137912e-05, "epoch": 4.106007067137809, "percentage": 58.62, "elapsed_time": "11:54:42", "remaining_time": "8:24:31"} +{"current_steps": 4650, "total_steps": 7924, "loss": 0.2497, "lr": 1.744736124322707e-05, "epoch": 4.110424028268551, "percentage": 58.68, "elapsed_time": "11:55:28", "remaining_time": "8:23:45"} +{"current_steps": 4655, "total_steps": 7924, "loss": 0.2322, "lr": 1.7403672345230342e-05, "epoch": 4.114840989399293, "percentage": 58.75, "elapsed_time": "11:56:14", "remaining_time": "8:22:58"} +{"current_steps": 4660, "total_steps": 7924, "loss": 0.2192, "lr": 1.7359996045135007e-05, "epoch": 4.119257950530035, "percentage": 58.81, "elapsed_time": "11:57:00", "remaining_time": "8:22:12"} +{"current_steps": 4665, "total_steps": 7924, "loss": 0.2665, "lr": 1.7316332554867224e-05, "epoch": 4.123674911660777, "percentage": 58.87, "elapsed_time": "11:57:45", "remaining_time": "8:21:26"} +{"current_steps": 4670, "total_steps": 7924, "loss": 0.2602, "lr": 1.7272682086290982e-05, "epoch": 4.1280918727915195, "percentage": 58.93, "elapsed_time": "11:58:31", "remaining_time": "8:20:39"} +{"current_steps": 4675, "total_steps": 7924, "loss": 0.2498, "lr": 1.722904485120709e-05, "epoch": 4.1325088339222615, "percentage": 59.0, "elapsed_time": "11:59:17", "remaining_time": "8:19:53"} +{"current_steps": 4680, "total_steps": 7924, "loss": 0.2403, "lr": 1.7185421061352135e-05, "epoch": 4.136925795053004, "percentage": 59.06, "elapsed_time": "12:00:03", "remaining_time": "8:19:07"} +{"current_steps": 4685, "total_steps": 7924, "loss": 0.2512, "lr": 1.7141810928397495e-05, "epoch": 4.141342756183746, "percentage": 59.12, "elapsed_time": "12:00:50", "remaining_time": "8:18:21"} +{"current_steps": 4690, "total_steps": 7924, "loss": 0.2467, "lr": 1.7098214663948243e-05, "epoch": 4.145759717314488, "percentage": 59.19, "elapsed_time": "12:01:36", "remaining_time": "8:17:34"} +{"current_steps": 4695, "total_steps": 7924, "loss": 0.2392, "lr": 1.7054632479542196e-05, "epoch": 4.15017667844523, "percentage": 59.25, "elapsed_time": "12:02:21", "remaining_time": "8:16:48"} +{"current_steps": 4700, "total_steps": 7924, "loss": 0.2549, "lr": 1.7011064586648828e-05, "epoch": 4.154593639575972, "percentage": 59.31, "elapsed_time": "12:03:07", "remaining_time": "8:16:02"} +{"current_steps": 4705, "total_steps": 7924, "loss": 0.2433, "lr": 1.6967511196668277e-05, "epoch": 4.159010600706714, "percentage": 59.38, "elapsed_time": "12:03:53", "remaining_time": "8:15:15"} +{"current_steps": 4710, "total_steps": 7924, "loss": 0.2595, "lr": 1.6923972520930307e-05, "epoch": 4.163427561837456, "percentage": 59.44, "elapsed_time": "12:04:39", "remaining_time": "8:14:29"} +{"current_steps": 4715, "total_steps": 7924, "loss": 0.2606, "lr": 1.688044877069328e-05, "epoch": 4.167844522968198, "percentage": 59.5, "elapsed_time": "12:05:25", "remaining_time": "8:13:43"} +{"current_steps": 4720, "total_steps": 7924, "loss": 0.2477, "lr": 1.6836940157143152e-05, "epoch": 4.17226148409894, "percentage": 59.57, "elapsed_time": "12:06:11", "remaining_time": "8:12:57"} +{"current_steps": 4725, "total_steps": 7924, "loss": 0.2758, "lr": 1.6793446891392422e-05, "epoch": 4.176678445229682, "percentage": 59.63, "elapsed_time": "12:06:57", "remaining_time": "8:12:10"} +{"current_steps": 4730, "total_steps": 7924, "loss": 0.2304, "lr": 1.6749969184479116e-05, "epoch": 4.181095406360424, "percentage": 59.69, "elapsed_time": "12:07:43", "remaining_time": "8:11:24"} +{"current_steps": 4735, "total_steps": 7924, "loss": 0.2323, "lr": 1.670650724736577e-05, "epoch": 4.1855123674911665, "percentage": 59.76, "elapsed_time": "12:08:29", "remaining_time": "8:10:38"} +{"current_steps": 4740, "total_steps": 7924, "loss": 0.2365, "lr": 1.66630612909384e-05, "epoch": 4.189929328621908, "percentage": 59.82, "elapsed_time": "12:09:15", "remaining_time": "8:09:51"} +{"current_steps": 4745, "total_steps": 7924, "loss": 0.2397, "lr": 1.661963152600549e-05, "epoch": 4.19434628975265, "percentage": 59.88, "elapsed_time": "12:10:01", "remaining_time": "8:09:05"} +{"current_steps": 4750, "total_steps": 7924, "loss": 0.2502, "lr": 1.657621816329694e-05, "epoch": 4.198763250883392, "percentage": 59.94, "elapsed_time": "12:10:47", "remaining_time": "8:08:19"} +{"current_steps": 4755, "total_steps": 7924, "loss": 0.2476, "lr": 1.6532821413463083e-05, "epoch": 4.203180212014134, "percentage": 60.01, "elapsed_time": "12:11:33", "remaining_time": "8:07:33"} +{"current_steps": 4760, "total_steps": 7924, "loss": 0.2452, "lr": 1.648944148707363e-05, "epoch": 4.207597173144876, "percentage": 60.07, "elapsed_time": "12:12:19", "remaining_time": "8:06:46"} +{"current_steps": 4765, "total_steps": 7924, "loss": 0.2463, "lr": 1.6446078594616666e-05, "epoch": 4.212014134275618, "percentage": 60.13, "elapsed_time": "12:13:04", "remaining_time": "8:06:00"} +{"current_steps": 4770, "total_steps": 7924, "loss": 0.2593, "lr": 1.640273294649762e-05, "epoch": 4.21643109540636, "percentage": 60.2, "elapsed_time": "12:13:50", "remaining_time": "8:05:13"} +{"current_steps": 4775, "total_steps": 7924, "loss": 0.2516, "lr": 1.635940475303826e-05, "epoch": 4.220848056537102, "percentage": 60.26, "elapsed_time": "12:14:36", "remaining_time": "8:04:27"} +{"current_steps": 4780, "total_steps": 7924, "loss": 0.2362, "lr": 1.631609422447565e-05, "epoch": 4.225265017667844, "percentage": 60.32, "elapsed_time": "12:15:22", "remaining_time": "8:03:41"} +{"current_steps": 4785, "total_steps": 7924, "loss": 0.2179, "lr": 1.6272801570961136e-05, "epoch": 4.229681978798586, "percentage": 60.39, "elapsed_time": "12:16:09", "remaining_time": "8:02:55"} +{"current_steps": 4790, "total_steps": 7924, "loss": 0.2669, "lr": 1.6229527002559346e-05, "epoch": 4.2340989399293285, "percentage": 60.45, "elapsed_time": "12:16:55", "remaining_time": "8:02:08"} +{"current_steps": 4795, "total_steps": 7924, "loss": 0.2395, "lr": 1.6186270729247137e-05, "epoch": 4.238515901060071, "percentage": 60.51, "elapsed_time": "12:17:40", "remaining_time": "8:01:22"} +{"current_steps": 4800, "total_steps": 7924, "loss": 0.2536, "lr": 1.614303296091262e-05, "epoch": 4.242932862190813, "percentage": 60.58, "elapsed_time": "12:18:27", "remaining_time": "8:00:36"} +{"current_steps": 4805, "total_steps": 7924, "loss": 0.2925, "lr": 1.6099813907354077e-05, "epoch": 4.247349823321555, "percentage": 60.64, "elapsed_time": "12:19:13", "remaining_time": "7:59:50"} +{"current_steps": 4810, "total_steps": 7924, "loss": 0.253, "lr": 1.6056613778279026e-05, "epoch": 4.251766784452297, "percentage": 60.7, "elapsed_time": "12:19:58", "remaining_time": "7:59:03"} +{"current_steps": 4815, "total_steps": 7924, "loss": 0.2903, "lr": 1.6013432783303133e-05, "epoch": 4.256183745583039, "percentage": 60.76, "elapsed_time": "12:20:44", "remaining_time": "7:58:17"} +{"current_steps": 4820, "total_steps": 7924, "loss": 0.2959, "lr": 1.5970271131949213e-05, "epoch": 4.260600706713781, "percentage": 60.83, "elapsed_time": "12:21:31", "remaining_time": "7:57:31"} +{"current_steps": 4825, "total_steps": 7924, "loss": 0.2509, "lr": 1.5927129033646264e-05, "epoch": 4.265017667844523, "percentage": 60.89, "elapsed_time": "12:22:17", "remaining_time": "7:56:45"} +{"current_steps": 4830, "total_steps": 7924, "loss": 0.2623, "lr": 1.588400669772836e-05, "epoch": 4.269434628975265, "percentage": 60.95, "elapsed_time": "12:23:03", "remaining_time": "7:55:59"} +{"current_steps": 4835, "total_steps": 7924, "loss": 0.2439, "lr": 1.5840904333433717e-05, "epoch": 4.273851590106007, "percentage": 61.02, "elapsed_time": "12:23:49", "remaining_time": "7:55:12"} +{"current_steps": 4840, "total_steps": 7924, "loss": 0.2262, "lr": 1.5797822149903625e-05, "epoch": 4.278268551236749, "percentage": 61.08, "elapsed_time": "12:24:35", "remaining_time": "7:54:26"} +{"current_steps": 4845, "total_steps": 7924, "loss": 0.253, "lr": 1.575476035618147e-05, "epoch": 4.282685512367491, "percentage": 61.14, "elapsed_time": "12:25:21", "remaining_time": "7:53:40"} +{"current_steps": 4850, "total_steps": 7924, "loss": 0.2378, "lr": 1.5711719161211674e-05, "epoch": 4.2871024734982335, "percentage": 61.21, "elapsed_time": "12:26:07", "remaining_time": "7:52:53"} +{"current_steps": 4855, "total_steps": 7924, "loss": 0.2877, "lr": 1.5668698773838746e-05, "epoch": 4.291519434628976, "percentage": 61.27, "elapsed_time": "12:26:52", "remaining_time": "7:52:07"} +{"current_steps": 4860, "total_steps": 7924, "loss": 0.229, "lr": 1.562569940280622e-05, "epoch": 4.295936395759718, "percentage": 61.33, "elapsed_time": "12:27:38", "remaining_time": "7:51:21"} +{"current_steps": 4865, "total_steps": 7924, "loss": 0.243, "lr": 1.5582721256755632e-05, "epoch": 4.30035335689046, "percentage": 61.4, "elapsed_time": "12:28:24", "remaining_time": "7:50:34"} +{"current_steps": 4870, "total_steps": 7924, "loss": 0.2499, "lr": 1.5539764544225565e-05, "epoch": 4.304770318021202, "percentage": 61.46, "elapsed_time": "12:29:10", "remaining_time": "7:49:48"} +{"current_steps": 4875, "total_steps": 7924, "loss": 0.2427, "lr": 1.5496829473650568e-05, "epoch": 4.309187279151944, "percentage": 61.52, "elapsed_time": "12:29:56", "remaining_time": "7:49:02"} +{"current_steps": 4880, "total_steps": 7924, "loss": 0.2614, "lr": 1.5453916253360218e-05, "epoch": 4.313604240282686, "percentage": 61.59, "elapsed_time": "12:30:42", "remaining_time": "7:48:16"} +{"current_steps": 4885, "total_steps": 7924, "loss": 0.2386, "lr": 1.5411025091578025e-05, "epoch": 4.318021201413427, "percentage": 61.65, "elapsed_time": "12:31:28", "remaining_time": "7:47:30"} +{"current_steps": 4890, "total_steps": 7924, "loss": 0.3025, "lr": 1.5368156196420506e-05, "epoch": 4.322438162544169, "percentage": 61.71, "elapsed_time": "12:32:14", "remaining_time": "7:46:43"} +{"current_steps": 4895, "total_steps": 7924, "loss": 0.2698, "lr": 1.5325309775896117e-05, "epoch": 4.326855123674911, "percentage": 61.77, "elapsed_time": "12:33:00", "remaining_time": "7:45:57"} +{"current_steps": 4900, "total_steps": 7924, "loss": 0.2487, "lr": 1.5282486037904253e-05, "epoch": 4.331272084805653, "percentage": 61.84, "elapsed_time": "12:33:46", "remaining_time": "7:45:11"} +{"current_steps": 4905, "total_steps": 7924, "loss": 0.2564, "lr": 1.5239685190234287e-05, "epoch": 4.3356890459363955, "percentage": 61.9, "elapsed_time": "12:34:32", "remaining_time": "7:44:24"} +{"current_steps": 4910, "total_steps": 7924, "loss": 0.2617, "lr": 1.519690744056447e-05, "epoch": 4.340106007067138, "percentage": 61.96, "elapsed_time": "12:35:18", "remaining_time": "7:43:38"} +{"current_steps": 4915, "total_steps": 7924, "loss": 0.2334, "lr": 1.5154152996461026e-05, "epoch": 4.34452296819788, "percentage": 62.03, "elapsed_time": "12:36:04", "remaining_time": "7:42:52"} +{"current_steps": 4920, "total_steps": 7924, "loss": 0.2756, "lr": 1.5111422065377062e-05, "epoch": 4.348939929328622, "percentage": 62.09, "elapsed_time": "12:36:50", "remaining_time": "7:42:06"} +{"current_steps": 4925, "total_steps": 7924, "loss": 0.2544, "lr": 1.5068714854651614e-05, "epoch": 4.353356890459364, "percentage": 62.15, "elapsed_time": "12:37:36", "remaining_time": "7:41:20"} +{"current_steps": 4930, "total_steps": 7924, "loss": 0.2548, "lr": 1.5026031571508606e-05, "epoch": 4.357773851590106, "percentage": 62.22, "elapsed_time": "12:38:22", "remaining_time": "7:40:33"} +{"current_steps": 4935, "total_steps": 7924, "loss": 0.239, "lr": 1.498337242305588e-05, "epoch": 4.362190812720848, "percentage": 62.28, "elapsed_time": "12:39:08", "remaining_time": "7:39:47"} +{"current_steps": 4940, "total_steps": 7924, "loss": 0.2697, "lr": 1.4940737616284163e-05, "epoch": 4.36660777385159, "percentage": 62.34, "elapsed_time": "12:39:54", "remaining_time": "7:39:01"} +{"current_steps": 4945, "total_steps": 7924, "loss": 0.2612, "lr": 1.4898127358066061e-05, "epoch": 4.371024734982332, "percentage": 62.41, "elapsed_time": "12:40:40", "remaining_time": "7:38:15"} +{"current_steps": 4950, "total_steps": 7924, "loss": 0.2499, "lr": 1.4855541855155086e-05, "epoch": 4.375441696113074, "percentage": 62.47, "elapsed_time": "12:41:26", "remaining_time": "7:37:28"} +{"current_steps": 4955, "total_steps": 7924, "loss": 0.2344, "lr": 1.4812981314184607e-05, "epoch": 4.379858657243816, "percentage": 62.53, "elapsed_time": "12:42:12", "remaining_time": "7:36:42"} +{"current_steps": 4960, "total_steps": 7924, "loss": 0.2311, "lr": 1.4770445941666905e-05, "epoch": 4.384275618374558, "percentage": 62.59, "elapsed_time": "12:42:58", "remaining_time": "7:35:56"} +{"current_steps": 4965, "total_steps": 7924, "loss": 0.244, "lr": 1.4727935943992098e-05, "epoch": 4.3886925795053005, "percentage": 62.66, "elapsed_time": "12:43:44", "remaining_time": "7:35:09"} +{"current_steps": 4970, "total_steps": 7924, "loss": 0.2287, "lr": 1.4685451527427224e-05, "epoch": 4.3931095406360425, "percentage": 62.72, "elapsed_time": "12:44:30", "remaining_time": "7:34:23"} +{"current_steps": 4975, "total_steps": 7924, "loss": 0.246, "lr": 1.4642992898115158e-05, "epoch": 4.397526501766785, "percentage": 62.78, "elapsed_time": "12:45:16", "remaining_time": "7:33:37"} +{"current_steps": 4980, "total_steps": 7924, "loss": 0.2758, "lr": 1.460056026207367e-05, "epoch": 4.401943462897527, "percentage": 62.85, "elapsed_time": "12:46:02", "remaining_time": "7:32:51"} +{"current_steps": 4985, "total_steps": 7924, "loss": 0.2615, "lr": 1.4558153825194419e-05, "epoch": 4.406360424028269, "percentage": 62.91, "elapsed_time": "12:46:48", "remaining_time": "7:32:04"} +{"current_steps": 4990, "total_steps": 7924, "loss": 0.2407, "lr": 1.4515773793241898e-05, "epoch": 4.410777385159011, "percentage": 62.97, "elapsed_time": "12:47:34", "remaining_time": "7:31:18"} +{"current_steps": 4995, "total_steps": 7924, "loss": 0.2543, "lr": 1.4473420371852526e-05, "epoch": 4.415194346289753, "percentage": 63.04, "elapsed_time": "12:48:20", "remaining_time": "7:30:32"} +{"current_steps": 5000, "total_steps": 7924, "loss": 0.2606, "lr": 1.4431093766533567e-05, "epoch": 4.419611307420495, "percentage": 63.1, "elapsed_time": "12:49:06", "remaining_time": "7:29:46"} +{"current_steps": 5005, "total_steps": 7924, "loss": 0.2842, "lr": 1.4388794182662186e-05, "epoch": 4.424028268551237, "percentage": 63.16, "elapsed_time": "12:49:52", "remaining_time": "7:28:59"} +{"current_steps": 5010, "total_steps": 7924, "loss": 0.2327, "lr": 1.4346521825484424e-05, "epoch": 4.428445229681979, "percentage": 63.23, "elapsed_time": "12:50:37", "remaining_time": "7:28:13"} +{"current_steps": 5015, "total_steps": 7924, "loss": 0.248, "lr": 1.4304276900114222e-05, "epoch": 4.432862190812721, "percentage": 63.29, "elapsed_time": "12:51:23", "remaining_time": "7:27:27"} +{"current_steps": 5020, "total_steps": 7924, "loss": 0.2224, "lr": 1.4262059611532419e-05, "epoch": 4.4372791519434625, "percentage": 63.35, "elapsed_time": "12:52:09", "remaining_time": "7:26:41"} +{"current_steps": 5025, "total_steps": 7924, "loss": 0.2735, "lr": 1.4219870164585739e-05, "epoch": 4.4416961130742045, "percentage": 63.41, "elapsed_time": "12:52:55", "remaining_time": "7:25:54"} +{"current_steps": 5030, "total_steps": 7924, "loss": 0.2832, "lr": 1.417770876398583e-05, "epoch": 4.446113074204947, "percentage": 63.48, "elapsed_time": "12:53:41", "remaining_time": "7:25:08"} +{"current_steps": 5035, "total_steps": 7924, "loss": 0.2552, "lr": 1.4135575614308232e-05, "epoch": 4.450530035335689, "percentage": 63.54, "elapsed_time": "12:54:27", "remaining_time": "7:24:22"} +{"current_steps": 5040, "total_steps": 7924, "loss": 0.2892, "lr": 1.4093470919991442e-05, "epoch": 4.454946996466431, "percentage": 63.6, "elapsed_time": "12:55:13", "remaining_time": "7:23:35"} +{"current_steps": 5045, "total_steps": 7924, "loss": 0.2772, "lr": 1.4051394885335836e-05, "epoch": 4.459363957597173, "percentage": 63.67, "elapsed_time": "12:55:59", "remaining_time": "7:22:49"} +{"current_steps": 5050, "total_steps": 7924, "loss": 0.2243, "lr": 1.4009347714502778e-05, "epoch": 4.463780918727915, "percentage": 63.73, "elapsed_time": "12:56:45", "remaining_time": "7:22:03"} +{"current_steps": 5055, "total_steps": 7924, "loss": 0.243, "lr": 1.3967329611513543e-05, "epoch": 4.468197879858657, "percentage": 63.79, "elapsed_time": "12:57:31", "remaining_time": "7:21:17"} +{"current_steps": 5060, "total_steps": 7924, "loss": 0.2629, "lr": 1.3925340780248373e-05, "epoch": 4.472614840989399, "percentage": 63.86, "elapsed_time": "12:58:17", "remaining_time": "7:20:30"} +{"current_steps": 5065, "total_steps": 7924, "loss": 0.2168, "lr": 1.3883381424445506e-05, "epoch": 4.477031802120141, "percentage": 63.92, "elapsed_time": "12:59:03", "remaining_time": "7:19:44"} +{"current_steps": 5070, "total_steps": 7924, "loss": 0.2603, "lr": 1.3841451747700098e-05, "epoch": 4.481448763250883, "percentage": 63.98, "elapsed_time": "12:59:48", "remaining_time": "7:18:58"} +{"current_steps": 5075, "total_steps": 7924, "loss": 0.2619, "lr": 1.3799551953463362e-05, "epoch": 4.485865724381625, "percentage": 64.05, "elapsed_time": "13:00:34", "remaining_time": "7:18:12"} +{"current_steps": 5080, "total_steps": 7924, "loss": 0.2375, "lr": 1.3757682245041466e-05, "epoch": 4.490282685512367, "percentage": 64.11, "elapsed_time": "13:01:20", "remaining_time": "7:17:25"} +{"current_steps": 5085, "total_steps": 7924, "loss": 0.2427, "lr": 1.3715842825594628e-05, "epoch": 4.4946996466431095, "percentage": 64.17, "elapsed_time": "13:02:06", "remaining_time": "7:16:39"} +{"current_steps": 5090, "total_steps": 7924, "loss": 0.278, "lr": 1.3674033898136071e-05, "epoch": 4.499116607773852, "percentage": 64.24, "elapsed_time": "13:02:52", "remaining_time": "7:15:53"} +{"current_steps": 5095, "total_steps": 7924, "loss": 0.2619, "lr": 1.3632255665531088e-05, "epoch": 4.503533568904594, "percentage": 64.3, "elapsed_time": "13:03:38", "remaining_time": "7:15:07"} +{"current_steps": 5100, "total_steps": 7924, "loss": 0.2318, "lr": 1.3590508330496027e-05, "epoch": 4.507950530035336, "percentage": 64.36, "elapsed_time": "13:04:24", "remaining_time": "7:14:20"} +{"current_steps": 5105, "total_steps": 7924, "loss": 0.2235, "lr": 1.3548792095597305e-05, "epoch": 4.512367491166078, "percentage": 64.42, "elapsed_time": "13:05:10", "remaining_time": "7:13:34"} +{"current_steps": 5110, "total_steps": 7924, "loss": 0.2419, "lr": 1.3507107163250453e-05, "epoch": 4.51678445229682, "percentage": 64.49, "elapsed_time": "13:05:56", "remaining_time": "7:12:48"} +{"current_steps": 5115, "total_steps": 7924, "loss": 0.2439, "lr": 1.3465453735719087e-05, "epoch": 4.521201413427562, "percentage": 64.55, "elapsed_time": "13:06:42", "remaining_time": "7:12:02"} +{"current_steps": 5120, "total_steps": 7924, "loss": 0.2832, "lr": 1.3423832015114e-05, "epoch": 4.525618374558304, "percentage": 64.61, "elapsed_time": "13:07:28", "remaining_time": "7:11:15"} +{"current_steps": 5125, "total_steps": 7924, "loss": 0.2112, "lr": 1.3382242203392083e-05, "epoch": 4.530035335689046, "percentage": 64.68, "elapsed_time": "13:08:14", "remaining_time": "7:10:29"} +{"current_steps": 5130, "total_steps": 7924, "loss": 0.2808, "lr": 1.3340684502355443e-05, "epoch": 4.534452296819788, "percentage": 64.74, "elapsed_time": "13:09:00", "remaining_time": "7:09:43"} +{"current_steps": 5135, "total_steps": 7924, "loss": 0.2751, "lr": 1.3299159113650357e-05, "epoch": 4.53886925795053, "percentage": 64.8, "elapsed_time": "13:09:45", "remaining_time": "7:08:56"} +{"current_steps": 5140, "total_steps": 7924, "loss": 0.2733, "lr": 1.325766623876632e-05, "epoch": 4.543286219081272, "percentage": 64.87, "elapsed_time": "13:10:31", "remaining_time": "7:08:10"} +{"current_steps": 5145, "total_steps": 7924, "loss": 0.261, "lr": 1.321620607903508e-05, "epoch": 4.5477031802120145, "percentage": 64.93, "elapsed_time": "13:11:17", "remaining_time": "7:07:24"} +{"current_steps": 5150, "total_steps": 7924, "loss": 0.2636, "lr": 1.3174778835629605e-05, "epoch": 4.5521201413427566, "percentage": 64.99, "elapsed_time": "13:12:03", "remaining_time": "7:06:38"} +{"current_steps": 5155, "total_steps": 7924, "loss": 0.2509, "lr": 1.3133384709563188e-05, "epoch": 4.556537102473499, "percentage": 65.06, "elapsed_time": "13:12:49", "remaining_time": "7:05:51"} +{"current_steps": 5160, "total_steps": 7924, "loss": 0.2404, "lr": 1.309202390168841e-05, "epoch": 4.560954063604241, "percentage": 65.12, "elapsed_time": "13:13:35", "remaining_time": "7:05:05"} +{"current_steps": 5165, "total_steps": 7924, "loss": 0.2432, "lr": 1.3050696612696188e-05, "epoch": 4.565371024734983, "percentage": 65.18, "elapsed_time": "13:14:21", "remaining_time": "7:04:19"} +{"current_steps": 5170, "total_steps": 7924, "loss": 0.26, "lr": 1.3009403043114796e-05, "epoch": 4.569787985865725, "percentage": 65.24, "elapsed_time": "13:15:06", "remaining_time": "7:03:32"} +{"current_steps": 5175, "total_steps": 7924, "loss": 0.2324, "lr": 1.2968143393308897e-05, "epoch": 4.574204946996466, "percentage": 65.31, "elapsed_time": "13:15:52", "remaining_time": "7:02:46"} +{"current_steps": 5180, "total_steps": 7924, "loss": 0.2566, "lr": 1.2926917863478581e-05, "epoch": 4.578621908127208, "percentage": 65.37, "elapsed_time": "13:16:39", "remaining_time": "7:02:00"} +{"current_steps": 5185, "total_steps": 7924, "loss": 0.2738, "lr": 1.2885726653658355e-05, "epoch": 4.58303886925795, "percentage": 65.43, "elapsed_time": "13:17:24", "remaining_time": "7:01:14"} +{"current_steps": 5190, "total_steps": 7924, "loss": 0.2545, "lr": 1.2844569963716222e-05, "epoch": 4.587455830388692, "percentage": 65.5, "elapsed_time": "13:18:11", "remaining_time": "7:00:28"} +{"current_steps": 5195, "total_steps": 7924, "loss": 0.2543, "lr": 1.280344799335267e-05, "epoch": 4.591872791519434, "percentage": 65.56, "elapsed_time": "13:18:57", "remaining_time": "6:59:42"} +{"current_steps": 5200, "total_steps": 7924, "loss": 0.2415, "lr": 1.2762360942099745e-05, "epoch": 4.5962897526501765, "percentage": 65.62, "elapsed_time": "13:19:43", "remaining_time": "6:58:56"} +{"current_steps": 5205, "total_steps": 7924, "loss": 0.2502, "lr": 1.2721309009320021e-05, "epoch": 4.6007067137809186, "percentage": 65.69, "elapsed_time": "13:20:30", "remaining_time": "6:58:10"} +{"current_steps": 5210, "total_steps": 7924, "loss": 0.295, "lr": 1.268029239420571e-05, "epoch": 4.605123674911661, "percentage": 65.75, "elapsed_time": "13:21:16", "remaining_time": "6:57:23"} +{"current_steps": 5215, "total_steps": 7924, "loss": 0.2495, "lr": 1.2639311295777632e-05, "epoch": 4.609540636042403, "percentage": 65.81, "elapsed_time": "13:22:02", "remaining_time": "6:56:37"} +{"current_steps": 5220, "total_steps": 7924, "loss": 0.2556, "lr": 1.2598365912884267e-05, "epoch": 4.613957597173145, "percentage": 65.88, "elapsed_time": "13:22:47", "remaining_time": "6:55:51"} +{"current_steps": 5225, "total_steps": 7924, "loss": 0.296, "lr": 1.2557456444200831e-05, "epoch": 4.618374558303887, "percentage": 65.94, "elapsed_time": "13:23:34", "remaining_time": "6:55:05"} +{"current_steps": 5230, "total_steps": 7924, "loss": 0.2777, "lr": 1.2516583088228224e-05, "epoch": 4.622791519434629, "percentage": 66.0, "elapsed_time": "13:24:20", "remaining_time": "6:54:19"} +{"current_steps": 5235, "total_steps": 7924, "loss": 0.2595, "lr": 1.2475746043292176e-05, "epoch": 4.627208480565371, "percentage": 66.07, "elapsed_time": "13:25:05", "remaining_time": "6:53:32"} +{"current_steps": 5240, "total_steps": 7924, "loss": 0.2763, "lr": 1.243494550754219e-05, "epoch": 4.631625441696113, "percentage": 66.13, "elapsed_time": "13:25:51", "remaining_time": "6:52:46"} +{"current_steps": 5245, "total_steps": 7924, "loss": 0.2777, "lr": 1.239418167895063e-05, "epoch": 4.636042402826855, "percentage": 66.19, "elapsed_time": "13:26:37", "remaining_time": "6:52:00"} +{"current_steps": 5250, "total_steps": 7924, "loss": 0.2444, "lr": 1.2353454755311751e-05, "epoch": 4.640459363957597, "percentage": 66.25, "elapsed_time": "13:27:23", "remaining_time": "6:51:13"} +{"current_steps": 5255, "total_steps": 7924, "loss": 0.2706, "lr": 1.2312764934240735e-05, "epoch": 4.644876325088339, "percentage": 66.32, "elapsed_time": "13:28:09", "remaining_time": "6:50:27"} +{"current_steps": 5260, "total_steps": 7924, "loss": 0.2523, "lr": 1.227211241317275e-05, "epoch": 4.6492932862190814, "percentage": 66.38, "elapsed_time": "13:28:55", "remaining_time": "6:49:41"} +{"current_steps": 5265, "total_steps": 7924, "loss": 0.2539, "lr": 1.223149738936195e-05, "epoch": 4.6537102473498235, "percentage": 66.44, "elapsed_time": "13:29:41", "remaining_time": "6:48:55"} +{"current_steps": 5270, "total_steps": 7924, "loss": 0.2527, "lr": 1.219092005988057e-05, "epoch": 4.658127208480566, "percentage": 66.51, "elapsed_time": "13:30:27", "remaining_time": "6:48:09"} +{"current_steps": 5275, "total_steps": 7924, "loss": 0.2383, "lr": 1.215038062161792e-05, "epoch": 4.662544169611308, "percentage": 66.57, "elapsed_time": "13:31:13", "remaining_time": "6:47:22"} +{"current_steps": 5280, "total_steps": 7924, "loss": 0.2567, "lr": 1.2109879271279486e-05, "epoch": 4.66696113074205, "percentage": 66.63, "elapsed_time": "13:31:59", "remaining_time": "6:46:36"} +{"current_steps": 5285, "total_steps": 7924, "loss": 0.2271, "lr": 1.2069416205385902e-05, "epoch": 4.671378091872792, "percentage": 66.7, "elapsed_time": "13:32:45", "remaining_time": "6:45:50"} +{"current_steps": 5290, "total_steps": 7924, "loss": 0.2139, "lr": 1.2028991620272081e-05, "epoch": 4.675795053003534, "percentage": 66.76, "elapsed_time": "13:33:31", "remaining_time": "6:45:04"} +{"current_steps": 5295, "total_steps": 7924, "loss": 0.2503, "lr": 1.1988605712086199e-05, "epoch": 4.680212014134275, "percentage": 66.82, "elapsed_time": "13:34:17", "remaining_time": "6:44:18"} +{"current_steps": 5300, "total_steps": 7924, "loss": 0.269, "lr": 1.1948258676788751e-05, "epoch": 4.684628975265017, "percentage": 66.89, "elapsed_time": "13:35:03", "remaining_time": "6:43:31"} +{"current_steps": 5305, "total_steps": 7924, "loss": 0.2719, "lr": 1.190795071015165e-05, "epoch": 4.689045936395759, "percentage": 66.95, "elapsed_time": "13:35:49", "remaining_time": "6:42:45"} +{"current_steps": 5310, "total_steps": 7924, "loss": 0.2423, "lr": 1.1867682007757191e-05, "epoch": 4.693462897526501, "percentage": 67.01, "elapsed_time": "13:36:35", "remaining_time": "6:41:59"} +{"current_steps": 5315, "total_steps": 7924, "loss": 0.2419, "lr": 1.1827452764997198e-05, "epoch": 4.6978798586572434, "percentage": 67.07, "elapsed_time": "13:37:21", "remaining_time": "6:41:13"} +{"current_steps": 5320, "total_steps": 7924, "loss": 0.2423, "lr": 1.1787263177071997e-05, "epoch": 4.7022968197879855, "percentage": 67.14, "elapsed_time": "13:38:07", "remaining_time": "6:40:27"} +{"current_steps": 5325, "total_steps": 7924, "loss": 0.234, "lr": 1.174711343898952e-05, "epoch": 4.706713780918728, "percentage": 67.2, "elapsed_time": "13:38:53", "remaining_time": "6:39:40"} +{"current_steps": 5330, "total_steps": 7924, "loss": 0.232, "lr": 1.1707003745564319e-05, "epoch": 4.71113074204947, "percentage": 67.26, "elapsed_time": "13:39:39", "remaining_time": "6:38:54"} +{"current_steps": 5335, "total_steps": 7924, "loss": 0.2505, "lr": 1.1666934291416666e-05, "epoch": 4.715547703180212, "percentage": 67.33, "elapsed_time": "13:40:25", "remaining_time": "6:38:08"} +{"current_steps": 5340, "total_steps": 7924, "loss": 0.2342, "lr": 1.1626905270971563e-05, "epoch": 4.719964664310954, "percentage": 67.39, "elapsed_time": "13:41:11", "remaining_time": "6:37:22"} +{"current_steps": 5345, "total_steps": 7924, "loss": 0.235, "lr": 1.1586916878457837e-05, "epoch": 4.724381625441696, "percentage": 67.45, "elapsed_time": "13:41:57", "remaining_time": "6:36:35"} +{"current_steps": 5350, "total_steps": 7924, "loss": 0.2824, "lr": 1.1546969307907162e-05, "epoch": 4.728798586572438, "percentage": 67.52, "elapsed_time": "13:42:43", "remaining_time": "6:35:49"} +{"current_steps": 5355, "total_steps": 7924, "loss": 0.2466, "lr": 1.1507062753153155e-05, "epoch": 4.73321554770318, "percentage": 67.58, "elapsed_time": "13:43:28", "remaining_time": "6:35:03"} +{"current_steps": 5360, "total_steps": 7924, "loss": 0.2835, "lr": 1.1467197407830409e-05, "epoch": 4.737632508833922, "percentage": 67.64, "elapsed_time": "13:44:14", "remaining_time": "6:34:17"} +{"current_steps": 5365, "total_steps": 7924, "loss": 0.2764, "lr": 1.1427373465373541e-05, "epoch": 4.742049469964664, "percentage": 67.71, "elapsed_time": "13:45:00", "remaining_time": "6:33:30"} +{"current_steps": 5370, "total_steps": 7924, "loss": 0.2267, "lr": 1.1387591119016292e-05, "epoch": 4.746466431095406, "percentage": 67.77, "elapsed_time": "13:45:46", "remaining_time": "6:32:44"} +{"current_steps": 5375, "total_steps": 7924, "loss": 0.2895, "lr": 1.1347850561790594e-05, "epoch": 4.750883392226148, "percentage": 67.83, "elapsed_time": "13:46:32", "remaining_time": "6:31:58"} +{"current_steps": 5380, "total_steps": 7924, "loss": 0.2552, "lr": 1.1308151986525557e-05, "epoch": 4.7553003533568905, "percentage": 67.9, "elapsed_time": "13:47:18", "remaining_time": "6:31:12"} +{"current_steps": 5385, "total_steps": 7924, "loss": 0.2509, "lr": 1.1268495585846621e-05, "epoch": 4.759717314487633, "percentage": 67.96, "elapsed_time": "13:48:04", "remaining_time": "6:30:25"} +{"current_steps": 5390, "total_steps": 7924, "loss": 0.2416, "lr": 1.1228881552174585e-05, "epoch": 4.764134275618375, "percentage": 68.02, "elapsed_time": "13:48:50", "remaining_time": "6:29:39"} +{"current_steps": 5395, "total_steps": 7924, "loss": 0.2682, "lr": 1.1189310077724667e-05, "epoch": 4.768551236749117, "percentage": 68.08, "elapsed_time": "13:49:35", "remaining_time": "6:28:53"} +{"current_steps": 5400, "total_steps": 7924, "loss": 0.2789, "lr": 1.1149781354505565e-05, "epoch": 4.772968197879859, "percentage": 68.15, "elapsed_time": "13:50:21", "remaining_time": "6:28:07"} +{"current_steps": 5405, "total_steps": 7924, "loss": 0.2106, "lr": 1.111029557431858e-05, "epoch": 4.777385159010601, "percentage": 68.21, "elapsed_time": "13:51:07", "remaining_time": "6:27:20"} +{"current_steps": 5410, "total_steps": 7924, "loss": 0.2423, "lr": 1.1070852928756598e-05, "epoch": 4.781802120141343, "percentage": 68.27, "elapsed_time": "13:51:53", "remaining_time": "6:26:34"} +{"current_steps": 5415, "total_steps": 7924, "loss": 0.2544, "lr": 1.1031453609203244e-05, "epoch": 4.786219081272085, "percentage": 68.34, "elapsed_time": "13:52:39", "remaining_time": "6:25:48"} +{"current_steps": 5420, "total_steps": 7924, "loss": 0.2405, "lr": 1.0992097806831894e-05, "epoch": 4.790636042402827, "percentage": 68.4, "elapsed_time": "13:53:25", "remaining_time": "6:25:02"} +{"current_steps": 5425, "total_steps": 7924, "loss": 0.2846, "lr": 1.0952785712604777e-05, "epoch": 4.795053003533569, "percentage": 68.46, "elapsed_time": "13:54:11", "remaining_time": "6:24:16"} +{"current_steps": 5430, "total_steps": 7924, "loss": 0.2523, "lr": 1.0913517517272057e-05, "epoch": 4.799469964664311, "percentage": 68.53, "elapsed_time": "13:54:57", "remaining_time": "6:23:29"} +{"current_steps": 5435, "total_steps": 7924, "loss": 0.251, "lr": 1.0874293411370847e-05, "epoch": 4.803886925795053, "percentage": 68.59, "elapsed_time": "13:55:43", "remaining_time": "6:22:43"} +{"current_steps": 5440, "total_steps": 7924, "loss": 0.2611, "lr": 1.083511358522439e-05, "epoch": 4.8083038869257955, "percentage": 68.65, "elapsed_time": "13:56:29", "remaining_time": "6:21:57"} +{"current_steps": 5445, "total_steps": 7924, "loss": 0.2497, "lr": 1.0795978228941025e-05, "epoch": 4.8127208480565375, "percentage": 68.72, "elapsed_time": "13:57:14", "remaining_time": "6:21:10"} +{"current_steps": 5450, "total_steps": 7924, "loss": 0.2686, "lr": 1.0756887532413328e-05, "epoch": 4.81713780918728, "percentage": 68.78, "elapsed_time": "13:58:00", "remaining_time": "6:20:24"} +{"current_steps": 5455, "total_steps": 7924, "loss": 0.258, "lr": 1.0717841685317207e-05, "epoch": 4.821554770318021, "percentage": 68.84, "elapsed_time": "13:58:46", "remaining_time": "6:19:38"} +{"current_steps": 5460, "total_steps": 7924, "loss": 0.2883, "lr": 1.0678840877110906e-05, "epoch": 4.825971731448763, "percentage": 68.9, "elapsed_time": "13:59:33", "remaining_time": "6:18:52"} +{"current_steps": 5465, "total_steps": 7924, "loss": 0.2302, "lr": 1.0639885297034157e-05, "epoch": 4.830388692579505, "percentage": 68.97, "elapsed_time": "14:00:18", "remaining_time": "6:18:06"} +{"current_steps": 5470, "total_steps": 7924, "loss": 0.2783, "lr": 1.060097513410723e-05, "epoch": 4.834805653710247, "percentage": 69.03, "elapsed_time": "14:01:05", "remaining_time": "6:17:20"} +{"current_steps": 5475, "total_steps": 7924, "loss": 0.2754, "lr": 1.0562110577130031e-05, "epoch": 4.839222614840989, "percentage": 69.09, "elapsed_time": "14:01:51", "remaining_time": "6:16:34"} +{"current_steps": 5480, "total_steps": 7924, "loss": 0.2622, "lr": 1.0523291814681149e-05, "epoch": 4.843639575971731, "percentage": 69.16, "elapsed_time": "14:02:37", "remaining_time": "6:15:47"} +{"current_steps": 5485, "total_steps": 7924, "loss": 0.2484, "lr": 1.0484519035117015e-05, "epoch": 4.848056537102473, "percentage": 69.22, "elapsed_time": "14:03:25", "remaining_time": "6:15:02"} +{"current_steps": 5490, "total_steps": 7924, "loss": 0.2689, "lr": 1.0445792426570894e-05, "epoch": 4.852473498233215, "percentage": 69.28, "elapsed_time": "14:04:12", "remaining_time": "6:14:16"} +{"current_steps": 5495, "total_steps": 7924, "loss": 0.2246, "lr": 1.040711217695205e-05, "epoch": 4.8568904593639575, "percentage": 69.35, "elapsed_time": "14:04:58", "remaining_time": "6:13:30"} +{"current_steps": 5500, "total_steps": 7924, "loss": 0.2404, "lr": 1.0368478473944792e-05, "epoch": 4.8613074204946995, "percentage": 69.41, "elapsed_time": "14:05:44", "remaining_time": "6:12:44"} +{"current_steps": 5505, "total_steps": 7924, "loss": 0.242, "lr": 1.0329891505007582e-05, "epoch": 4.865724381625442, "percentage": 69.47, "elapsed_time": "14:06:31", "remaining_time": "6:11:58"} +{"current_steps": 5510, "total_steps": 7924, "loss": 0.2418, "lr": 1.029135145737212e-05, "epoch": 4.870141342756184, "percentage": 69.54, "elapsed_time": "14:07:17", "remaining_time": "6:11:12"} +{"current_steps": 5515, "total_steps": 7924, "loss": 0.253, "lr": 1.0252858518042413e-05, "epoch": 4.874558303886926, "percentage": 69.6, "elapsed_time": "14:08:04", "remaining_time": "6:10:26"} +{"current_steps": 5520, "total_steps": 7924, "loss": 0.2393, "lr": 1.0214412873793931e-05, "epoch": 4.878975265017668, "percentage": 69.66, "elapsed_time": "14:08:50", "remaining_time": "6:09:40"} +{"current_steps": 5525, "total_steps": 7924, "loss": 0.2694, "lr": 1.0176014711172615e-05, "epoch": 4.88339222614841, "percentage": 69.72, "elapsed_time": "14:09:37", "remaining_time": "6:08:54"} +{"current_steps": 5530, "total_steps": 7924, "loss": 0.2504, "lr": 1.0137664216494035e-05, "epoch": 4.887809187279152, "percentage": 69.79, "elapsed_time": "14:10:23", "remaining_time": "6:08:08"} +{"current_steps": 5535, "total_steps": 7924, "loss": 0.2173, "lr": 1.0099361575842486e-05, "epoch": 4.892226148409894, "percentage": 69.85, "elapsed_time": "14:11:10", "remaining_time": "6:07:22"} +{"current_steps": 5540, "total_steps": 7924, "loss": 0.2604, "lr": 1.0061106975070025e-05, "epoch": 4.896643109540636, "percentage": 69.91, "elapsed_time": "14:11:56", "remaining_time": "6:06:36"} +{"current_steps": 5545, "total_steps": 7924, "loss": 0.3287, "lr": 1.0022900599795641e-05, "epoch": 4.901060070671378, "percentage": 69.98, "elapsed_time": "14:12:41", "remaining_time": "6:05:50"} +{"current_steps": 5550, "total_steps": 7924, "loss": 0.2694, "lr": 9.984742635404313e-06, "epoch": 4.90547703180212, "percentage": 70.04, "elapsed_time": "14:13:28", "remaining_time": "6:05:04"} +{"current_steps": 5555, "total_steps": 7924, "loss": 0.2607, "lr": 9.946633267046125e-06, "epoch": 4.909893992932862, "percentage": 70.1, "elapsed_time": "14:14:15", "remaining_time": "6:04:18"} +{"current_steps": 5560, "total_steps": 7924, "loss": 0.2684, "lr": 9.908572679635337e-06, "epoch": 4.9143109540636045, "percentage": 70.17, "elapsed_time": "14:15:01", "remaining_time": "6:03:32"} +{"current_steps": 5565, "total_steps": 7924, "loss": 0.2712, "lr": 9.87056105784957e-06, "epoch": 4.918727915194347, "percentage": 70.23, "elapsed_time": "14:15:47", "remaining_time": "6:02:46"} +{"current_steps": 5570, "total_steps": 7924, "loss": 0.3051, "lr": 9.832598586128796e-06, "epoch": 4.923144876325089, "percentage": 70.29, "elapsed_time": "14:16:33", "remaining_time": "6:02:00"} +{"current_steps": 5575, "total_steps": 7924, "loss": 0.2447, "lr": 9.794685448674533e-06, "epoch": 4.927561837455831, "percentage": 70.36, "elapsed_time": "14:17:20", "remaining_time": "6:01:14"} +{"current_steps": 5580, "total_steps": 7924, "loss": 0.2278, "lr": 9.756821829448911e-06, "epoch": 4.931978798586572, "percentage": 70.42, "elapsed_time": "14:18:06", "remaining_time": "6:00:28"} +{"current_steps": 5585, "total_steps": 7924, "loss": 0.2551, "lr": 9.719007912173786e-06, "epoch": 4.936395759717314, "percentage": 70.48, "elapsed_time": "14:18:53", "remaining_time": "5:59:42"} +{"current_steps": 5590, "total_steps": 7924, "loss": 0.2973, "lr": 9.681243880329864e-06, "epoch": 4.940812720848056, "percentage": 70.55, "elapsed_time": "14:19:39", "remaining_time": "5:58:55"} +{"current_steps": 5595, "total_steps": 7924, "loss": 0.2431, "lr": 9.643529917155765e-06, "epoch": 4.945229681978798, "percentage": 70.61, "elapsed_time": "14:20:25", "remaining_time": "5:58:10"} +{"current_steps": 5600, "total_steps": 7924, "loss": 0.2326, "lr": 9.60586620564721e-06, "epoch": 4.94964664310954, "percentage": 70.67, "elapsed_time": "14:21:12", "remaining_time": "5:57:24"} +{"current_steps": 5605, "total_steps": 7924, "loss": 0.2577, "lr": 9.568252928556045e-06, "epoch": 4.954063604240282, "percentage": 70.73, "elapsed_time": "14:21:58", "remaining_time": "5:56:37"} +{"current_steps": 5610, "total_steps": 7924, "loss": 0.2536, "lr": 9.530690268389419e-06, "epoch": 4.958480565371024, "percentage": 70.8, "elapsed_time": "14:22:44", "remaining_time": "5:55:51"} +{"current_steps": 5615, "total_steps": 7924, "loss": 0.2502, "lr": 9.493178407408898e-06, "epoch": 4.9628975265017665, "percentage": 70.86, "elapsed_time": "14:23:30", "remaining_time": "5:55:05"} +{"current_steps": 5620, "total_steps": 7924, "loss": 0.2612, "lr": 9.45571752762952e-06, "epoch": 4.967314487632509, "percentage": 70.92, "elapsed_time": "14:24:16", "remaining_time": "5:54:19"} +{"current_steps": 5625, "total_steps": 7924, "loss": 0.2224, "lr": 9.418307810818974e-06, "epoch": 4.971731448763251, "percentage": 70.99, "elapsed_time": "14:25:02", "remaining_time": "5:53:33"} +{"current_steps": 5630, "total_steps": 7924, "loss": 0.2443, "lr": 9.380949438496694e-06, "epoch": 4.976148409893993, "percentage": 71.05, "elapsed_time": "14:25:48", "remaining_time": "5:52:46"} +{"current_steps": 5635, "total_steps": 7924, "loss": 0.2481, "lr": 9.343642591932986e-06, "epoch": 4.980565371024735, "percentage": 71.11, "elapsed_time": "14:26:34", "remaining_time": "5:52:00"} +{"current_steps": 5640, "total_steps": 7924, "loss": 0.2555, "lr": 9.306387452148117e-06, "epoch": 4.984982332155477, "percentage": 71.18, "elapsed_time": "14:27:20", "remaining_time": "5:51:14"} +{"current_steps": 5645, "total_steps": 7924, "loss": 0.2758, "lr": 9.269184199911507e-06, "epoch": 4.989399293286219, "percentage": 71.24, "elapsed_time": "14:28:05", "remaining_time": "5:50:28"} +{"current_steps": 5650, "total_steps": 7924, "loss": 0.283, "lr": 9.232033015740765e-06, "epoch": 4.993816254416961, "percentage": 71.3, "elapsed_time": "14:28:52", "remaining_time": "5:49:42"} +{"current_steps": 5655, "total_steps": 7924, "loss": 0.2749, "lr": 9.19493407990087e-06, "epoch": 4.998233215547703, "percentage": 71.37, "elapsed_time": "14:29:39", "remaining_time": "5:48:56"} +{"current_steps": 5660, "total_steps": 7924, "loss": 0.2212, "lr": 9.157887572403292e-06, "epoch": 5.003533568904594, "percentage": 71.43, "elapsed_time": "14:30:25", "remaining_time": "5:48:10"} +{"current_steps": 5665, "total_steps": 7924, "loss": 0.2174, "lr": 9.120893673005095e-06, "epoch": 5.007950530035336, "percentage": 71.49, "elapsed_time": "14:31:11", "remaining_time": "5:47:23"} +{"current_steps": 5670, "total_steps": 7924, "loss": 0.2525, "lr": 9.083952561208093e-06, "epoch": 5.012367491166078, "percentage": 71.55, "elapsed_time": "14:31:57", "remaining_time": "5:46:37"} +{"current_steps": 5675, "total_steps": 7924, "loss": 0.2458, "lr": 9.04706441625793e-06, "epoch": 5.01678445229682, "percentage": 71.62, "elapsed_time": "14:32:43", "remaining_time": "5:45:51"} +{"current_steps": 5680, "total_steps": 7924, "loss": 0.2348, "lr": 9.010229417143298e-06, "epoch": 5.021201413427562, "percentage": 71.68, "elapsed_time": "14:33:29", "remaining_time": "5:45:05"} +{"current_steps": 5685, "total_steps": 7924, "loss": 0.2264, "lr": 8.973447742594959e-06, "epoch": 5.025618374558304, "percentage": 71.74, "elapsed_time": "14:34:15", "remaining_time": "5:44:19"} +{"current_steps": 5690, "total_steps": 7924, "loss": 0.2468, "lr": 8.936719571084964e-06, "epoch": 5.030035335689046, "percentage": 71.81, "elapsed_time": "14:35:01", "remaining_time": "5:43:33"} +{"current_steps": 5695, "total_steps": 7924, "loss": 0.1942, "lr": 8.900045080825772e-06, "epoch": 5.034452296819788, "percentage": 71.87, "elapsed_time": "14:35:47", "remaining_time": "5:42:46"} +{"current_steps": 5700, "total_steps": 7924, "loss": 0.2063, "lr": 8.863424449769326e-06, "epoch": 5.03886925795053, "percentage": 71.93, "elapsed_time": "14:36:33", "remaining_time": "5:42:00"} +{"current_steps": 5705, "total_steps": 7924, "loss": 0.2236, "lr": 8.826857855606268e-06, "epoch": 5.043286219081272, "percentage": 72.0, "elapsed_time": "14:37:19", "remaining_time": "5:41:14"} +{"current_steps": 5710, "total_steps": 7924, "loss": 0.2325, "lr": 8.790345475765028e-06, "epoch": 5.0477031802120145, "percentage": 72.06, "elapsed_time": "14:38:05", "remaining_time": "5:40:28"} +{"current_steps": 5715, "total_steps": 7924, "loss": 0.244, "lr": 8.753887487410988e-06, "epoch": 5.0521201413427566, "percentage": 72.12, "elapsed_time": "14:38:51", "remaining_time": "5:39:42"} +{"current_steps": 5720, "total_steps": 7924, "loss": 0.2635, "lr": 8.71748406744559e-06, "epoch": 5.056537102473499, "percentage": 72.19, "elapsed_time": "14:39:37", "remaining_time": "5:38:55"} +{"current_steps": 5725, "total_steps": 7924, "loss": 0.2934, "lr": 8.681135392505521e-06, "epoch": 5.060954063604241, "percentage": 72.25, "elapsed_time": "14:40:23", "remaining_time": "5:38:09"} +{"current_steps": 5730, "total_steps": 7924, "loss": 0.2103, "lr": 8.644841638961827e-06, "epoch": 5.065371024734982, "percentage": 72.31, "elapsed_time": "14:41:09", "remaining_time": "5:37:23"} +{"current_steps": 5735, "total_steps": 7924, "loss": 0.2306, "lr": 8.608602982919061e-06, "epoch": 5.069787985865724, "percentage": 72.38, "elapsed_time": "14:41:55", "remaining_time": "5:36:37"} +{"current_steps": 5740, "total_steps": 7924, "loss": 0.2622, "lr": 8.57241960021444e-06, "epoch": 5.074204946996466, "percentage": 72.44, "elapsed_time": "14:42:41", "remaining_time": "5:35:51"} +{"current_steps": 5745, "total_steps": 7924, "loss": 0.2268, "lr": 8.536291666416971e-06, "epoch": 5.078621908127208, "percentage": 72.5, "elapsed_time": "14:43:26", "remaining_time": "5:35:04"} +{"current_steps": 5750, "total_steps": 7924, "loss": 0.2813, "lr": 8.500219356826633e-06, "epoch": 5.08303886925795, "percentage": 72.56, "elapsed_time": "14:44:12", "remaining_time": "5:34:18"} +{"current_steps": 5755, "total_steps": 7924, "loss": 0.2059, "lr": 8.464202846473467e-06, "epoch": 5.087455830388692, "percentage": 72.63, "elapsed_time": "14:44:59", "remaining_time": "5:33:32"} +{"current_steps": 5760, "total_steps": 7924, "loss": 0.2579, "lr": 8.428242310116817e-06, "epoch": 5.091872791519434, "percentage": 72.69, "elapsed_time": "14:45:45", "remaining_time": "5:32:46"} +{"current_steps": 5765, "total_steps": 7924, "loss": 0.2401, "lr": 8.392337922244383e-06, "epoch": 5.0962897526501765, "percentage": 72.75, "elapsed_time": "14:46:30", "remaining_time": "5:32:00"} +{"current_steps": 5770, "total_steps": 7924, "loss": 0.2129, "lr": 8.35648985707144e-06, "epoch": 5.1007067137809186, "percentage": 72.82, "elapsed_time": "14:47:16", "remaining_time": "5:31:13"} +{"current_steps": 5775, "total_steps": 7924, "loss": 0.2755, "lr": 8.320698288539997e-06, "epoch": 5.105123674911661, "percentage": 72.88, "elapsed_time": "14:48:02", "remaining_time": "5:30:27"} +{"current_steps": 5780, "total_steps": 7924, "loss": 0.1896, "lr": 8.284963390317885e-06, "epoch": 5.109540636042403, "percentage": 72.94, "elapsed_time": "14:48:48", "remaining_time": "5:29:41"} +{"current_steps": 5785, "total_steps": 7924, "loss": 0.2427, "lr": 8.24928533579799e-06, "epoch": 5.113957597173145, "percentage": 73.01, "elapsed_time": "14:49:34", "remaining_time": "5:28:55"} +{"current_steps": 5790, "total_steps": 7924, "loss": 0.2294, "lr": 8.21366429809737e-06, "epoch": 5.118374558303887, "percentage": 73.07, "elapsed_time": "14:50:20", "remaining_time": "5:28:09"} +{"current_steps": 5795, "total_steps": 7924, "loss": 0.2844, "lr": 8.17810045005644e-06, "epoch": 5.122791519434629, "percentage": 73.13, "elapsed_time": "14:51:06", "remaining_time": "5:27:22"} +{"current_steps": 5800, "total_steps": 7924, "loss": 0.2243, "lr": 8.142593964238092e-06, "epoch": 5.127208480565371, "percentage": 73.2, "elapsed_time": "14:51:52", "remaining_time": "5:26:36"} +{"current_steps": 5805, "total_steps": 7924, "loss": 0.2261, "lr": 8.107145012926909e-06, "epoch": 5.131625441696113, "percentage": 73.26, "elapsed_time": "14:52:38", "remaining_time": "5:25:50"} +{"current_steps": 5810, "total_steps": 7924, "loss": 0.2198, "lr": 8.071753768128299e-06, "epoch": 5.136042402826855, "percentage": 73.32, "elapsed_time": "14:53:24", "remaining_time": "5:25:04"} +{"current_steps": 5815, "total_steps": 7924, "loss": 0.236, "lr": 8.036420401567662e-06, "epoch": 5.140459363957597, "percentage": 73.38, "elapsed_time": "14:54:10", "remaining_time": "5:24:18"} +{"current_steps": 5820, "total_steps": 7924, "loss": 0.2842, "lr": 8.001145084689563e-06, "epoch": 5.144876325088339, "percentage": 73.45, "elapsed_time": "14:54:56", "remaining_time": "5:23:31"} +{"current_steps": 5825, "total_steps": 7924, "loss": 0.2422, "lr": 7.965927988656903e-06, "epoch": 5.1492932862190814, "percentage": 73.51, "elapsed_time": "14:55:42", "remaining_time": "5:22:45"} +{"current_steps": 5830, "total_steps": 7924, "loss": 0.2294, "lr": 7.930769284350084e-06, "epoch": 5.1537102473498235, "percentage": 73.57, "elapsed_time": "14:56:28", "remaining_time": "5:21:59"} +{"current_steps": 5835, "total_steps": 7924, "loss": 0.2181, "lr": 7.895669142366159e-06, "epoch": 5.158127208480566, "percentage": 73.64, "elapsed_time": "14:57:14", "remaining_time": "5:21:13"} +{"current_steps": 5840, "total_steps": 7924, "loss": 0.2292, "lr": 7.860627733018065e-06, "epoch": 5.162544169611308, "percentage": 73.7, "elapsed_time": "14:57:59", "remaining_time": "5:20:26"} +{"current_steps": 5845, "total_steps": 7924, "loss": 0.219, "lr": 7.825645226333714e-06, "epoch": 5.16696113074205, "percentage": 73.76, "elapsed_time": "14:58:45", "remaining_time": "5:19:40"} +{"current_steps": 5850, "total_steps": 7924, "loss": 0.2321, "lr": 7.79072179205523e-06, "epoch": 5.171378091872792, "percentage": 73.83, "elapsed_time": "14:59:31", "remaining_time": "5:18:54"} +{"current_steps": 5855, "total_steps": 7924, "loss": 0.2032, "lr": 7.755857599638124e-06, "epoch": 5.175795053003534, "percentage": 73.89, "elapsed_time": "15:00:17", "remaining_time": "5:18:08"} +{"current_steps": 5860, "total_steps": 7924, "loss": 0.2759, "lr": 7.721052818250419e-06, "epoch": 5.180212014134276, "percentage": 73.95, "elapsed_time": "15:01:03", "remaining_time": "5:17:22"} +{"current_steps": 5865, "total_steps": 7924, "loss": 0.2458, "lr": 7.686307616771883e-06, "epoch": 5.184628975265018, "percentage": 74.02, "elapsed_time": "15:01:49", "remaining_time": "5:16:36"} +{"current_steps": 5870, "total_steps": 7924, "loss": 0.247, "lr": 7.651622163793189e-06, "epoch": 5.189045936395759, "percentage": 74.08, "elapsed_time": "15:02:36", "remaining_time": "5:15:50"} +{"current_steps": 5875, "total_steps": 7924, "loss": 0.2295, "lr": 7.616996627615103e-06, "epoch": 5.193462897526501, "percentage": 74.14, "elapsed_time": "15:03:22", "remaining_time": "5:15:03"} +{"current_steps": 5880, "total_steps": 7924, "loss": 0.2714, "lr": 7.582431176247642e-06, "epoch": 5.1978798586572434, "percentage": 74.2, "elapsed_time": "15:04:08", "remaining_time": "5:14:17"} +{"current_steps": 5885, "total_steps": 7924, "loss": 0.2119, "lr": 7.547925977409301e-06, "epoch": 5.2022968197879855, "percentage": 74.27, "elapsed_time": "15:04:53", "remaining_time": "5:13:31"} +{"current_steps": 5890, "total_steps": 7924, "loss": 0.2842, "lr": 7.5134811985262115e-06, "epoch": 5.206713780918728, "percentage": 74.33, "elapsed_time": "15:05:39", "remaining_time": "5:12:45"} +{"current_steps": 5895, "total_steps": 7924, "loss": 0.2569, "lr": 7.479097006731333e-06, "epoch": 5.21113074204947, "percentage": 74.39, "elapsed_time": "15:06:25", "remaining_time": "5:11:59"} +{"current_steps": 5900, "total_steps": 7924, "loss": 0.2368, "lr": 7.444773568863646e-06, "epoch": 5.215547703180212, "percentage": 74.46, "elapsed_time": "15:07:11", "remaining_time": "5:11:12"} +{"current_steps": 5905, "total_steps": 7924, "loss": 0.247, "lr": 7.410511051467339e-06, "epoch": 5.219964664310954, "percentage": 74.52, "elapsed_time": "15:07:57", "remaining_time": "5:10:26"} +{"current_steps": 5910, "total_steps": 7924, "loss": 0.1984, "lr": 7.376309620791016e-06, "epoch": 5.224381625441696, "percentage": 74.58, "elapsed_time": "15:08:43", "remaining_time": "5:09:40"} +{"current_steps": 5915, "total_steps": 7924, "loss": 0.2352, "lr": 7.342169442786835e-06, "epoch": 5.228798586572438, "percentage": 74.65, "elapsed_time": "15:09:29", "remaining_time": "5:08:54"} +{"current_steps": 5920, "total_steps": 7924, "loss": 0.2557, "lr": 7.308090683109803e-06, "epoch": 5.23321554770318, "percentage": 74.71, "elapsed_time": "15:10:15", "remaining_time": "5:08:07"} +{"current_steps": 5925, "total_steps": 7924, "loss": 0.2537, "lr": 7.274073507116865e-06, "epoch": 5.237632508833922, "percentage": 74.77, "elapsed_time": "15:11:00", "remaining_time": "5:07:21"} +{"current_steps": 5930, "total_steps": 7924, "loss": 0.2208, "lr": 7.240118079866163e-06, "epoch": 5.242049469964664, "percentage": 74.84, "elapsed_time": "15:11:46", "remaining_time": "5:06:35"} +{"current_steps": 5935, "total_steps": 7924, "loss": 0.2618, "lr": 7.206224566116247e-06, "epoch": 5.246466431095406, "percentage": 74.9, "elapsed_time": "15:12:32", "remaining_time": "5:05:49"} +{"current_steps": 5940, "total_steps": 7924, "loss": 0.2298, "lr": 7.172393130325208e-06, "epoch": 5.250883392226148, "percentage": 74.96, "elapsed_time": "15:13:18", "remaining_time": "5:05:03"} +{"current_steps": 5945, "total_steps": 7924, "loss": 0.2273, "lr": 7.138623936649951e-06, "epoch": 5.2553003533568905, "percentage": 75.03, "elapsed_time": "15:14:04", "remaining_time": "5:04:17"} +{"current_steps": 5950, "total_steps": 7924, "loss": 0.2597, "lr": 7.104917148945363e-06, "epoch": 5.259717314487633, "percentage": 75.09, "elapsed_time": "15:14:50", "remaining_time": "5:03:30"} +{"current_steps": 5955, "total_steps": 7924, "loss": 0.2464, "lr": 7.0712729307635284e-06, "epoch": 5.264134275618375, "percentage": 75.15, "elapsed_time": "15:15:36", "remaining_time": "5:02:44"} +{"current_steps": 5960, "total_steps": 7924, "loss": 0.2446, "lr": 7.037691445352917e-06, "epoch": 5.268551236749117, "percentage": 75.21, "elapsed_time": "15:16:23", "remaining_time": "5:01:58"} +{"current_steps": 5965, "total_steps": 7924, "loss": 0.2521, "lr": 7.00417285565762e-06, "epoch": 5.272968197879859, "percentage": 75.28, "elapsed_time": "15:17:08", "remaining_time": "5:01:12"} +{"current_steps": 5970, "total_steps": 7924, "loss": 0.2666, "lr": 6.970717324316545e-06, "epoch": 5.277385159010601, "percentage": 75.34, "elapsed_time": "15:17:54", "remaining_time": "5:00:26"} +{"current_steps": 5975, "total_steps": 7924, "loss": 0.2322, "lr": 6.937325013662623e-06, "epoch": 5.281802120141343, "percentage": 75.4, "elapsed_time": "15:18:40", "remaining_time": "4:59:39"} +{"current_steps": 5980, "total_steps": 7924, "loss": 0.233, "lr": 6.903996085722033e-06, "epoch": 5.286219081272085, "percentage": 75.47, "elapsed_time": "15:19:26", "remaining_time": "4:58:53"} +{"current_steps": 5985, "total_steps": 7924, "loss": 0.2401, "lr": 6.8707307022134e-06, "epoch": 5.290636042402827, "percentage": 75.53, "elapsed_time": "15:20:12", "remaining_time": "4:58:07"} +{"current_steps": 5990, "total_steps": 7924, "loss": 0.2582, "lr": 6.8375290245470296e-06, "epoch": 5.295053003533569, "percentage": 75.59, "elapsed_time": "15:20:58", "remaining_time": "4:57:21"} +{"current_steps": 5995, "total_steps": 7924, "loss": 0.2549, "lr": 6.804391213824087e-06, "epoch": 5.299469964664311, "percentage": 75.66, "elapsed_time": "15:21:44", "remaining_time": "4:56:35"} +{"current_steps": 6000, "total_steps": 7924, "loss": 0.2322, "lr": 6.771317430835888e-06, "epoch": 5.303886925795053, "percentage": 75.72, "elapsed_time": "15:22:30", "remaining_time": "4:55:49"} +{"current_steps": 6005, "total_steps": 7924, "loss": 0.2568, "lr": 6.73830783606303e-06, "epoch": 5.3083038869257955, "percentage": 75.78, "elapsed_time": "15:23:57", "remaining_time": "4:55:15"} +{"current_steps": 6010, "total_steps": 7924, "loss": 0.2372, "lr": 6.705362589674667e-06, "epoch": 5.3127208480565375, "percentage": 75.85, "elapsed_time": "15:24:43", "remaining_time": "4:54:29"} +{"current_steps": 6015, "total_steps": 7924, "loss": 0.2299, "lr": 6.6724818515277544e-06, "epoch": 5.317137809187279, "percentage": 75.91, "elapsed_time": "15:25:29", "remaining_time": "4:53:43"} +{"current_steps": 6020, "total_steps": 7924, "loss": 0.2595, "lr": 6.639665781166189e-06, "epoch": 5.321554770318021, "percentage": 75.97, "elapsed_time": "15:26:15", "remaining_time": "4:52:57"} +{"current_steps": 6025, "total_steps": 7924, "loss": 0.2572, "lr": 6.606914537820122e-06, "epoch": 5.325971731448763, "percentage": 76.03, "elapsed_time": "15:27:00", "remaining_time": "4:52:11"} +{"current_steps": 6030, "total_steps": 7924, "loss": 0.2525, "lr": 6.574228280405139e-06, "epoch": 5.330388692579505, "percentage": 76.1, "elapsed_time": "15:27:46", "remaining_time": "4:51:24"} +{"current_steps": 6035, "total_steps": 7924, "loss": 0.233, "lr": 6.5416071675215136e-06, "epoch": 5.334805653710247, "percentage": 76.16, "elapsed_time": "15:28:32", "remaining_time": "4:50:38"} +{"current_steps": 6040, "total_steps": 7924, "loss": 0.2251, "lr": 6.509051357453393e-06, "epoch": 5.339222614840989, "percentage": 76.22, "elapsed_time": "15:29:18", "remaining_time": "4:49:52"} +{"current_steps": 6045, "total_steps": 7924, "loss": 0.2538, "lr": 6.476561008168096e-06, "epoch": 5.343639575971731, "percentage": 76.29, "elapsed_time": "15:30:04", "remaining_time": "4:49:06"} +{"current_steps": 6050, "total_steps": 7924, "loss": 0.2336, "lr": 6.444136277315296e-06, "epoch": 5.348056537102473, "percentage": 76.35, "elapsed_time": "15:30:50", "remaining_time": "4:48:19"} +{"current_steps": 6055, "total_steps": 7924, "loss": 0.2358, "lr": 6.4117773222262805e-06, "epoch": 5.352473498233215, "percentage": 76.41, "elapsed_time": "15:31:36", "remaining_time": "4:47:33"} +{"current_steps": 6060, "total_steps": 7924, "loss": 0.2533, "lr": 6.379484299913172e-06, "epoch": 5.3568904593639575, "percentage": 76.48, "elapsed_time": "15:32:22", "remaining_time": "4:46:47"} +{"current_steps": 6065, "total_steps": 7924, "loss": 0.2593, "lr": 6.3472573670681805e-06, "epoch": 5.3613074204946995, "percentage": 76.54, "elapsed_time": "15:33:08", "remaining_time": "4:46:01"} +{"current_steps": 6070, "total_steps": 7924, "loss": 0.2525, "lr": 6.315096680062838e-06, "epoch": 5.365724381625442, "percentage": 76.6, "elapsed_time": "15:33:54", "remaining_time": "4:45:14"} +{"current_steps": 6075, "total_steps": 7924, "loss": 0.2355, "lr": 6.283002394947216e-06, "epoch": 5.370141342756184, "percentage": 76.67, "elapsed_time": "15:34:40", "remaining_time": "4:44:28"} +{"current_steps": 6080, "total_steps": 7924, "loss": 0.2552, "lr": 6.2509746674492346e-06, "epoch": 5.374558303886926, "percentage": 76.73, "elapsed_time": "15:35:26", "remaining_time": "4:43:42"} +{"current_steps": 6085, "total_steps": 7924, "loss": 0.2316, "lr": 6.21901365297382e-06, "epoch": 5.378975265017668, "percentage": 76.79, "elapsed_time": "15:36:12", "remaining_time": "4:42:56"} +{"current_steps": 6090, "total_steps": 7924, "loss": 0.2751, "lr": 6.187119506602215e-06, "epoch": 5.38339222614841, "percentage": 76.86, "elapsed_time": "15:36:58", "remaining_time": "4:42:10"} +{"current_steps": 6095, "total_steps": 7924, "loss": 0.2403, "lr": 6.1552923830912e-06, "epoch": 5.387809187279152, "percentage": 76.92, "elapsed_time": "15:37:44", "remaining_time": "4:41:23"} +{"current_steps": 6100, "total_steps": 7924, "loss": 0.2475, "lr": 6.123532436872353e-06, "epoch": 5.392226148409894, "percentage": 76.98, "elapsed_time": "15:38:30", "remaining_time": "4:40:37"} +{"current_steps": 6105, "total_steps": 7924, "loss": 0.2906, "lr": 6.091839822051284e-06, "epoch": 5.396643109540636, "percentage": 77.04, "elapsed_time": "15:39:16", "remaining_time": "4:39:51"} +{"current_steps": 6110, "total_steps": 7924, "loss": 0.2527, "lr": 6.060214692406905e-06, "epoch": 5.401060070671378, "percentage": 77.11, "elapsed_time": "15:40:02", "remaining_time": "4:39:05"} +{"current_steps": 6115, "total_steps": 7924, "loss": 0.224, "lr": 6.028657201390682e-06, "epoch": 5.40547703180212, "percentage": 77.17, "elapsed_time": "15:40:48", "remaining_time": "4:38:19"} +{"current_steps": 6120, "total_steps": 7924, "loss": 0.2456, "lr": 5.99716750212586e-06, "epoch": 5.409893992932862, "percentage": 77.23, "elapsed_time": "15:41:34", "remaining_time": "4:37:33"} +{"current_steps": 6125, "total_steps": 7924, "loss": 0.2818, "lr": 5.965745747406775e-06, "epoch": 5.4143109540636045, "percentage": 77.3, "elapsed_time": "15:42:20", "remaining_time": "4:36:46"} +{"current_steps": 6130, "total_steps": 7924, "loss": 0.2124, "lr": 5.934392089698064e-06, "epoch": 5.418727915194347, "percentage": 77.36, "elapsed_time": "15:43:06", "remaining_time": "4:36:00"} +{"current_steps": 6135, "total_steps": 7924, "loss": 0.2021, "lr": 5.903106681133952e-06, "epoch": 5.423144876325089, "percentage": 77.42, "elapsed_time": "15:43:52", "remaining_time": "4:35:14"} +{"current_steps": 6140, "total_steps": 7924, "loss": 0.2439, "lr": 5.871889673517501e-06, "epoch": 5.427561837455831, "percentage": 77.49, "elapsed_time": "15:44:38", "remaining_time": "4:34:28"} +{"current_steps": 6145, "total_steps": 7924, "loss": 0.2563, "lr": 5.840741218319881e-06, "epoch": 5.431978798586573, "percentage": 77.55, "elapsed_time": "15:45:24", "remaining_time": "4:33:41"} +{"current_steps": 6150, "total_steps": 7924, "loss": 0.246, "lr": 5.809661466679635e-06, "epoch": 5.436395759717314, "percentage": 77.61, "elapsed_time": "15:46:10", "remaining_time": "4:32:55"} +{"current_steps": 6155, "total_steps": 7924, "loss": 0.2374, "lr": 5.778650569401922e-06, "epoch": 5.440812720848056, "percentage": 77.68, "elapsed_time": "15:46:56", "remaining_time": "4:32:09"} +{"current_steps": 6160, "total_steps": 7924, "loss": 0.2198, "lr": 5.747708676957844e-06, "epoch": 5.445229681978798, "percentage": 77.74, "elapsed_time": "15:47:42", "remaining_time": "4:31:23"} +{"current_steps": 6165, "total_steps": 7924, "loss": 0.2423, "lr": 5.716835939483641e-06, "epoch": 5.44964664310954, "percentage": 77.8, "elapsed_time": "15:48:27", "remaining_time": "4:30:36"} +{"current_steps": 6170, "total_steps": 7924, "loss": 0.2228, "lr": 5.686032506780015e-06, "epoch": 5.454063604240282, "percentage": 77.86, "elapsed_time": "15:49:13", "remaining_time": "4:29:50"} +{"current_steps": 6175, "total_steps": 7924, "loss": 0.243, "lr": 5.655298528311388e-06, "epoch": 5.458480565371024, "percentage": 77.93, "elapsed_time": "15:49:59", "remaining_time": "4:29:04"} +{"current_steps": 6180, "total_steps": 7924, "loss": 0.2485, "lr": 5.624634153205178e-06, "epoch": 5.4628975265017665, "percentage": 77.99, "elapsed_time": "15:50:45", "remaining_time": "4:28:18"} +{"current_steps": 6185, "total_steps": 7924, "loss": 0.2386, "lr": 5.594039530251065e-06, "epoch": 5.467314487632509, "percentage": 78.05, "elapsed_time": "15:51:31", "remaining_time": "4:27:32"} +{"current_steps": 6190, "total_steps": 7924, "loss": 0.2508, "lr": 5.563514807900285e-06, "epoch": 5.471731448763251, "percentage": 78.12, "elapsed_time": "15:52:17", "remaining_time": "4:26:45"} +{"current_steps": 6195, "total_steps": 7924, "loss": 0.2262, "lr": 5.533060134264907e-06, "epoch": 5.476148409893993, "percentage": 78.18, "elapsed_time": "15:53:03", "remaining_time": "4:25:59"} +{"current_steps": 6200, "total_steps": 7924, "loss": 0.2373, "lr": 5.5026756571170896e-06, "epoch": 5.480565371024735, "percentage": 78.24, "elapsed_time": "15:53:48", "remaining_time": "4:25:13"} +{"current_steps": 6205, "total_steps": 7924, "loss": 0.2203, "lr": 5.472361523888401e-06, "epoch": 5.484982332155477, "percentage": 78.31, "elapsed_time": "15:54:34", "remaining_time": "4:24:27"} +{"current_steps": 6210, "total_steps": 7924, "loss": 0.2265, "lr": 5.442117881669085e-06, "epoch": 5.489399293286219, "percentage": 78.37, "elapsed_time": "15:55:20", "remaining_time": "4:23:40"} +{"current_steps": 6215, "total_steps": 7924, "loss": 0.2506, "lr": 5.411944877207347e-06, "epoch": 5.493816254416961, "percentage": 78.43, "elapsed_time": "15:56:06", "remaining_time": "4:22:54"} +{"current_steps": 6220, "total_steps": 7924, "loss": 0.2008, "lr": 5.38184265690864e-06, "epoch": 5.498233215547703, "percentage": 78.5, "elapsed_time": "15:56:52", "remaining_time": "4:22:08"} +{"current_steps": 6225, "total_steps": 7924, "loss": 0.2397, "lr": 5.3518113668349645e-06, "epoch": 5.502650176678445, "percentage": 78.56, "elapsed_time": "15:57:37", "remaining_time": "4:21:22"} +{"current_steps": 6230, "total_steps": 7924, "loss": 0.2621, "lr": 5.321851152704154e-06, "epoch": 5.507067137809187, "percentage": 78.62, "elapsed_time": "15:58:23", "remaining_time": "4:20:35"} +{"current_steps": 6235, "total_steps": 7924, "loss": 0.2454, "lr": 5.291962159889148e-06, "epoch": 5.511484098939929, "percentage": 78.69, "elapsed_time": "15:59:09", "remaining_time": "4:19:49"} +{"current_steps": 6240, "total_steps": 7924, "loss": 0.2354, "lr": 5.262144533417344e-06, "epoch": 5.5159010600706715, "percentage": 78.75, "elapsed_time": "15:59:55", "remaining_time": "4:19:03"} +{"current_steps": 6245, "total_steps": 7924, "loss": 0.2233, "lr": 5.232398417969815e-06, "epoch": 5.520318021201414, "percentage": 78.81, "elapsed_time": "16:00:41", "remaining_time": "4:18:17"} +{"current_steps": 6250, "total_steps": 7924, "loss": 0.2322, "lr": 5.2027239578806734e-06, "epoch": 5.524734982332156, "percentage": 78.87, "elapsed_time": "16:01:27", "remaining_time": "4:17:30"} +{"current_steps": 6255, "total_steps": 7924, "loss": 0.201, "lr": 5.173121297136337e-06, "epoch": 5.529151943462898, "percentage": 78.94, "elapsed_time": "16:02:13", "remaining_time": "4:16:44"} +{"current_steps": 6260, "total_steps": 7924, "loss": 0.2965, "lr": 5.14359057937484e-06, "epoch": 5.53356890459364, "percentage": 79.0, "elapsed_time": "16:02:59", "remaining_time": "4:15:58"} +{"current_steps": 6265, "total_steps": 7924, "loss": 0.2873, "lr": 5.114131947885137e-06, "epoch": 5.537985865724382, "percentage": 79.06, "elapsed_time": "16:03:45", "remaining_time": "4:15:12"} +{"current_steps": 6270, "total_steps": 7924, "loss": 0.2543, "lr": 5.084745545606402e-06, "epoch": 5.542402826855124, "percentage": 79.13, "elapsed_time": "16:04:30", "remaining_time": "4:14:26"} +{"current_steps": 6275, "total_steps": 7924, "loss": 0.2463, "lr": 5.055431515127349e-06, "epoch": 5.546819787985866, "percentage": 79.19, "elapsed_time": "16:05:16", "remaining_time": "4:13:39"} +{"current_steps": 6280, "total_steps": 7924, "loss": 0.2449, "lr": 5.026189998685504e-06, "epoch": 5.551236749116608, "percentage": 79.25, "elapsed_time": "16:06:02", "remaining_time": "4:12:53"} +{"current_steps": 6285, "total_steps": 7924, "loss": 0.275, "lr": 4.9970211381665665e-06, "epoch": 5.55565371024735, "percentage": 79.32, "elapsed_time": "16:06:48", "remaining_time": "4:12:07"} +{"current_steps": 6290, "total_steps": 7924, "loss": 0.2554, "lr": 4.967925075103685e-06, "epoch": 5.560070671378092, "percentage": 79.38, "elapsed_time": "16:07:34", "remaining_time": "4:11:21"} +{"current_steps": 6295, "total_steps": 7924, "loss": 0.2563, "lr": 4.93890195067678e-06, "epoch": 5.564487632508834, "percentage": 79.44, "elapsed_time": "16:08:20", "remaining_time": "4:10:34"} +{"current_steps": 6300, "total_steps": 7924, "loss": 0.2297, "lr": 4.909951905711858e-06, "epoch": 5.5689045936395765, "percentage": 79.51, "elapsed_time": "16:09:06", "remaining_time": "4:09:48"} +{"current_steps": 6305, "total_steps": 7924, "loss": 0.2317, "lr": 4.881075080680335e-06, "epoch": 5.573321554770318, "percentage": 79.57, "elapsed_time": "16:09:52", "remaining_time": "4:09:02"} +{"current_steps": 6310, "total_steps": 7924, "loss": 0.2183, "lr": 4.852271615698349e-06, "epoch": 5.57773851590106, "percentage": 79.63, "elapsed_time": "16:10:38", "remaining_time": "4:08:16"} +{"current_steps": 6315, "total_steps": 7924, "loss": 0.2652, "lr": 4.823541650526058e-06, "epoch": 5.582155477031802, "percentage": 79.69, "elapsed_time": "16:11:24", "remaining_time": "4:07:30"} +{"current_steps": 6320, "total_steps": 7924, "loss": 0.2128, "lr": 4.7948853245670294e-06, "epoch": 5.586572438162544, "percentage": 79.76, "elapsed_time": "16:12:10", "remaining_time": "4:06:44"} +{"current_steps": 6325, "total_steps": 7924, "loss": 0.2051, "lr": 4.7663027768674705e-06, "epoch": 5.590989399293286, "percentage": 79.82, "elapsed_time": "16:12:56", "remaining_time": "4:05:57"} +{"current_steps": 6330, "total_steps": 7924, "loss": 0.2225, "lr": 4.737794146115633e-06, "epoch": 5.595406360424028, "percentage": 79.88, "elapsed_time": "16:13:41", "remaining_time": "4:05:11"} +{"current_steps": 6335, "total_steps": 7924, "loss": 0.2627, "lr": 4.7093595706410945e-06, "epoch": 5.59982332155477, "percentage": 79.95, "elapsed_time": "16:14:28", "remaining_time": "4:04:25"} +{"current_steps": 6340, "total_steps": 7924, "loss": 0.24, "lr": 4.680999188414108e-06, "epoch": 5.604240282685512, "percentage": 80.01, "elapsed_time": "16:15:14", "remaining_time": "4:03:39"} +{"current_steps": 6345, "total_steps": 7924, "loss": 0.2189, "lr": 4.652713137044927e-06, "epoch": 5.608657243816254, "percentage": 80.07, "elapsed_time": "16:15:59", "remaining_time": "4:02:53"} +{"current_steps": 6350, "total_steps": 7924, "loss": 0.2295, "lr": 4.624501553783127e-06, "epoch": 5.613074204946996, "percentage": 80.14, "elapsed_time": "16:16:46", "remaining_time": "4:02:06"} +{"current_steps": 6355, "total_steps": 7924, "loss": 0.2256, "lr": 4.596364575516969e-06, "epoch": 5.6174911660777385, "percentage": 80.2, "elapsed_time": "16:17:32", "remaining_time": "4:01:20"} +{"current_steps": 6360, "total_steps": 7924, "loss": 0.2364, "lr": 4.568302338772688e-06, "epoch": 5.6219081272084805, "percentage": 80.26, "elapsed_time": "16:18:18", "remaining_time": "4:00:34"} +{"current_steps": 6365, "total_steps": 7924, "loss": 0.2299, "lr": 4.540314979713876e-06, "epoch": 5.626325088339223, "percentage": 80.33, "elapsed_time": "16:19:04", "remaining_time": "3:59:48"} +{"current_steps": 6370, "total_steps": 7924, "loss": 0.2584, "lr": 4.512402634140804e-06, "epoch": 5.630742049469965, "percentage": 80.39, "elapsed_time": "16:19:50", "remaining_time": "3:59:02"} +{"current_steps": 6375, "total_steps": 7924, "loss": 0.2553, "lr": 4.484565437489759e-06, "epoch": 5.635159010600707, "percentage": 80.45, "elapsed_time": "16:20:36", "remaining_time": "3:58:16"} +{"current_steps": 6380, "total_steps": 7924, "loss": 0.2364, "lr": 4.456803524832389e-06, "epoch": 5.639575971731449, "percentage": 80.51, "elapsed_time": "16:21:21", "remaining_time": "3:57:29"} +{"current_steps": 6385, "total_steps": 7924, "loss": 0.2052, "lr": 4.429117030875052e-06, "epoch": 5.643992932862191, "percentage": 80.58, "elapsed_time": "16:22:07", "remaining_time": "3:56:43"} +{"current_steps": 6390, "total_steps": 7924, "loss": 0.2322, "lr": 4.401506089958161e-06, "epoch": 5.648409893992933, "percentage": 80.64, "elapsed_time": "16:22:53", "remaining_time": "3:55:57"} +{"current_steps": 6395, "total_steps": 7924, "loss": 0.2162, "lr": 4.37397083605551e-06, "epoch": 5.652826855123675, "percentage": 80.7, "elapsed_time": "16:23:39", "remaining_time": "3:55:11"} +{"current_steps": 6400, "total_steps": 7924, "loss": 0.2306, "lr": 4.346511402773688e-06, "epoch": 5.657243816254417, "percentage": 80.77, "elapsed_time": "16:24:25", "remaining_time": "3:54:24"} +{"current_steps": 6405, "total_steps": 7924, "loss": 0.2713, "lr": 4.319127923351339e-06, "epoch": 5.661660777385159, "percentage": 80.83, "elapsed_time": "16:25:10", "remaining_time": "3:53:38"} +{"current_steps": 6410, "total_steps": 7924, "loss": 0.2549, "lr": 4.291820530658595e-06, "epoch": 5.666077738515901, "percentage": 80.89, "elapsed_time": "16:25:57", "remaining_time": "3:52:52"} +{"current_steps": 6415, "total_steps": 7924, "loss": 0.2342, "lr": 4.264589357196389e-06, "epoch": 5.670494699646643, "percentage": 80.96, "elapsed_time": "16:26:43", "remaining_time": "3:52:06"} +{"current_steps": 6420, "total_steps": 7924, "loss": 0.2329, "lr": 4.2374345350958256e-06, "epoch": 5.6749116607773855, "percentage": 81.02, "elapsed_time": "16:27:29", "remaining_time": "3:51:20"} +{"current_steps": 6425, "total_steps": 7924, "loss": 0.2168, "lr": 4.2103561961175354e-06, "epoch": 5.679328621908128, "percentage": 81.08, "elapsed_time": "16:28:15", "remaining_time": "3:50:34"} +{"current_steps": 6430, "total_steps": 7924, "loss": 0.2357, "lr": 4.183354471651037e-06, "epoch": 5.683745583038869, "percentage": 81.15, "elapsed_time": "16:29:01", "remaining_time": "3:49:47"} +{"current_steps": 6435, "total_steps": 7924, "loss": 0.2213, "lr": 4.156429492714109e-06, "epoch": 5.688162544169611, "percentage": 81.21, "elapsed_time": "16:29:47", "remaining_time": "3:49:01"} +{"current_steps": 6440, "total_steps": 7924, "loss": 0.2259, "lr": 4.129581389952129e-06, "epoch": 5.692579505300353, "percentage": 81.27, "elapsed_time": "16:30:33", "remaining_time": "3:48:15"} +{"current_steps": 6445, "total_steps": 7924, "loss": 0.2262, "lr": 4.102810293637465e-06, "epoch": 5.696996466431095, "percentage": 81.34, "elapsed_time": "16:31:19", "remaining_time": "3:47:29"} +{"current_steps": 6450, "total_steps": 7924, "loss": 0.2337, "lr": 4.076116333668838e-06, "epoch": 5.701413427561837, "percentage": 81.4, "elapsed_time": "16:32:05", "remaining_time": "3:46:43"} +{"current_steps": 6455, "total_steps": 7924, "loss": 0.2503, "lr": 4.049499639570682e-06, "epoch": 5.705830388692579, "percentage": 81.46, "elapsed_time": "16:32:51", "remaining_time": "3:45:57"} +{"current_steps": 6460, "total_steps": 7924, "loss": 0.2277, "lr": 4.022960340492525e-06, "epoch": 5.710247349823321, "percentage": 81.52, "elapsed_time": "16:33:38", "remaining_time": "3:45:10"} +{"current_steps": 6465, "total_steps": 7924, "loss": 0.2277, "lr": 3.996498565208358e-06, "epoch": 5.714664310954063, "percentage": 81.59, "elapsed_time": "16:34:24", "remaining_time": "3:44:24"} +{"current_steps": 6470, "total_steps": 7924, "loss": 0.1905, "lr": 3.970114442116013e-06, "epoch": 5.719081272084805, "percentage": 81.65, "elapsed_time": "16:35:10", "remaining_time": "3:43:38"} +{"current_steps": 6475, "total_steps": 7924, "loss": 0.2257, "lr": 3.943808099236524e-06, "epoch": 5.7234982332155475, "percentage": 81.71, "elapsed_time": "16:35:56", "remaining_time": "3:42:52"} +{"current_steps": 6480, "total_steps": 7924, "loss": 0.2471, "lr": 3.917579664213549e-06, "epoch": 5.72791519434629, "percentage": 81.78, "elapsed_time": "16:36:41", "remaining_time": "3:42:06"} +{"current_steps": 6485, "total_steps": 7924, "loss": 0.225, "lr": 3.8914292643126915e-06, "epoch": 5.732332155477032, "percentage": 81.84, "elapsed_time": "16:37:27", "remaining_time": "3:41:20"} +{"current_steps": 6490, "total_steps": 7924, "loss": 0.2457, "lr": 3.865357026420926e-06, "epoch": 5.736749116607774, "percentage": 81.9, "elapsed_time": "16:38:13", "remaining_time": "3:40:33"} +{"current_steps": 6495, "total_steps": 7924, "loss": 0.229, "lr": 3.839363077045974e-06, "epoch": 5.741166077738516, "percentage": 81.97, "elapsed_time": "16:38:59", "remaining_time": "3:39:47"} +{"current_steps": 6500, "total_steps": 7924, "loss": 0.2428, "lr": 3.8134475423156757e-06, "epoch": 5.745583038869258, "percentage": 82.03, "elapsed_time": "16:39:45", "remaining_time": "3:39:01"} +{"current_steps": 6505, "total_steps": 7924, "loss": 0.2413, "lr": 3.787610547977396e-06, "epoch": 5.75, "percentage": 82.09, "elapsed_time": "16:40:31", "remaining_time": "3:38:15"} +{"current_steps": 6510, "total_steps": 7924, "loss": 0.2527, "lr": 3.7618522193973994e-06, "epoch": 5.754416961130742, "percentage": 82.16, "elapsed_time": "16:41:17", "remaining_time": "3:37:29"} +{"current_steps": 6515, "total_steps": 7924, "loss": 0.2515, "lr": 3.7361726815602596e-06, "epoch": 5.758833922261484, "percentage": 82.22, "elapsed_time": "16:42:03", "remaining_time": "3:36:42"} +{"current_steps": 6520, "total_steps": 7924, "loss": 0.2364, "lr": 3.710572059068218e-06, "epoch": 5.763250883392226, "percentage": 82.28, "elapsed_time": "16:42:49", "remaining_time": "3:35:56"} +{"current_steps": 6525, "total_steps": 7924, "loss": 0.291, "lr": 3.6850504761406282e-06, "epoch": 5.767667844522968, "percentage": 82.34, "elapsed_time": "16:43:35", "remaining_time": "3:35:10"} +{"current_steps": 6530, "total_steps": 7924, "loss": 0.2566, "lr": 3.6596080566133176e-06, "epoch": 5.77208480565371, "percentage": 82.41, "elapsed_time": "16:44:21", "remaining_time": "3:34:24"} +{"current_steps": 6535, "total_steps": 7924, "loss": 0.2514, "lr": 3.6342449239379974e-06, "epoch": 5.7765017667844525, "percentage": 82.47, "elapsed_time": "16:45:07", "remaining_time": "3:33:38"} +{"current_steps": 6540, "total_steps": 7924, "loss": 0.2722, "lr": 3.608961201181662e-06, "epoch": 5.780918727915195, "percentage": 82.53, "elapsed_time": "16:45:53", "remaining_time": "3:32:51"} +{"current_steps": 6545, "total_steps": 7924, "loss": 0.2274, "lr": 3.5837570110259945e-06, "epoch": 5.785335689045937, "percentage": 82.6, "elapsed_time": "16:46:39", "remaining_time": "3:32:05"} +{"current_steps": 6550, "total_steps": 7924, "loss": 0.237, "lr": 3.558632475766777e-06, "epoch": 5.789752650176679, "percentage": 82.66, "elapsed_time": "16:47:24", "remaining_time": "3:31:19"} +{"current_steps": 6555, "total_steps": 7924, "loss": 0.2623, "lr": 3.5335877173132672e-06, "epoch": 5.794169611307421, "percentage": 82.72, "elapsed_time": "16:48:10", "remaining_time": "3:30:33"} +{"current_steps": 6560, "total_steps": 7924, "loss": 0.2782, "lr": 3.5086228571876622e-06, "epoch": 5.798586572438163, "percentage": 82.79, "elapsed_time": "16:48:56", "remaining_time": "3:29:47"} +{"current_steps": 6565, "total_steps": 7924, "loss": 0.2596, "lr": 3.4837380165244494e-06, "epoch": 5.803003533568905, "percentage": 82.85, "elapsed_time": "16:49:42", "remaining_time": "3:29:01"} +{"current_steps": 6570, "total_steps": 7924, "loss": 0.2359, "lr": 3.4589333160698592e-06, "epoch": 5.807420494699647, "percentage": 82.91, "elapsed_time": "16:50:28", "remaining_time": "3:28:14"} +{"current_steps": 6575, "total_steps": 7924, "loss": 0.244, "lr": 3.434208876181262e-06, "epoch": 5.811837455830389, "percentage": 82.98, "elapsed_time": "16:51:14", "remaining_time": "3:27:28"} +{"current_steps": 6580, "total_steps": 7924, "loss": 0.2667, "lr": 3.409564816826587e-06, "epoch": 5.816254416961131, "percentage": 83.04, "elapsed_time": "16:52:00", "remaining_time": "3:26:42"} +{"current_steps": 6585, "total_steps": 7924, "loss": 0.2082, "lr": 3.385001257583744e-06, "epoch": 5.820671378091872, "percentage": 83.1, "elapsed_time": "16:52:46", "remaining_time": "3:25:56"} +{"current_steps": 6590, "total_steps": 7924, "loss": 0.2312, "lr": 3.3605183176400402e-06, "epoch": 5.8250883392226145, "percentage": 83.17, "elapsed_time": "16:53:32", "remaining_time": "3:25:10"} +{"current_steps": 6595, "total_steps": 7924, "loss": 0.2148, "lr": 3.3361161157916012e-06, "epoch": 5.829505300353357, "percentage": 83.23, "elapsed_time": "16:54:18", "remaining_time": "3:24:23"} +{"current_steps": 6600, "total_steps": 7924, "loss": 0.2404, "lr": 3.3117947704427866e-06, "epoch": 5.833922261484099, "percentage": 83.29, "elapsed_time": "16:55:04", "remaining_time": "3:23:37"} +{"current_steps": 6605, "total_steps": 7924, "loss": 0.2114, "lr": 3.287554399605637e-06, "epoch": 5.838339222614841, "percentage": 83.35, "elapsed_time": "16:55:51", "remaining_time": "3:22:51"} +{"current_steps": 6610, "total_steps": 7924, "loss": 0.2358, "lr": 3.2633951208992797e-06, "epoch": 5.842756183745583, "percentage": 83.42, "elapsed_time": "16:56:37", "remaining_time": "3:22:05"} +{"current_steps": 6615, "total_steps": 7924, "loss": 0.2204, "lr": 3.2393170515493756e-06, "epoch": 5.847173144876325, "percentage": 83.48, "elapsed_time": "16:57:23", "remaining_time": "3:21:19"} +{"current_steps": 6620, "total_steps": 7924, "loss": 0.2544, "lr": 3.2153203083875306e-06, "epoch": 5.851590106007067, "percentage": 83.54, "elapsed_time": "16:58:09", "remaining_time": "3:20:33"} +{"current_steps": 6625, "total_steps": 7924, "loss": 0.2382, "lr": 3.19140500785075e-06, "epoch": 5.856007067137809, "percentage": 83.61, "elapsed_time": "16:58:55", "remaining_time": "3:19:47"} +{"current_steps": 6630, "total_steps": 7924, "loss": 0.2356, "lr": 3.1675712659808576e-06, "epoch": 5.860424028268551, "percentage": 83.67, "elapsed_time": "16:59:41", "remaining_time": "3:19:00"} +{"current_steps": 6635, "total_steps": 7924, "loss": 0.226, "lr": 3.1438191984239297e-06, "epoch": 5.864840989399293, "percentage": 83.73, "elapsed_time": "17:00:27", "remaining_time": "3:18:14"} +{"current_steps": 6640, "total_steps": 7924, "loss": 0.2465, "lr": 3.1201489204297663e-06, "epoch": 5.869257950530035, "percentage": 83.8, "elapsed_time": "17:01:13", "remaining_time": "3:17:28"} +{"current_steps": 6645, "total_steps": 7924, "loss": 0.2645, "lr": 3.0965605468512837e-06, "epoch": 5.873674911660777, "percentage": 83.86, "elapsed_time": "17:01:59", "remaining_time": "3:16:42"} +{"current_steps": 6650, "total_steps": 7924, "loss": 0.2243, "lr": 3.0730541921439936e-06, "epoch": 5.8780918727915195, "percentage": 83.92, "elapsed_time": "17:02:45", "remaining_time": "3:15:56"} +{"current_steps": 6655, "total_steps": 7924, "loss": 0.2309, "lr": 3.049629970365433e-06, "epoch": 5.8825088339222615, "percentage": 83.99, "elapsed_time": "17:03:31", "remaining_time": "3:15:10"} +{"current_steps": 6660, "total_steps": 7924, "loss": 0.2356, "lr": 3.026287995174615e-06, "epoch": 5.886925795053004, "percentage": 84.05, "elapsed_time": "17:04:17", "remaining_time": "3:14:24"} +{"current_steps": 6665, "total_steps": 7924, "loss": 0.2445, "lr": 3.0030283798314785e-06, "epoch": 5.891342756183746, "percentage": 84.11, "elapsed_time": "17:05:03", "remaining_time": "3:13:37"} +{"current_steps": 6670, "total_steps": 7924, "loss": 0.2535, "lr": 2.9798512371963207e-06, "epoch": 5.895759717314488, "percentage": 84.17, "elapsed_time": "17:05:49", "remaining_time": "3:12:51"} +{"current_steps": 6675, "total_steps": 7924, "loss": 0.2605, "lr": 2.9567566797292914e-06, "epoch": 5.90017667844523, "percentage": 84.24, "elapsed_time": "17:06:35", "remaining_time": "3:12:05"} +{"current_steps": 6680, "total_steps": 7924, "loss": 0.2583, "lr": 2.9337448194897943e-06, "epoch": 5.904593639575972, "percentage": 84.3, "elapsed_time": "17:07:21", "remaining_time": "3:11:19"} +{"current_steps": 6685, "total_steps": 7924, "loss": 0.2428, "lr": 2.9108157681359837e-06, "epoch": 5.909010600706714, "percentage": 84.36, "elapsed_time": "17:08:07", "remaining_time": "3:10:33"} +{"current_steps": 6690, "total_steps": 7924, "loss": 0.2704, "lr": 2.8879696369242062e-06, "epoch": 5.913427561837456, "percentage": 84.43, "elapsed_time": "17:08:53", "remaining_time": "3:09:47"} +{"current_steps": 6695, "total_steps": 7924, "loss": 0.2479, "lr": 2.8652065367084627e-06, "epoch": 5.917844522968198, "percentage": 84.49, "elapsed_time": "17:09:39", "remaining_time": "3:09:00"} +{"current_steps": 6700, "total_steps": 7924, "loss": 0.251, "lr": 2.8425265779398704e-06, "epoch": 5.92226148409894, "percentage": 84.55, "elapsed_time": "17:10:25", "remaining_time": "3:08:14"} +{"current_steps": 6705, "total_steps": 7924, "loss": 0.2146, "lr": 2.819929870666129e-06, "epoch": 5.926678445229682, "percentage": 84.62, "elapsed_time": "17:11:11", "remaining_time": "3:07:28"} +{"current_steps": 6710, "total_steps": 7924, "loss": 0.2275, "lr": 2.7974165245309913e-06, "epoch": 5.9310954063604235, "percentage": 84.68, "elapsed_time": "17:11:57", "remaining_time": "3:06:42"} +{"current_steps": 6715, "total_steps": 7924, "loss": 0.2487, "lr": 2.774986648773701e-06, "epoch": 5.935512367491166, "percentage": 84.74, "elapsed_time": "17:12:44", "remaining_time": "3:05:56"} +{"current_steps": 6720, "total_steps": 7924, "loss": 0.254, "lr": 2.752640352228524e-06, "epoch": 5.939929328621908, "percentage": 84.81, "elapsed_time": "17:13:30", "remaining_time": "3:05:10"} +{"current_steps": 6725, "total_steps": 7924, "loss": 0.2321, "lr": 2.7303777433241506e-06, "epoch": 5.94434628975265, "percentage": 84.87, "elapsed_time": "17:14:16", "remaining_time": "3:04:23"} +{"current_steps": 6730, "total_steps": 7924, "loss": 0.2504, "lr": 2.708198930083219e-06, "epoch": 5.948763250883392, "percentage": 84.93, "elapsed_time": "17:15:02", "remaining_time": "3:03:37"} +{"current_steps": 6735, "total_steps": 7924, "loss": 0.2246, "lr": 2.6861040201217692e-06, "epoch": 5.953180212014134, "percentage": 84.99, "elapsed_time": "17:15:48", "remaining_time": "3:02:51"} +{"current_steps": 6740, "total_steps": 7924, "loss": 0.2539, "lr": 2.6640931206487252e-06, "epoch": 5.957597173144876, "percentage": 85.06, "elapsed_time": "17:16:34", "remaining_time": "3:02:05"} +{"current_steps": 6745, "total_steps": 7924, "loss": 0.2416, "lr": 2.642166338465384e-06, "epoch": 5.962014134275618, "percentage": 85.12, "elapsed_time": "17:17:20", "remaining_time": "3:01:19"} +{"current_steps": 6750, "total_steps": 7924, "loss": 0.213, "lr": 2.6203237799648663e-06, "epoch": 5.96643109540636, "percentage": 85.18, "elapsed_time": "17:18:06", "remaining_time": "3:00:33"} +{"current_steps": 6755, "total_steps": 7924, "loss": 0.2295, "lr": 2.598565551131653e-06, "epoch": 5.970848056537102, "percentage": 85.25, "elapsed_time": "17:18:52", "remaining_time": "2:59:47"} +{"current_steps": 6760, "total_steps": 7924, "loss": 0.2332, "lr": 2.5768917575410134e-06, "epoch": 5.975265017667844, "percentage": 85.31, "elapsed_time": "17:19:38", "remaining_time": "2:59:00"} +{"current_steps": 6765, "total_steps": 7924, "loss": 0.2225, "lr": 2.555302504358537e-06, "epoch": 5.979681978798586, "percentage": 85.37, "elapsed_time": "17:20:24", "remaining_time": "2:58:14"} +{"current_steps": 6770, "total_steps": 7924, "loss": 0.227, "lr": 2.5337978963396003e-06, "epoch": 5.9840989399293285, "percentage": 85.44, "elapsed_time": "17:21:09", "remaining_time": "2:57:28"} +{"current_steps": 6775, "total_steps": 7924, "loss": 0.2404, "lr": 2.5123780378288642e-06, "epoch": 5.988515901060071, "percentage": 85.5, "elapsed_time": "17:21:55", "remaining_time": "2:56:42"} +{"current_steps": 6780, "total_steps": 7924, "loss": 0.2628, "lr": 2.49104303275977e-06, "epoch": 5.992932862190813, "percentage": 85.56, "elapsed_time": "17:22:41", "remaining_time": "2:55:56"} +{"current_steps": 6785, "total_steps": 7924, "loss": 0.2371, "lr": 2.4697929846540335e-06, "epoch": 5.997349823321555, "percentage": 85.63, "elapsed_time": "17:23:27", "remaining_time": "2:55:09"} +{"current_steps": 6790, "total_steps": 7924, "loss": 0.2211, "lr": 2.4486279966211425e-06, "epoch": 6.002650176678445, "percentage": 85.69, "elapsed_time": "17:24:13", "remaining_time": "2:54:23"} +{"current_steps": 6795, "total_steps": 7924, "loss": 0.2488, "lr": 2.427548171357843e-06, "epoch": 6.007067137809187, "percentage": 85.75, "elapsed_time": "17:24:59", "remaining_time": "2:53:37"} +{"current_steps": 6800, "total_steps": 7924, "loss": 0.2631, "lr": 2.406553611147684e-06, "epoch": 6.011484098939929, "percentage": 85.82, "elapsed_time": "17:25:45", "remaining_time": "2:52:51"} +{"current_steps": 6805, "total_steps": 7924, "loss": 0.2463, "lr": 2.38564441786046e-06, "epoch": 6.0159010600706715, "percentage": 85.88, "elapsed_time": "17:26:31", "remaining_time": "2:52:05"} +{"current_steps": 6810, "total_steps": 7924, "loss": 0.2418, "lr": 2.364820692951766e-06, "epoch": 6.020318021201414, "percentage": 85.94, "elapsed_time": "17:27:17", "remaining_time": "2:51:19"} +{"current_steps": 6815, "total_steps": 7924, "loss": 0.2504, "lr": 2.3440825374624798e-06, "epoch": 6.024734982332156, "percentage": 86.0, "elapsed_time": "17:28:03", "remaining_time": "2:50:32"} +{"current_steps": 6820, "total_steps": 7924, "loss": 0.2535, "lr": 2.3234300520182873e-06, "epoch": 6.029151943462898, "percentage": 86.07, "elapsed_time": "17:28:49", "remaining_time": "2:49:46"} +{"current_steps": 6825, "total_steps": 7924, "loss": 0.2408, "lr": 2.3028633368291843e-06, "epoch": 6.03356890459364, "percentage": 86.13, "elapsed_time": "17:29:35", "remaining_time": "2:49:00"} +{"current_steps": 6830, "total_steps": 7924, "loss": 0.2133, "lr": 2.2823824916889724e-06, "epoch": 6.037985865724382, "percentage": 86.19, "elapsed_time": "17:30:21", "remaining_time": "2:48:14"} +{"current_steps": 6835, "total_steps": 7924, "loss": 0.2345, "lr": 2.261987615974832e-06, "epoch": 6.042402826855124, "percentage": 86.26, "elapsed_time": "17:31:07", "remaining_time": "2:47:28"} +{"current_steps": 6840, "total_steps": 7924, "loss": 0.227, "lr": 2.241678808646768e-06, "epoch": 6.046819787985866, "percentage": 86.32, "elapsed_time": "17:31:53", "remaining_time": "2:46:42"} +{"current_steps": 6845, "total_steps": 7924, "loss": 0.2263, "lr": 2.2214561682471825e-06, "epoch": 6.051236749116608, "percentage": 86.38, "elapsed_time": "17:32:38", "remaining_time": "2:45:55"} +{"current_steps": 6850, "total_steps": 7924, "loss": 0.2305, "lr": 2.201319792900374e-06, "epoch": 6.05565371024735, "percentage": 86.45, "elapsed_time": "17:33:24", "remaining_time": "2:45:09"} +{"current_steps": 6855, "total_steps": 7924, "loss": 0.2177, "lr": 2.181269780312063e-06, "epoch": 6.060070671378092, "percentage": 86.51, "elapsed_time": "17:34:11", "remaining_time": "2:44:23"} +{"current_steps": 6860, "total_steps": 7924, "loss": 0.2205, "lr": 2.1613062277689266e-06, "epoch": 6.0644876325088335, "percentage": 86.57, "elapsed_time": "17:34:57", "remaining_time": "2:43:37"} +{"current_steps": 6865, "total_steps": 7924, "loss": 0.2207, "lr": 2.141429232138117e-06, "epoch": 6.068904593639576, "percentage": 86.64, "elapsed_time": "17:35:44", "remaining_time": "2:42:51"} +{"current_steps": 6870, "total_steps": 7924, "loss": 0.2352, "lr": 2.1216388898667973e-06, "epoch": 6.073321554770318, "percentage": 86.7, "elapsed_time": "17:36:30", "remaining_time": "2:42:05"} +{"current_steps": 6875, "total_steps": 7924, "loss": 0.265, "lr": 2.1019352969816585e-06, "epoch": 6.07773851590106, "percentage": 86.76, "elapsed_time": "17:37:16", "remaining_time": "2:41:19"} +{"current_steps": 6880, "total_steps": 7924, "loss": 0.2542, "lr": 2.082318549088491e-06, "epoch": 6.082155477031802, "percentage": 86.82, "elapsed_time": "17:38:02", "remaining_time": "2:40:33"} +{"current_steps": 6885, "total_steps": 7924, "loss": 0.2227, "lr": 2.062788741371673e-06, "epoch": 6.086572438162544, "percentage": 86.89, "elapsed_time": "17:38:49", "remaining_time": "2:39:47"} +{"current_steps": 6890, "total_steps": 7924, "loss": 0.2593, "lr": 2.0433459685937395e-06, "epoch": 6.090989399293286, "percentage": 86.95, "elapsed_time": "17:39:35", "remaining_time": "2:39:00"} +{"current_steps": 6895, "total_steps": 7924, "loss": 0.2221, "lr": 2.0239903250949176e-06, "epoch": 6.095406360424028, "percentage": 87.01, "elapsed_time": "17:40:23", "remaining_time": "2:38:15"} +{"current_steps": 6900, "total_steps": 7924, "loss": 0.2221, "lr": 2.0047219047926614e-06, "epoch": 6.09982332155477, "percentage": 87.08, "elapsed_time": "17:41:10", "remaining_time": "2:37:29"} +{"current_steps": 6905, "total_steps": 7924, "loss": 0.2456, "lr": 1.9855408011812117e-06, "epoch": 6.104240282685512, "percentage": 87.14, "elapsed_time": "17:41:56", "remaining_time": "2:36:42"} +{"current_steps": 6910, "total_steps": 7924, "loss": 0.2238, "lr": 1.966447107331104e-06, "epoch": 6.108657243816254, "percentage": 87.2, "elapsed_time": "17:42:42", "remaining_time": "2:35:56"} +{"current_steps": 6915, "total_steps": 7924, "loss": 0.233, "lr": 1.9474409158887807e-06, "epoch": 6.113074204946996, "percentage": 87.27, "elapsed_time": "17:43:29", "remaining_time": "2:35:10"} +{"current_steps": 6920, "total_steps": 7924, "loss": 0.2178, "lr": 1.9285223190760737e-06, "epoch": 6.1174911660777385, "percentage": 87.33, "elapsed_time": "17:44:15", "remaining_time": "2:34:24"} +{"current_steps": 6925, "total_steps": 7924, "loss": 0.2098, "lr": 1.9096914086898087e-06, "epoch": 6.1219081272084805, "percentage": 87.39, "elapsed_time": "17:45:01", "remaining_time": "2:33:38"} +{"current_steps": 6930, "total_steps": 7924, "loss": 0.2236, "lr": 1.8909482761013254e-06, "epoch": 6.126325088339223, "percentage": 87.46, "elapsed_time": "17:45:47", "remaining_time": "2:32:52"} +{"current_steps": 6935, "total_steps": 7924, "loss": 0.2034, "lr": 1.872293012256059e-06, "epoch": 6.130742049469965, "percentage": 87.52, "elapsed_time": "17:46:33", "remaining_time": "2:32:06"} +{"current_steps": 6940, "total_steps": 7924, "loss": 0.2399, "lr": 1.853725707673082e-06, "epoch": 6.135159010600707, "percentage": 87.58, "elapsed_time": "17:47:19", "remaining_time": "2:31:19"} +{"current_steps": 6945, "total_steps": 7924, "loss": 0.2054, "lr": 1.8352464524446724e-06, "epoch": 6.139575971731449, "percentage": 87.65, "elapsed_time": "17:48:06", "remaining_time": "2:30:33"} +{"current_steps": 6950, "total_steps": 7924, "loss": 0.2165, "lr": 1.8168553362358787e-06, "epoch": 6.143992932862191, "percentage": 87.71, "elapsed_time": "17:48:52", "remaining_time": "2:29:47"} +{"current_steps": 6955, "total_steps": 7924, "loss": 0.2483, "lr": 1.7985524482840676e-06, "epoch": 6.148409893992933, "percentage": 87.77, "elapsed_time": "17:49:39", "remaining_time": "2:29:01"} +{"current_steps": 6960, "total_steps": 7924, "loss": 0.2445, "lr": 1.7803378773985214e-06, "epoch": 6.152826855123675, "percentage": 87.83, "elapsed_time": "17:50:25", "remaining_time": "2:28:15"} +{"current_steps": 6965, "total_steps": 7924, "loss": 0.2313, "lr": 1.7622117119599802e-06, "epoch": 6.157243816254417, "percentage": 87.9, "elapsed_time": "17:51:11", "remaining_time": "2:27:29"} +{"current_steps": 6970, "total_steps": 7924, "loss": 0.2473, "lr": 1.74417403992023e-06, "epoch": 6.161660777385159, "percentage": 87.96, "elapsed_time": "17:51:57", "remaining_time": "2:26:43"} +{"current_steps": 6975, "total_steps": 7924, "loss": 0.276, "lr": 1.7262249488016648e-06, "epoch": 6.166077738515901, "percentage": 88.02, "elapsed_time": "17:52:44", "remaining_time": "2:25:57"} +{"current_steps": 6980, "total_steps": 7924, "loss": 0.2731, "lr": 1.708364525696864e-06, "epoch": 6.170494699646643, "percentage": 88.09, "elapsed_time": "17:53:31", "remaining_time": "2:25:11"} +{"current_steps": 6985, "total_steps": 7924, "loss": 0.2509, "lr": 1.6905928572681806e-06, "epoch": 6.1749116607773855, "percentage": 88.15, "elapsed_time": "17:54:17", "remaining_time": "2:24:25"} +{"current_steps": 6990, "total_steps": 7924, "loss": 0.2125, "lr": 1.6729100297472967e-06, "epoch": 6.179328621908128, "percentage": 88.21, "elapsed_time": "17:55:04", "remaining_time": "2:23:39"} +{"current_steps": 6995, "total_steps": 7924, "loss": 0.2182, "lr": 1.6553161289348429e-06, "epoch": 6.18374558303887, "percentage": 88.28, "elapsed_time": "17:55:50", "remaining_time": "2:22:52"} +{"current_steps": 7000, "total_steps": 7924, "loss": 0.2144, "lr": 1.637811240199938e-06, "epoch": 6.188162544169611, "percentage": 88.34, "elapsed_time": "17:56:36", "remaining_time": "2:22:06"} +{"current_steps": 7005, "total_steps": 7924, "loss": 0.2198, "lr": 1.620395448479808e-06, "epoch": 6.192579505300353, "percentage": 88.4, "elapsed_time": "17:57:22", "remaining_time": "2:21:20"} +{"current_steps": 7010, "total_steps": 7924, "loss": 0.2222, "lr": 1.603068838279358e-06, "epoch": 6.196996466431095, "percentage": 88.47, "elapsed_time": "17:58:08", "remaining_time": "2:20:34"} +{"current_steps": 7015, "total_steps": 7924, "loss": 0.2144, "lr": 1.5858314936707731e-06, "epoch": 6.201413427561837, "percentage": 88.53, "elapsed_time": "17:58:54", "remaining_time": "2:19:48"} +{"current_steps": 7020, "total_steps": 7924, "loss": 0.2489, "lr": 1.5686834982930954e-06, "epoch": 6.205830388692579, "percentage": 88.59, "elapsed_time": "17:59:40", "remaining_time": "2:19:02"} +{"current_steps": 7025, "total_steps": 7924, "loss": 0.2235, "lr": 1.551624935351832e-06, "epoch": 6.210247349823321, "percentage": 88.65, "elapsed_time": "18:00:26", "remaining_time": "2:18:15"} +{"current_steps": 7030, "total_steps": 7924, "loss": 0.2176, "lr": 1.5346558876185459e-06, "epoch": 6.214664310954063, "percentage": 88.72, "elapsed_time": "18:01:13", "remaining_time": "2:17:29"} +{"current_steps": 7035, "total_steps": 7924, "loss": 0.2437, "lr": 1.5177764374304493e-06, "epoch": 6.219081272084805, "percentage": 88.78, "elapsed_time": "18:01:59", "remaining_time": "2:16:43"} +{"current_steps": 7040, "total_steps": 7924, "loss": 0.241, "lr": 1.500986666690012e-06, "epoch": 6.2234982332155475, "percentage": 88.84, "elapsed_time": "18:02:45", "remaining_time": "2:15:57"} +{"current_steps": 7045, "total_steps": 7924, "loss": 0.2073, "lr": 1.4842866568645642e-06, "epoch": 6.22791519434629, "percentage": 88.91, "elapsed_time": "18:03:31", "remaining_time": "2:15:11"} +{"current_steps": 7050, "total_steps": 7924, "loss": 0.2075, "lr": 1.4676764889858964e-06, "epoch": 6.232332155477032, "percentage": 88.97, "elapsed_time": "18:04:17", "remaining_time": "2:14:25"} +{"current_steps": 7055, "total_steps": 7924, "loss": 0.2369, "lr": 1.4511562436498671e-06, "epoch": 6.236749116607774, "percentage": 89.03, "elapsed_time": "18:05:03", "remaining_time": "2:13:39"} +{"current_steps": 7060, "total_steps": 7924, "loss": 0.2507, "lr": 1.4347260010160112e-06, "epoch": 6.241166077738516, "percentage": 89.1, "elapsed_time": "18:05:49", "remaining_time": "2:12:53"} +{"current_steps": 7065, "total_steps": 7924, "loss": 0.2332, "lr": 1.418385840807157e-06, "epoch": 6.245583038869258, "percentage": 89.16, "elapsed_time": "18:06:36", "remaining_time": "2:12:06"} +{"current_steps": 7070, "total_steps": 7924, "loss": 0.2203, "lr": 1.402135842309027e-06, "epoch": 6.25, "percentage": 89.22, "elapsed_time": "18:07:22", "remaining_time": "2:11:20"} +{"current_steps": 7075, "total_steps": 7924, "loss": 0.2481, "lr": 1.3859760843698733e-06, "epoch": 6.254416961130742, "percentage": 89.29, "elapsed_time": "18:08:08", "remaining_time": "2:10:34"} +{"current_steps": 7080, "total_steps": 7924, "loss": 0.2303, "lr": 1.3699066454000698e-06, "epoch": 6.258833922261484, "percentage": 89.35, "elapsed_time": "18:08:54", "remaining_time": "2:09:48"} +{"current_steps": 7085, "total_steps": 7924, "loss": 0.2606, "lr": 1.353927603371754e-06, "epoch": 6.263250883392226, "percentage": 89.41, "elapsed_time": "18:09:40", "remaining_time": "2:09:02"} +{"current_steps": 7090, "total_steps": 7924, "loss": 0.2137, "lr": 1.3380390358184324e-06, "epoch": 6.267667844522968, "percentage": 89.48, "elapsed_time": "18:10:26", "remaining_time": "2:08:16"} +{"current_steps": 7095, "total_steps": 7924, "loss": 0.2577, "lr": 1.322241019834616e-06, "epoch": 6.27208480565371, "percentage": 89.54, "elapsed_time": "18:11:12", "remaining_time": "2:07:29"} +{"current_steps": 7100, "total_steps": 7924, "loss": 0.232, "lr": 1.3065336320754418e-06, "epoch": 6.2765017667844525, "percentage": 89.6, "elapsed_time": "18:11:58", "remaining_time": "2:06:43"} +{"current_steps": 7105, "total_steps": 7924, "loss": 0.2367, "lr": 1.2909169487562978e-06, "epoch": 6.280918727915195, "percentage": 89.66, "elapsed_time": "18:12:44", "remaining_time": "2:05:57"} +{"current_steps": 7110, "total_steps": 7924, "loss": 0.2241, "lr": 1.2753910456524588e-06, "epoch": 6.285335689045937, "percentage": 89.73, "elapsed_time": "18:13:30", "remaining_time": "2:05:11"} +{"current_steps": 7115, "total_steps": 7924, "loss": 0.2708, "lr": 1.2599559980987076e-06, "epoch": 6.289752650176679, "percentage": 89.79, "elapsed_time": "18:14:16", "remaining_time": "2:04:25"} +{"current_steps": 7120, "total_steps": 7924, "loss": 0.2394, "lr": 1.2446118809889906e-06, "epoch": 6.294169611307421, "percentage": 89.85, "elapsed_time": "18:15:02", "remaining_time": "2:03:39"} +{"current_steps": 7125, "total_steps": 7924, "loss": 0.2177, "lr": 1.22935876877603e-06, "epoch": 6.298586572438163, "percentage": 89.92, "elapsed_time": "18:15:48", "remaining_time": "2:02:53"} +{"current_steps": 7130, "total_steps": 7924, "loss": 0.2486, "lr": 1.214196735470985e-06, "epoch": 6.303003533568905, "percentage": 89.98, "elapsed_time": "18:16:34", "remaining_time": "2:02:06"} +{"current_steps": 7135, "total_steps": 7924, "loss": 0.2368, "lr": 1.1991258546430683e-06, "epoch": 6.307420494699647, "percentage": 90.04, "elapsed_time": "18:17:20", "remaining_time": "2:01:20"} +{"current_steps": 7140, "total_steps": 7924, "loss": 0.2728, "lr": 1.184146199419216e-06, "epoch": 6.311837455830389, "percentage": 90.11, "elapsed_time": "18:18:06", "remaining_time": "2:00:34"} +{"current_steps": 7145, "total_steps": 7924, "loss": 0.232, "lr": 1.1692578424837131e-06, "epoch": 6.316254416961131, "percentage": 90.17, "elapsed_time": "18:18:52", "remaining_time": "1:59:48"} +{"current_steps": 7150, "total_steps": 7924, "loss": 0.198, "lr": 1.1544608560778392e-06, "epoch": 6.320671378091872, "percentage": 90.23, "elapsed_time": "18:19:38", "remaining_time": "1:59:02"} +{"current_steps": 7155, "total_steps": 7924, "loss": 0.2155, "lr": 1.139755311999544e-06, "epoch": 6.3250883392226145, "percentage": 90.3, "elapsed_time": "18:20:24", "remaining_time": "1:58:16"} +{"current_steps": 7160, "total_steps": 7924, "loss": 0.2489, "lr": 1.1251412816030637e-06, "epoch": 6.329505300353357, "percentage": 90.36, "elapsed_time": "18:21:09", "remaining_time": "1:57:29"} +{"current_steps": 7165, "total_steps": 7924, "loss": 0.2302, "lr": 1.1106188357986003e-06, "epoch": 6.333922261484099, "percentage": 90.42, "elapsed_time": "18:21:55", "remaining_time": "1:56:43"} +{"current_steps": 7170, "total_steps": 7924, "loss": 0.2349, "lr": 1.096188045051969e-06, "epoch": 6.338339222614841, "percentage": 90.48, "elapsed_time": "18:22:41", "remaining_time": "1:55:57"} +{"current_steps": 7175, "total_steps": 7924, "loss": 0.2205, "lr": 1.0818489793842523e-06, "epoch": 6.342756183745583, "percentage": 90.55, "elapsed_time": "18:23:27", "remaining_time": "1:55:11"} +{"current_steps": 7180, "total_steps": 7924, "loss": 0.2523, "lr": 1.0676017083714684e-06, "epoch": 6.347173144876325, "percentage": 90.61, "elapsed_time": "18:24:13", "remaining_time": "1:54:25"} +{"current_steps": 7185, "total_steps": 7924, "loss": 0.2251, "lr": 1.0534463011442276e-06, "epoch": 6.351590106007067, "percentage": 90.67, "elapsed_time": "18:25:00", "remaining_time": "1:53:39"} +{"current_steps": 7190, "total_steps": 7924, "loss": 0.2692, "lr": 1.0393828263873985e-06, "epoch": 6.356007067137809, "percentage": 90.74, "elapsed_time": "18:25:46", "remaining_time": "1:52:53"} +{"current_steps": 7195, "total_steps": 7924, "loss": 0.2218, "lr": 1.0254113523397736e-06, "epoch": 6.360424028268551, "percentage": 90.8, "elapsed_time": "18:26:32", "remaining_time": "1:52:06"} +{"current_steps": 7200, "total_steps": 7924, "loss": 0.242, "lr": 1.0115319467937402e-06, "epoch": 6.364840989399293, "percentage": 90.86, "elapsed_time": "18:27:18", "remaining_time": "1:51:20"} +{"current_steps": 7205, "total_steps": 7924, "loss": 0.2294, "lr": 9.977446770949562e-07, "epoch": 6.369257950530035, "percentage": 90.93, "elapsed_time": "18:28:04", "remaining_time": "1:50:34"} +{"current_steps": 7210, "total_steps": 7924, "loss": 0.2407, "lr": 9.840496101420106e-07, "epoch": 6.373674911660777, "percentage": 90.99, "elapsed_time": "18:28:50", "remaining_time": "1:49:48"} +{"current_steps": 7215, "total_steps": 7924, "loss": 0.2477, "lr": 9.704468123861077e-07, "epoch": 6.3780918727915195, "percentage": 91.05, "elapsed_time": "18:29:36", "remaining_time": "1:49:02"} +{"current_steps": 7220, "total_steps": 7924, "loss": 0.239, "lr": 9.569363498307482e-07, "epoch": 6.3825088339222615, "percentage": 91.12, "elapsed_time": "18:30:22", "remaining_time": "1:48:16"} +{"current_steps": 7225, "total_steps": 7924, "loss": 0.2365, "lr": 9.43518288031402e-07, "epoch": 6.386925795053004, "percentage": 91.18, "elapsed_time": "18:31:08", "remaining_time": "1:47:29"} +{"current_steps": 7230, "total_steps": 7924, "loss": 0.192, "lr": 9.301926920951798e-07, "epoch": 6.391342756183746, "percentage": 91.24, "elapsed_time": "18:31:54", "remaining_time": "1:46:43"} +{"current_steps": 7235, "total_steps": 7924, "loss": 0.2508, "lr": 9.169596266805536e-07, "epoch": 6.395759717314488, "percentage": 91.3, "elapsed_time": "18:32:40", "remaining_time": "1:45:57"} +{"current_steps": 7240, "total_steps": 7924, "loss": 0.2597, "lr": 9.038191559969967e-07, "epoch": 6.40017667844523, "percentage": 91.37, "elapsed_time": "18:33:26", "remaining_time": "1:45:11"} +{"current_steps": 7245, "total_steps": 7924, "loss": 0.2343, "lr": 8.907713438047039e-07, "epoch": 6.404593639575972, "percentage": 91.43, "elapsed_time": "18:34:12", "remaining_time": "1:44:25"} +{"current_steps": 7250, "total_steps": 7924, "loss": 0.2845, "lr": 8.77816253414272e-07, "epoch": 6.409010600706714, "percentage": 91.49, "elapsed_time": "18:34:58", "remaining_time": "1:43:39"} +{"current_steps": 7255, "total_steps": 7924, "loss": 0.1907, "lr": 8.649539476863933e-07, "epoch": 6.413427561837456, "percentage": 91.56, "elapsed_time": "18:35:44", "remaining_time": "1:42:53"} +{"current_steps": 7260, "total_steps": 7924, "loss": 0.2369, "lr": 8.521844890315489e-07, "epoch": 6.417844522968198, "percentage": 91.62, "elapsed_time": "18:36:30", "remaining_time": "1:42:06"} +{"current_steps": 7265, "total_steps": 7924, "loss": 0.2287, "lr": 8.395079394097072e-07, "epoch": 6.42226148409894, "percentage": 91.68, "elapsed_time": "18:37:16", "remaining_time": "1:41:20"} +{"current_steps": 7270, "total_steps": 7924, "loss": 0.2387, "lr": 8.269243603300259e-07, "epoch": 6.426678445229682, "percentage": 91.75, "elapsed_time": "18:38:02", "remaining_time": "1:40:34"} +{"current_steps": 7275, "total_steps": 7924, "loss": 0.2216, "lr": 8.144338128505458e-07, "epoch": 6.431095406360424, "percentage": 91.81, "elapsed_time": "18:38:47", "remaining_time": "1:39:48"} +{"current_steps": 7280, "total_steps": 7924, "loss": 0.2516, "lr": 8.020363575779044e-07, "epoch": 6.435512367491166, "percentage": 91.87, "elapsed_time": "18:39:33", "remaining_time": "1:39:02"} +{"current_steps": 7285, "total_steps": 7924, "loss": 0.2133, "lr": 7.897320546670362e-07, "epoch": 6.439929328621908, "percentage": 91.94, "elapsed_time": "18:40:19", "remaining_time": "1:38:16"} +{"current_steps": 7290, "total_steps": 7924, "loss": 0.2498, "lr": 7.775209638208814e-07, "epoch": 6.44434628975265, "percentage": 92.0, "elapsed_time": "18:41:04", "remaining_time": "1:37:29"} +{"current_steps": 7295, "total_steps": 7924, "loss": 0.1744, "lr": 7.654031442900978e-07, "epoch": 6.448763250883392, "percentage": 92.06, "elapsed_time": "18:41:50", "remaining_time": "1:36:43"} +{"current_steps": 7300, "total_steps": 7924, "loss": 0.2215, "lr": 7.533786548727695e-07, "epoch": 6.453180212014134, "percentage": 92.13, "elapsed_time": "18:42:36", "remaining_time": "1:35:57"} +{"current_steps": 7305, "total_steps": 7924, "loss": 0.2707, "lr": 7.414475539141275e-07, "epoch": 6.457597173144876, "percentage": 92.19, "elapsed_time": "18:43:22", "remaining_time": "1:35:11"} +{"current_steps": 7310, "total_steps": 7924, "loss": 0.1844, "lr": 7.296098993062562e-07, "epoch": 6.462014134275618, "percentage": 92.25, "elapsed_time": "18:44:08", "remaining_time": "1:34:25"} +{"current_steps": 7315, "total_steps": 7924, "loss": 0.257, "lr": 7.178657484878338e-07, "epoch": 6.46643109540636, "percentage": 92.31, "elapsed_time": "18:44:54", "remaining_time": "1:33:39"} +{"current_steps": 7320, "total_steps": 7924, "loss": 0.2236, "lr": 7.062151584438215e-07, "epoch": 6.470848056537102, "percentage": 92.38, "elapsed_time": "18:45:40", "remaining_time": "1:32:52"} +{"current_steps": 7325, "total_steps": 7924, "loss": 0.2281, "lr": 6.946581857052192e-07, "epoch": 6.475265017667844, "percentage": 92.44, "elapsed_time": "18:46:26", "remaining_time": "1:32:06"} +{"current_steps": 7330, "total_steps": 7924, "loss": 0.2464, "lr": 6.831948863487703e-07, "epoch": 6.479681978798586, "percentage": 92.5, "elapsed_time": "18:47:12", "remaining_time": "1:31:20"} +{"current_steps": 7335, "total_steps": 7924, "loss": 0.225, "lr": 6.71825315996697e-07, "epoch": 6.4840989399293285, "percentage": 92.57, "elapsed_time": "18:47:58", "remaining_time": "1:30:34"} +{"current_steps": 7340, "total_steps": 7924, "loss": 0.2491, "lr": 6.605495298164299e-07, "epoch": 6.488515901060071, "percentage": 92.63, "elapsed_time": "18:48:44", "remaining_time": "1:29:48"} +{"current_steps": 7345, "total_steps": 7924, "loss": 0.1915, "lr": 6.493675825203416e-07, "epoch": 6.492932862190813, "percentage": 92.69, "elapsed_time": "18:49:30", "remaining_time": "1:29:02"} +{"current_steps": 7350, "total_steps": 7924, "loss": 0.2077, "lr": 6.382795283654796e-07, "epoch": 6.497349823321555, "percentage": 92.76, "elapsed_time": "18:50:16", "remaining_time": "1:28:16"} +{"current_steps": 7355, "total_steps": 7924, "loss": 0.2322, "lr": 6.272854211532964e-07, "epoch": 6.501766784452297, "percentage": 92.82, "elapsed_time": "18:51:02", "remaining_time": "1:27:30"} +{"current_steps": 7360, "total_steps": 7924, "loss": 0.2214, "lr": 6.163853142294041e-07, "epoch": 6.506183745583039, "percentage": 92.88, "elapsed_time": "18:51:48", "remaining_time": "1:26:43"} +{"current_steps": 7365, "total_steps": 7924, "loss": 0.2615, "lr": 6.055792604833022e-07, "epoch": 6.510600706713781, "percentage": 92.95, "elapsed_time": "18:52:34", "remaining_time": "1:25:57"} +{"current_steps": 7370, "total_steps": 7924, "loss": 0.249, "lr": 5.948673123481286e-07, "epoch": 6.515017667844523, "percentage": 93.01, "elapsed_time": "18:53:20", "remaining_time": "1:25:11"} +{"current_steps": 7375, "total_steps": 7924, "loss": 0.2199, "lr": 5.842495218003952e-07, "epoch": 6.519434628975265, "percentage": 93.07, "elapsed_time": "18:54:06", "remaining_time": "1:24:25"} +{"current_steps": 7380, "total_steps": 7924, "loss": 0.2334, "lr": 5.737259403597484e-07, "epoch": 6.523851590106007, "percentage": 93.13, "elapsed_time": "18:54:51", "remaining_time": "1:23:39"} +{"current_steps": 7385, "total_steps": 7924, "loss": 0.2038, "lr": 5.632966190887157e-07, "epoch": 6.528268551236749, "percentage": 93.2, "elapsed_time": "18:55:37", "remaining_time": "1:22:53"} +{"current_steps": 7390, "total_steps": 7924, "loss": 0.233, "lr": 5.529616085924439e-07, "epoch": 6.532685512367491, "percentage": 93.26, "elapsed_time": "18:56:23", "remaining_time": "1:22:06"} +{"current_steps": 7395, "total_steps": 7924, "loss": 0.268, "lr": 5.42720959018479e-07, "epoch": 6.5371024734982335, "percentage": 93.32, "elapsed_time": "18:57:09", "remaining_time": "1:21:20"} +{"current_steps": 7400, "total_steps": 7924, "loss": 0.2227, "lr": 5.325747200564979e-07, "epoch": 6.541519434628976, "percentage": 93.39, "elapsed_time": "18:57:55", "remaining_time": "1:20:34"} +{"current_steps": 7405, "total_steps": 7924, "loss": 0.2127, "lr": 5.225229409380839e-07, "epoch": 6.545936395759718, "percentage": 93.45, "elapsed_time": "18:58:41", "remaining_time": "1:19:48"} +{"current_steps": 7410, "total_steps": 7924, "loss": 0.2077, "lr": 5.125656704364801e-07, "epoch": 6.55035335689046, "percentage": 93.51, "elapsed_time": "18:59:27", "remaining_time": "1:19:02"} +{"current_steps": 7415, "total_steps": 7924, "loss": 0.2762, "lr": 5.027029568663566e-07, "epoch": 6.554770318021202, "percentage": 93.58, "elapsed_time": "19:00:13", "remaining_time": "1:18:16"} +{"current_steps": 7420, "total_steps": 7924, "loss": 0.212, "lr": 4.929348480835749e-07, "epoch": 6.559187279151944, "percentage": 93.64, "elapsed_time": "19:00:59", "remaining_time": "1:17:30"} +{"current_steps": 7425, "total_steps": 7924, "loss": 0.2257, "lr": 4.832613914849504e-07, "epoch": 6.563604240282686, "percentage": 93.7, "elapsed_time": "19:01:45", "remaining_time": "1:16:43"} +{"current_steps": 7430, "total_steps": 7924, "loss": 0.2179, "lr": 4.7368263400803693e-07, "epoch": 6.568021201413428, "percentage": 93.77, "elapsed_time": "19:02:31", "remaining_time": "1:15:57"} +{"current_steps": 7435, "total_steps": 7924, "loss": 0.221, "lr": 4.6419862213087365e-07, "epoch": 6.572438162544169, "percentage": 93.83, "elapsed_time": "19:03:17", "remaining_time": "1:15:11"} +{"current_steps": 7440, "total_steps": 7924, "loss": 0.2338, "lr": 4.548094018717919e-07, "epoch": 6.576855123674911, "percentage": 93.89, "elapsed_time": "19:04:03", "remaining_time": "1:14:25"} +{"current_steps": 7445, "total_steps": 7924, "loss": 0.2489, "lr": 4.4551501878916214e-07, "epoch": 6.581272084805653, "percentage": 93.96, "elapsed_time": "19:04:49", "remaining_time": "1:13:39"} +{"current_steps": 7450, "total_steps": 7924, "loss": 0.1945, "lr": 4.363155179811962e-07, "epoch": 6.5856890459363955, "percentage": 94.02, "elapsed_time": "19:05:35", "remaining_time": "1:12:53"} +{"current_steps": 7455, "total_steps": 7924, "loss": 0.2113, "lr": 4.2721094408570974e-07, "epoch": 6.590106007067138, "percentage": 94.08, "elapsed_time": "19:06:20", "remaining_time": "1:12:07"} +{"current_steps": 7460, "total_steps": 7924, "loss": 0.2024, "lr": 4.1820134127991794e-07, "epoch": 6.59452296819788, "percentage": 94.14, "elapsed_time": "19:07:06", "remaining_time": "1:11:20"} +{"current_steps": 7465, "total_steps": 7924, "loss": 0.2569, "lr": 4.0928675328022027e-07, "epoch": 6.598939929328622, "percentage": 94.21, "elapsed_time": "19:07:52", "remaining_time": "1:10:34"} +{"current_steps": 7470, "total_steps": 7924, "loss": 0.2523, "lr": 4.0046722334197375e-07, "epoch": 6.603356890459364, "percentage": 94.27, "elapsed_time": "19:08:38", "remaining_time": "1:09:48"} +{"current_steps": 7475, "total_steps": 7924, "loss": 0.2493, "lr": 3.9174279425931105e-07, "epoch": 6.607773851590106, "percentage": 94.33, "elapsed_time": "19:09:23", "remaining_time": "1:09:02"} +{"current_steps": 7480, "total_steps": 7924, "loss": 0.2072, "lr": 3.8311350836490514e-07, "epoch": 6.612190812720848, "percentage": 94.4, "elapsed_time": "19:10:09", "remaining_time": "1:08:16"} +{"current_steps": 7485, "total_steps": 7924, "loss": 0.2245, "lr": 3.7457940752977594e-07, "epoch": 6.61660777385159, "percentage": 94.46, "elapsed_time": "19:10:55", "remaining_time": "1:07:30"} +{"current_steps": 7490, "total_steps": 7924, "loss": 0.2484, "lr": 3.6614053316309074e-07, "epoch": 6.621024734982332, "percentage": 94.52, "elapsed_time": "19:11:41", "remaining_time": "1:06:44"} +{"current_steps": 7495, "total_steps": 7924, "loss": 0.2471, "lr": 3.577969262119574e-07, "epoch": 6.625441696113074, "percentage": 94.59, "elapsed_time": "19:12:27", "remaining_time": "1:05:57"} +{"current_steps": 7500, "total_steps": 7924, "loss": 0.245, "lr": 3.4954862716122473e-07, "epoch": 6.629858657243816, "percentage": 94.65, "elapsed_time": "19:13:13", "remaining_time": "1:05:11"} +{"current_steps": 7505, "total_steps": 7924, "loss": 0.2486, "lr": 3.413956760332937e-07, "epoch": 6.634275618374558, "percentage": 94.71, "elapsed_time": "19:14:31", "remaining_time": "1:04:27"} +{"current_steps": 7510, "total_steps": 7924, "loss": 0.2397, "lr": 3.3333811238791316e-07, "epoch": 6.6386925795053005, "percentage": 94.78, "elapsed_time": "19:15:17", "remaining_time": "1:03:41"} +{"current_steps": 7515, "total_steps": 7924, "loss": 0.2319, "lr": 3.2537597532199315e-07, "epoch": 6.6431095406360425, "percentage": 94.84, "elapsed_time": "19:16:03", "remaining_time": "1:02:55"} +{"current_steps": 7520, "total_steps": 7924, "loss": 0.2462, "lr": 3.175093034694188e-07, "epoch": 6.647526501766785, "percentage": 94.9, "elapsed_time": "19:16:49", "remaining_time": "1:02:08"} +{"current_steps": 7525, "total_steps": 7924, "loss": 0.2524, "lr": 3.0973813500085215e-07, "epoch": 6.651943462897527, "percentage": 94.96, "elapsed_time": "19:17:35", "remaining_time": "1:01:22"} +{"current_steps": 7530, "total_steps": 7924, "loss": 0.2229, "lr": 3.0206250762356393e-07, "epoch": 6.656360424028269, "percentage": 95.03, "elapsed_time": "19:18:21", "remaining_time": "1:00:36"} +{"current_steps": 7535, "total_steps": 7924, "loss": 0.253, "lr": 2.944824585812289e-07, "epoch": 6.660777385159011, "percentage": 95.09, "elapsed_time": "19:19:06", "remaining_time": "0:59:50"} +{"current_steps": 7540, "total_steps": 7924, "loss": 0.2605, "lr": 2.86998024653764e-07, "epoch": 6.665194346289753, "percentage": 95.15, "elapsed_time": "19:19:53", "remaining_time": "0:59:04"} +{"current_steps": 7545, "total_steps": 7924, "loss": 0.2675, "lr": 2.7960924215714394e-07, "epoch": 6.669611307420495, "percentage": 95.22, "elapsed_time": "19:20:39", "remaining_time": "0:58:18"} +{"current_steps": 7550, "total_steps": 7924, "loss": 0.1965, "lr": 2.723161469432123e-07, "epoch": 6.674028268551237, "percentage": 95.28, "elapsed_time": "19:21:25", "remaining_time": "0:57:31"} +{"current_steps": 7555, "total_steps": 7924, "loss": 0.2459, "lr": 2.6511877439953536e-07, "epoch": 6.678445229681979, "percentage": 95.34, "elapsed_time": "19:22:11", "remaining_time": "0:56:45"} +{"current_steps": 7560, "total_steps": 7924, "loss": 0.2413, "lr": 2.5801715944919983e-07, "epoch": 6.68286219081272, "percentage": 95.41, "elapsed_time": "19:22:57", "remaining_time": "0:55:59"} +{"current_steps": 7565, "total_steps": 7924, "loss": 0.2139, "lr": 2.510113365506639e-07, "epoch": 6.6872791519434625, "percentage": 95.47, "elapsed_time": "19:23:43", "remaining_time": "0:55:13"} +{"current_steps": 7570, "total_steps": 7924, "loss": 0.2604, "lr": 2.441013396975822e-07, "epoch": 6.6916961130742045, "percentage": 95.53, "elapsed_time": "19:24:29", "remaining_time": "0:54:27"} +{"current_steps": 7575, "total_steps": 7924, "loss": 0.2582, "lr": 2.3728720241864123e-07, "epoch": 6.696113074204947, "percentage": 95.6, "elapsed_time": "19:25:15", "remaining_time": "0:53:41"} +{"current_steps": 7580, "total_steps": 7924, "loss": 0.2462, "lr": 2.3056895777740174e-07, "epoch": 6.700530035335689, "percentage": 95.66, "elapsed_time": "19:26:00", "remaining_time": "0:52:55"} +{"current_steps": 7585, "total_steps": 7924, "loss": 0.226, "lr": 2.2394663837213005e-07, "epoch": 6.704946996466431, "percentage": 95.72, "elapsed_time": "19:26:47", "remaining_time": "0:52:08"} +{"current_steps": 7590, "total_steps": 7924, "loss": 0.2058, "lr": 2.1742027633564477e-07, "epoch": 6.709363957597173, "percentage": 95.78, "elapsed_time": "19:27:33", "remaining_time": "0:51:22"} +{"current_steps": 7595, "total_steps": 7924, "loss": 0.2245, "lr": 2.1098990333516144e-07, "epoch": 6.713780918727915, "percentage": 95.85, "elapsed_time": "19:28:18", "remaining_time": "0:50:36"} +{"current_steps": 7600, "total_steps": 7924, "loss": 0.2762, "lr": 2.0465555057213705e-07, "epoch": 6.718197879858657, "percentage": 95.91, "elapsed_time": "19:29:04", "remaining_time": "0:49:50"} +{"current_steps": 7605, "total_steps": 7924, "loss": 0.2409, "lr": 1.9841724878211676e-07, "epoch": 6.722614840989399, "percentage": 95.97, "elapsed_time": "19:29:50", "remaining_time": "0:49:04"} +{"current_steps": 7610, "total_steps": 7924, "loss": 0.2223, "lr": 1.9227502823458976e-07, "epoch": 6.727031802120141, "percentage": 96.04, "elapsed_time": "19:30:36", "remaining_time": "0:48:18"} +{"current_steps": 7615, "total_steps": 7924, "loss": 0.2322, "lr": 1.8622891873284254e-07, "epoch": 6.731448763250883, "percentage": 96.1, "elapsed_time": "19:31:21", "remaining_time": "0:47:31"} +{"current_steps": 7620, "total_steps": 7924, "loss": 0.237, "lr": 1.8027894961380353e-07, "epoch": 6.735865724381625, "percentage": 96.16, "elapsed_time": "19:32:07", "remaining_time": "0:46:45"} +{"current_steps": 7625, "total_steps": 7924, "loss": 0.2704, "lr": 1.7442514974792103e-07, "epoch": 6.740282685512367, "percentage": 96.23, "elapsed_time": "19:32:53", "remaining_time": "0:45:59"} +{"current_steps": 7630, "total_steps": 7924, "loss": 0.2293, "lr": 1.6866754753899429e-07, "epoch": 6.7446996466431095, "percentage": 96.29, "elapsed_time": "19:33:39", "remaining_time": "0:45:13"} +{"current_steps": 7635, "total_steps": 7924, "loss": 0.2852, "lr": 1.6300617092406933e-07, "epoch": 6.749116607773852, "percentage": 96.35, "elapsed_time": "19:34:25", "remaining_time": "0:44:27"} +{"current_steps": 7640, "total_steps": 7924, "loss": 0.2428, "lr": 1.5744104737327458e-07, "epoch": 6.753533568904594, "percentage": 96.42, "elapsed_time": "19:35:11", "remaining_time": "0:43:41"} +{"current_steps": 7645, "total_steps": 7924, "loss": 0.2053, "lr": 1.5197220388970313e-07, "epoch": 6.757950530035336, "percentage": 96.48, "elapsed_time": "19:35:57", "remaining_time": "0:42:54"} +{"current_steps": 7650, "total_steps": 7924, "loss": 0.2316, "lr": 1.4659966700927952e-07, "epoch": 6.762367491166078, "percentage": 96.54, "elapsed_time": "19:36:43", "remaining_time": "0:42:08"} +{"current_steps": 7655, "total_steps": 7924, "loss": 0.2372, "lr": 1.413234628006288e-07, "epoch": 6.76678445229682, "percentage": 96.61, "elapsed_time": "19:37:29", "remaining_time": "0:41:22"} +{"current_steps": 7660, "total_steps": 7924, "loss": 0.2375, "lr": 1.3614361686494549e-07, "epoch": 6.771201413427562, "percentage": 96.67, "elapsed_time": "19:38:15", "remaining_time": "0:40:36"} +{"current_steps": 7665, "total_steps": 7924, "loss": 0.2402, "lr": 1.310601543358847e-07, "epoch": 6.775618374558304, "percentage": 96.73, "elapsed_time": "19:39:01", "remaining_time": "0:39:50"} +{"current_steps": 7670, "total_steps": 7924, "loss": 0.2261, "lr": 1.260730998794202e-07, "epoch": 6.780035335689046, "percentage": 96.79, "elapsed_time": "19:39:47", "remaining_time": "0:39:04"} +{"current_steps": 7675, "total_steps": 7924, "loss": 0.2558, "lr": 1.2118247769373758e-07, "epoch": 6.784452296819788, "percentage": 96.86, "elapsed_time": "19:40:33", "remaining_time": "0:38:18"} +{"current_steps": 7680, "total_steps": 7924, "loss": 0.2173, "lr": 1.163883115091169e-07, "epoch": 6.78886925795053, "percentage": 96.92, "elapsed_time": "19:41:19", "remaining_time": "0:37:31"} +{"current_steps": 7685, "total_steps": 7924, "loss": 0.2131, "lr": 1.1169062458781022e-07, "epoch": 6.793286219081272, "percentage": 96.98, "elapsed_time": "19:42:05", "remaining_time": "0:36:45"} +{"current_steps": 7690, "total_steps": 7924, "loss": 0.2261, "lr": 1.0708943972393748e-07, "epoch": 6.7977031802120145, "percentage": 97.05, "elapsed_time": "19:42:50", "remaining_time": "0:35:59"} +{"current_steps": 7695, "total_steps": 7924, "loss": 0.2611, "lr": 1.025847792433643e-07, "epoch": 6.8021201413427566, "percentage": 97.11, "elapsed_time": "19:43:36", "remaining_time": "0:35:13"} +{"current_steps": 7700, "total_steps": 7924, "loss": 0.2362, "lr": 9.817666500360867e-08, "epoch": 6.806537102473499, "percentage": 97.17, "elapsed_time": "19:44:22", "remaining_time": "0:34:27"} +{"current_steps": 7705, "total_steps": 7924, "loss": 0.2119, "lr": 9.386511839372114e-08, "epoch": 6.810954063604241, "percentage": 97.24, "elapsed_time": "19:45:08", "remaining_time": "0:33:41"} +{"current_steps": 7710, "total_steps": 7924, "loss": 0.2257, "lr": 8.965016033418705e-08, "epoch": 6.815371024734983, "percentage": 97.3, "elapsed_time": "19:45:54", "remaining_time": "0:32:54"} +{"current_steps": 7715, "total_steps": 7924, "loss": 0.2653, "lr": 8.553181127683108e-08, "epoch": 6.819787985865725, "percentage": 97.36, "elapsed_time": "19:46:40", "remaining_time": "0:32:08"} +{"current_steps": 7720, "total_steps": 7924, "loss": 0.2372, "lr": 8.1510091204704e-08, "epoch": 6.824204946996466, "percentage": 97.43, "elapsed_time": "19:47:26", "remaining_time": "0:31:22"} +{"current_steps": 7725, "total_steps": 7924, "loss": 0.2673, "lr": 7.758501963199605e-08, "epoch": 6.828621908127208, "percentage": 97.49, "elapsed_time": "19:48:11", "remaining_time": "0:30:36"} +{"current_steps": 7730, "total_steps": 7924, "loss": 0.2844, "lr": 7.375661560394154e-08, "epoch": 6.83303886925795, "percentage": 97.55, "elapsed_time": "19:48:57", "remaining_time": "0:29:50"} +{"current_steps": 7735, "total_steps": 7924, "loss": 0.2247, "lr": 7.002489769672105e-08, "epoch": 6.837455830388692, "percentage": 97.61, "elapsed_time": "19:49:43", "remaining_time": "0:29:04"} +{"current_steps": 7740, "total_steps": 7924, "loss": 0.2383, "lr": 6.638988401737933e-08, "epoch": 6.841872791519434, "percentage": 97.68, "elapsed_time": "19:50:30", "remaining_time": "0:28:18"} +{"current_steps": 7745, "total_steps": 7924, "loss": 0.2929, "lr": 6.285159220372982e-08, "epoch": 6.8462897526501765, "percentage": 97.74, "elapsed_time": "19:51:16", "remaining_time": "0:27:31"} +{"current_steps": 7750, "total_steps": 7924, "loss": 0.2092, "lr": 5.941003942427026e-08, "epoch": 6.8507067137809186, "percentage": 97.8, "elapsed_time": "19:52:01", "remaining_time": "0:26:45"} +{"current_steps": 7755, "total_steps": 7924, "loss": 0.2357, "lr": 5.6065242378104957e-08, "epoch": 6.855123674911661, "percentage": 97.87, "elapsed_time": "19:52:47", "remaining_time": "0:25:59"} +{"current_steps": 7760, "total_steps": 7924, "loss": 0.2196, "lr": 5.281721729486044e-08, "epoch": 6.859540636042403, "percentage": 97.93, "elapsed_time": "19:53:33", "remaining_time": "0:25:13"} +{"current_steps": 7765, "total_steps": 7924, "loss": 0.2458, "lr": 4.966597993460109e-08, "epoch": 6.863957597173145, "percentage": 97.99, "elapsed_time": "19:54:19", "remaining_time": "0:24:27"} +{"current_steps": 7770, "total_steps": 7924, "loss": 0.1947, "lr": 4.6611545587762486e-08, "epoch": 6.868374558303887, "percentage": 98.06, "elapsed_time": "19:55:05", "remaining_time": "0:23:41"} +{"current_steps": 7775, "total_steps": 7924, "loss": 0.1886, "lr": 4.365392907507149e-08, "epoch": 6.872791519434629, "percentage": 98.12, "elapsed_time": "19:55:52", "remaining_time": "0:22:55"} +{"current_steps": 7780, "total_steps": 7924, "loss": 0.2243, "lr": 4.079314474747742e-08, "epoch": 6.877208480565371, "percentage": 98.18, "elapsed_time": "19:56:38", "remaining_time": "0:22:08"} +{"current_steps": 7785, "total_steps": 7924, "loss": 0.2157, "lr": 3.802920648607433e-08, "epoch": 6.881625441696113, "percentage": 98.25, "elapsed_time": "19:57:23", "remaining_time": "0:21:22"} +{"current_steps": 7790, "total_steps": 7924, "loss": 0.2375, "lr": 3.536212770204772e-08, "epoch": 6.886042402826855, "percentage": 98.31, "elapsed_time": "19:58:09", "remaining_time": "0:20:36"} +{"current_steps": 7795, "total_steps": 7924, "loss": 0.2164, "lr": 3.279192133659459e-08, "epoch": 6.890459363957597, "percentage": 98.37, "elapsed_time": "19:58:55", "remaining_time": "0:19:50"} +{"current_steps": 7800, "total_steps": 7924, "loss": 0.2213, "lr": 3.0318599860872377e-08, "epoch": 6.894876325088339, "percentage": 98.44, "elapsed_time": "19:59:41", "remaining_time": "0:19:04"} +{"current_steps": 7805, "total_steps": 7924, "loss": 0.2358, "lr": 2.7942175275932347e-08, "epoch": 6.8992932862190814, "percentage": 98.5, "elapsed_time": "20:00:27", "remaining_time": "0:18:18"} +{"current_steps": 7810, "total_steps": 7924, "loss": 0.2383, "lr": 2.5662659112659637e-08, "epoch": 6.9037102473498235, "percentage": 98.56, "elapsed_time": "20:01:13", "remaining_time": "0:17:32"} +{"current_steps": 7815, "total_steps": 7924, "loss": 0.2359, "lr": 2.3480062431724404e-08, "epoch": 6.908127208480566, "percentage": 98.62, "elapsed_time": "20:01:59", "remaining_time": "0:16:45"} +{"current_steps": 7820, "total_steps": 7924, "loss": 0.198, "lr": 2.1394395823524093e-08, "epoch": 6.912544169611308, "percentage": 98.69, "elapsed_time": "20:02:46", "remaining_time": "0:15:59"} +{"current_steps": 7825, "total_steps": 7924, "loss": 0.2227, "lr": 1.9405669408127935e-08, "epoch": 6.91696113074205, "percentage": 98.75, "elapsed_time": "20:03:32", "remaining_time": "0:15:13"} +{"current_steps": 7830, "total_steps": 7924, "loss": 0.2251, "lr": 1.7513892835236967e-08, "epoch": 6.921378091872792, "percentage": 98.81, "elapsed_time": "20:04:18", "remaining_time": "0:14:27"} +{"current_steps": 7835, "total_steps": 7924, "loss": 0.2325, "lr": 1.5719075284126307e-08, "epoch": 6.925795053003534, "percentage": 98.88, "elapsed_time": "20:05:03", "remaining_time": "0:13:41"} +{"current_steps": 7840, "total_steps": 7924, "loss": 0.2759, "lr": 1.4021225463614063e-08, "epoch": 6.930212014134275, "percentage": 98.94, "elapsed_time": "20:05:49", "remaining_time": "0:12:55"} +{"current_steps": 7845, "total_steps": 7924, "loss": 0.2478, "lr": 1.2420351612003611e-08, "epoch": 6.934628975265017, "percentage": 99.0, "elapsed_time": "20:06:35", "remaining_time": "0:12:09"} +{"current_steps": 7850, "total_steps": 7924, "loss": 0.1851, "lr": 1.0916461497059161e-08, "epoch": 6.939045936395759, "percentage": 99.07, "elapsed_time": "20:07:21", "remaining_time": "0:11:22"} +{"current_steps": 7855, "total_steps": 7924, "loss": 0.2303, "lr": 9.509562415952468e-09, "epoch": 6.943462897526501, "percentage": 99.13, "elapsed_time": "20:08:07", "remaining_time": "0:10:36"} +{"current_steps": 7860, "total_steps": 7924, "loss": 0.2786, "lr": 8.199661195240626e-09, "epoch": 6.9478798586572434, "percentage": 99.19, "elapsed_time": "20:08:53", "remaining_time": "0:09:50"} +{"current_steps": 7865, "total_steps": 7924, "loss": 0.2273, "lr": 6.9867641908305524e-09, "epoch": 6.9522968197879855, "percentage": 99.26, "elapsed_time": "20:09:39", "remaining_time": "0:09:04"} +{"current_steps": 7870, "total_steps": 7924, "loss": 0.2254, "lr": 5.870877287934562e-09, "epoch": 6.956713780918728, "percentage": 99.32, "elapsed_time": "20:10:25", "remaining_time": "0:08:18"} +{"current_steps": 7875, "total_steps": 7924, "loss": 0.207, "lr": 4.852005901063717e-09, "epoch": 6.96113074204947, "percentage": 99.38, "elapsed_time": "20:11:11", "remaining_time": "0:07:32"} +{"current_steps": 7880, "total_steps": 7924, "loss": 0.2314, "lr": 3.930154973985634e-09, "epoch": 6.965547703180212, "percentage": 99.44, "elapsed_time": "20:11:57", "remaining_time": "0:06:46"} +{"current_steps": 7885, "total_steps": 7924, "loss": 0.2361, "lr": 3.1053289797022825e-09, "epoch": 6.969964664310954, "percentage": 99.51, "elapsed_time": "20:12:43", "remaining_time": "0:05:59"} +{"current_steps": 7890, "total_steps": 7924, "loss": 0.2094, "lr": 2.37753192043888e-09, "epoch": 6.974381625441696, "percentage": 99.57, "elapsed_time": "20:13:29", "remaining_time": "0:05:13"} +{"current_steps": 7895, "total_steps": 7924, "loss": 0.2498, "lr": 1.746767327610588e-09, "epoch": 6.978798586572438, "percentage": 99.63, "elapsed_time": "20:14:15", "remaining_time": "0:04:27"} +{"current_steps": 7900, "total_steps": 7924, "loss": 0.2267, "lr": 1.2130382618114057e-09, "epoch": 6.98321554770318, "percentage": 99.7, "elapsed_time": "20:15:01", "remaining_time": "0:03:41"} +{"current_steps": 7905, "total_steps": 7924, "loss": 0.2211, "lr": 7.763473128052923e-10, "epoch": 6.987632508833922, "percentage": 99.76, "elapsed_time": "20:15:47", "remaining_time": "0:02:55"} +{"current_steps": 7910, "total_steps": 7924, "loss": 0.2046, "lr": 4.366965994995198e-10, "epoch": 6.992049469964664, "percentage": 99.82, "elapsed_time": "20:16:34", "remaining_time": "0:02:09"} +{"current_steps": 7915, "total_steps": 7924, "loss": 0.1849, "lr": 1.9408776995355483e-10, "epoch": 6.996466431095406, "percentage": 99.89, "elapsed_time": "20:17:20", "remaining_time": "0:01:23"} +{"current_steps": 7918, "total_steps": 7924, "epoch": 6.999116607773852, "percentage": 99.92, "elapsed_time": "20:17:47", "remaining_time": "0:00:55"} diff --git a/trainer_state.json b/trainer_state.json new file mode 100644 index 0000000..5c2800b --- /dev/null +++ b/trainer_state.json @@ -0,0 +1,14292 @@ +{ + "best_global_step": null, + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 6.999116607773852, + "eval_steps": 500, + "global_step": 7918, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.00441696113074205, + "grad_norm": 24.109052658081055, + "learning_rate": 2.0176544766708703e-07, + "loss": 0.8392, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.7250818014144897, + "step": 5 + }, + { + "epoch": 0.0088339222614841, + "grad_norm": 23.32025718688965, + "learning_rate": 4.5397225725094586e-07, + "loss": 0.8118, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.8858845233917236, + "step": 10 + }, + { + "epoch": 0.013250883392226149, + "grad_norm": 19.11461639404297, + "learning_rate": 7.061790668348046e-07, + "loss": 0.8166, + "loss_nan_ranks": 0, + "loss_rank_avg": 1.0077567100524902, + "step": 15 + }, + { + "epoch": 0.0176678445229682, + "grad_norm": 18.66092300415039, + "learning_rate": 9.583858764186634e-07, + "loss": 0.7684, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.7495265007019043, + "step": 20 + }, + { + "epoch": 0.022084805653710248, + "grad_norm": 11.922789573669434, + "learning_rate": 1.210592686002522e-06, + "loss": 0.7393, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.6014531850814819, + "step": 25 + }, + { + "epoch": 0.026501766784452298, + "grad_norm": 9.920490264892578, + "learning_rate": 1.4627994955863808e-06, + "loss": 0.7164, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.6873708367347717, + "step": 30 + }, + { + "epoch": 0.030918727915194347, + "grad_norm": 6.4108171463012695, + "learning_rate": 1.7150063051702399e-06, + "loss": 0.6782, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.628339946269989, + "step": 35 + }, + { + "epoch": 0.0353356890459364, + "grad_norm": 4.962175369262695, + "learning_rate": 1.9672131147540985e-06, + "loss": 0.6596, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.6136308908462524, + "step": 40 + }, + { + "epoch": 0.03975265017667844, + "grad_norm": 2.925511121749878, + "learning_rate": 2.2194199243379574e-06, + "loss": 0.6625, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.7212941646575928, + "step": 45 + }, + { + "epoch": 0.044169611307420496, + "grad_norm": 1.815821886062622, + "learning_rate": 2.4716267339218163e-06, + "loss": 0.6164, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.6185051202774048, + "step": 50 + }, + { + "epoch": 0.04858657243816254, + "grad_norm": 1.6444969177246094, + "learning_rate": 2.723833543505675e-06, + "loss": 0.5692, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.5030768513679504, + "step": 55 + }, + { + "epoch": 0.053003533568904596, + "grad_norm": 1.4677263498306274, + "learning_rate": 2.9760403530895336e-06, + "loss": 0.5737, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.6769396066665649, + "step": 60 + }, + { + "epoch": 0.05742049469964664, + "grad_norm": 1.4194655418395996, + "learning_rate": 3.2282471626733925e-06, + "loss": 0.5621, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.48633405566215515, + "step": 65 + }, + { + "epoch": 0.061837455830388695, + "grad_norm": 1.1492063999176025, + "learning_rate": 3.480453972257251e-06, + "loss": 0.5322, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.5070392489433289, + "step": 70 + }, + { + "epoch": 0.06625441696113074, + "grad_norm": 1.0129790306091309, + "learning_rate": 3.73266078184111e-06, + "loss": 0.5497, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.4552308917045593, + "step": 75 + }, + { + "epoch": 0.0706713780918728, + "grad_norm": 0.9223603010177612, + "learning_rate": 3.984867591424969e-06, + "loss": 0.5376, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.4579840302467346, + "step": 80 + }, + { + "epoch": 0.07508833922261485, + "grad_norm": 1.050498366355896, + "learning_rate": 4.237074401008828e-06, + "loss": 0.5411, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.5621368885040283, + "step": 85 + }, + { + "epoch": 0.07950530035335689, + "grad_norm": 0.8152749538421631, + "learning_rate": 4.4892812105926865e-06, + "loss": 0.5246, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.49742865562438965, + "step": 90 + }, + { + "epoch": 0.08392226148409894, + "grad_norm": 0.9410054087638855, + "learning_rate": 4.741488020176545e-06, + "loss": 0.5438, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.5632932186126709, + "step": 95 + }, + { + "epoch": 0.08833922261484099, + "grad_norm": 0.7807851433753967, + "learning_rate": 4.993694829760403e-06, + "loss": 0.5607, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.5178039073944092, + "step": 100 + }, + { + "epoch": 0.09275618374558305, + "grad_norm": 0.9543706178665161, + "learning_rate": 5.245901639344263e-06, + "loss": 0.5857, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.4983155429363251, + "step": 105 + }, + { + "epoch": 0.09717314487632508, + "grad_norm": 1.1576862335205078, + "learning_rate": 5.498108448928121e-06, + "loss": 0.5065, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.5359828472137451, + "step": 110 + }, + { + "epoch": 0.10159010600706714, + "grad_norm": 0.8757576942443848, + "learning_rate": 5.7503152585119805e-06, + "loss": 0.5241, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.4543844163417816, + "step": 115 + }, + { + "epoch": 0.10600706713780919, + "grad_norm": 0.8769079446792603, + "learning_rate": 6.00252206809584e-06, + "loss": 0.4923, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.4466102123260498, + "step": 120 + }, + { + "epoch": 0.11042402826855123, + "grad_norm": 0.7672253251075745, + "learning_rate": 6.254728877679697e-06, + "loss": 0.4789, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.5950910449028015, + "step": 125 + }, + { + "epoch": 0.11484098939929328, + "grad_norm": 0.7976667284965515, + "learning_rate": 6.506935687263557e-06, + "loss": 0.4813, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.46099185943603516, + "step": 130 + }, + { + "epoch": 0.11925795053003534, + "grad_norm": 0.8457517623901367, + "learning_rate": 6.759142496847415e-06, + "loss": 0.5338, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.4921356439590454, + "step": 135 + }, + { + "epoch": 0.12367491166077739, + "grad_norm": 0.8651091456413269, + "learning_rate": 7.0113493064312745e-06, + "loss": 0.4892, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.4262227714061737, + "step": 140 + }, + { + "epoch": 0.12809187279151943, + "grad_norm": 0.7235690355300903, + "learning_rate": 7.263556116015134e-06, + "loss": 0.459, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.46743807196617126, + "step": 145 + }, + { + "epoch": 0.13250883392226148, + "grad_norm": 0.7583150267601013, + "learning_rate": 7.515762925598991e-06, + "loss": 0.4925, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.4664328396320343, + "step": 150 + }, + { + "epoch": 0.13692579505300354, + "grad_norm": 0.9911392331123352, + "learning_rate": 7.76796973518285e-06, + "loss": 0.4904, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.5170290470123291, + "step": 155 + }, + { + "epoch": 0.1413427561837456, + "grad_norm": 0.7634608745574951, + "learning_rate": 8.020176544766708e-06, + "loss": 0.484, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.45101815462112427, + "step": 160 + }, + { + "epoch": 0.14575971731448764, + "grad_norm": 0.8441954255104065, + "learning_rate": 8.272383354350568e-06, + "loss": 0.4711, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.527004599571228, + "step": 165 + }, + { + "epoch": 0.1501766784452297, + "grad_norm": 0.8853057026863098, + "learning_rate": 8.524590163934427e-06, + "loss": 0.4725, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.40662050247192383, + "step": 170 + }, + { + "epoch": 0.15459363957597172, + "grad_norm": 0.7503329515457153, + "learning_rate": 8.776796973518286e-06, + "loss": 0.4435, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.45254433155059814, + "step": 175 + }, + { + "epoch": 0.15901060070671377, + "grad_norm": 0.811824381351471, + "learning_rate": 9.029003783102146e-06, + "loss": 0.4582, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.44835376739501953, + "step": 180 + }, + { + "epoch": 0.16342756183745583, + "grad_norm": 0.8182066679000854, + "learning_rate": 9.281210592686003e-06, + "loss": 0.4924, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.5891055464744568, + "step": 185 + }, + { + "epoch": 0.16784452296819788, + "grad_norm": 0.7815266251564026, + "learning_rate": 9.533417402269862e-06, + "loss": 0.4778, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.44031378626823425, + "step": 190 + }, + { + "epoch": 0.17226148409893993, + "grad_norm": 0.8124738931655884, + "learning_rate": 9.78562421185372e-06, + "loss": 0.4345, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.45023810863494873, + "step": 195 + }, + { + "epoch": 0.17667844522968199, + "grad_norm": 0.8434866666793823, + "learning_rate": 1.0037831021437581e-05, + "loss": 0.4199, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.44062817096710205, + "step": 200 + }, + { + "epoch": 0.18109540636042404, + "grad_norm": 0.8283309936523438, + "learning_rate": 1.0290037831021437e-05, + "loss": 0.4342, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.4788593649864197, + "step": 205 + }, + { + "epoch": 0.1855123674911661, + "grad_norm": 0.6613907814025879, + "learning_rate": 1.0542244640605296e-05, + "loss": 0.4274, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.4137096405029297, + "step": 210 + }, + { + "epoch": 0.18992932862190812, + "grad_norm": 0.9123347401618958, + "learning_rate": 1.0794451450189156e-05, + "loss": 0.4314, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.4231804609298706, + "step": 215 + }, + { + "epoch": 0.19434628975265017, + "grad_norm": 0.8312901854515076, + "learning_rate": 1.1046658259773015e-05, + "loss": 0.4208, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.4278219938278198, + "step": 220 + }, + { + "epoch": 0.19876325088339222, + "grad_norm": 0.6793373823165894, + "learning_rate": 1.1298865069356874e-05, + "loss": 0.4253, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.39295411109924316, + "step": 225 + }, + { + "epoch": 0.20318021201413428, + "grad_norm": 0.8161110877990723, + "learning_rate": 1.1551071878940732e-05, + "loss": 0.4605, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.4723338186740875, + "step": 230 + }, + { + "epoch": 0.20759717314487633, + "grad_norm": 0.7220736145973206, + "learning_rate": 1.1803278688524591e-05, + "loss": 0.4244, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.44514936208724976, + "step": 235 + }, + { + "epoch": 0.21201413427561838, + "grad_norm": 0.780061662197113, + "learning_rate": 1.205548549810845e-05, + "loss": 0.472, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.4039708971977234, + "step": 240 + }, + { + "epoch": 0.21643109540636044, + "grad_norm": 0.6663079857826233, + "learning_rate": 1.230769230769231e-05, + "loss": 0.4287, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.35989290475845337, + "step": 245 + }, + { + "epoch": 0.22084805653710246, + "grad_norm": 0.8617585897445679, + "learning_rate": 1.2559899117276166e-05, + "loss": 0.4589, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.5611141324043274, + "step": 250 + }, + { + "epoch": 0.2252650176678445, + "grad_norm": 0.8636696934700012, + "learning_rate": 1.2812105926860025e-05, + "loss": 0.393, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.4264186918735504, + "step": 255 + }, + { + "epoch": 0.22968197879858657, + "grad_norm": 0.884749174118042, + "learning_rate": 1.3064312736443884e-05, + "loss": 0.441, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.5420808792114258, + "step": 260 + }, + { + "epoch": 0.23409893992932862, + "grad_norm": 0.7348568439483643, + "learning_rate": 1.3316519546027744e-05, + "loss": 0.357, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.34996867179870605, + "step": 265 + }, + { + "epoch": 0.23851590106007067, + "grad_norm": 0.6985581517219543, + "learning_rate": 1.3568726355611603e-05, + "loss": 0.4148, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.44080445170402527, + "step": 270 + }, + { + "epoch": 0.24293286219081273, + "grad_norm": 0.8112905025482178, + "learning_rate": 1.382093316519546e-05, + "loss": 0.387, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.46948182582855225, + "step": 275 + }, + { + "epoch": 0.24734982332155478, + "grad_norm": 0.7835463881492615, + "learning_rate": 1.407313997477932e-05, + "loss": 0.4286, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.39757293462753296, + "step": 280 + }, + { + "epoch": 0.25176678445229683, + "grad_norm": 0.8474117517471313, + "learning_rate": 1.4325346784363179e-05, + "loss": 0.4184, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.386111855506897, + "step": 285 + }, + { + "epoch": 0.25618374558303886, + "grad_norm": 0.7113578915596008, + "learning_rate": 1.4577553593947038e-05, + "loss": 0.3587, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.32697296142578125, + "step": 290 + }, + { + "epoch": 0.26060070671378094, + "grad_norm": 0.8683375716209412, + "learning_rate": 1.4829760403530898e-05, + "loss": 0.3728, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3836482763290405, + "step": 295 + }, + { + "epoch": 0.26501766784452296, + "grad_norm": 0.732476532459259, + "learning_rate": 1.5081967213114754e-05, + "loss": 0.4082, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.39730846881866455, + "step": 300 + }, + { + "epoch": 0.26943462897526504, + "grad_norm": 0.8139944076538086, + "learning_rate": 1.5334174022698615e-05, + "loss": 0.4319, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.44922178983688354, + "step": 305 + }, + { + "epoch": 0.27385159010600707, + "grad_norm": 0.7223174571990967, + "learning_rate": 1.5586380832282474e-05, + "loss": 0.3937, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.39554718136787415, + "step": 310 + }, + { + "epoch": 0.2782685512367491, + "grad_norm": 0.7935890555381775, + "learning_rate": 1.5838587641866333e-05, + "loss": 0.3971, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.38846322894096375, + "step": 315 + }, + { + "epoch": 0.2826855123674912, + "grad_norm": 0.8385109305381775, + "learning_rate": 1.6090794451450193e-05, + "loss": 0.3842, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3368198275566101, + "step": 320 + }, + { + "epoch": 0.2871024734982332, + "grad_norm": 0.7849225401878357, + "learning_rate": 1.634300126103405e-05, + "loss": 0.4017, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.4689519703388214, + "step": 325 + }, + { + "epoch": 0.2915194346289753, + "grad_norm": 0.9184194207191467, + "learning_rate": 1.6595208070617908e-05, + "loss": 0.4222, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.451241135597229, + "step": 330 + }, + { + "epoch": 0.2959363957597173, + "grad_norm": 0.7168762683868408, + "learning_rate": 1.6847414880201767e-05, + "loss": 0.3989, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3456907868385315, + "step": 335 + }, + { + "epoch": 0.3003533568904594, + "grad_norm": 0.7282963395118713, + "learning_rate": 1.7099621689785626e-05, + "loss": 0.4091, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3524041771888733, + "step": 340 + }, + { + "epoch": 0.3047703180212014, + "grad_norm": 0.6994873285293579, + "learning_rate": 1.7351828499369486e-05, + "loss": 0.4219, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3284667134284973, + "step": 345 + }, + { + "epoch": 0.30918727915194344, + "grad_norm": 0.6103523969650269, + "learning_rate": 1.760403530895334e-05, + "loss": 0.4224, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3484126925468445, + "step": 350 + }, + { + "epoch": 0.3136042402826855, + "grad_norm": 0.7844368815422058, + "learning_rate": 1.78562421185372e-05, + "loss": 0.4439, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.47252100706100464, + "step": 355 + }, + { + "epoch": 0.31802120141342755, + "grad_norm": 0.6682479381561279, + "learning_rate": 1.810844892812106e-05, + "loss": 0.3943, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.4642447829246521, + "step": 360 + }, + { + "epoch": 0.3224381625441696, + "grad_norm": 0.7882423996925354, + "learning_rate": 1.836065573770492e-05, + "loss": 0.3837, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.4364447593688965, + "step": 365 + }, + { + "epoch": 0.32685512367491165, + "grad_norm": 0.6892913579940796, + "learning_rate": 1.861286254728878e-05, + "loss": 0.403, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3730366826057434, + "step": 370 + }, + { + "epoch": 0.33127208480565373, + "grad_norm": 0.890687882900238, + "learning_rate": 1.8865069356872635e-05, + "loss": 0.4094, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.4724777638912201, + "step": 375 + }, + { + "epoch": 0.33568904593639576, + "grad_norm": 0.7592324018478394, + "learning_rate": 1.9117276166456494e-05, + "loss": 0.4166, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.38363948464393616, + "step": 380 + }, + { + "epoch": 0.3401060070671378, + "grad_norm": 0.8239976763725281, + "learning_rate": 1.9369482976040353e-05, + "loss": 0.3977, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.37950828671455383, + "step": 385 + }, + { + "epoch": 0.34452296819787986, + "grad_norm": 1.0034205913543701, + "learning_rate": 1.9621689785624213e-05, + "loss": 0.3811, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.31529074907302856, + "step": 390 + }, + { + "epoch": 0.3489399293286219, + "grad_norm": 0.7281728982925415, + "learning_rate": 1.9873896595208072e-05, + "loss": 0.4184, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.35957640409469604, + "step": 395 + }, + { + "epoch": 0.35335689045936397, + "grad_norm": 0.8345057368278503, + "learning_rate": 2.012610340479193e-05, + "loss": 0.3603, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.38588133454322815, + "step": 400 + }, + { + "epoch": 0.357773851590106, + "grad_norm": 0.881252110004425, + "learning_rate": 2.037831021437579e-05, + "loss": 0.4153, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.385145366191864, + "step": 405 + }, + { + "epoch": 0.3621908127208481, + "grad_norm": 0.7450293302536011, + "learning_rate": 2.063051702395965e-05, + "loss": 0.43, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.4830145835876465, + "step": 410 + }, + { + "epoch": 0.3666077738515901, + "grad_norm": 0.7826328873634338, + "learning_rate": 2.0882723833543506e-05, + "loss": 0.3932, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.34232282638549805, + "step": 415 + }, + { + "epoch": 0.3710247349823322, + "grad_norm": 0.7054056525230408, + "learning_rate": 2.113493064312737e-05, + "loss": 0.3558, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2948620617389679, + "step": 420 + }, + { + "epoch": 0.3754416961130742, + "grad_norm": 0.770078718662262, + "learning_rate": 2.1387137452711224e-05, + "loss": 0.382, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.4264480471611023, + "step": 425 + }, + { + "epoch": 0.37985865724381623, + "grad_norm": 0.7419948577880859, + "learning_rate": 2.1639344262295087e-05, + "loss": 0.3874, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.4408531188964844, + "step": 430 + }, + { + "epoch": 0.3842756183745583, + "grad_norm": 0.7609454989433289, + "learning_rate": 2.1891551071878943e-05, + "loss": 0.3913, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.35307884216308594, + "step": 435 + }, + { + "epoch": 0.38869257950530034, + "grad_norm": 1.1566354036331177, + "learning_rate": 2.21437578814628e-05, + "loss": 0.3921, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2975735664367676, + "step": 440 + }, + { + "epoch": 0.3931095406360424, + "grad_norm": 0.8143091201782227, + "learning_rate": 2.239596469104666e-05, + "loss": 0.377, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3160606920719147, + "step": 445 + }, + { + "epoch": 0.39752650176678445, + "grad_norm": 0.6456040143966675, + "learning_rate": 2.2648171500630518e-05, + "loss": 0.3703, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.37065446376800537, + "step": 450 + }, + { + "epoch": 0.4019434628975265, + "grad_norm": 0.6718341708183289, + "learning_rate": 2.290037831021438e-05, + "loss": 0.3584, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.32244637608528137, + "step": 455 + }, + { + "epoch": 0.40636042402826855, + "grad_norm": 0.862759530544281, + "learning_rate": 2.3152585119798236e-05, + "loss": 0.4145, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.4153875708580017, + "step": 460 + }, + { + "epoch": 0.4107773851590106, + "grad_norm": 0.7327967882156372, + "learning_rate": 2.3404791929382092e-05, + "loss": 0.3628, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3565199077129364, + "step": 465 + }, + { + "epoch": 0.41519434628975266, + "grad_norm": 0.863936722278595, + "learning_rate": 2.3656998738965955e-05, + "loss": 0.405, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.40698155760765076, + "step": 470 + }, + { + "epoch": 0.4196113074204947, + "grad_norm": 0.8501296639442444, + "learning_rate": 2.390920554854981e-05, + "loss": 0.3567, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.36806195974349976, + "step": 475 + }, + { + "epoch": 0.42402826855123676, + "grad_norm": 1.3909848928451538, + "learning_rate": 2.4161412358133673e-05, + "loss": 0.4706, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.4072020649909973, + "step": 480 + }, + { + "epoch": 0.4284452296819788, + "grad_norm": 0.7283811569213867, + "learning_rate": 2.441361916771753e-05, + "loss": 0.3921, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.37325534224510193, + "step": 485 + }, + { + "epoch": 0.43286219081272087, + "grad_norm": 0.7579320669174194, + "learning_rate": 2.466582597730139e-05, + "loss": 0.3424, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.33884212374687195, + "step": 490 + }, + { + "epoch": 0.4372791519434629, + "grad_norm": 0.8202218413352966, + "learning_rate": 2.4918032786885248e-05, + "loss": 0.3737, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.34762677550315857, + "step": 495 + }, + { + "epoch": 0.4416961130742049, + "grad_norm": 0.7608699798583984, + "learning_rate": 2.5170239596469107e-05, + "loss": 0.3843, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.306643545627594, + "step": 500 + }, + { + "epoch": 0.446113074204947, + "grad_norm": 0.7544461488723755, + "learning_rate": 2.5422446406052967e-05, + "loss": 0.3745, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.39631327986717224, + "step": 505 + }, + { + "epoch": 0.450530035335689, + "grad_norm": 0.6755616664886475, + "learning_rate": 2.5674653215636826e-05, + "loss": 0.3803, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3355293869972229, + "step": 510 + }, + { + "epoch": 0.4549469964664311, + "grad_norm": 0.6714569330215454, + "learning_rate": 2.5926860025220682e-05, + "loss": 0.349, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.344030499458313, + "step": 515 + }, + { + "epoch": 0.45936395759717313, + "grad_norm": 0.7619994878768921, + "learning_rate": 2.6179066834804544e-05, + "loss": 0.3474, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.331425279378891, + "step": 520 + }, + { + "epoch": 0.4637809187279152, + "grad_norm": 0.745580792427063, + "learning_rate": 2.64312736443884e-05, + "loss": 0.3563, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.39198458194732666, + "step": 525 + }, + { + "epoch": 0.46819787985865724, + "grad_norm": 0.823861837387085, + "learning_rate": 2.668348045397226e-05, + "loss": 0.3697, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3569245934486389, + "step": 530 + }, + { + "epoch": 0.4726148409893993, + "grad_norm": 0.7193405628204346, + "learning_rate": 2.693568726355612e-05, + "loss": 0.3745, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.4549163579940796, + "step": 535 + }, + { + "epoch": 0.47703180212014135, + "grad_norm": 0.7041448354721069, + "learning_rate": 2.7187894073139975e-05, + "loss": 0.3817, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.43728315830230713, + "step": 540 + }, + { + "epoch": 0.48144876325088337, + "grad_norm": 0.8459624648094177, + "learning_rate": 2.7440100882723838e-05, + "loss": 0.3744, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3709990084171295, + "step": 545 + }, + { + "epoch": 0.48586572438162545, + "grad_norm": 0.8668114542961121, + "learning_rate": 2.7692307692307694e-05, + "loss": 0.3612, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.32689058780670166, + "step": 550 + }, + { + "epoch": 0.4902826855123675, + "grad_norm": 0.668462872505188, + "learning_rate": 2.7944514501891556e-05, + "loss": 0.3848, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.30886298418045044, + "step": 555 + }, + { + "epoch": 0.49469964664310956, + "grad_norm": 0.7367919683456421, + "learning_rate": 2.8196721311475412e-05, + "loss": 0.3591, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3122621476650238, + "step": 560 + }, + { + "epoch": 0.4991166077738516, + "grad_norm": 0.7553625106811523, + "learning_rate": 2.8448928121059268e-05, + "loss": 0.362, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3086245059967041, + "step": 565 + }, + { + "epoch": 0.5035335689045937, + "grad_norm": 0.6816399097442627, + "learning_rate": 2.870113493064313e-05, + "loss": 0.3565, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3278921842575073, + "step": 570 + }, + { + "epoch": 0.5079505300353356, + "grad_norm": 0.7400028109550476, + "learning_rate": 2.8953341740226987e-05, + "loss": 0.3903, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.33623063564300537, + "step": 575 + }, + { + "epoch": 0.5123674911660777, + "grad_norm": 0.6778237819671631, + "learning_rate": 2.920554854981085e-05, + "loss": 0.3745, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3270686864852905, + "step": 580 + }, + { + "epoch": 0.5167844522968198, + "grad_norm": 0.7293447256088257, + "learning_rate": 2.9457755359394705e-05, + "loss": 0.427, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.47877180576324463, + "step": 585 + }, + { + "epoch": 0.5212014134275619, + "grad_norm": 0.7676773071289062, + "learning_rate": 2.9709962168978565e-05, + "loss": 0.4404, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.4877261817455292, + "step": 590 + }, + { + "epoch": 0.5256183745583038, + "grad_norm": 0.6538991332054138, + "learning_rate": 2.9962168978562424e-05, + "loss": 0.3604, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3789966106414795, + "step": 595 + }, + { + "epoch": 0.5300353356890459, + "grad_norm": 0.7705276012420654, + "learning_rate": 3.0214375788146283e-05, + "loss": 0.3947, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.33382245898246765, + "step": 600 + }, + { + "epoch": 0.534452296819788, + "grad_norm": 0.7710214853286743, + "learning_rate": 3.0466582597730143e-05, + "loss": 0.3543, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.38197773694992065, + "step": 605 + }, + { + "epoch": 0.5388692579505301, + "grad_norm": 0.67430579662323, + "learning_rate": 3.0718789407314e-05, + "loss": 0.4353, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.4609474241733551, + "step": 610 + }, + { + "epoch": 0.5432862190812721, + "grad_norm": 0.7167083621025085, + "learning_rate": 3.097099621689786e-05, + "loss": 0.3326, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2994512915611267, + "step": 615 + }, + { + "epoch": 0.5477031802120141, + "grad_norm": 0.9038445949554443, + "learning_rate": 3.122320302648172e-05, + "loss": 0.3872, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.36574289202690125, + "step": 620 + }, + { + "epoch": 0.5521201413427562, + "grad_norm": 0.7607389092445374, + "learning_rate": 3.1475409836065576e-05, + "loss": 0.384, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.4068770706653595, + "step": 625 + }, + { + "epoch": 0.5565371024734982, + "grad_norm": 0.8022470474243164, + "learning_rate": 3.1727616645649436e-05, + "loss": 0.3579, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.34602364897727966, + "step": 630 + }, + { + "epoch": 0.5609540636042403, + "grad_norm": 0.7999392747879028, + "learning_rate": 3.1979823455233295e-05, + "loss": 0.3784, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.4875880479812622, + "step": 635 + }, + { + "epoch": 0.5653710247349824, + "grad_norm": 0.7946346402168274, + "learning_rate": 3.2232030264817154e-05, + "loss": 0.3777, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.4051263630390167, + "step": 640 + }, + { + "epoch": 0.5697879858657244, + "grad_norm": 0.6947000622749329, + "learning_rate": 3.2484237074401014e-05, + "loss": 0.3565, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.4333820044994354, + "step": 645 + }, + { + "epoch": 0.5742049469964664, + "grad_norm": 0.7858723402023315, + "learning_rate": 3.273644388398487e-05, + "loss": 0.3576, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.33583977818489075, + "step": 650 + }, + { + "epoch": 0.5786219081272085, + "grad_norm": 0.817613959312439, + "learning_rate": 3.298865069356873e-05, + "loss": 0.3129, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.27572858333587646, + "step": 655 + }, + { + "epoch": 0.5830388692579506, + "grad_norm": 0.7588901519775391, + "learning_rate": 3.324085750315259e-05, + "loss": 0.367, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.29186704754829407, + "step": 660 + }, + { + "epoch": 0.5874558303886925, + "grad_norm": 0.7261371612548828, + "learning_rate": 3.3493064312736444e-05, + "loss": 0.3133, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3395135998725891, + "step": 665 + }, + { + "epoch": 0.5918727915194346, + "grad_norm": 0.6673470735549927, + "learning_rate": 3.37452711223203e-05, + "loss": 0.404, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.41102349758148193, + "step": 670 + }, + { + "epoch": 0.5962897526501767, + "grad_norm": 0.6788419485092163, + "learning_rate": 3.399747793190416e-05, + "loss": 0.3931, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3825211226940155, + "step": 675 + }, + { + "epoch": 0.6007067137809188, + "grad_norm": 0.861670970916748, + "learning_rate": 3.424968474148802e-05, + "loss": 0.3875, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3033771514892578, + "step": 680 + }, + { + "epoch": 0.6051236749116607, + "grad_norm": 0.6976490616798401, + "learning_rate": 3.450189155107188e-05, + "loss": 0.3206, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3370228409767151, + "step": 685 + }, + { + "epoch": 0.6095406360424028, + "grad_norm": 0.6630620956420898, + "learning_rate": 3.475409836065574e-05, + "loss": 0.3294, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.34097903966903687, + "step": 690 + }, + { + "epoch": 0.6139575971731449, + "grad_norm": 0.6962843537330627, + "learning_rate": 3.50063051702396e-05, + "loss": 0.3731, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.40441685914993286, + "step": 695 + }, + { + "epoch": 0.6183745583038869, + "grad_norm": 0.6727367639541626, + "learning_rate": 3.525851197982346e-05, + "loss": 0.3226, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.32211965322494507, + "step": 700 + }, + { + "epoch": 0.622791519434629, + "grad_norm": 0.7818762063980103, + "learning_rate": 3.551071878940732e-05, + "loss": 0.3474, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.4583301544189453, + "step": 705 + }, + { + "epoch": 0.627208480565371, + "grad_norm": 0.7723045349121094, + "learning_rate": 3.576292559899118e-05, + "loss": 0.3719, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.28167077898979187, + "step": 710 + }, + { + "epoch": 0.6316254416961131, + "grad_norm": 0.6072138547897339, + "learning_rate": 3.601513240857503e-05, + "loss": 0.3479, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2791953682899475, + "step": 715 + }, + { + "epoch": 0.6360424028268551, + "grad_norm": 0.8396653532981873, + "learning_rate": 3.6267339218158896e-05, + "loss": 0.4271, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.5739034414291382, + "step": 720 + }, + { + "epoch": 0.6404593639575972, + "grad_norm": 0.7916384935379028, + "learning_rate": 3.651954602774275e-05, + "loss": 0.4105, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.42675089836120605, + "step": 725 + }, + { + "epoch": 0.6448763250883393, + "grad_norm": 0.7917523980140686, + "learning_rate": 3.677175283732661e-05, + "loss": 0.3365, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.30214524269104004, + "step": 730 + }, + { + "epoch": 0.6492932862190812, + "grad_norm": 0.6911900639533997, + "learning_rate": 3.702395964691047e-05, + "loss": 0.3602, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3707190155982971, + "step": 735 + }, + { + "epoch": 0.6537102473498233, + "grad_norm": 0.7061692476272583, + "learning_rate": 3.727616645649433e-05, + "loss": 0.3479, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.28617987036705017, + "step": 740 + }, + { + "epoch": 0.6581272084805654, + "grad_norm": 0.6829811334609985, + "learning_rate": 3.7528373266078186e-05, + "loss": 0.4159, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.4385426640510559, + "step": 745 + }, + { + "epoch": 0.6625441696113075, + "grad_norm": 0.688046395778656, + "learning_rate": 3.7780580075662045e-05, + "loss": 0.3727, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3491179943084717, + "step": 750 + }, + { + "epoch": 0.6669611307420494, + "grad_norm": 0.6785129308700562, + "learning_rate": 3.8032786885245905e-05, + "loss": 0.3583, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.369488000869751, + "step": 755 + }, + { + "epoch": 0.6713780918727915, + "grad_norm": 0.7109005451202393, + "learning_rate": 3.8284993694829764e-05, + "loss": 0.3344, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.354184627532959, + "step": 760 + }, + { + "epoch": 0.6757950530035336, + "grad_norm": 0.7314112782478333, + "learning_rate": 3.853720050441362e-05, + "loss": 0.3582, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.37824511528015137, + "step": 765 + }, + { + "epoch": 0.6802120141342756, + "grad_norm": 1.165858268737793, + "learning_rate": 3.878940731399748e-05, + "loss": 0.384, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.32820677757263184, + "step": 770 + }, + { + "epoch": 0.6846289752650176, + "grad_norm": 0.8004192113876343, + "learning_rate": 3.904161412358134e-05, + "loss": 0.3607, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.43373507261276245, + "step": 775 + }, + { + "epoch": 0.6890459363957597, + "grad_norm": 0.6773238182067871, + "learning_rate": 3.9293820933165195e-05, + "loss": 0.3786, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3990272581577301, + "step": 780 + }, + { + "epoch": 0.6934628975265018, + "grad_norm": 0.676603376865387, + "learning_rate": 3.954602774274906e-05, + "loss": 0.3336, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.34566766023635864, + "step": 785 + }, + { + "epoch": 0.6978798586572438, + "grad_norm": 0.7312802672386169, + "learning_rate": 3.979823455233291e-05, + "loss": 0.3405, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.37781059741973877, + "step": 790 + }, + { + "epoch": 0.7022968197879859, + "grad_norm": 0.7477230429649353, + "learning_rate": 3.99999980591192e-05, + "loss": 0.345, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.29296875, + "step": 795 + }, + { + "epoch": 0.7067137809187279, + "grad_norm": 0.6933770179748535, + "learning_rate": 3.99999301283305e-05, + "loss": 0.4408, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.4305647313594818, + "step": 800 + }, + { + "epoch": 0.7111307420494699, + "grad_norm": 0.6644602417945862, + "learning_rate": 3.999976515387813e-05, + "loss": 0.3571, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2757279574871063, + "step": 805 + }, + { + "epoch": 0.715547703180212, + "grad_norm": 0.6703394651412964, + "learning_rate": 3.9999503136562586e-05, + "loss": 0.3417, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3425188660621643, + "step": 810 + }, + { + "epoch": 0.7199646643109541, + "grad_norm": 0.6245801448822021, + "learning_rate": 3.999914407765523e-05, + "loss": 0.3524, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.29982197284698486, + "step": 815 + }, + { + "epoch": 0.7243816254416962, + "grad_norm": 0.701495885848999, + "learning_rate": 3.999868797889828e-05, + "loss": 0.3204, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3113703429698944, + "step": 820 + }, + { + "epoch": 0.7287985865724381, + "grad_norm": 0.8265374302864075, + "learning_rate": 3.999813484250483e-05, + "loss": 0.3488, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3571431338787079, + "step": 825 + }, + { + "epoch": 0.7332155477031802, + "grad_norm": 0.8132041096687317, + "learning_rate": 3.99974846711588e-05, + "loss": 0.3718, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.30748432874679565, + "step": 830 + }, + { + "epoch": 0.7376325088339223, + "grad_norm": 0.6265267133712769, + "learning_rate": 3.9996737468014954e-05, + "loss": 0.3123, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3108974099159241, + "step": 835 + }, + { + "epoch": 0.7420494699646644, + "grad_norm": 0.7385701537132263, + "learning_rate": 3.999589323669887e-05, + "loss": 0.359, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.40513014793395996, + "step": 840 + }, + { + "epoch": 0.7464664310954063, + "grad_norm": 0.6594541668891907, + "learning_rate": 3.9994951981306926e-05, + "loss": 0.3511, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2959279716014862, + "step": 845 + }, + { + "epoch": 0.7508833922261484, + "grad_norm": 0.7326868176460266, + "learning_rate": 3.9993913706406287e-05, + "loss": 0.349, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.31566479802131653, + "step": 850 + }, + { + "epoch": 0.7553003533568905, + "grad_norm": 0.798692524433136, + "learning_rate": 3.999277841703486e-05, + "loss": 0.347, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.31945452094078064, + "step": 855 + }, + { + "epoch": 0.7597173144876325, + "grad_norm": 0.6340591907501221, + "learning_rate": 3.999154611870131e-05, + "loss": 0.3524, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3852103352546692, + "step": 860 + }, + { + "epoch": 0.7641342756183745, + "grad_norm": 0.7896412014961243, + "learning_rate": 3.999021681738499e-05, + "loss": 0.3417, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.32461148500442505, + "step": 865 + }, + { + "epoch": 0.7685512367491166, + "grad_norm": 0.6427087187767029, + "learning_rate": 3.998879051953593e-05, + "loss": 0.3073, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.28621137142181396, + "step": 870 + }, + { + "epoch": 0.7729681978798587, + "grad_norm": 0.6806996464729309, + "learning_rate": 3.9987267232074816e-05, + "loss": 0.3812, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3365304470062256, + "step": 875 + }, + { + "epoch": 0.7773851590106007, + "grad_norm": 0.6693117618560791, + "learning_rate": 3.998564696239295e-05, + "loss": 0.3718, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3166996240615845, + "step": 880 + }, + { + "epoch": 0.7818021201413428, + "grad_norm": 0.719115674495697, + "learning_rate": 3.99839297183522e-05, + "loss": 0.3356, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.33166027069091797, + "step": 885 + }, + { + "epoch": 0.7862190812720848, + "grad_norm": 0.6326349973678589, + "learning_rate": 3.998211550828497e-05, + "loss": 0.3528, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3603453040122986, + "step": 890 + }, + { + "epoch": 0.7906360424028268, + "grad_norm": 0.8190131187438965, + "learning_rate": 3.998020434099418e-05, + "loss": 0.3497, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.38982582092285156, + "step": 895 + }, + { + "epoch": 0.7950530035335689, + "grad_norm": 0.6838703751564026, + "learning_rate": 3.997819622575319e-05, + "loss": 0.3586, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3148457705974579, + "step": 900 + }, + { + "epoch": 0.799469964664311, + "grad_norm": 0.6027899384498596, + "learning_rate": 3.9976091172305794e-05, + "loss": 0.3576, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3718492388725281, + "step": 905 + }, + { + "epoch": 0.803886925795053, + "grad_norm": 1.1394686698913574, + "learning_rate": 3.9973889190866105e-05, + "loss": 0.3383, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.32566916942596436, + "step": 910 + }, + { + "epoch": 0.808303886925795, + "grad_norm": 0.6600670218467712, + "learning_rate": 3.99715902921186e-05, + "loss": 0.355, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.32827770709991455, + "step": 915 + }, + { + "epoch": 0.8127208480565371, + "grad_norm": 0.8769943714141846, + "learning_rate": 3.9969194487217987e-05, + "loss": 0.3669, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3776477575302124, + "step": 920 + }, + { + "epoch": 0.8171378091872792, + "grad_norm": 0.6823641657829285, + "learning_rate": 3.9966701787789194e-05, + "loss": 0.3431, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.31834328174591064, + "step": 925 + }, + { + "epoch": 0.8215547703180212, + "grad_norm": 0.7511164546012878, + "learning_rate": 3.996411220592729e-05, + "loss": 0.3553, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3393649458885193, + "step": 930 + }, + { + "epoch": 0.8259717314487632, + "grad_norm": 0.6989418268203735, + "learning_rate": 3.996142575419745e-05, + "loss": 0.3087, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.36577892303466797, + "step": 935 + }, + { + "epoch": 0.8303886925795053, + "grad_norm": 0.6358893513679504, + "learning_rate": 3.995864244563487e-05, + "loss": 0.3472, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.29227280616760254, + "step": 940 + }, + { + "epoch": 0.8348056537102474, + "grad_norm": 0.6637855768203735, + "learning_rate": 3.9955762293744735e-05, + "loss": 0.3563, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3354935348033905, + "step": 945 + }, + { + "epoch": 0.8392226148409894, + "grad_norm": 1.028828740119934, + "learning_rate": 3.9952785312502107e-05, + "loss": 0.3675, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3541829586029053, + "step": 950 + }, + { + "epoch": 0.8436395759717314, + "grad_norm": 0.660925030708313, + "learning_rate": 3.99497115163519e-05, + "loss": 0.4159, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3134467601776123, + "step": 955 + }, + { + "epoch": 0.8480565371024735, + "grad_norm": 0.6419395208358765, + "learning_rate": 3.994654092020877e-05, + "loss": 0.3492, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.36793047189712524, + "step": 960 + }, + { + "epoch": 0.8524734982332155, + "grad_norm": 0.6670768857002258, + "learning_rate": 3.994327353945712e-05, + "loss": 0.3413, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.35276514291763306, + "step": 965 + }, + { + "epoch": 0.8568904593639576, + "grad_norm": 0.7899559736251831, + "learning_rate": 3.9939909389950894e-05, + "loss": 0.3682, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.4707202911376953, + "step": 970 + }, + { + "epoch": 0.8613074204946997, + "grad_norm": 0.8200883865356445, + "learning_rate": 3.9936448488013646e-05, + "loss": 0.3363, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.33861371874809265, + "step": 975 + }, + { + "epoch": 0.8657243816254417, + "grad_norm": 0.7544311285018921, + "learning_rate": 3.9932890850438356e-05, + "loss": 0.3754, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.4074310064315796, + "step": 980 + }, + { + "epoch": 0.8701413427561837, + "grad_norm": 0.8232197165489197, + "learning_rate": 3.9929236494487395e-05, + "loss": 0.359, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.39264535903930664, + "step": 985 + }, + { + "epoch": 0.8745583038869258, + "grad_norm": 0.6976638436317444, + "learning_rate": 3.9925485437892434e-05, + "loss": 0.3726, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.34916025400161743, + "step": 990 + }, + { + "epoch": 0.8789752650176679, + "grad_norm": 0.7832766771316528, + "learning_rate": 3.992163769885435e-05, + "loss": 0.3198, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.33082062005996704, + "step": 995 + }, + { + "epoch": 0.8833922261484098, + "grad_norm": 0.6496185064315796, + "learning_rate": 3.9917693296043124e-05, + "loss": 0.3586, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3823724091053009, + "step": 1000 + }, + { + "epoch": 0.8878091872791519, + "grad_norm": 1.394060492515564, + "learning_rate": 3.9913652248597806e-05, + "loss": 0.3653, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3977188169956207, + "step": 1005 + }, + { + "epoch": 0.892226148409894, + "grad_norm": 2.0863211154937744, + "learning_rate": 3.990951457612637e-05, + "loss": 0.3364, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3450695276260376, + "step": 1010 + }, + { + "epoch": 0.8966431095406361, + "grad_norm": 0.9185066223144531, + "learning_rate": 3.9905280298705624e-05, + "loss": 0.3569, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3249596357345581, + "step": 1015 + }, + { + "epoch": 0.901060070671378, + "grad_norm": 0.9138262271881104, + "learning_rate": 3.9900949436881126e-05, + "loss": 0.3507, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3235431909561157, + "step": 1020 + }, + { + "epoch": 0.9054770318021201, + "grad_norm": 0.663921058177948, + "learning_rate": 3.989652201166709e-05, + "loss": 0.3224, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.30882376432418823, + "step": 1025 + }, + { + "epoch": 0.9098939929328622, + "grad_norm": 1.4138214588165283, + "learning_rate": 3.989199804454627e-05, + "loss": 0.3297, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.41032299399375916, + "step": 1030 + }, + { + "epoch": 0.9143109540636042, + "grad_norm": 0.7750231027603149, + "learning_rate": 3.988737755746986e-05, + "loss": 0.3366, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.35097378492355347, + "step": 1035 + }, + { + "epoch": 0.9187279151943463, + "grad_norm": 0.691072940826416, + "learning_rate": 3.9882660572857375e-05, + "loss": 0.3495, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.318118155002594, + "step": 1040 + }, + { + "epoch": 0.9231448763250883, + "grad_norm": 1.0037578344345093, + "learning_rate": 3.987784711359658e-05, + "loss": 0.3272, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2796628773212433, + "step": 1045 + }, + { + "epoch": 0.9275618374558304, + "grad_norm": 0.667972981929779, + "learning_rate": 3.987293720304335e-05, + "loss": 0.3611, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.41761964559555054, + "step": 1050 + }, + { + "epoch": 0.9319787985865724, + "grad_norm": 0.6739106178283691, + "learning_rate": 3.9867930865021535e-05, + "loss": 0.3379, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.28799429535865784, + "step": 1055 + }, + { + "epoch": 0.9363957597173145, + "grad_norm": 0.8230948448181152, + "learning_rate": 3.9862828123822905e-05, + "loss": 0.3756, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3900887966156006, + "step": 1060 + }, + { + "epoch": 0.9408127208480566, + "grad_norm": 0.6671487092971802, + "learning_rate": 3.985762900420698e-05, + "loss": 0.3687, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.36852848529815674, + "step": 1065 + }, + { + "epoch": 0.9452296819787986, + "grad_norm": 0.6791719198226929, + "learning_rate": 3.985233353140092e-05, + "loss": 0.2972, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.28970006108283997, + "step": 1070 + }, + { + "epoch": 0.9496466431095406, + "grad_norm": 0.6565694212913513, + "learning_rate": 3.984694173109942e-05, + "loss": 0.3508, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3562009036540985, + "step": 1075 + }, + { + "epoch": 0.9540636042402827, + "grad_norm": 0.6499453186988831, + "learning_rate": 3.984145362946458e-05, + "loss": 0.361, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.4719471335411072, + "step": 1080 + }, + { + "epoch": 0.9584805653710248, + "grad_norm": 0.6347289085388184, + "learning_rate": 3.983586925312576e-05, + "loss": 0.3525, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.30270689725875854, + "step": 1085 + }, + { + "epoch": 0.9628975265017667, + "grad_norm": 0.7031768560409546, + "learning_rate": 3.983018862917948e-05, + "loss": 0.3245, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.28910696506500244, + "step": 1090 + }, + { + "epoch": 0.9673144876325088, + "grad_norm": 0.6593021750450134, + "learning_rate": 3.9824411785189264e-05, + "loss": 0.3461, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2953903079032898, + "step": 1095 + }, + { + "epoch": 0.9717314487632509, + "grad_norm": 0.7052675485610962, + "learning_rate": 3.9818538749185506e-05, + "loss": 0.3357, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.38916873931884766, + "step": 1100 + }, + { + "epoch": 0.976148409893993, + "grad_norm": 0.781073808670044, + "learning_rate": 3.981256954966536e-05, + "loss": 0.3559, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.42383044958114624, + "step": 1105 + }, + { + "epoch": 0.980565371024735, + "grad_norm": 0.8780611157417297, + "learning_rate": 3.9806504215592575e-05, + "loss": 0.345, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3717604875564575, + "step": 1110 + }, + { + "epoch": 0.984982332155477, + "grad_norm": 0.6577640771865845, + "learning_rate": 3.980034277639737e-05, + "loss": 0.3427, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3323565423488617, + "step": 1115 + }, + { + "epoch": 0.9893992932862191, + "grad_norm": 0.6743144392967224, + "learning_rate": 3.979408526197628e-05, + "loss": 0.3845, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.37875691056251526, + "step": 1120 + }, + { + "epoch": 0.9938162544169611, + "grad_norm": 0.6501055359840393, + "learning_rate": 3.9787731702692004e-05, + "loss": 0.3406, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3506585955619812, + "step": 1125 + }, + { + "epoch": 0.9982332155477032, + "grad_norm": 0.7022161483764648, + "learning_rate": 3.9781282129373294e-05, + "loss": 0.3353, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.41503027081489563, + "step": 1130 + }, + { + "epoch": 1.0026501766784452, + "grad_norm": 0.6163008213043213, + "learning_rate": 3.9774736573314774e-05, + "loss": 0.3603, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.41754186153411865, + "step": 1135 + }, + { + "epoch": 1.0070671378091873, + "grad_norm": 0.7156196236610413, + "learning_rate": 3.9768095066276794e-05, + "loss": 0.3576, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.275761216878891, + "step": 1140 + }, + { + "epoch": 1.0114840989399294, + "grad_norm": 0.6655539274215698, + "learning_rate": 3.9761357640485255e-05, + "loss": 0.3423, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.33661192655563354, + "step": 1145 + }, + { + "epoch": 1.0159010600706713, + "grad_norm": 0.673190712928772, + "learning_rate": 3.975452432863152e-05, + "loss": 0.317, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3377433717250824, + "step": 1150 + }, + { + "epoch": 1.0203180212014133, + "grad_norm": 0.6982813477516174, + "learning_rate": 3.974759516387216e-05, + "loss": 0.3091, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2920030951499939, + "step": 1155 + }, + { + "epoch": 1.0247349823321554, + "grad_norm": 0.6755205988883972, + "learning_rate": 3.9740570179828905e-05, + "loss": 0.3357, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.28413355350494385, + "step": 1160 + }, + { + "epoch": 1.0291519434628975, + "grad_norm": 0.7137518525123596, + "learning_rate": 3.9733449410588354e-05, + "loss": 0.3105, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2865508198738098, + "step": 1165 + }, + { + "epoch": 1.0335689045936396, + "grad_norm": 0.6695932149887085, + "learning_rate": 3.972623289070191e-05, + "loss": 0.329, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3236057758331299, + "step": 1170 + }, + { + "epoch": 1.0379858657243817, + "grad_norm": 0.8682721257209778, + "learning_rate": 3.971892065518557e-05, + "loss": 0.2882, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2601502239704132, + "step": 1175 + }, + { + "epoch": 1.0424028268551238, + "grad_norm": 0.6531527638435364, + "learning_rate": 3.971151273951979e-05, + "loss": 0.2812, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.30173492431640625, + "step": 1180 + }, + { + "epoch": 1.0468197879858656, + "grad_norm": 0.752144992351532, + "learning_rate": 3.970400917964922e-05, + "loss": 0.354, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2773906886577606, + "step": 1185 + }, + { + "epoch": 1.0512367491166077, + "grad_norm": 0.6246238350868225, + "learning_rate": 3.969641001198266e-05, + "loss": 0.32, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2532247304916382, + "step": 1190 + }, + { + "epoch": 1.0556537102473498, + "grad_norm": 0.7625958919525146, + "learning_rate": 3.9688715273392785e-05, + "loss": 0.294, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.28142765164375305, + "step": 1195 + }, + { + "epoch": 1.0600706713780919, + "grad_norm": 0.6404998302459717, + "learning_rate": 3.9680925001216e-05, + "loss": 0.3253, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.30740100145339966, + "step": 1200 + }, + { + "epoch": 1.064487632508834, + "grad_norm": 0.7395120859146118, + "learning_rate": 3.967303923325228e-05, + "loss": 0.3327, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.34588027000427246, + "step": 1205 + }, + { + "epoch": 1.068904593639576, + "grad_norm": 0.6187570691108704, + "learning_rate": 3.966505800776493e-05, + "loss": 0.3793, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.33034655451774597, + "step": 1210 + }, + { + "epoch": 1.073321554770318, + "grad_norm": 0.6744672656059265, + "learning_rate": 3.965698136348048e-05, + "loss": 0.3273, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.41641291975975037, + "step": 1215 + }, + { + "epoch": 1.0777385159010602, + "grad_norm": 0.6118738055229187, + "learning_rate": 3.96488093395884e-05, + "loss": 0.3072, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.306530237197876, + "step": 1220 + }, + { + "epoch": 1.082155477031802, + "grad_norm": 0.5981642603874207, + "learning_rate": 3.964054197574099e-05, + "loss": 0.3266, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.34041914343833923, + "step": 1225 + }, + { + "epoch": 1.0865724381625441, + "grad_norm": 0.649811863899231, + "learning_rate": 3.963217931205317e-05, + "loss": 0.3013, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.25741928815841675, + "step": 1230 + }, + { + "epoch": 1.0909893992932862, + "grad_norm": 0.6270660161972046, + "learning_rate": 3.962372138910223e-05, + "loss": 0.32, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3462643325328827, + "step": 1235 + }, + { + "epoch": 1.0954063604240283, + "grad_norm": 0.75999915599823, + "learning_rate": 3.9615168247927735e-05, + "loss": 0.3129, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.31946852803230286, + "step": 1240 + }, + { + "epoch": 1.0998233215547704, + "grad_norm": 0.7573233246803284, + "learning_rate": 3.9606519930031225e-05, + "loss": 0.3373, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3408510386943817, + "step": 1245 + }, + { + "epoch": 1.1042402826855124, + "grad_norm": 0.6537328362464905, + "learning_rate": 3.959777647737606e-05, + "loss": 0.3615, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.34509575366973877, + "step": 1250 + }, + { + "epoch": 1.1086572438162543, + "grad_norm": 0.6237077713012695, + "learning_rate": 3.958893793238723e-05, + "loss": 0.3505, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.365169882774353, + "step": 1255 + }, + { + "epoch": 1.1130742049469964, + "grad_norm": 0.6630376577377319, + "learning_rate": 3.958000433795113e-05, + "loss": 0.3865, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.35122057795524597, + "step": 1260 + }, + { + "epoch": 1.1174911660777385, + "grad_norm": 0.6241370439529419, + "learning_rate": 3.957097573741534e-05, + "loss": 0.3463, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.30081939697265625, + "step": 1265 + }, + { + "epoch": 1.1219081272084805, + "grad_norm": 0.7064021229743958, + "learning_rate": 3.956185217458843e-05, + "loss": 0.3429, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2906290888786316, + "step": 1270 + }, + { + "epoch": 1.1263250883392226, + "grad_norm": 0.7379579544067383, + "learning_rate": 3.955263369373977e-05, + "loss": 0.306, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2594120502471924, + "step": 1275 + }, + { + "epoch": 1.1307420494699647, + "grad_norm": 0.6167639493942261, + "learning_rate": 3.9543320339599266e-05, + "loss": 0.3344, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.27128392457962036, + "step": 1280 + }, + { + "epoch": 1.1351590106007068, + "grad_norm": 0.6793010234832764, + "learning_rate": 3.953391215735718e-05, + "loss": 0.3495, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3618358373641968, + "step": 1285 + }, + { + "epoch": 1.1395759717314489, + "grad_norm": 0.8995165824890137, + "learning_rate": 3.952440919266389e-05, + "loss": 0.3221, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.43430811166763306, + "step": 1290 + }, + { + "epoch": 1.1439929328621907, + "grad_norm": 0.746021568775177, + "learning_rate": 3.951481149162968e-05, + "loss": 0.3149, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.27596035599708557, + "step": 1295 + }, + { + "epoch": 1.1484098939929328, + "grad_norm": 0.6076446175575256, + "learning_rate": 3.950511910082452e-05, + "loss": 0.3011, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2746957242488861, + "step": 1300 + }, + { + "epoch": 1.1528268551236749, + "grad_norm": 0.692255973815918, + "learning_rate": 3.949533206727784e-05, + "loss": 0.3092, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3611806333065033, + "step": 1305 + }, + { + "epoch": 1.157243816254417, + "grad_norm": 0.7213220000267029, + "learning_rate": 3.948545043847826e-05, + "loss": 0.3042, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3065589368343353, + "step": 1310 + }, + { + "epoch": 1.161660777385159, + "grad_norm": 0.6529719829559326, + "learning_rate": 3.947547426237344e-05, + "loss": 0.3432, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3932499289512634, + "step": 1315 + }, + { + "epoch": 1.1660777385159011, + "grad_norm": 0.683671236038208, + "learning_rate": 3.9465403587369784e-05, + "loss": 0.3098, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.274165540933609, + "step": 1320 + }, + { + "epoch": 1.170494699646643, + "grad_norm": 0.8071700930595398, + "learning_rate": 3.945523846233222e-05, + "loss": 0.3043, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3191944658756256, + "step": 1325 + }, + { + "epoch": 1.174911660777385, + "grad_norm": 0.6285055875778198, + "learning_rate": 3.944497893658396e-05, + "loss": 0.3261, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3105619251728058, + "step": 1330 + }, + { + "epoch": 1.1793286219081272, + "grad_norm": 0.8717606663703918, + "learning_rate": 3.943462505990629e-05, + "loss": 0.3588, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3164404630661011, + "step": 1335 + }, + { + "epoch": 1.1837455830388692, + "grad_norm": 0.7031919956207275, + "learning_rate": 3.942417688253827e-05, + "loss": 0.3394, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3558098077774048, + "step": 1340 + }, + { + "epoch": 1.1881625441696113, + "grad_norm": 0.5943769216537476, + "learning_rate": 3.9413634455176584e-05, + "loss": 0.3199, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.31143832206726074, + "step": 1345 + }, + { + "epoch": 1.1925795053003534, + "grad_norm": 0.6784660220146179, + "learning_rate": 3.940299782897517e-05, + "loss": 0.3039, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.31072232127189636, + "step": 1350 + }, + { + "epoch": 1.1969964664310955, + "grad_norm": 0.6783735752105713, + "learning_rate": 3.939226705554507e-05, + "loss": 0.3124, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3142765462398529, + "step": 1355 + }, + { + "epoch": 1.2014134275618376, + "grad_norm": 0.6520729660987854, + "learning_rate": 3.9381442186954155e-05, + "loss": 0.3508, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2647053599357605, + "step": 1360 + }, + { + "epoch": 1.2058303886925794, + "grad_norm": 0.6096318960189819, + "learning_rate": 3.9370523275726844e-05, + "loss": 0.3369, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.27471020817756653, + "step": 1365 + }, + { + "epoch": 1.2102473498233215, + "grad_norm": 0.6824337840080261, + "learning_rate": 3.935951037484388e-05, + "loss": 0.3035, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3124973177909851, + "step": 1370 + }, + { + "epoch": 1.2146643109540636, + "grad_norm": 0.7245553135871887, + "learning_rate": 3.934840353774208e-05, + "loss": 0.3162, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.27780479192733765, + "step": 1375 + }, + { + "epoch": 1.2190812720848057, + "grad_norm": 0.8077725172042847, + "learning_rate": 3.9337202818314016e-05, + "loss": 0.2926, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.36201488971710205, + "step": 1380 + }, + { + "epoch": 1.2234982332155477, + "grad_norm": 0.6361654996871948, + "learning_rate": 3.932590827090783e-05, + "loss": 0.3642, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.40312081575393677, + "step": 1385 + }, + { + "epoch": 1.2279151943462898, + "grad_norm": 0.6152886748313904, + "learning_rate": 3.931451995032693e-05, + "loss": 0.3168, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3805939853191376, + "step": 1390 + }, + { + "epoch": 1.232332155477032, + "grad_norm": 0.7459203004837036, + "learning_rate": 3.930303791182972e-05, + "loss": 0.3519, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.34478020668029785, + "step": 1395 + }, + { + "epoch": 1.2367491166077738, + "grad_norm": 0.8537634015083313, + "learning_rate": 3.929146221112936e-05, + "loss": 0.3215, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.281982421875, + "step": 1400 + }, + { + "epoch": 1.2411660777385158, + "grad_norm": 0.6000507473945618, + "learning_rate": 3.927979290439346e-05, + "loss": 0.3281, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2675206661224365, + "step": 1405 + }, + { + "epoch": 1.245583038869258, + "grad_norm": 0.640716016292572, + "learning_rate": 3.926803004824382e-05, + "loss": 0.3312, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2979744076728821, + "step": 1410 + }, + { + "epoch": 1.25, + "grad_norm": 0.7331620454788208, + "learning_rate": 3.925617369975619e-05, + "loss": 0.3385, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.335235059261322, + "step": 1415 + }, + { + "epoch": 1.254416961130742, + "grad_norm": 0.6532949805259705, + "learning_rate": 3.924422391645994e-05, + "loss": 0.3509, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3822665214538574, + "step": 1420 + }, + { + "epoch": 1.2588339222614842, + "grad_norm": 0.7220327854156494, + "learning_rate": 3.923218075633781e-05, + "loss": 0.3268, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3238363265991211, + "step": 1425 + }, + { + "epoch": 1.2632508833922262, + "grad_norm": 0.727918803691864, + "learning_rate": 3.9220044277825615e-05, + "loss": 0.3149, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.31373223662376404, + "step": 1430 + }, + { + "epoch": 1.2676678445229683, + "grad_norm": 0.633305013179779, + "learning_rate": 3.920781453981199e-05, + "loss": 0.2994, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3962780237197876, + "step": 1435 + }, + { + "epoch": 1.2720848056537102, + "grad_norm": 0.6449732184410095, + "learning_rate": 3.919549160163806e-05, + "loss": 0.3217, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2957836985588074, + "step": 1440 + }, + { + "epoch": 1.2765017667844523, + "grad_norm": 0.8494489789009094, + "learning_rate": 3.91830755230972e-05, + "loss": 0.3579, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.47561344504356384, + "step": 1445 + }, + { + "epoch": 1.2809187279151943, + "grad_norm": 0.6150957942008972, + "learning_rate": 3.91705663644347e-05, + "loss": 0.3226, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3158280849456787, + "step": 1450 + }, + { + "epoch": 1.2853356890459364, + "grad_norm": 0.5343297719955444, + "learning_rate": 3.91579641863475e-05, + "loss": 0.323, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.30349069833755493, + "step": 1455 + }, + { + "epoch": 1.2897526501766785, + "grad_norm": 0.8276622295379639, + "learning_rate": 3.91452690499839e-05, + "loss": 0.3446, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.4098215103149414, + "step": 1460 + }, + { + "epoch": 1.2941696113074204, + "grad_norm": 0.6456217765808105, + "learning_rate": 3.913248101694323e-05, + "loss": 0.333, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.40201887488365173, + "step": 1465 + }, + { + "epoch": 1.2985865724381624, + "grad_norm": 0.5984911322593689, + "learning_rate": 3.911960014927559e-05, + "loss": 0.3269, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3077358603477478, + "step": 1470 + }, + { + "epoch": 1.3030035335689045, + "grad_norm": 0.6234849691390991, + "learning_rate": 3.910662650948153e-05, + "loss": 0.3081, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3293282687664032, + "step": 1475 + }, + { + "epoch": 1.3074204946996466, + "grad_norm": 0.6392715573310852, + "learning_rate": 3.9093560160511746e-05, + "loss": 0.3063, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.34439730644226074, + "step": 1480 + }, + { + "epoch": 1.3118374558303887, + "grad_norm": 0.5880979299545288, + "learning_rate": 3.9080401165766776e-05, + "loss": 0.316, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2801549434661865, + "step": 1485 + }, + { + "epoch": 1.3162544169611308, + "grad_norm": 0.5844652652740479, + "learning_rate": 3.9067149589096695e-05, + "loss": 0.2849, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2638784646987915, + "step": 1490 + }, + { + "epoch": 1.3206713780918728, + "grad_norm": 0.5854594707489014, + "learning_rate": 3.905380549480081e-05, + "loss": 0.3029, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.30802667140960693, + "step": 1495 + }, + { + "epoch": 1.325088339222615, + "grad_norm": 0.5132575035095215, + "learning_rate": 3.904036894762734e-05, + "loss": 0.3015, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2757795453071594, + "step": 1500 + }, + { + "epoch": 1.329505300353357, + "grad_norm": 0.790227472782135, + "learning_rate": 3.9026840012773094e-05, + "loss": 0.3119, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3685351610183716, + "step": 1505 + }, + { + "epoch": 1.3339222614840989, + "grad_norm": 0.5928642153739929, + "learning_rate": 3.901321875588317e-05, + "loss": 0.3241, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3497644364833832, + "step": 1510 + }, + { + "epoch": 1.338339222614841, + "grad_norm": 0.6434882283210754, + "learning_rate": 3.899950524305064e-05, + "loss": 0.3218, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3292595148086548, + "step": 1515 + }, + { + "epoch": 1.342756183745583, + "grad_norm": 0.7256221771240234, + "learning_rate": 3.898569954081621e-05, + "loss": 0.3332, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3023999333381653, + "step": 1520 + }, + { + "epoch": 1.3471731448763251, + "grad_norm": 0.7445887327194214, + "learning_rate": 3.897180171616791e-05, + "loss": 0.3047, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.27664875984191895, + "step": 1525 + }, + { + "epoch": 1.3515901060070672, + "grad_norm": 0.6363182663917542, + "learning_rate": 3.895781183654076e-05, + "loss": 0.348, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2896704077720642, + "step": 1530 + }, + { + "epoch": 1.356007067137809, + "grad_norm": 0.7220079898834229, + "learning_rate": 3.894372996981647e-05, + "loss": 0.3056, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3187897503376007, + "step": 1535 + }, + { + "epoch": 1.3604240282685511, + "grad_norm": 0.9932358264923096, + "learning_rate": 3.892955618432306e-05, + "loss": 0.2863, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3777332007884979, + "step": 1540 + }, + { + "epoch": 1.3648409893992932, + "grad_norm": 0.6612488627433777, + "learning_rate": 3.891529054883458e-05, + "loss": 0.3671, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.32506632804870605, + "step": 1545 + }, + { + "epoch": 1.3692579505300353, + "grad_norm": 0.809368371963501, + "learning_rate": 3.8900933132570755e-05, + "loss": 0.3164, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2754046618938446, + "step": 1550 + }, + { + "epoch": 1.3736749116607774, + "grad_norm": 0.6561906933784485, + "learning_rate": 3.888648400519663e-05, + "loss": 0.364, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.27140358090400696, + "step": 1555 + }, + { + "epoch": 1.3780918727915195, + "grad_norm": 0.6149983406066895, + "learning_rate": 3.8871943236822274e-05, + "loss": 0.2918, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3111530542373657, + "step": 1560 + }, + { + "epoch": 1.3825088339222615, + "grad_norm": 0.788455605506897, + "learning_rate": 3.88573108980024e-05, + "loss": 0.3015, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.31705474853515625, + "step": 1565 + }, + { + "epoch": 1.3869257950530036, + "grad_norm": 0.8068515062332153, + "learning_rate": 3.8842587059736054e-05, + "loss": 0.2891, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.27609729766845703, + "step": 1570 + }, + { + "epoch": 1.3913427561837457, + "grad_norm": 0.5502995252609253, + "learning_rate": 3.882777179346622e-05, + "loss": 0.3524, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.286032497882843, + "step": 1575 + }, + { + "epoch": 1.3957597173144876, + "grad_norm": 0.5802372694015503, + "learning_rate": 3.881286517107957e-05, + "loss": 0.343, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.24839898943901062, + "step": 1580 + }, + { + "epoch": 1.4001766784452296, + "grad_norm": 0.614653468132019, + "learning_rate": 3.879786726490599e-05, + "loss": 0.3196, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.29347753524780273, + "step": 1585 + }, + { + "epoch": 1.4045936395759717, + "grad_norm": 0.5936715006828308, + "learning_rate": 3.8782778147718335e-05, + "loss": 0.329, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3896068036556244, + "step": 1590 + }, + { + "epoch": 1.4090106007067138, + "grad_norm": 5.28199577331543, + "learning_rate": 3.876759789273202e-05, + "loss": 0.3, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.26368066668510437, + "step": 1595 + }, + { + "epoch": 1.4134275618374559, + "grad_norm": 0.7651248574256897, + "learning_rate": 3.8752326573604684e-05, + "loss": 0.3075, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.30806493759155273, + "step": 1600 + }, + { + "epoch": 1.417844522968198, + "grad_norm": 0.6337783932685852, + "learning_rate": 3.873696426443581e-05, + "loss": 0.3195, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.34544122219085693, + "step": 1605 + }, + { + "epoch": 1.4222614840989398, + "grad_norm": 0.7464025616645813, + "learning_rate": 3.872151103976642e-05, + "loss": 0.3251, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3165499269962311, + "step": 1610 + }, + { + "epoch": 1.426678445229682, + "grad_norm": 0.5613967180252075, + "learning_rate": 3.870596697457863e-05, + "loss": 0.3442, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.27536633610725403, + "step": 1615 + }, + { + "epoch": 1.431095406360424, + "grad_norm": 0.6180069446563721, + "learning_rate": 3.8690332144295375e-05, + "loss": 0.3426, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.39397120475769043, + "step": 1620 + }, + { + "epoch": 1.435512367491166, + "grad_norm": 0.628078818321228, + "learning_rate": 3.867460662477996e-05, + "loss": 0.332, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3936801254749298, + "step": 1625 + }, + { + "epoch": 1.4399293286219081, + "grad_norm": 0.6523563265800476, + "learning_rate": 3.865879049233577e-05, + "loss": 0.3076, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.31615036725997925, + "step": 1630 + }, + { + "epoch": 1.4443462897526502, + "grad_norm": 0.6801185607910156, + "learning_rate": 3.864288382370584e-05, + "loss": 0.3124, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.27253082394599915, + "step": 1635 + }, + { + "epoch": 1.4487632508833923, + "grad_norm": 0.6551727056503296, + "learning_rate": 3.8626886696072495e-05, + "loss": 0.3393, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3144484758377075, + "step": 1640 + }, + { + "epoch": 1.4531802120141344, + "grad_norm": 0.6799625158309937, + "learning_rate": 3.8610799187057025e-05, + "loss": 0.3086, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3325912356376648, + "step": 1645 + }, + { + "epoch": 1.4575971731448762, + "grad_norm": 0.6419410705566406, + "learning_rate": 3.8594621374719226e-05, + "loss": 0.3026, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.28254279494285583, + "step": 1650 + }, + { + "epoch": 1.4620141342756183, + "grad_norm": 0.6062602996826172, + "learning_rate": 3.857835333755709e-05, + "loss": 0.3182, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.32770591974258423, + "step": 1655 + }, + { + "epoch": 1.4664310954063604, + "grad_norm": 0.6399083137512207, + "learning_rate": 3.856199515450638e-05, + "loss": 0.3236, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.38535135984420776, + "step": 1660 + }, + { + "epoch": 1.4708480565371025, + "grad_norm": 0.6881480813026428, + "learning_rate": 3.8545546904940285e-05, + "loss": 0.3233, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.35274213552474976, + "step": 1665 + }, + { + "epoch": 1.4752650176678446, + "grad_norm": 0.708899199962616, + "learning_rate": 3.8529008668668996e-05, + "loss": 0.3243, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.29779568314552307, + "step": 1670 + }, + { + "epoch": 1.4796819787985867, + "grad_norm": 0.5962197780609131, + "learning_rate": 3.851238052593935e-05, + "loss": 0.3054, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3215206265449524, + "step": 1675 + }, + { + "epoch": 1.4840989399293285, + "grad_norm": 0.6873301863670349, + "learning_rate": 3.849566255743442e-05, + "loss": 0.3252, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.29667988419532776, + "step": 1680 + }, + { + "epoch": 1.4885159010600706, + "grad_norm": 0.7549837827682495, + "learning_rate": 3.8478854844273134e-05, + "loss": 0.3139, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.32202067971229553, + "step": 1685 + }, + { + "epoch": 1.4929328621908127, + "grad_norm": 0.6532432436943054, + "learning_rate": 3.846195746800988e-05, + "loss": 0.2846, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2808791995048523, + "step": 1690 + }, + { + "epoch": 1.4973498233215548, + "grad_norm": 0.6817176342010498, + "learning_rate": 3.8444970510634124e-05, + "loss": 0.3371, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.29034101963043213, + "step": 1695 + }, + { + "epoch": 1.5017667844522968, + "grad_norm": 0.6129101514816284, + "learning_rate": 3.842789405456996e-05, + "loss": 0.3295, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3373889923095703, + "step": 1700 + }, + { + "epoch": 1.506183745583039, + "grad_norm": 0.5987979769706726, + "learning_rate": 3.841072818267578e-05, + "loss": 0.3237, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.36195623874664307, + "step": 1705 + }, + { + "epoch": 1.510600706713781, + "grad_norm": 0.6188389658927917, + "learning_rate": 3.839347297824383e-05, + "loss": 0.3196, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.28901487588882446, + "step": 1710 + }, + { + "epoch": 1.515017667844523, + "grad_norm": 0.605717658996582, + "learning_rate": 3.837612852499982e-05, + "loss": 0.352, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.31816282868385315, + "step": 1715 + }, + { + "epoch": 1.5194346289752652, + "grad_norm": 0.5934823751449585, + "learning_rate": 3.8358694907102504e-05, + "loss": 0.3625, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3863886594772339, + "step": 1720 + }, + { + "epoch": 1.523851590106007, + "grad_norm": 0.578649640083313, + "learning_rate": 3.834117220914328e-05, + "loss": 0.3449, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2828061580657959, + "step": 1725 + }, + { + "epoch": 1.528268551236749, + "grad_norm": 0.6291191577911377, + "learning_rate": 3.832356051614579e-05, + "loss": 0.307, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.28863656520843506, + "step": 1730 + }, + { + "epoch": 1.5326855123674912, + "grad_norm": 0.6461930274963379, + "learning_rate": 3.8305859913565505e-05, + "loss": 0.3011, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3028411865234375, + "step": 1735 + }, + { + "epoch": 1.5371024734982333, + "grad_norm": 0.7606573104858398, + "learning_rate": 3.8288070487289274e-05, + "loss": 0.3087, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3565424084663391, + "step": 1740 + }, + { + "epoch": 1.5415194346289751, + "grad_norm": 0.6665891408920288, + "learning_rate": 3.827019232363496e-05, + "loss": 0.3183, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3757513165473938, + "step": 1745 + }, + { + "epoch": 1.5459363957597172, + "grad_norm": 0.6307830214500427, + "learning_rate": 3.8252225509350985e-05, + "loss": 0.3252, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.322690486907959, + "step": 1750 + }, + { + "epoch": 1.5503533568904593, + "grad_norm": 0.6449663043022156, + "learning_rate": 3.823417013161594e-05, + "loss": 0.3276, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3264492154121399, + "step": 1755 + }, + { + "epoch": 1.5547703180212014, + "grad_norm": 0.6309463381767273, + "learning_rate": 3.821602627803813e-05, + "loss": 0.3399, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3071059584617615, + "step": 1760 + }, + { + "epoch": 1.5591872791519434, + "grad_norm": 0.5758494138717651, + "learning_rate": 3.819779403665515e-05, + "loss": 0.3248, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3266148567199707, + "step": 1765 + }, + { + "epoch": 1.5636042402826855, + "grad_norm": 0.6446824669837952, + "learning_rate": 3.8179473495933497e-05, + "loss": 0.3323, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3177984952926636, + "step": 1770 + }, + { + "epoch": 1.5680212014134276, + "grad_norm": 0.5853697657585144, + "learning_rate": 3.8161064744768096e-05, + "loss": 0.2712, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.24256934225559235, + "step": 1775 + }, + { + "epoch": 1.5724381625441697, + "grad_norm": 0.722345232963562, + "learning_rate": 3.814256787248189e-05, + "loss": 0.3833, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.4055064916610718, + "step": 1780 + }, + { + "epoch": 1.5768551236749118, + "grad_norm": 0.7835103869438171, + "learning_rate": 3.81239829688254e-05, + "loss": 0.3099, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3148956596851349, + "step": 1785 + }, + { + "epoch": 1.5812720848056538, + "grad_norm": 0.6101419925689697, + "learning_rate": 3.810531012397632e-05, + "loss": 0.3416, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3856297433376312, + "step": 1790 + }, + { + "epoch": 1.585689045936396, + "grad_norm": 0.607524037361145, + "learning_rate": 3.8086549428539016e-05, + "loss": 0.3393, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3431060314178467, + "step": 1795 + }, + { + "epoch": 1.5901060070671378, + "grad_norm": 0.700236976146698, + "learning_rate": 3.806770097354413e-05, + "loss": 0.2922, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.30610954761505127, + "step": 1800 + }, + { + "epoch": 1.5945229681978799, + "grad_norm": 0.5721810460090637, + "learning_rate": 3.8048764850448146e-05, + "loss": 0.3178, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2738623023033142, + "step": 1805 + }, + { + "epoch": 1.598939929328622, + "grad_norm": 0.591039776802063, + "learning_rate": 3.802974115113292e-05, + "loss": 0.3071, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3155909776687622, + "step": 1810 + }, + { + "epoch": 1.6033568904593638, + "grad_norm": 0.6222483515739441, + "learning_rate": 3.801062996790526e-05, + "loss": 0.3603, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.40520355105400085, + "step": 1815 + }, + { + "epoch": 1.6077738515901059, + "grad_norm": 0.6669710874557495, + "learning_rate": 3.7991431393496435e-05, + "loss": 0.3065, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3122641444206238, + "step": 1820 + }, + { + "epoch": 1.612190812720848, + "grad_norm": 0.628384530544281, + "learning_rate": 3.797214552106178e-05, + "loss": 0.2951, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.29349058866500854, + "step": 1825 + }, + { + "epoch": 1.61660777385159, + "grad_norm": 0.6003797054290771, + "learning_rate": 3.7952772444180205e-05, + "loss": 0.3327, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.28449106216430664, + "step": 1830 + }, + { + "epoch": 1.6210247349823321, + "grad_norm": 0.6813213229179382, + "learning_rate": 3.793331225685376e-05, + "loss": 0.3209, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.247023805975914, + "step": 1835 + }, + { + "epoch": 1.6254416961130742, + "grad_norm": 0.6331183314323425, + "learning_rate": 3.791376505350716e-05, + "loss": 0.2859, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2805844843387604, + "step": 1840 + }, + { + "epoch": 1.6298586572438163, + "grad_norm": 0.5931798815727234, + "learning_rate": 3.789413092898735e-05, + "loss": 0.2862, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2882406711578369, + "step": 1845 + }, + { + "epoch": 1.6342756183745584, + "grad_norm": 0.6270744204521179, + "learning_rate": 3.7874409978563045e-05, + "loss": 0.2997, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.27691859006881714, + "step": 1850 + }, + { + "epoch": 1.6386925795053005, + "grad_norm": 0.6561160087585449, + "learning_rate": 3.785460229792422e-05, + "loss": 0.2763, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2742835283279419, + "step": 1855 + }, + { + "epoch": 1.6431095406360425, + "grad_norm": 0.6197009086608887, + "learning_rate": 3.783470798318173e-05, + "loss": 0.3189, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2708655297756195, + "step": 1860 + }, + { + "epoch": 1.6475265017667846, + "grad_norm": 0.7104551196098328, + "learning_rate": 3.7814727130866756e-05, + "loss": 0.3393, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3953385353088379, + "step": 1865 + }, + { + "epoch": 1.6519434628975265, + "grad_norm": 0.6067651510238647, + "learning_rate": 3.779465983793039e-05, + "loss": 0.3433, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.31828904151916504, + "step": 1870 + }, + { + "epoch": 1.6563604240282686, + "grad_norm": 0.6345065236091614, + "learning_rate": 3.7774506201743175e-05, + "loss": 0.3252, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3190929591655731, + "step": 1875 + }, + { + "epoch": 1.6607773851590106, + "grad_norm": 0.5989964604377747, + "learning_rate": 3.775426632009456e-05, + "loss": 0.3066, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2885761857032776, + "step": 1880 + }, + { + "epoch": 1.6651943462897525, + "grad_norm": 0.6218124032020569, + "learning_rate": 3.7733940291192516e-05, + "loss": 0.3205, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2709357738494873, + "step": 1885 + }, + { + "epoch": 1.6696113074204946, + "grad_norm": 0.6491353511810303, + "learning_rate": 3.771352821366301e-05, + "loss": 0.3574, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.31962040066719055, + "step": 1890 + }, + { + "epoch": 1.6740282685512367, + "grad_norm": 0.6486796736717224, + "learning_rate": 3.769303018654951e-05, + "loss": 0.3129, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.27561670541763306, + "step": 1895 + }, + { + "epoch": 1.6784452296819787, + "grad_norm": 0.8267950415611267, + "learning_rate": 3.7672446309312554e-05, + "loss": 0.3588, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2873364984989166, + "step": 1900 + }, + { + "epoch": 1.6828621908127208, + "grad_norm": 0.6081082820892334, + "learning_rate": 3.765177668182923e-05, + "loss": 0.3609, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.35057562589645386, + "step": 1905 + }, + { + "epoch": 1.687279151943463, + "grad_norm": 0.5976192951202393, + "learning_rate": 3.763102140439272e-05, + "loss": 0.3016, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2694868743419647, + "step": 1910 + }, + { + "epoch": 1.691696113074205, + "grad_norm": 0.7123092412948608, + "learning_rate": 3.7610180577711774e-05, + "loss": 0.2874, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2789933681488037, + "step": 1915 + }, + { + "epoch": 1.696113074204947, + "grad_norm": 0.741333544254303, + "learning_rate": 3.758925430291025e-05, + "loss": 0.3251, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.35715097188949585, + "step": 1920 + }, + { + "epoch": 1.7005300353356891, + "grad_norm": 0.6544567942619324, + "learning_rate": 3.756824268152663e-05, + "loss": 0.326, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2953934669494629, + "step": 1925 + }, + { + "epoch": 1.7049469964664312, + "grad_norm": 1.1364296674728394, + "learning_rate": 3.7547145815513504e-05, + "loss": 0.3568, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.43960511684417725, + "step": 1930 + }, + { + "epoch": 1.7093639575971733, + "grad_norm": 0.5804359912872314, + "learning_rate": 3.752596380723709e-05, + "loss": 0.3531, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.4059427082538605, + "step": 1935 + }, + { + "epoch": 1.7137809187279152, + "grad_norm": 0.6707079410552979, + "learning_rate": 3.750469675947672e-05, + "loss": 0.3044, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.308398962020874, + "step": 1940 + }, + { + "epoch": 1.7181978798586572, + "grad_norm": 0.6289849281311035, + "learning_rate": 3.7483344775424376e-05, + "loss": 0.3225, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.35997170209884644, + "step": 1945 + }, + { + "epoch": 1.7226148409893993, + "grad_norm": 0.5992223620414734, + "learning_rate": 3.746190795868416e-05, + "loss": 0.3168, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.36869436502456665, + "step": 1950 + }, + { + "epoch": 1.7270318021201412, + "grad_norm": 0.5985012054443359, + "learning_rate": 3.7440386413271796e-05, + "loss": 0.2932, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3474159240722656, + "step": 1955 + }, + { + "epoch": 1.7314487632508833, + "grad_norm": 0.6145229339599609, + "learning_rate": 3.741878024361412e-05, + "loss": 0.3082, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.34130439162254333, + "step": 1960 + }, + { + "epoch": 1.7358657243816253, + "grad_norm": 0.5978900790214539, + "learning_rate": 3.7397089554548606e-05, + "loss": 0.2994, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3122338354587555, + "step": 1965 + }, + { + "epoch": 1.7402826855123674, + "grad_norm": 0.6474099159240723, + "learning_rate": 3.73753144513228e-05, + "loss": 0.2718, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2959476113319397, + "step": 1970 + }, + { + "epoch": 1.7446996466431095, + "grad_norm": 0.5508759021759033, + "learning_rate": 3.735345503959388e-05, + "loss": 0.3195, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.25343257188796997, + "step": 1975 + }, + { + "epoch": 1.7491166077738516, + "grad_norm": 0.5979631543159485, + "learning_rate": 3.7331511425428075e-05, + "loss": 0.307, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3401501476764679, + "step": 1980 + }, + { + "epoch": 1.7535335689045937, + "grad_norm": 0.6378216743469238, + "learning_rate": 3.73094837153002e-05, + "loss": 0.3163, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3658217787742615, + "step": 1985 + }, + { + "epoch": 1.7579505300353357, + "grad_norm": 0.623346745967865, + "learning_rate": 3.7287372016093106e-05, + "loss": 0.3476, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3634029030799866, + "step": 1990 + }, + { + "epoch": 1.7623674911660778, + "grad_norm": 0.548507034778595, + "learning_rate": 3.726517643509718e-05, + "loss": 0.3238, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.34876665472984314, + "step": 1995 + }, + { + "epoch": 1.76678445229682, + "grad_norm": 0.7020362615585327, + "learning_rate": 3.724289708000984e-05, + "loss": 0.313, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.29458093643188477, + "step": 2000 + }, + { + "epoch": 1.771201413427562, + "grad_norm": 1.0174129009246826, + "learning_rate": 3.722053405893495e-05, + "loss": 0.3045, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2385835349559784, + "step": 2005 + }, + { + "epoch": 1.7756183745583038, + "grad_norm": 0.6126503348350525, + "learning_rate": 3.7198087480382386e-05, + "loss": 0.3038, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.24906566739082336, + "step": 2010 + }, + { + "epoch": 1.780035335689046, + "grad_norm": 0.6186851263046265, + "learning_rate": 3.7175557453267435e-05, + "loss": 0.3153, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.36772221326828003, + "step": 2015 + }, + { + "epoch": 1.784452296819788, + "grad_norm": 0.5845491886138916, + "learning_rate": 3.715294408691029e-05, + "loss": 0.3231, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.31444716453552246, + "step": 2020 + }, + { + "epoch": 1.78886925795053, + "grad_norm": 0.5485044121742249, + "learning_rate": 3.713024749103554e-05, + "loss": 0.3279, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.28981852531433105, + "step": 2025 + }, + { + "epoch": 1.793286219081272, + "grad_norm": 0.7004613280296326, + "learning_rate": 3.71074677757716e-05, + "loss": 0.3089, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2541000247001648, + "step": 2030 + }, + { + "epoch": 1.797703180212014, + "grad_norm": 0.7733515501022339, + "learning_rate": 3.708460505165021e-05, + "loss": 0.3438, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3459468185901642, + "step": 2035 + }, + { + "epoch": 1.802120141342756, + "grad_norm": 0.5577183961868286, + "learning_rate": 3.706165942960589e-05, + "loss": 0.3271, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3305257558822632, + "step": 2040 + }, + { + "epoch": 1.8065371024734982, + "grad_norm": 0.6522884964942932, + "learning_rate": 3.703863102097538e-05, + "loss": 0.3168, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3232169449329376, + "step": 2045 + }, + { + "epoch": 1.8109540636042403, + "grad_norm": 0.6660712361335754, + "learning_rate": 3.701551993749714e-05, + "loss": 0.3165, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2644830346107483, + "step": 2050 + }, + { + "epoch": 1.8153710247349824, + "grad_norm": 0.7599813938140869, + "learning_rate": 3.6992326291310764e-05, + "loss": 0.3048, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.30041027069091797, + "step": 2055 + }, + { + "epoch": 1.8197879858657244, + "grad_norm": 0.6655240058898926, + "learning_rate": 3.696905019495647e-05, + "loss": 0.2975, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2828328609466553, + "step": 2060 + }, + { + "epoch": 1.8242049469964665, + "grad_norm": 0.608040988445282, + "learning_rate": 3.6945691761374535e-05, + "loss": 0.3234, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.42636048793792725, + "step": 2065 + }, + { + "epoch": 1.8286219081272086, + "grad_norm": 0.6305931806564331, + "learning_rate": 3.692225110390474e-05, + "loss": 0.3236, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3188191056251526, + "step": 2070 + }, + { + "epoch": 1.8330388692579507, + "grad_norm": 0.7485166788101196, + "learning_rate": 3.689872833628587e-05, + "loss": 0.3203, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.42439842224121094, + "step": 2075 + }, + { + "epoch": 1.8374558303886925, + "grad_norm": 0.5816894769668579, + "learning_rate": 3.687512357265509e-05, + "loss": 0.3268, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.37391436100006104, + "step": 2080 + }, + { + "epoch": 1.8418727915194346, + "grad_norm": 0.6705328822135925, + "learning_rate": 3.685143692754743e-05, + "loss": 0.3141, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.361337274312973, + "step": 2085 + }, + { + "epoch": 1.8462897526501767, + "grad_norm": 0.6142212152481079, + "learning_rate": 3.6827668515895234e-05, + "loss": 0.3092, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3198060691356659, + "step": 2090 + }, + { + "epoch": 1.8507067137809188, + "grad_norm": 0.6588479280471802, + "learning_rate": 3.68038184530276e-05, + "loss": 0.3024, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3107324540615082, + "step": 2095 + }, + { + "epoch": 1.8551236749116606, + "grad_norm": 0.6271264553070068, + "learning_rate": 3.6779886854669815e-05, + "loss": 0.2935, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3267250657081604, + "step": 2100 + }, + { + "epoch": 1.8595406360424027, + "grad_norm": 0.6405203938484192, + "learning_rate": 3.6755873836942756e-05, + "loss": 0.3282, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.32828402519226074, + "step": 2105 + }, + { + "epoch": 1.8639575971731448, + "grad_norm": 0.6557453274726868, + "learning_rate": 3.673177951636242e-05, + "loss": 0.3506, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.28758811950683594, + "step": 2110 + }, + { + "epoch": 1.8683745583038869, + "grad_norm": 1.1797688007354736, + "learning_rate": 3.670760400983925e-05, + "loss": 0.3433, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.39800888299942017, + "step": 2115 + }, + { + "epoch": 1.872791519434629, + "grad_norm": 0.6395815014839172, + "learning_rate": 3.6683347434677654e-05, + "loss": 0.3342, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3299906253814697, + "step": 2120 + }, + { + "epoch": 1.877208480565371, + "grad_norm": 0.6777175664901733, + "learning_rate": 3.6659009908575394e-05, + "loss": 0.2953, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2692110538482666, + "step": 2125 + }, + { + "epoch": 1.8816254416961131, + "grad_norm": 0.5334305763244629, + "learning_rate": 3.663459154962301e-05, + "loss": 0.3263, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3176087439060211, + "step": 2130 + }, + { + "epoch": 1.8860424028268552, + "grad_norm": 0.5658344626426697, + "learning_rate": 3.661009247630326e-05, + "loss": 0.3149, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.36185193061828613, + "step": 2135 + }, + { + "epoch": 1.8904593639575973, + "grad_norm": 0.6579681634902954, + "learning_rate": 3.658551280749055e-05, + "loss": 0.2887, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.25398847460746765, + "step": 2140 + }, + { + "epoch": 1.8948763250883394, + "grad_norm": 0.6341381072998047, + "learning_rate": 3.656085266245038e-05, + "loss": 0.2892, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3196604251861572, + "step": 2145 + }, + { + "epoch": 1.8992932862190812, + "grad_norm": 0.6079564094543457, + "learning_rate": 3.653611216083867e-05, + "loss": 0.3093, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.41236889362335205, + "step": 2150 + }, + { + "epoch": 1.9037102473498233, + "grad_norm": 0.5752854347229004, + "learning_rate": 3.651129142270132e-05, + "loss": 0.3111, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.32420551776885986, + "step": 2155 + }, + { + "epoch": 1.9081272084805654, + "grad_norm": 0.7557041645050049, + "learning_rate": 3.6486390568473494e-05, + "loss": 0.3236, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.40055620670318604, + "step": 2160 + }, + { + "epoch": 1.9125441696113075, + "grad_norm": 0.6140500903129578, + "learning_rate": 3.646140971897914e-05, + "loss": 0.2967, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3187151551246643, + "step": 2165 + }, + { + "epoch": 1.9169611307420493, + "grad_norm": 0.5381097793579102, + "learning_rate": 3.6436348995430314e-05, + "loss": 0.3371, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.20654772222042084, + "step": 2170 + }, + { + "epoch": 1.9213780918727914, + "grad_norm": 0.6211098432540894, + "learning_rate": 3.641120851942669e-05, + "loss": 0.3121, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.27154093980789185, + "step": 2175 + }, + { + "epoch": 1.9257950530035335, + "grad_norm": 0.6953615546226501, + "learning_rate": 3.638598841295487e-05, + "loss": 0.3264, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.31072691082954407, + "step": 2180 + }, + { + "epoch": 1.9302120141342756, + "grad_norm": 0.579765796661377, + "learning_rate": 3.6360688798387865e-05, + "loss": 0.3463, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3020220398902893, + "step": 2185 + }, + { + "epoch": 1.9346289752650176, + "grad_norm": 0.6398031115531921, + "learning_rate": 3.633530979848446e-05, + "loss": 0.2941, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2853238880634308, + "step": 2190 + }, + { + "epoch": 1.9390459363957597, + "grad_norm": 0.6654336452484131, + "learning_rate": 3.6309851536388664e-05, + "loss": 0.3671, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.35676461458206177, + "step": 2195 + }, + { + "epoch": 1.9434628975265018, + "grad_norm": 0.5575515627861023, + "learning_rate": 3.6284314135629036e-05, + "loss": 0.3231, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.4140787720680237, + "step": 2200 + }, + { + "epoch": 1.947879858657244, + "grad_norm": 0.6344410181045532, + "learning_rate": 3.625869772011816e-05, + "loss": 0.3538, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.34461867809295654, + "step": 2205 + }, + { + "epoch": 1.952296819787986, + "grad_norm": 0.6593054533004761, + "learning_rate": 3.6233002414152025e-05, + "loss": 0.3141, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3200452923774719, + "step": 2210 + }, + { + "epoch": 1.956713780918728, + "grad_norm": 0.5528004765510559, + "learning_rate": 3.620722834240939e-05, + "loss": 0.3353, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.31394898891448975, + "step": 2215 + }, + { + "epoch": 1.96113074204947, + "grad_norm": 0.5560632944107056, + "learning_rate": 3.61813756299512e-05, + "loss": 0.3328, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3282029330730438, + "step": 2220 + }, + { + "epoch": 1.965547703180212, + "grad_norm": 0.6172696352005005, + "learning_rate": 3.6155444402219995e-05, + "loss": 0.3315, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2924641966819763, + "step": 2225 + }, + { + "epoch": 1.969964664310954, + "grad_norm": 0.6023048758506775, + "learning_rate": 3.612943478503929e-05, + "loss": 0.313, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.33808523416519165, + "step": 2230 + }, + { + "epoch": 1.9743816254416962, + "grad_norm": 0.5986247658729553, + "learning_rate": 3.610334690461295e-05, + "loss": 0.2988, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3043588399887085, + "step": 2235 + }, + { + "epoch": 1.978798586572438, + "grad_norm": 0.6460138559341431, + "learning_rate": 3.6077180887524584e-05, + "loss": 0.3008, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3652576506137848, + "step": 2240 + }, + { + "epoch": 1.98321554770318, + "grad_norm": 0.6833009123802185, + "learning_rate": 3.605093686073694e-05, + "loss": 0.3143, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3827962875366211, + "step": 2245 + }, + { + "epoch": 1.9876325088339222, + "grad_norm": 0.5557314157485962, + "learning_rate": 3.602461495159131e-05, + "loss": 0.3122, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.28441256284713745, + "step": 2250 + }, + { + "epoch": 1.9920494699646643, + "grad_norm": 0.6646502017974854, + "learning_rate": 3.5998215287806845e-05, + "loss": 0.3075, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2972768545150757, + "step": 2255 + }, + { + "epoch": 1.9964664310954063, + "grad_norm": 0.6511381268501282, + "learning_rate": 3.597173799748001e-05, + "loss": 0.3088, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3291865587234497, + "step": 2260 + }, + { + "epoch": 2.001766784452297, + "grad_norm": 0.5447190999984741, + "learning_rate": 3.594518320908391e-05, + "loss": 0.2974, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.28889337182044983, + "step": 2265 + }, + { + "epoch": 2.006183745583039, + "grad_norm": 0.6198117733001709, + "learning_rate": 3.591855105146769e-05, + "loss": 0.2677, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.23140525817871094, + "step": 2270 + }, + { + "epoch": 2.010600706713781, + "grad_norm": 0.5488595366477966, + "learning_rate": 3.589184165385592e-05, + "loss": 0.2922, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.23664027452468872, + "step": 2275 + }, + { + "epoch": 2.015017667844523, + "grad_norm": 0.5935027599334717, + "learning_rate": 3.586505514584793e-05, + "loss": 0.2791, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3317275643348694, + "step": 2280 + }, + { + "epoch": 2.019434628975265, + "grad_norm": 0.5912279486656189, + "learning_rate": 3.583819165741722e-05, + "loss": 0.3277, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3340924382209778, + "step": 2285 + }, + { + "epoch": 2.0238515901060072, + "grad_norm": 0.6450473070144653, + "learning_rate": 3.581125131891082e-05, + "loss": 0.2556, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2265785187482834, + "step": 2290 + }, + { + "epoch": 2.0282685512367493, + "grad_norm": 0.605987548828125, + "learning_rate": 3.578423426104864e-05, + "loss": 0.3267, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.38664746284484863, + "step": 2295 + }, + { + "epoch": 2.032685512367491, + "grad_norm": 0.6688462495803833, + "learning_rate": 3.5757140614922846e-05, + "loss": 0.2794, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.29907870292663574, + "step": 2300 + }, + { + "epoch": 2.037102473498233, + "grad_norm": 0.6601260900497437, + "learning_rate": 3.572997051199724e-05, + "loss": 0.3039, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.4096679091453552, + "step": 2305 + }, + { + "epoch": 2.041519434628975, + "grad_norm": 0.6559783220291138, + "learning_rate": 3.5702724084106596e-05, + "loss": 0.2865, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2515143156051636, + "step": 2310 + }, + { + "epoch": 2.045936395759717, + "grad_norm": 0.8877668380737305, + "learning_rate": 3.567540146345604e-05, + "loss": 0.3152, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.324682354927063, + "step": 2315 + }, + { + "epoch": 2.0503533568904593, + "grad_norm": 0.6168680787086487, + "learning_rate": 3.5648002782620375e-05, + "loss": 0.3071, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.28638938069343567, + "step": 2320 + }, + { + "epoch": 2.0547703180212014, + "grad_norm": 0.6127181649208069, + "learning_rate": 3.562052817454351e-05, + "loss": 0.2835, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3344689905643463, + "step": 2325 + }, + { + "epoch": 2.0591872791519434, + "grad_norm": 0.6770476698875427, + "learning_rate": 3.5592977772537734e-05, + "loss": 0.2967, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.257055401802063, + "step": 2330 + }, + { + "epoch": 2.0636042402826855, + "grad_norm": 0.5903377532958984, + "learning_rate": 3.55653517102831e-05, + "loss": 0.2975, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.32297080755233765, + "step": 2335 + }, + { + "epoch": 2.0680212014134276, + "grad_norm": 0.6872009634971619, + "learning_rate": 3.5537650121826804e-05, + "loss": 0.2931, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3144655227661133, + "step": 2340 + }, + { + "epoch": 2.0724381625441697, + "grad_norm": 0.6215624213218689, + "learning_rate": 3.550987314158249e-05, + "loss": 0.3258, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.35112127661705017, + "step": 2345 + }, + { + "epoch": 2.0768551236749118, + "grad_norm": 0.6877519488334656, + "learning_rate": 3.5482020904329635e-05, + "loss": 0.2963, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.26411569118499756, + "step": 2350 + }, + { + "epoch": 2.081272084805654, + "grad_norm": 0.8698825240135193, + "learning_rate": 3.545409354521286e-05, + "loss": 0.3224, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.33345749974250793, + "step": 2355 + }, + { + "epoch": 2.085689045936396, + "grad_norm": 0.6604434251785278, + "learning_rate": 3.542609119974129e-05, + "loss": 0.2875, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3097860515117645, + "step": 2360 + }, + { + "epoch": 2.090106007067138, + "grad_norm": 0.5723004341125488, + "learning_rate": 3.539801400378793e-05, + "loss": 0.2737, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.23669950664043427, + "step": 2365 + }, + { + "epoch": 2.0945229681978796, + "grad_norm": 0.7183104157447815, + "learning_rate": 3.5369862093588946e-05, + "loss": 0.2733, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3131876289844513, + "step": 2370 + }, + { + "epoch": 2.0989399293286217, + "grad_norm": 0.649411141872406, + "learning_rate": 3.534163560574304e-05, + "loss": 0.3283, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.31314218044281006, + "step": 2375 + }, + { + "epoch": 2.103356890459364, + "grad_norm": 0.659487247467041, + "learning_rate": 3.531333467721078e-05, + "loss": 0.3206, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.31890353560447693, + "step": 2380 + }, + { + "epoch": 2.107773851590106, + "grad_norm": 0.6342830657958984, + "learning_rate": 3.5284959445313945e-05, + "loss": 0.2959, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2846304178237915, + "step": 2385 + }, + { + "epoch": 2.112190812720848, + "grad_norm": 0.6259862780570984, + "learning_rate": 3.525651004773481e-05, + "loss": 0.3114, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.35213717818260193, + "step": 2390 + }, + { + "epoch": 2.11660777385159, + "grad_norm": 0.6828689575195312, + "learning_rate": 3.522798662251558e-05, + "loss": 0.3066, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.35810428857803345, + "step": 2395 + }, + { + "epoch": 2.121024734982332, + "grad_norm": 0.621423602104187, + "learning_rate": 3.51993893080576e-05, + "loss": 0.288, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.23874174058437347, + "step": 2400 + }, + { + "epoch": 2.125441696113074, + "grad_norm": 0.6099359393119812, + "learning_rate": 3.517071824312077e-05, + "loss": 0.3052, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2990003228187561, + "step": 2405 + }, + { + "epoch": 2.1298586572438163, + "grad_norm": 0.6000734567642212, + "learning_rate": 3.5141973566822843e-05, + "loss": 0.2777, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2534373700618744, + "step": 2410 + }, + { + "epoch": 2.1342756183745584, + "grad_norm": 0.5893080830574036, + "learning_rate": 3.511315541863873e-05, + "loss": 0.2757, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2416633665561676, + "step": 2415 + }, + { + "epoch": 2.1386925795053005, + "grad_norm": 0.6631132960319519, + "learning_rate": 3.508426393839986e-05, + "loss": 0.3008, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.28256604075431824, + "step": 2420 + }, + { + "epoch": 2.1431095406360425, + "grad_norm": 0.5691207647323608, + "learning_rate": 3.505529926629348e-05, + "loss": 0.2822, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2603251338005066, + "step": 2425 + }, + { + "epoch": 2.1475265017667846, + "grad_norm": 0.632331371307373, + "learning_rate": 3.502626154286196e-05, + "loss": 0.2722, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.25206872820854187, + "step": 2430 + }, + { + "epoch": 2.1519434628975267, + "grad_norm": 0.5492742657661438, + "learning_rate": 3.4997150909002156e-05, + "loss": 0.2772, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.21242624521255493, + "step": 2435 + }, + { + "epoch": 2.1563604240282688, + "grad_norm": 0.6887109279632568, + "learning_rate": 3.496796750596469e-05, + "loss": 0.3005, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.33346956968307495, + "step": 2440 + }, + { + "epoch": 2.1607773851590104, + "grad_norm": 0.6558607816696167, + "learning_rate": 3.4938711475353286e-05, + "loss": 0.262, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.22680602967739105, + "step": 2445 + }, + { + "epoch": 2.1651943462897525, + "grad_norm": 0.6331962943077087, + "learning_rate": 3.490938295912404e-05, + "loss": 0.3254, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.319640576839447, + "step": 2450 + }, + { + "epoch": 2.1696113074204946, + "grad_norm": 0.6158129572868347, + "learning_rate": 3.487998209958479e-05, + "loss": 0.2815, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.31720587611198425, + "step": 2455 + }, + { + "epoch": 2.1740282685512367, + "grad_norm": 0.6651401519775391, + "learning_rate": 3.485050903939439e-05, + "loss": 0.2918, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.33852121233940125, + "step": 2460 + }, + { + "epoch": 2.1784452296819787, + "grad_norm": 0.7507199048995972, + "learning_rate": 3.482096392156203e-05, + "loss": 0.3105, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3842792212963104, + "step": 2465 + }, + { + "epoch": 2.182862190812721, + "grad_norm": 0.5813544988632202, + "learning_rate": 3.4791346889446536e-05, + "loss": 0.2912, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2993150055408478, + "step": 2470 + }, + { + "epoch": 2.187279151943463, + "grad_norm": 0.5870715975761414, + "learning_rate": 3.476165808675567e-05, + "loss": 0.2811, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.22926348447799683, + "step": 2475 + }, + { + "epoch": 2.191696113074205, + "grad_norm": 0.6749504208564758, + "learning_rate": 3.473189765754544e-05, + "loss": 0.3342, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2661892771720886, + "step": 2480 + }, + { + "epoch": 2.196113074204947, + "grad_norm": 0.7036541104316711, + "learning_rate": 3.4702065746219416e-05, + "loss": 0.3031, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.28539180755615234, + "step": 2485 + }, + { + "epoch": 2.200530035335689, + "grad_norm": 0.7815271019935608, + "learning_rate": 3.467216249752799e-05, + "loss": 0.3215, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.35631412267684937, + "step": 2490 + }, + { + "epoch": 2.204946996466431, + "grad_norm": 0.6628273129463196, + "learning_rate": 3.4642188056567726e-05, + "loss": 0.2966, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.27998021245002747, + "step": 2495 + }, + { + "epoch": 2.2093639575971733, + "grad_norm": 0.968639075756073, + "learning_rate": 3.461214256878059e-05, + "loss": 0.2672, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.23727664351463318, + "step": 2500 + }, + { + "epoch": 2.2137809187279154, + "grad_norm": 0.6167461276054382, + "learning_rate": 3.458202617995332e-05, + "loss": 0.293, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.30183541774749756, + "step": 2505 + }, + { + "epoch": 2.218197879858657, + "grad_norm": 0.6023343801498413, + "learning_rate": 3.4551839036216645e-05, + "loss": 0.2677, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2949235141277313, + "step": 2510 + }, + { + "epoch": 2.222614840989399, + "grad_norm": 0.6811277866363525, + "learning_rate": 3.452158128404465e-05, + "loss": 0.3118, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3465280532836914, + "step": 2515 + }, + { + "epoch": 2.227031802120141, + "grad_norm": 0.6505920886993408, + "learning_rate": 3.449125307025399e-05, + "loss": 0.2811, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2278938889503479, + "step": 2520 + }, + { + "epoch": 2.2314487632508833, + "grad_norm": 0.5918983221054077, + "learning_rate": 3.446085454200322e-05, + "loss": 0.2657, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2987041771411896, + "step": 2525 + }, + { + "epoch": 2.2358657243816253, + "grad_norm": 0.5385729670524597, + "learning_rate": 3.44303858467921e-05, + "loss": 0.294, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.25492385029792786, + "step": 2530 + }, + { + "epoch": 2.2402826855123674, + "grad_norm": 0.6151769757270813, + "learning_rate": 3.4399847132460826e-05, + "loss": 0.3009, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2648699879646301, + "step": 2535 + }, + { + "epoch": 2.2446996466431095, + "grad_norm": 0.6031373739242554, + "learning_rate": 3.436923854718935e-05, + "loss": 0.2864, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.23283502459526062, + "step": 2540 + }, + { + "epoch": 2.2491166077738516, + "grad_norm": 0.6123268008232117, + "learning_rate": 3.433856023949666e-05, + "loss": 0.3324, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2949381172657013, + "step": 2545 + }, + { + "epoch": 2.2535335689045937, + "grad_norm": 0.63086998462677, + "learning_rate": 3.430781235824006e-05, + "loss": 0.3372, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3198104500770569, + "step": 2550 + }, + { + "epoch": 2.2579505300353357, + "grad_norm": 0.5796027779579163, + "learning_rate": 3.427699505261439e-05, + "loss": 0.276, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2592904567718506, + "step": 2555 + }, + { + "epoch": 2.262367491166078, + "grad_norm": 0.6514537930488586, + "learning_rate": 3.4246108472151404e-05, + "loss": 0.3106, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.24681276082992554, + "step": 2560 + }, + { + "epoch": 2.26678445229682, + "grad_norm": 0.6188172698020935, + "learning_rate": 3.421515276671897e-05, + "loss": 0.3131, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.25234729051589966, + "step": 2565 + }, + { + "epoch": 2.271201413427562, + "grad_norm": 0.614671528339386, + "learning_rate": 3.418412808652037e-05, + "loss": 0.2993, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2817681133747101, + "step": 2570 + }, + { + "epoch": 2.275618374558304, + "grad_norm": 0.6262189745903015, + "learning_rate": 3.4153034582093546e-05, + "loss": 0.2756, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2680363059043884, + "step": 2575 + }, + { + "epoch": 2.280035335689046, + "grad_norm": 0.6415190100669861, + "learning_rate": 3.412187240431043e-05, + "loss": 0.3148, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.40983670949935913, + "step": 2580 + }, + { + "epoch": 2.2844522968197882, + "grad_norm": 0.5524982213973999, + "learning_rate": 3.409064170437612e-05, + "loss": 0.3066, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.26829278469085693, + "step": 2585 + }, + { + "epoch": 2.28886925795053, + "grad_norm": 0.5553908944129944, + "learning_rate": 3.405934263382824e-05, + "loss": 0.2891, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.24250522255897522, + "step": 2590 + }, + { + "epoch": 2.293286219081272, + "grad_norm": 0.5957083106040955, + "learning_rate": 3.4027975344536125e-05, + "loss": 0.2759, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2847854495048523, + "step": 2595 + }, + { + "epoch": 2.297703180212014, + "grad_norm": 0.5808223485946655, + "learning_rate": 3.399653998870016e-05, + "loss": 0.3083, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3035164773464203, + "step": 2600 + }, + { + "epoch": 2.302120141342756, + "grad_norm": 0.5876525640487671, + "learning_rate": 3.396503671885098e-05, + "loss": 0.2856, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3480638265609741, + "step": 2605 + }, + { + "epoch": 2.306537102473498, + "grad_norm": 0.5925856232643127, + "learning_rate": 3.3933465687848745e-05, + "loss": 0.2716, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2727441191673279, + "step": 2610 + }, + { + "epoch": 2.3109540636042403, + "grad_norm": 0.6211825013160706, + "learning_rate": 3.390182704888242e-05, + "loss": 0.2747, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.26210927963256836, + "step": 2615 + }, + { + "epoch": 2.3153710247349824, + "grad_norm": 0.6318957805633545, + "learning_rate": 3.387012095546903e-05, + "loss": 0.2731, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2508987486362457, + "step": 2620 + }, + { + "epoch": 2.3197879858657244, + "grad_norm": 0.5794128775596619, + "learning_rate": 3.3838347561452854e-05, + "loss": 0.2676, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2536904811859131, + "step": 2625 + }, + { + "epoch": 2.3242049469964665, + "grad_norm": 0.7687904238700867, + "learning_rate": 3.380650702100478e-05, + "loss": 0.3206, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2745317220687866, + "step": 2630 + }, + { + "epoch": 2.3286219081272086, + "grad_norm": 0.6696126461029053, + "learning_rate": 3.3774599488621477e-05, + "loss": 0.2588, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.34656822681427, + "step": 2635 + }, + { + "epoch": 2.3330388692579507, + "grad_norm": 0.6203848719596863, + "learning_rate": 3.374262511912468e-05, + "loss": 0.287, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.27805572748184204, + "step": 2640 + }, + { + "epoch": 2.3374558303886928, + "grad_norm": 0.6597334742546082, + "learning_rate": 3.371058406766043e-05, + "loss": 0.3141, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2802439332008362, + "step": 2645 + }, + { + "epoch": 2.3418727915194344, + "grad_norm": 0.5888747572898865, + "learning_rate": 3.3678476489698316e-05, + "loss": 0.2838, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.28658509254455566, + "step": 2650 + }, + { + "epoch": 2.3462897526501765, + "grad_norm": 0.6427140831947327, + "learning_rate": 3.364630254103073e-05, + "loss": 0.2916, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2904638648033142, + "step": 2655 + }, + { + "epoch": 2.3507067137809186, + "grad_norm": 0.6213993430137634, + "learning_rate": 3.3614062377772124e-05, + "loss": 0.2578, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.25421738624572754, + "step": 2660 + }, + { + "epoch": 2.3551236749116606, + "grad_norm": 0.6171494722366333, + "learning_rate": 3.358175615635821e-05, + "loss": 0.2999, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2982363998889923, + "step": 2665 + }, + { + "epoch": 2.3595406360424027, + "grad_norm": 0.6194586157798767, + "learning_rate": 3.354938403354524e-05, + "loss": 0.2791, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.27266404032707214, + "step": 2670 + }, + { + "epoch": 2.363957597173145, + "grad_norm": 0.5762106776237488, + "learning_rate": 3.351694616640924e-05, + "loss": 0.2736, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.264001727104187, + "step": 2675 + }, + { + "epoch": 2.368374558303887, + "grad_norm": 0.7091301679611206, + "learning_rate": 3.348444271234523e-05, + "loss": 0.2929, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.24949441850185394, + "step": 2680 + }, + { + "epoch": 2.372791519434629, + "grad_norm": 0.5496478080749512, + "learning_rate": 3.3451873829066474e-05, + "loss": 0.2975, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.23808279633522034, + "step": 2685 + }, + { + "epoch": 2.377208480565371, + "grad_norm": 0.6888132095336914, + "learning_rate": 3.341923967460371e-05, + "loss": 0.2851, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.24560776352882385, + "step": 2690 + }, + { + "epoch": 2.381625441696113, + "grad_norm": 0.5535579323768616, + "learning_rate": 3.338654040730439e-05, + "loss": 0.3136, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.30382639169692993, + "step": 2695 + }, + { + "epoch": 2.386042402826855, + "grad_norm": 0.5726889967918396, + "learning_rate": 3.335377618583191e-05, + "loss": 0.3478, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.355141818523407, + "step": 2700 + }, + { + "epoch": 2.3904593639575973, + "grad_norm": 0.5942392349243164, + "learning_rate": 3.332094716916481e-05, + "loss": 0.2869, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2685794234275818, + "step": 2705 + }, + { + "epoch": 2.3948763250883394, + "grad_norm": 0.6522179245948792, + "learning_rate": 3.328805351659606e-05, + "loss": 0.2872, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.26475298404693604, + "step": 2710 + }, + { + "epoch": 2.3992932862190814, + "grad_norm": 0.6337575912475586, + "learning_rate": 3.3255095387732245e-05, + "loss": 0.2787, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.304879754781723, + "step": 2715 + }, + { + "epoch": 2.4037102473498235, + "grad_norm": 0.6819047331809998, + "learning_rate": 3.3222072942492807e-05, + "loss": 0.283, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2330174744129181, + "step": 2720 + }, + { + "epoch": 2.4081272084805656, + "grad_norm": 0.7528138160705566, + "learning_rate": 3.318898634110925e-05, + "loss": 0.3262, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.43806248903274536, + "step": 2725 + }, + { + "epoch": 2.4125441696113072, + "grad_norm": 0.6072879433631897, + "learning_rate": 3.31558357441244e-05, + "loss": 0.2902, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2504928410053253, + "step": 2730 + }, + { + "epoch": 2.4169611307420493, + "grad_norm": 0.6802332401275635, + "learning_rate": 3.312262131239157e-05, + "loss": 0.3299, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.26598912477493286, + "step": 2735 + }, + { + "epoch": 2.4213780918727914, + "grad_norm": 0.6019970774650574, + "learning_rate": 3.308934320707385e-05, + "loss": 0.2914, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2811354398727417, + "step": 2740 + }, + { + "epoch": 2.4257950530035335, + "grad_norm": 0.5732802748680115, + "learning_rate": 3.305600158964325e-05, + "loss": 0.3168, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.36934155225753784, + "step": 2745 + }, + { + "epoch": 2.4302120141342756, + "grad_norm": 0.6369587182998657, + "learning_rate": 3.3022596621879976e-05, + "loss": 0.3298, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3334657549858093, + "step": 2750 + }, + { + "epoch": 2.4346289752650176, + "grad_norm": 0.5502805709838867, + "learning_rate": 3.298912846587162e-05, + "loss": 0.2721, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.21960023045539856, + "step": 2755 + }, + { + "epoch": 2.4390459363957597, + "grad_norm": 0.5598191618919373, + "learning_rate": 3.2955597284012375e-05, + "loss": 0.304, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.28418734669685364, + "step": 2760 + }, + { + "epoch": 2.443462897526502, + "grad_norm": 1.0691636800765991, + "learning_rate": 3.2922003239002234e-05, + "loss": 0.322, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.4324992001056671, + "step": 2765 + }, + { + "epoch": 2.447879858657244, + "grad_norm": 0.5500887036323547, + "learning_rate": 3.288834649384624e-05, + "loss": 0.2833, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.30279844999313354, + "step": 2770 + }, + { + "epoch": 2.452296819787986, + "grad_norm": 0.6789073348045349, + "learning_rate": 3.2854627211853656e-05, + "loss": 0.329, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3540169894695282, + "step": 2775 + }, + { + "epoch": 2.456713780918728, + "grad_norm": 0.6984388828277588, + "learning_rate": 3.2820845556637173e-05, + "loss": 0.3262, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.26223987340927124, + "step": 2780 + }, + { + "epoch": 2.46113074204947, + "grad_norm": 0.6759300827980042, + "learning_rate": 3.278700169211216e-05, + "loss": 0.2892, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2541850209236145, + "step": 2785 + }, + { + "epoch": 2.4655477031802118, + "grad_norm": 0.5834370255470276, + "learning_rate": 3.275309578249581e-05, + "loss": 0.2874, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.27517110109329224, + "step": 2790 + }, + { + "epoch": 2.469964664310954, + "grad_norm": 0.6928835511207581, + "learning_rate": 3.2719127992306386e-05, + "loss": 0.2761, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.34850040078163147, + "step": 2795 + }, + { + "epoch": 2.474381625441696, + "grad_norm": 0.5541810989379883, + "learning_rate": 3.26850984863624e-05, + "loss": 0.2861, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.25448691844940186, + "step": 2800 + }, + { + "epoch": 2.478798586572438, + "grad_norm": 0.5982546210289001, + "learning_rate": 3.265100742978183e-05, + "loss": 0.2931, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.21053552627563477, + "step": 2805 + }, + { + "epoch": 2.48321554770318, + "grad_norm": 0.5980389714241028, + "learning_rate": 3.261685498798131e-05, + "loss": 0.2993, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2833639681339264, + "step": 2810 + }, + { + "epoch": 2.487632508833922, + "grad_norm": 0.5997810959815979, + "learning_rate": 3.258264132667531e-05, + "loss": 0.2439, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2284521758556366, + "step": 2815 + }, + { + "epoch": 2.4920494699646643, + "grad_norm": 0.5549051761627197, + "learning_rate": 3.254836661187537e-05, + "loss": 0.2679, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.31983861327171326, + "step": 2820 + }, + { + "epoch": 2.4964664310954063, + "grad_norm": 0.5757570862770081, + "learning_rate": 3.2514031009889264e-05, + "loss": 0.2843, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.27677464485168457, + "step": 2825 + }, + { + "epoch": 2.5008833922261484, + "grad_norm": 0.5566675662994385, + "learning_rate": 3.247963468732021e-05, + "loss": 0.2926, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.23760895431041718, + "step": 2830 + }, + { + "epoch": 2.5053003533568905, + "grad_norm": 0.5576279163360596, + "learning_rate": 3.244517781106604e-05, + "loss": 0.2898, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3000289797782898, + "step": 2835 + }, + { + "epoch": 2.5097173144876326, + "grad_norm": 0.5720181465148926, + "learning_rate": 3.241066054831842e-05, + "loss": 0.2766, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2972480058670044, + "step": 2840 + }, + { + "epoch": 2.5141342756183747, + "grad_norm": 0.5405129194259644, + "learning_rate": 3.237608306656201e-05, + "loss": 0.2677, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.23669353127479553, + "step": 2845 + }, + { + "epoch": 2.5185512367491167, + "grad_norm": 0.5755829215049744, + "learning_rate": 3.234144553357368e-05, + "loss": 0.3027, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3481435775756836, + "step": 2850 + }, + { + "epoch": 2.522968197879859, + "grad_norm": 0.7123222947120667, + "learning_rate": 3.230674811742167e-05, + "loss": 0.2627, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2425704300403595, + "step": 2855 + }, + { + "epoch": 2.527385159010601, + "grad_norm": 0.6053310632705688, + "learning_rate": 3.227199098646479e-05, + "loss": 0.2944, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3263585567474365, + "step": 2860 + }, + { + "epoch": 2.531802120141343, + "grad_norm": 0.6052203178405762, + "learning_rate": 3.223717430935158e-05, + "loss": 0.3646, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.30958011746406555, + "step": 2865 + }, + { + "epoch": 2.536219081272085, + "grad_norm": 0.6099016666412354, + "learning_rate": 3.2202298255019546e-05, + "loss": 0.2613, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2909597158432007, + "step": 2870 + }, + { + "epoch": 2.5406360424028267, + "grad_norm": 0.6662100553512573, + "learning_rate": 3.216736299269427e-05, + "loss": 0.2772, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2982367277145386, + "step": 2875 + }, + { + "epoch": 2.545053003533569, + "grad_norm": 0.7286942005157471, + "learning_rate": 3.213236869188864e-05, + "loss": 0.3025, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2656930088996887, + "step": 2880 + }, + { + "epoch": 2.549469964664311, + "grad_norm": 0.5937227606773376, + "learning_rate": 3.209731552240201e-05, + "loss": 0.2999, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.34423115849494934, + "step": 2885 + }, + { + "epoch": 2.553886925795053, + "grad_norm": 0.6218231320381165, + "learning_rate": 3.206220365431937e-05, + "loss": 0.3097, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3695404529571533, + "step": 2890 + }, + { + "epoch": 2.558303886925795, + "grad_norm": 0.6536095142364502, + "learning_rate": 3.202703325801054e-05, + "loss": 0.2994, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.36360839009284973, + "step": 2895 + }, + { + "epoch": 2.562720848056537, + "grad_norm": 0.6371568441390991, + "learning_rate": 3.19918045041293e-05, + "loss": 0.3254, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3120114803314209, + "step": 2900 + }, + { + "epoch": 2.567137809187279, + "grad_norm": 0.6116867065429688, + "learning_rate": 3.1956517563612645e-05, + "loss": 0.3266, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.31710511445999146, + "step": 2905 + }, + { + "epoch": 2.5715547703180213, + "grad_norm": 0.6612317562103271, + "learning_rate": 3.1921172607679846e-05, + "loss": 0.3028, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3122076690196991, + "step": 2910 + }, + { + "epoch": 2.5759717314487633, + "grad_norm": 0.6160558462142944, + "learning_rate": 3.1885769807831714e-05, + "loss": 0.29, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.27517688274383545, + "step": 2915 + }, + { + "epoch": 2.5803886925795054, + "grad_norm": 0.600016176700592, + "learning_rate": 3.185030933584972e-05, + "loss": 0.3317, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3114628791809082, + "step": 2920 + }, + { + "epoch": 2.5848056537102475, + "grad_norm": 1.3786065578460693, + "learning_rate": 3.181479136379518e-05, + "loss": 0.314, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.39571282267570496, + "step": 2925 + }, + { + "epoch": 2.589222614840989, + "grad_norm": 0.6324872970581055, + "learning_rate": 3.177921606400838e-05, + "loss": 0.2583, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.26827818155288696, + "step": 2930 + }, + { + "epoch": 2.5936395759717312, + "grad_norm": 0.7281625270843506, + "learning_rate": 3.1743583609107815e-05, + "loss": 0.2935, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2841928005218506, + "step": 2935 + }, + { + "epoch": 2.5980565371024733, + "grad_norm": 0.5587540864944458, + "learning_rate": 3.1707894171989266e-05, + "loss": 0.2884, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2770228981971741, + "step": 2940 + }, + { + "epoch": 2.6024734982332154, + "grad_norm": 0.6678293347358704, + "learning_rate": 3.167214792582505e-05, + "loss": 0.3147, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.30559659004211426, + "step": 2945 + }, + { + "epoch": 2.6068904593639575, + "grad_norm": 0.6080886125564575, + "learning_rate": 3.163634504406309e-05, + "loss": 0.2943, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2588590383529663, + "step": 2950 + }, + { + "epoch": 2.6113074204946995, + "grad_norm": 0.5840783715248108, + "learning_rate": 3.160048570042614e-05, + "loss": 0.2724, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2337406575679779, + "step": 2955 + }, + { + "epoch": 2.6157243816254416, + "grad_norm": 0.5808059573173523, + "learning_rate": 3.1564570068910905e-05, + "loss": 0.2943, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.304470419883728, + "step": 2960 + }, + { + "epoch": 2.6201413427561837, + "grad_norm": 0.5165686011314392, + "learning_rate": 3.152859832378723e-05, + "loss": 0.2963, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3160091042518616, + "step": 2965 + }, + { + "epoch": 2.624558303886926, + "grad_norm": 0.5811315774917603, + "learning_rate": 3.1492570639597216e-05, + "loss": 0.2916, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.22409474849700928, + "step": 2970 + }, + { + "epoch": 2.628975265017668, + "grad_norm": 0.901731014251709, + "learning_rate": 3.145648719115439e-05, + "loss": 0.2875, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2565080225467682, + "step": 2975 + }, + { + "epoch": 2.63339222614841, + "grad_norm": 0.6577340364456177, + "learning_rate": 3.1420348153542875e-05, + "loss": 0.3208, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.33042359352111816, + "step": 2980 + }, + { + "epoch": 2.637809187279152, + "grad_norm": 0.5609688758850098, + "learning_rate": 3.138415370211651e-05, + "loss": 0.3028, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.30433666706085205, + "step": 2985 + }, + { + "epoch": 2.642226148409894, + "grad_norm": 0.5504027605056763, + "learning_rate": 3.1347904012498015e-05, + "loss": 0.2762, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.27242234349250793, + "step": 2990 + }, + { + "epoch": 2.646643109540636, + "grad_norm": 0.679768443107605, + "learning_rate": 3.1311599260578144e-05, + "loss": 0.2736, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.26140671968460083, + "step": 2995 + }, + { + "epoch": 2.6510600706713783, + "grad_norm": 1.4311226606369019, + "learning_rate": 3.1275239622514805e-05, + "loss": 0.2793, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.25825434923171997, + "step": 3000 + }, + { + "epoch": 2.6554770318021204, + "grad_norm": 0.6120150685310364, + "learning_rate": 3.123882527473226e-05, + "loss": 0.2988, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.33173418045043945, + "step": 3005 + }, + { + "epoch": 2.6598939929328624, + "grad_norm": 0.7447654008865356, + "learning_rate": 3.1202356393920205e-05, + "loss": 0.2891, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2618862986564636, + "step": 3010 + }, + { + "epoch": 2.664310954063604, + "grad_norm": 0.6641417145729065, + "learning_rate": 3.1165833157032945e-05, + "loss": 0.3026, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.32111161947250366, + "step": 3015 + }, + { + "epoch": 2.668727915194346, + "grad_norm": 0.5896281599998474, + "learning_rate": 3.112925574128853e-05, + "loss": 0.3406, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3454618453979492, + "step": 3020 + }, + { + "epoch": 2.6731448763250882, + "grad_norm": 0.5626996159553528, + "learning_rate": 3.109262432416791e-05, + "loss": 0.2728, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.25294023752212524, + "step": 3025 + }, + { + "epoch": 2.6775618374558303, + "grad_norm": 0.6241391897201538, + "learning_rate": 3.105593908341405e-05, + "loss": 0.3298, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.4321364164352417, + "step": 3030 + }, + { + "epoch": 2.6819787985865724, + "grad_norm": 0.5742759704589844, + "learning_rate": 3.1019200197031074e-05, + "loss": 0.2969, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.29776108264923096, + "step": 3035 + }, + { + "epoch": 2.6863957597173145, + "grad_norm": 0.5870106220245361, + "learning_rate": 3.098240784328342e-05, + "loss": 0.2909, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3504473567008972, + "step": 3040 + }, + { + "epoch": 2.6908127208480566, + "grad_norm": 0.7754055261611938, + "learning_rate": 3.094556220069495e-05, + "loss": 0.296, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3014984726905823, + "step": 3045 + }, + { + "epoch": 2.6952296819787986, + "grad_norm": 0.6273770928382874, + "learning_rate": 3.09086634480481e-05, + "loss": 0.3134, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.26273688673973083, + "step": 3050 + }, + { + "epoch": 2.6996466431095407, + "grad_norm": 0.6451196074485779, + "learning_rate": 3.087171176438299e-05, + "loss": 0.2997, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.32904472947120667, + "step": 3055 + }, + { + "epoch": 2.704063604240283, + "grad_norm": 0.5997614860534668, + "learning_rate": 3.083470732899659e-05, + "loss": 0.2636, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.27674105763435364, + "step": 3060 + }, + { + "epoch": 2.708480565371025, + "grad_norm": 0.6042181253433228, + "learning_rate": 3.0797650321441836e-05, + "loss": 0.2955, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3244817852973938, + "step": 3065 + }, + { + "epoch": 2.7128975265017665, + "grad_norm": 0.6343804597854614, + "learning_rate": 3.076054092152673e-05, + "loss": 0.3018, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.25237613916397095, + "step": 3070 + }, + { + "epoch": 2.7173144876325086, + "grad_norm": 0.5651307106018066, + "learning_rate": 3.072337930931351e-05, + "loss": 0.3081, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2834140658378601, + "step": 3075 + }, + { + "epoch": 2.7217314487632507, + "grad_norm": 0.5973877906799316, + "learning_rate": 3.068616566511777e-05, + "loss": 0.2835, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3114064633846283, + "step": 3080 + }, + { + "epoch": 2.7261484098939928, + "grad_norm": 0.6461395621299744, + "learning_rate": 3.0648900169507546e-05, + "loss": 0.3086, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3663422465324402, + "step": 3085 + }, + { + "epoch": 2.730565371024735, + "grad_norm": 0.7704973220825195, + "learning_rate": 3.0611583003302483e-05, + "loss": 0.2973, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.324812650680542, + "step": 3090 + }, + { + "epoch": 2.734982332155477, + "grad_norm": 0.6625204086303711, + "learning_rate": 3.0574214347572944e-05, + "loss": 0.2868, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2915918827056885, + "step": 3095 + }, + { + "epoch": 2.739399293286219, + "grad_norm": 0.7523000240325928, + "learning_rate": 3.0536794383639124e-05, + "loss": 0.315, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.267653226852417, + "step": 3100 + }, + { + "epoch": 2.743816254416961, + "grad_norm": 0.6316024661064148, + "learning_rate": 3.0499323293070168e-05, + "loss": 0.2744, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2455906867980957, + "step": 3105 + }, + { + "epoch": 2.748233215547703, + "grad_norm": 0.5501362681388855, + "learning_rate": 3.0461801257683316e-05, + "loss": 0.2514, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.24123415350914001, + "step": 3110 + }, + { + "epoch": 2.7526501766784452, + "grad_norm": 0.5820156335830688, + "learning_rate": 3.0424228459542996e-05, + "loss": 0.3227, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2901195287704468, + "step": 3115 + }, + { + "epoch": 2.7570671378091873, + "grad_norm": 0.5763217806816101, + "learning_rate": 3.0386605080959933e-05, + "loss": 0.3368, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2486405223608017, + "step": 3120 + }, + { + "epoch": 2.7614840989399294, + "grad_norm": 0.6018761396408081, + "learning_rate": 3.0348931304490308e-05, + "loss": 0.3192, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3718198835849762, + "step": 3125 + }, + { + "epoch": 2.7659010600706715, + "grad_norm": 0.6464835405349731, + "learning_rate": 3.0311207312934802e-05, + "loss": 0.3052, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2733161747455597, + "step": 3130 + }, + { + "epoch": 2.7703180212014136, + "grad_norm": 0.6074162125587463, + "learning_rate": 3.0273433289337782e-05, + "loss": 0.3438, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3934619128704071, + "step": 3135 + }, + { + "epoch": 2.7747349823321557, + "grad_norm": 0.6861649751663208, + "learning_rate": 3.0235609416986382e-05, + "loss": 0.3097, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3455376625061035, + "step": 3140 + }, + { + "epoch": 2.7791519434628977, + "grad_norm": 0.546413242816925, + "learning_rate": 3.0197735879409582e-05, + "loss": 0.2465, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3003658056259155, + "step": 3145 + }, + { + "epoch": 2.78356890459364, + "grad_norm": 0.5838289260864258, + "learning_rate": 3.015981286037737e-05, + "loss": 0.2401, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.24027827382087708, + "step": 3150 + }, + { + "epoch": 2.787985865724382, + "grad_norm": 0.6270075440406799, + "learning_rate": 3.0121840543899828e-05, + "loss": 0.2884, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.26620614528656006, + "step": 3155 + }, + { + "epoch": 2.7924028268551235, + "grad_norm": 0.6166526079177856, + "learning_rate": 3.008381911422624e-05, + "loss": 0.3056, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2833024263381958, + "step": 3160 + }, + { + "epoch": 2.7968197879858656, + "grad_norm": 0.7598279714584351, + "learning_rate": 3.0045748755844183e-05, + "loss": 0.2683, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.23250682651996613, + "step": 3165 + }, + { + "epoch": 2.8012367491166077, + "grad_norm": 0.5732672810554504, + "learning_rate": 3.000762965347866e-05, + "loss": 0.3035, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.28198108077049255, + "step": 3170 + }, + { + "epoch": 2.8056537102473498, + "grad_norm": 0.6021406650543213, + "learning_rate": 2.9969461992091187e-05, + "loss": 0.3052, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.4037885367870331, + "step": 3175 + }, + { + "epoch": 2.810070671378092, + "grad_norm": 0.5615018606185913, + "learning_rate": 2.9931245956878892e-05, + "loss": 0.2972, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.35477539896965027, + "step": 3180 + }, + { + "epoch": 2.814487632508834, + "grad_norm": 0.6810048818588257, + "learning_rate": 2.9892981733273622e-05, + "loss": 0.2809, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.32894670963287354, + "step": 3185 + }, + { + "epoch": 2.818904593639576, + "grad_norm": 0.6966601014137268, + "learning_rate": 2.9854669506941056e-05, + "loss": 0.3045, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2800629138946533, + "step": 3190 + }, + { + "epoch": 2.823321554770318, + "grad_norm": 0.6326582431793213, + "learning_rate": 2.9816309463779777e-05, + "loss": 0.3209, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2518727481365204, + "step": 3195 + }, + { + "epoch": 2.82773851590106, + "grad_norm": 0.6759814023971558, + "learning_rate": 2.9777901789920393e-05, + "loss": 0.3045, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3975909948348999, + "step": 3200 + }, + { + "epoch": 2.8321554770318023, + "grad_norm": 0.6340915560722351, + "learning_rate": 2.9739446671724633e-05, + "loss": 0.2956, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.25144162774086, + "step": 3205 + }, + { + "epoch": 2.836572438162544, + "grad_norm": 0.6134033203125, + "learning_rate": 2.9700944295784416e-05, + "loss": 0.3104, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3317497670650482, + "step": 3210 + }, + { + "epoch": 2.840989399293286, + "grad_norm": 0.5460602045059204, + "learning_rate": 2.9662394848920976e-05, + "loss": 0.247, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.27905362844467163, + "step": 3215 + }, + { + "epoch": 2.845406360424028, + "grad_norm": 0.6375061273574829, + "learning_rate": 2.962379851818396e-05, + "loss": 0.3235, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.36598873138427734, + "step": 3220 + }, + { + "epoch": 2.84982332155477, + "grad_norm": 0.5567300319671631, + "learning_rate": 2.9585155490850463e-05, + "loss": 0.2588, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.18971426784992218, + "step": 3225 + }, + { + "epoch": 2.854240282685512, + "grad_norm": 0.6028186678886414, + "learning_rate": 2.954646595442421e-05, + "loss": 0.2845, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3407897353172302, + "step": 3230 + }, + { + "epoch": 2.8586572438162543, + "grad_norm": 0.5787585377693176, + "learning_rate": 2.9507730096634558e-05, + "loss": 0.2964, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3281695246696472, + "step": 3235 + }, + { + "epoch": 2.8630742049469964, + "grad_norm": 0.6009767055511475, + "learning_rate": 2.9468948105435652e-05, + "loss": 0.2745, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2663469910621643, + "step": 3240 + }, + { + "epoch": 2.8674911660777385, + "grad_norm": 0.5785790085792542, + "learning_rate": 2.943012016900548e-05, + "loss": 0.2944, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.23294416069984436, + "step": 3245 + }, + { + "epoch": 2.8719081272084805, + "grad_norm": 0.5688700079917908, + "learning_rate": 2.9391246475744952e-05, + "loss": 0.303, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.23604430258274078, + "step": 3250 + }, + { + "epoch": 2.8763250883392226, + "grad_norm": 0.5440213084220886, + "learning_rate": 2.9352327214277002e-05, + "loss": 0.3017, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.29112064838409424, + "step": 3255 + }, + { + "epoch": 2.8807420494699647, + "grad_norm": 0.5312842726707458, + "learning_rate": 2.931336257344569e-05, + "loss": 0.2971, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.25097882747650146, + "step": 3260 + }, + { + "epoch": 2.885159010600707, + "grad_norm": 0.658263623714447, + "learning_rate": 2.9274352742315234e-05, + "loss": 0.2663, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2677016854286194, + "step": 3265 + }, + { + "epoch": 2.889575971731449, + "grad_norm": 0.6275796890258789, + "learning_rate": 2.923529791016916e-05, + "loss": 0.2654, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.26214462518692017, + "step": 3270 + }, + { + "epoch": 2.893992932862191, + "grad_norm": 0.6369099020957947, + "learning_rate": 2.919619826650932e-05, + "loss": 0.3023, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.27498164772987366, + "step": 3275 + }, + { + "epoch": 2.898409893992933, + "grad_norm": 0.6062490344047546, + "learning_rate": 2.9157054001055007e-05, + "loss": 0.2484, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.35193073749542236, + "step": 3280 + }, + { + "epoch": 2.902826855123675, + "grad_norm": 0.6960145831108093, + "learning_rate": 2.9117865303742043e-05, + "loss": 0.2768, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1947738081216812, + "step": 3285 + }, + { + "epoch": 2.907243816254417, + "grad_norm": 0.610952615737915, + "learning_rate": 2.9078632364721813e-05, + "loss": 0.2925, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3309261202812195, + "step": 3290 + }, + { + "epoch": 2.9116607773851593, + "grad_norm": 0.696971595287323, + "learning_rate": 2.903935537436041e-05, + "loss": 0.3046, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3597220778465271, + "step": 3295 + }, + { + "epoch": 2.916077738515901, + "grad_norm": 0.5838300585746765, + "learning_rate": 2.900003452323764e-05, + "loss": 0.2693, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2813507318496704, + "step": 3300 + }, + { + "epoch": 2.920494699646643, + "grad_norm": 0.5652858018875122, + "learning_rate": 2.8960670002146138e-05, + "loss": 0.2469, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2683357000350952, + "step": 3305 + }, + { + "epoch": 2.924911660777385, + "grad_norm": 0.6993016004562378, + "learning_rate": 2.8921262002090443e-05, + "loss": 0.2897, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.26220783591270447, + "step": 3310 + }, + { + "epoch": 2.929328621908127, + "grad_norm": 0.6500904560089111, + "learning_rate": 2.888181071428607e-05, + "loss": 0.2631, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.27039170265197754, + "step": 3315 + }, + { + "epoch": 2.9337455830388692, + "grad_norm": 0.6717191934585571, + "learning_rate": 2.884231633015854e-05, + "loss": 0.3335, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.31322067975997925, + "step": 3320 + }, + { + "epoch": 2.9381625441696113, + "grad_norm": 0.6734561920166016, + "learning_rate": 2.8802779041342527e-05, + "loss": 0.2989, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.28675904870033264, + "step": 3325 + }, + { + "epoch": 2.9425795053003534, + "grad_norm": 0.5548768043518066, + "learning_rate": 2.876319903968086e-05, + "loss": 0.2627, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.23984147608280182, + "step": 3330 + }, + { + "epoch": 2.9469964664310955, + "grad_norm": 0.5678575038909912, + "learning_rate": 2.8723576517223635e-05, + "loss": 0.27, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.26975440979003906, + "step": 3335 + }, + { + "epoch": 2.9514134275618376, + "grad_norm": 0.5872315764427185, + "learning_rate": 2.8683911666227254e-05, + "loss": 0.2604, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.24881170690059662, + "step": 3340 + }, + { + "epoch": 2.9558303886925796, + "grad_norm": 0.5842449069023132, + "learning_rate": 2.864420467915352e-05, + "loss": 0.2799, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.24471619725227356, + "step": 3345 + }, + { + "epoch": 2.9602473498233217, + "grad_norm": 0.6217790842056274, + "learning_rate": 2.8604455748668675e-05, + "loss": 0.2745, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3837500810623169, + "step": 3350 + }, + { + "epoch": 2.9646643109540634, + "grad_norm": 0.7379738092422485, + "learning_rate": 2.8564665067642485e-05, + "loss": 0.3111, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3087494373321533, + "step": 3355 + }, + { + "epoch": 2.9690812720848054, + "grad_norm": 0.6993508338928223, + "learning_rate": 2.8524832829147297e-05, + "loss": 0.3157, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2241922914981842, + "step": 3360 + }, + { + "epoch": 2.9734982332155475, + "grad_norm": 0.6891131401062012, + "learning_rate": 2.8484959226457115e-05, + "loss": 0.2835, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.25775182247161865, + "step": 3365 + }, + { + "epoch": 2.9779151943462896, + "grad_norm": 0.5844883322715759, + "learning_rate": 2.8445044453046624e-05, + "loss": 0.2942, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.285645067691803, + "step": 3370 + }, + { + "epoch": 2.9823321554770317, + "grad_norm": 0.546260416507721, + "learning_rate": 2.8405088702590296e-05, + "loss": 0.2498, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.26289626955986023, + "step": 3375 + }, + { + "epoch": 2.9867491166077738, + "grad_norm": 0.682931125164032, + "learning_rate": 2.8365092168961442e-05, + "loss": 0.2906, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.23991906642913818, + "step": 3380 + }, + { + "epoch": 2.991166077738516, + "grad_norm": 0.600168764591217, + "learning_rate": 2.8325055046231232e-05, + "loss": 0.2954, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.37289607524871826, + "step": 3385 + }, + { + "epoch": 2.995583038869258, + "grad_norm": 0.6167921423912048, + "learning_rate": 2.8284977528667806e-05, + "loss": 0.3104, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2748357951641083, + "step": 3390 + }, + { + "epoch": 3.0008833922261484, + "grad_norm": 0.5644031763076782, + "learning_rate": 2.8244859810735304e-05, + "loss": 0.2734, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.31137675046920776, + "step": 3395 + }, + { + "epoch": 3.0053003533568905, + "grad_norm": 0.5274640917778015, + "learning_rate": 2.8204702087092907e-05, + "loss": 0.2752, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.24842819571495056, + "step": 3400 + }, + { + "epoch": 3.0097173144876326, + "grad_norm": 0.6265926361083984, + "learning_rate": 2.8164504552593946e-05, + "loss": 0.2768, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.27338293194770813, + "step": 3405 + }, + { + "epoch": 3.0141342756183747, + "grad_norm": 0.6399211287498474, + "learning_rate": 2.8124267402284892e-05, + "loss": 0.2868, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3242674469947815, + "step": 3410 + }, + { + "epoch": 3.0185512367491167, + "grad_norm": 0.5678868889808655, + "learning_rate": 2.808399083140445e-05, + "loss": 0.2472, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2346222996711731, + "step": 3415 + }, + { + "epoch": 3.022968197879859, + "grad_norm": 0.6776866912841797, + "learning_rate": 2.804367503538261e-05, + "loss": 0.2434, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3134090304374695, + "step": 3420 + }, + { + "epoch": 3.027385159010601, + "grad_norm": 0.6301928758621216, + "learning_rate": 2.800332020983968e-05, + "loss": 0.297, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.26638519763946533, + "step": 3425 + }, + { + "epoch": 3.0318021201413425, + "grad_norm": 0.6153246164321899, + "learning_rate": 2.796292655058535e-05, + "loss": 0.2712, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2534346878528595, + "step": 3430 + }, + { + "epoch": 3.0362190812720846, + "grad_norm": 0.6437485814094543, + "learning_rate": 2.792249425361773e-05, + "loss": 0.2873, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3017566204071045, + "step": 3435 + }, + { + "epoch": 3.0406360424028267, + "grad_norm": 0.5970696210861206, + "learning_rate": 2.788202351512243e-05, + "loss": 0.2605, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.27321383357048035, + "step": 3440 + }, + { + "epoch": 3.045053003533569, + "grad_norm": 0.6142945885658264, + "learning_rate": 2.7841514531471574e-05, + "loss": 0.2623, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2881008982658386, + "step": 3445 + }, + { + "epoch": 3.049469964664311, + "grad_norm": 0.6773906350135803, + "learning_rate": 2.7800967499222845e-05, + "loss": 0.262, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2629165053367615, + "step": 3450 + }, + { + "epoch": 3.053886925795053, + "grad_norm": 0.545464038848877, + "learning_rate": 2.7760382615118562e-05, + "loss": 0.25, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.27945563197135925, + "step": 3455 + }, + { + "epoch": 3.058303886925795, + "grad_norm": 0.6101946234703064, + "learning_rate": 2.7719760076084713e-05, + "loss": 0.2938, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.268838107585907, + "step": 3460 + }, + { + "epoch": 3.062720848056537, + "grad_norm": 2.510591506958008, + "learning_rate": 2.7679100079229982e-05, + "loss": 0.2674, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.20418402552604675, + "step": 3465 + }, + { + "epoch": 3.067137809187279, + "grad_norm": 0.5778970122337341, + "learning_rate": 2.7638402821844808e-05, + "loss": 0.281, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.33357682824134827, + "step": 3470 + }, + { + "epoch": 3.0715547703180213, + "grad_norm": 0.630087673664093, + "learning_rate": 2.7597668501400436e-05, + "loss": 0.2591, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.28215503692626953, + "step": 3475 + }, + { + "epoch": 3.0759717314487633, + "grad_norm": 0.6290969848632812, + "learning_rate": 2.7556897315547934e-05, + "loss": 0.29, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2777438461780548, + "step": 3480 + }, + { + "epoch": 3.0803886925795054, + "grad_norm": 0.5843200087547302, + "learning_rate": 2.7516089462117265e-05, + "loss": 0.2386, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.195445254445076, + "step": 3485 + }, + { + "epoch": 3.0848056537102475, + "grad_norm": 0.6632335186004639, + "learning_rate": 2.747524513911629e-05, + "loss": 0.259, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3661026358604431, + "step": 3490 + }, + { + "epoch": 3.0892226148409896, + "grad_norm": 0.6285350918769836, + "learning_rate": 2.7434364544729844e-05, + "loss": 0.2747, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.32909315824508667, + "step": 3495 + }, + { + "epoch": 3.0936395759717312, + "grad_norm": 0.5703840255737305, + "learning_rate": 2.7393447877318756e-05, + "loss": 0.2907, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.34857630729675293, + "step": 3500 + }, + { + "epoch": 3.0980565371024733, + "grad_norm": 0.5537542104721069, + "learning_rate": 2.735249533541888e-05, + "loss": 0.2578, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3032943606376648, + "step": 3505 + }, + { + "epoch": 3.1024734982332154, + "grad_norm": 0.6345096826553345, + "learning_rate": 2.7311507117740138e-05, + "loss": 0.261, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2263367921113968, + "step": 3510 + }, + { + "epoch": 3.1068904593639575, + "grad_norm": 0.671589195728302, + "learning_rate": 2.7270483423165578e-05, + "loss": 0.2604, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.23606635630130768, + "step": 3515 + }, + { + "epoch": 3.1113074204946995, + "grad_norm": 0.6208762526512146, + "learning_rate": 2.7229424450750378e-05, + "loss": 0.2418, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.26206400990486145, + "step": 3520 + }, + { + "epoch": 3.1157243816254416, + "grad_norm": 0.6254077553749084, + "learning_rate": 2.7188330399720883e-05, + "loss": 0.2593, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.21936869621276855, + "step": 3525 + }, + { + "epoch": 3.1201413427561837, + "grad_norm": 0.6420286893844604, + "learning_rate": 2.7147201469473645e-05, + "loss": 0.2697, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.28934288024902344, + "step": 3530 + }, + { + "epoch": 3.124558303886926, + "grad_norm": 0.8817894458770752, + "learning_rate": 2.7106037859574482e-05, + "loss": 0.27, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.23228083550930023, + "step": 3535 + }, + { + "epoch": 3.128975265017668, + "grad_norm": 0.686221718788147, + "learning_rate": 2.706483976975746e-05, + "loss": 0.2552, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2681628465652466, + "step": 3540 + }, + { + "epoch": 3.13339222614841, + "grad_norm": 0.6326223611831665, + "learning_rate": 2.702360739992395e-05, + "loss": 0.2812, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2811989486217499, + "step": 3545 + }, + { + "epoch": 3.137809187279152, + "grad_norm": 0.7377673387527466, + "learning_rate": 2.698234095014167e-05, + "loss": 0.2648, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.26155680418014526, + "step": 3550 + }, + { + "epoch": 3.142226148409894, + "grad_norm": 0.6575599312782288, + "learning_rate": 2.6941040620643685e-05, + "loss": 0.2887, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.31103795766830444, + "step": 3555 + }, + { + "epoch": 3.146643109540636, + "grad_norm": 0.6150068044662476, + "learning_rate": 2.689970661182747e-05, + "loss": 0.2926, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.25528043508529663, + "step": 3560 + }, + { + "epoch": 3.1510600706713783, + "grad_norm": 0.5725194215774536, + "learning_rate": 2.6858339124253902e-05, + "loss": 0.288, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.25137802958488464, + "step": 3565 + }, + { + "epoch": 3.1554770318021204, + "grad_norm": 0.5649983286857605, + "learning_rate": 2.681693835864631e-05, + "loss": 0.2707, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2684163451194763, + "step": 3570 + }, + { + "epoch": 3.159893992932862, + "grad_norm": 0.6405424475669861, + "learning_rate": 2.6775504515889498e-05, + "loss": 0.2665, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.27231472730636597, + "step": 3575 + }, + { + "epoch": 3.164310954063604, + "grad_norm": 0.5784630179405212, + "learning_rate": 2.6734037797028764e-05, + "loss": 0.298, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.25174811482429504, + "step": 3580 + }, + { + "epoch": 3.168727915194346, + "grad_norm": 0.572640061378479, + "learning_rate": 2.6692538403268916e-05, + "loss": 0.2867, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.22904187440872192, + "step": 3585 + }, + { + "epoch": 3.1731448763250882, + "grad_norm": 0.6837023496627808, + "learning_rate": 2.6651006535973327e-05, + "loss": 0.3015, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2275552749633789, + "step": 3590 + }, + { + "epoch": 3.1775618374558303, + "grad_norm": 0.5804446935653687, + "learning_rate": 2.660944239666293e-05, + "loss": 0.2832, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.29134559631347656, + "step": 3595 + }, + { + "epoch": 3.1819787985865724, + "grad_norm": 0.6767000555992126, + "learning_rate": 2.6567846187015245e-05, + "loss": 0.2804, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3343573808670044, + "step": 3600 + }, + { + "epoch": 3.1863957597173145, + "grad_norm": 0.615800142288208, + "learning_rate": 2.6526218108863408e-05, + "loss": 0.3103, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.42924535274505615, + "step": 3605 + }, + { + "epoch": 3.1908127208480566, + "grad_norm": 0.6070627570152283, + "learning_rate": 2.648455836419518e-05, + "loss": 0.262, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2295074164867401, + "step": 3610 + }, + { + "epoch": 3.1952296819787986, + "grad_norm": 0.6150861382484436, + "learning_rate": 2.6442867155151984e-05, + "loss": 0.2611, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.24106386303901672, + "step": 3615 + }, + { + "epoch": 3.1996466431095407, + "grad_norm": 0.6103044748306274, + "learning_rate": 2.6401144684027915e-05, + "loss": 0.2458, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.26913753151893616, + "step": 3620 + }, + { + "epoch": 3.204063604240283, + "grad_norm": 0.6718603372573853, + "learning_rate": 2.635939115326874e-05, + "loss": 0.2722, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.28498226404190063, + "step": 3625 + }, + { + "epoch": 3.208480565371025, + "grad_norm": 0.6954282522201538, + "learning_rate": 2.631760676547096e-05, + "loss": 0.3019, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.22322580218315125, + "step": 3630 + }, + { + "epoch": 3.212897526501767, + "grad_norm": 0.6374308466911316, + "learning_rate": 2.6275791723380772e-05, + "loss": 0.2825, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.25081831216812134, + "step": 3635 + }, + { + "epoch": 3.2173144876325086, + "grad_norm": 0.6096289753913879, + "learning_rate": 2.6233946229893147e-05, + "loss": 0.2657, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.23818659782409668, + "step": 3640 + }, + { + "epoch": 3.2217314487632507, + "grad_norm": 0.6019396185874939, + "learning_rate": 2.6192070488050783e-05, + "loss": 0.2425, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.26665595173835754, + "step": 3645 + }, + { + "epoch": 3.2261484098939928, + "grad_norm": 0.6398298144340515, + "learning_rate": 2.615016470104316e-05, + "loss": 0.2519, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.21210452914237976, + "step": 3650 + }, + { + "epoch": 3.230565371024735, + "grad_norm": 0.5565531253814697, + "learning_rate": 2.6108229072205545e-05, + "loss": 0.31, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2653741240501404, + "step": 3655 + }, + { + "epoch": 3.234982332155477, + "grad_norm": 0.6310369968414307, + "learning_rate": 2.606626380501801e-05, + "loss": 0.2921, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3396562337875366, + "step": 3660 + }, + { + "epoch": 3.239399293286219, + "grad_norm": 0.6153047680854797, + "learning_rate": 2.6024269103104417e-05, + "loss": 0.2716, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.28231340646743774, + "step": 3665 + }, + { + "epoch": 3.243816254416961, + "grad_norm": 0.667517900466919, + "learning_rate": 2.5982245170231467e-05, + "loss": 0.264, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.32096004486083984, + "step": 3670 + }, + { + "epoch": 3.248233215547703, + "grad_norm": 0.6767958998680115, + "learning_rate": 2.5940192210307697e-05, + "loss": 0.2544, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3398870825767517, + "step": 3675 + }, + { + "epoch": 3.2526501766784452, + "grad_norm": 0.6170216798782349, + "learning_rate": 2.5898110427382487e-05, + "loss": 0.2543, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2500012516975403, + "step": 3680 + }, + { + "epoch": 3.2570671378091873, + "grad_norm": 0.5803675055503845, + "learning_rate": 2.5856000025645065e-05, + "loss": 0.2803, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2736228108406067, + "step": 3685 + }, + { + "epoch": 3.2614840989399294, + "grad_norm": 0.6387597322463989, + "learning_rate": 2.581386120942353e-05, + "loss": 0.3275, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.28693830966949463, + "step": 3690 + }, + { + "epoch": 3.2659010600706715, + "grad_norm": 0.6041744351387024, + "learning_rate": 2.577169418318385e-05, + "loss": 0.309, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3677545189857483, + "step": 3695 + }, + { + "epoch": 3.2703180212014136, + "grad_norm": 0.591244637966156, + "learning_rate": 2.5729499151528877e-05, + "loss": 0.2956, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3903142213821411, + "step": 3700 + }, + { + "epoch": 3.2747349823321557, + "grad_norm": 0.5570688247680664, + "learning_rate": 2.568727631919735e-05, + "loss": 0.3056, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.32988813519477844, + "step": 3705 + }, + { + "epoch": 3.2791519434628977, + "grad_norm": 0.7770934104919434, + "learning_rate": 2.5645025891062897e-05, + "loss": 0.2645, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2805140018463135, + "step": 3710 + }, + { + "epoch": 3.28356890459364, + "grad_norm": 0.5827252864837646, + "learning_rate": 2.5602748072133054e-05, + "loss": 0.2863, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.25772303342819214, + "step": 3715 + }, + { + "epoch": 3.2879858657243815, + "grad_norm": 0.6013981103897095, + "learning_rate": 2.5560443067548263e-05, + "loss": 0.2814, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.24229644238948822, + "step": 3720 + }, + { + "epoch": 3.2924028268551235, + "grad_norm": 0.5573179721832275, + "learning_rate": 2.5518111082580873e-05, + "loss": 0.251, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2687339782714844, + "step": 3725 + }, + { + "epoch": 3.2968197879858656, + "grad_norm": 0.5730259418487549, + "learning_rate": 2.547575232263414e-05, + "loss": 0.2566, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2892979681491852, + "step": 3730 + }, + { + "epoch": 3.3012367491166077, + "grad_norm": 0.578188955783844, + "learning_rate": 2.5433366993241252e-05, + "loss": 0.2739, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.25793546438217163, + "step": 3735 + }, + { + "epoch": 3.3056537102473498, + "grad_norm": 0.6442872881889343, + "learning_rate": 2.5390955300064306e-05, + "loss": 0.2552, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.23315435647964478, + "step": 3740 + }, + { + "epoch": 3.310070671378092, + "grad_norm": 0.6832812428474426, + "learning_rate": 2.5348517448893323e-05, + "loss": 0.283, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.28596487641334534, + "step": 3745 + }, + { + "epoch": 3.314487632508834, + "grad_norm": 0.5652126669883728, + "learning_rate": 2.530605364564526e-05, + "loss": 0.306, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.22419892251491547, + "step": 3750 + }, + { + "epoch": 3.318904593639576, + "grad_norm": 0.5962374806404114, + "learning_rate": 2.5263564096362972e-05, + "loss": 0.2711, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.23518124222755432, + "step": 3755 + }, + { + "epoch": 3.323321554770318, + "grad_norm": 0.5664314031600952, + "learning_rate": 2.5221049007214276e-05, + "loss": 0.2561, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2558237910270691, + "step": 3760 + }, + { + "epoch": 3.32773851590106, + "grad_norm": 0.5694194436073303, + "learning_rate": 2.5178508584490882e-05, + "loss": 0.2672, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2523389458656311, + "step": 3765 + }, + { + "epoch": 3.3321554770318023, + "grad_norm": 0.926784873008728, + "learning_rate": 2.5135943034607434e-05, + "loss": 0.3056, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3541930913925171, + "step": 3770 + }, + { + "epoch": 3.3365724381625443, + "grad_norm": 0.65824294090271, + "learning_rate": 2.50933525641005e-05, + "loss": 0.2076, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.19914916157722473, + "step": 3775 + }, + { + "epoch": 3.340989399293286, + "grad_norm": 0.62986159324646, + "learning_rate": 2.5050737379627575e-05, + "loss": 0.2822, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.285235196352005, + "step": 3780 + }, + { + "epoch": 3.345406360424028, + "grad_norm": 0.5901169776916504, + "learning_rate": 2.5008097687966052e-05, + "loss": 0.2573, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3077230453491211, + "step": 3785 + }, + { + "epoch": 3.34982332155477, + "grad_norm": 0.6803475618362427, + "learning_rate": 2.4965433696012255e-05, + "loss": 0.3068, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3818191885948181, + "step": 3790 + }, + { + "epoch": 3.354240282685512, + "grad_norm": 0.5957804918289185, + "learning_rate": 2.49227456107804e-05, + "loss": 0.3033, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3599034249782562, + "step": 3795 + }, + { + "epoch": 3.3586572438162543, + "grad_norm": 0.5898772478103638, + "learning_rate": 2.488003363940163e-05, + "loss": 0.2727, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.275057315826416, + "step": 3800 + }, + { + "epoch": 3.3630742049469964, + "grad_norm": 0.6106370687484741, + "learning_rate": 2.4837297989122987e-05, + "loss": 0.2831, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.31545543670654297, + "step": 3805 + }, + { + "epoch": 3.3674911660777385, + "grad_norm": 0.5764063596725464, + "learning_rate": 2.4794538867306385e-05, + "loss": 0.2837, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.30026909708976746, + "step": 3810 + }, + { + "epoch": 3.3719081272084805, + "grad_norm": 0.5482817888259888, + "learning_rate": 2.4751756481427637e-05, + "loss": 0.2699, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.23257222771644592, + "step": 3815 + }, + { + "epoch": 3.3763250883392226, + "grad_norm": 0.6209834218025208, + "learning_rate": 2.4708951039075462e-05, + "loss": 0.2375, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.21792644262313843, + "step": 3820 + }, + { + "epoch": 3.3807420494699647, + "grad_norm": 0.5752567648887634, + "learning_rate": 2.4666122747950416e-05, + "loss": 0.257, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2612408995628357, + "step": 3825 + }, + { + "epoch": 3.385159010600707, + "grad_norm": 0.594068706035614, + "learning_rate": 2.4623271815863943e-05, + "loss": 0.2552, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.26158034801483154, + "step": 3830 + }, + { + "epoch": 3.389575971731449, + "grad_norm": 0.6304320693016052, + "learning_rate": 2.4580398450737338e-05, + "loss": 0.3036, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3264097571372986, + "step": 3835 + }, + { + "epoch": 3.393992932862191, + "grad_norm": 0.7010661959648132, + "learning_rate": 2.4537502860600754e-05, + "loss": 0.2842, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2843078374862671, + "step": 3840 + }, + { + "epoch": 3.398409893992933, + "grad_norm": 0.6101694107055664, + "learning_rate": 2.4494585253592184e-05, + "loss": 0.2717, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2982633113861084, + "step": 3845 + }, + { + "epoch": 3.402826855123675, + "grad_norm": 0.5913658738136292, + "learning_rate": 2.445164583795643e-05, + "loss": 0.2567, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3931722640991211, + "step": 3850 + }, + { + "epoch": 3.407243816254417, + "grad_norm": 0.6326600313186646, + "learning_rate": 2.4408684822044152e-05, + "loss": 0.2485, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1966112107038498, + "step": 3855 + }, + { + "epoch": 3.411660777385159, + "grad_norm": 0.5460976958274841, + "learning_rate": 2.4365702414310786e-05, + "loss": 0.2891, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2495535910129547, + "step": 3860 + }, + { + "epoch": 3.416077738515901, + "grad_norm": 0.5842785835266113, + "learning_rate": 2.4322698823315572e-05, + "loss": 0.2936, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.22732970118522644, + "step": 3865 + }, + { + "epoch": 3.420494699646643, + "grad_norm": 0.5686548948287964, + "learning_rate": 2.4279674257720548e-05, + "loss": 0.2531, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.24848207831382751, + "step": 3870 + }, + { + "epoch": 3.424911660777385, + "grad_norm": 0.6319994926452637, + "learning_rate": 2.4236628926289506e-05, + "loss": 0.2879, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2527256906032562, + "step": 3875 + }, + { + "epoch": 3.429328621908127, + "grad_norm": 0.9111738204956055, + "learning_rate": 2.4193563037887025e-05, + "loss": 0.2488, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.27606528997421265, + "step": 3880 + }, + { + "epoch": 3.4337455830388692, + "grad_norm": 0.9722045063972473, + "learning_rate": 2.4150476801477404e-05, + "loss": 0.2452, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.24321609735488892, + "step": 3885 + }, + { + "epoch": 3.4381625441696113, + "grad_norm": 0.5848979949951172, + "learning_rate": 2.4107370426123685e-05, + "loss": 0.2405, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2286883294582367, + "step": 3890 + }, + { + "epoch": 3.4425795053003534, + "grad_norm": 0.7078244686126709, + "learning_rate": 2.406424412098664e-05, + "loss": 0.2926, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2944197654724121, + "step": 3895 + }, + { + "epoch": 3.4469964664310955, + "grad_norm": 0.516459584236145, + "learning_rate": 2.4021098095323713e-05, + "loss": 0.2537, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.23038452863693237, + "step": 3900 + }, + { + "epoch": 3.4514134275618376, + "grad_norm": 0.6297348737716675, + "learning_rate": 2.3977932558488074e-05, + "loss": 0.2725, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.28293001651763916, + "step": 3905 + }, + { + "epoch": 3.4558303886925796, + "grad_norm": 0.6394320726394653, + "learning_rate": 2.3934747719927534e-05, + "loss": 0.2794, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.29240959882736206, + "step": 3910 + }, + { + "epoch": 3.4602473498233217, + "grad_norm": 0.5845627188682556, + "learning_rate": 2.3891543789183573e-05, + "loss": 0.2999, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.34419286251068115, + "step": 3915 + }, + { + "epoch": 3.464664310954064, + "grad_norm": 0.5385047197341919, + "learning_rate": 2.3848320975890316e-05, + "loss": 0.2683, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.23887717723846436, + "step": 3920 + }, + { + "epoch": 3.4690812720848054, + "grad_norm": 0.5908883810043335, + "learning_rate": 2.3805079489773508e-05, + "loss": 0.2442, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2729918360710144, + "step": 3925 + }, + { + "epoch": 3.4734982332155475, + "grad_norm": 0.6180974841117859, + "learning_rate": 2.376181954064948e-05, + "loss": 0.2982, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2929500937461853, + "step": 3930 + }, + { + "epoch": 3.4779151943462896, + "grad_norm": 0.5845738649368286, + "learning_rate": 2.3718541338424176e-05, + "loss": 0.2994, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2808518409729004, + "step": 3935 + }, + { + "epoch": 3.4823321554770317, + "grad_norm": 0.7396969795227051, + "learning_rate": 2.3675245093092082e-05, + "loss": 0.2486, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2241256982088089, + "step": 3940 + }, + { + "epoch": 3.4867491166077738, + "grad_norm": 0.6327768564224243, + "learning_rate": 2.3631931014735258e-05, + "loss": 0.2812, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2858206033706665, + "step": 3945 + }, + { + "epoch": 3.491166077738516, + "grad_norm": 0.6281271576881409, + "learning_rate": 2.358859931352227e-05, + "loss": 0.2562, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.22891634702682495, + "step": 3950 + }, + { + "epoch": 3.495583038869258, + "grad_norm": 0.636397659778595, + "learning_rate": 2.3545250199707207e-05, + "loss": 0.2416, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2959528863430023, + "step": 3955 + }, + { + "epoch": 3.5, + "grad_norm": 0.6934898495674133, + "learning_rate": 2.350188388362865e-05, + "loss": 0.2578, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3115631937980652, + "step": 3960 + }, + { + "epoch": 3.504416961130742, + "grad_norm": 0.5889595150947571, + "learning_rate": 2.3458500575708642e-05, + "loss": 0.2683, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.31395983695983887, + "step": 3965 + }, + { + "epoch": 3.508833922261484, + "grad_norm": 0.6764131188392639, + "learning_rate": 2.341510048645167e-05, + "loss": 0.2803, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.301558256149292, + "step": 3970 + }, + { + "epoch": 3.5132508833922262, + "grad_norm": 0.6070583462715149, + "learning_rate": 2.337168382644367e-05, + "loss": 0.3018, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3173370063304901, + "step": 3975 + }, + { + "epoch": 3.5176678445229683, + "grad_norm": 0.5840939283370972, + "learning_rate": 2.332825080635094e-05, + "loss": 0.3153, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3090516924858093, + "step": 3980 + }, + { + "epoch": 3.5220848056537104, + "grad_norm": 0.7202004790306091, + "learning_rate": 2.3284801636919205e-05, + "loss": 0.2874, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.41266167163848877, + "step": 3985 + }, + { + "epoch": 3.5265017667844525, + "grad_norm": 0.6619871258735657, + "learning_rate": 2.3241336528972522e-05, + "loss": 0.2659, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2852621078491211, + "step": 3990 + }, + { + "epoch": 3.5309187279151946, + "grad_norm": 0.5971029996871948, + "learning_rate": 2.3197855693412295e-05, + "loss": 0.2901, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.24810791015625, + "step": 3995 + }, + { + "epoch": 3.5353356890459366, + "grad_norm": 0.5984452962875366, + "learning_rate": 2.3154359341216243e-05, + "loss": 0.2733, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.23727092146873474, + "step": 4000 + }, + { + "epoch": 3.5397526501766783, + "grad_norm": 0.5858767628669739, + "learning_rate": 2.311084768343737e-05, + "loss": 0.2487, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.27304431796073914, + "step": 4005 + }, + { + "epoch": 3.5441696113074204, + "grad_norm": 0.6337104439735413, + "learning_rate": 2.306732093120295e-05, + "loss": 0.2892, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.37451407313346863, + "step": 4010 + }, + { + "epoch": 3.5485865724381624, + "grad_norm": 0.6166836023330688, + "learning_rate": 2.3023779295713497e-05, + "loss": 0.2833, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.25973236560821533, + "step": 4015 + }, + { + "epoch": 3.5530035335689045, + "grad_norm": 0.6111301183700562, + "learning_rate": 2.2980222988241733e-05, + "loss": 0.2633, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.25971752405166626, + "step": 4020 + }, + { + "epoch": 3.5574204946996466, + "grad_norm": 0.6352372765541077, + "learning_rate": 2.293665222013158e-05, + "loss": 0.2422, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.21252036094665527, + "step": 4025 + }, + { + "epoch": 3.5618374558303887, + "grad_norm": 0.6189213395118713, + "learning_rate": 2.2893067202797136e-05, + "loss": 0.2314, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.22057734429836273, + "step": 4030 + }, + { + "epoch": 3.5662544169611308, + "grad_norm": 0.5966265201568604, + "learning_rate": 2.2849468147721615e-05, + "loss": 0.27, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.26105162501335144, + "step": 4035 + }, + { + "epoch": 3.570671378091873, + "grad_norm": 0.7433478832244873, + "learning_rate": 2.280585526645637e-05, + "loss": 0.272, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2277737855911255, + "step": 4040 + }, + { + "epoch": 3.575088339222615, + "grad_norm": 0.6259749531745911, + "learning_rate": 2.2762228770619815e-05, + "loss": 0.2872, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1985919028520584, + "step": 4045 + }, + { + "epoch": 3.579505300353357, + "grad_norm": 0.6301651000976562, + "learning_rate": 2.2718588871896454e-05, + "loss": 0.2657, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3158514201641083, + "step": 4050 + }, + { + "epoch": 3.583922261484099, + "grad_norm": 0.556121289730072, + "learning_rate": 2.2674935782035804e-05, + "loss": 0.2521, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.23911920189857483, + "step": 4055 + }, + { + "epoch": 3.5883392226148407, + "grad_norm": 0.5992854833602905, + "learning_rate": 2.2631269712851385e-05, + "loss": 0.2349, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.23733431100845337, + "step": 4060 + }, + { + "epoch": 3.592756183745583, + "grad_norm": 0.6029264330863953, + "learning_rate": 2.258759087621971e-05, + "loss": 0.26, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.30784931778907776, + "step": 4065 + }, + { + "epoch": 3.597173144876325, + "grad_norm": 0.6338687539100647, + "learning_rate": 2.2543899484079245e-05, + "loss": 0.299, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.26149436831474304, + "step": 4070 + }, + { + "epoch": 3.601590106007067, + "grad_norm": 0.5709251761436462, + "learning_rate": 2.2500195748429352e-05, + "loss": 0.2602, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.26341015100479126, + "step": 4075 + }, + { + "epoch": 3.606007067137809, + "grad_norm": 0.6511635780334473, + "learning_rate": 2.2456479881329315e-05, + "loss": 0.2457, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2105947583913803, + "step": 4080 + }, + { + "epoch": 3.610424028268551, + "grad_norm": 0.5920666456222534, + "learning_rate": 2.2412752094897267e-05, + "loss": 0.2718, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.25122541189193726, + "step": 4085 + }, + { + "epoch": 3.614840989399293, + "grad_norm": 0.6184494495391846, + "learning_rate": 2.236901260130918e-05, + "loss": 0.2614, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.19458022713661194, + "step": 4090 + }, + { + "epoch": 3.6192579505300353, + "grad_norm": 0.5893781781196594, + "learning_rate": 2.2325261612797832e-05, + "loss": 0.2518, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.26221320033073425, + "step": 4095 + }, + { + "epoch": 3.6236749116607774, + "grad_norm": 0.6632538437843323, + "learning_rate": 2.2281499341651767e-05, + "loss": 0.268, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2836889624595642, + "step": 4100 + }, + { + "epoch": 3.6280918727915195, + "grad_norm": 0.6132526993751526, + "learning_rate": 2.223772600021429e-05, + "loss": 0.2728, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3212493658065796, + "step": 4105 + }, + { + "epoch": 3.6325088339222615, + "grad_norm": 0.5957457423210144, + "learning_rate": 2.2193941800882418e-05, + "loss": 0.3153, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.4252464175224304, + "step": 4110 + }, + { + "epoch": 3.6369257950530036, + "grad_norm": 0.6638500094413757, + "learning_rate": 2.2150146956105836e-05, + "loss": 0.3003, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2801547646522522, + "step": 4115 + }, + { + "epoch": 3.6413427561837457, + "grad_norm": 0.6362109184265137, + "learning_rate": 2.210634167838591e-05, + "loss": 0.2801, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.36497941613197327, + "step": 4120 + }, + { + "epoch": 3.645759717314488, + "grad_norm": 0.7648757100105286, + "learning_rate": 2.2062526180274607e-05, + "loss": 0.2378, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2305619865655899, + "step": 4125 + }, + { + "epoch": 3.65017667844523, + "grad_norm": 0.5891234278678894, + "learning_rate": 2.2018700674373487e-05, + "loss": 0.2642, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.23286129534244537, + "step": 4130 + }, + { + "epoch": 3.654593639575972, + "grad_norm": 0.5884703993797302, + "learning_rate": 2.1974865373332695e-05, + "loss": 0.281, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2888106107711792, + "step": 4135 + }, + { + "epoch": 3.659010600706714, + "grad_norm": 0.5825828313827515, + "learning_rate": 2.1931020489849865e-05, + "loss": 0.2649, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2594233453273773, + "step": 4140 + }, + { + "epoch": 3.663427561837456, + "grad_norm": 0.6837796568870544, + "learning_rate": 2.1887166236669154e-05, + "loss": 0.2716, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.23577167093753815, + "step": 4145 + }, + { + "epoch": 3.6678445229681977, + "grad_norm": 0.558169960975647, + "learning_rate": 2.184330282658018e-05, + "loss": 0.2425, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.22032378613948822, + "step": 4150 + }, + { + "epoch": 3.67226148409894, + "grad_norm": 0.5991285443305969, + "learning_rate": 2.1799430472416975e-05, + "loss": 0.31, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.24205946922302246, + "step": 4155 + }, + { + "epoch": 3.676678445229682, + "grad_norm": 0.5339157581329346, + "learning_rate": 2.1755549387056997e-05, + "loss": 0.2795, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.24257515370845795, + "step": 4160 + }, + { + "epoch": 3.681095406360424, + "grad_norm": 0.5839236378669739, + "learning_rate": 2.1711659783420043e-05, + "loss": 0.2469, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3155287802219391, + "step": 4165 + }, + { + "epoch": 3.685512367491166, + "grad_norm": 0.6157808303833008, + "learning_rate": 2.1667761874467256e-05, + "loss": 0.282, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3029802739620209, + "step": 4170 + }, + { + "epoch": 3.689929328621908, + "grad_norm": 0.6158527731895447, + "learning_rate": 2.162385587320008e-05, + "loss": 0.2547, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.24819602072238922, + "step": 4175 + }, + { + "epoch": 3.6943462897526502, + "grad_norm": 0.6458650231361389, + "learning_rate": 2.1579941992659214e-05, + "loss": 0.2515, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2597864866256714, + "step": 4180 + }, + { + "epoch": 3.6987632508833923, + "grad_norm": 0.6469552516937256, + "learning_rate": 2.1536020445923595e-05, + "loss": 0.2546, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.21913045644760132, + "step": 4185 + }, + { + "epoch": 3.7031802120141344, + "grad_norm": 0.6282652616500854, + "learning_rate": 2.1492091446109372e-05, + "loss": 0.2705, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.23371529579162598, + "step": 4190 + }, + { + "epoch": 3.7075971731448765, + "grad_norm": 0.5427141785621643, + "learning_rate": 2.1448155206368823e-05, + "loss": 0.2806, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.26559561491012573, + "step": 4195 + }, + { + "epoch": 3.712014134275618, + "grad_norm": 0.6435478329658508, + "learning_rate": 2.1404211939889392e-05, + "loss": 0.2435, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.22901174426078796, + "step": 4200 + }, + { + "epoch": 3.71643109540636, + "grad_norm": 0.553631067276001, + "learning_rate": 2.1360261859892594e-05, + "loss": 0.3053, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3381679654121399, + "step": 4205 + }, + { + "epoch": 3.7208480565371023, + "grad_norm": 0.6866286993026733, + "learning_rate": 2.1316305179633016e-05, + "loss": 0.3006, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3275683522224426, + "step": 4210 + }, + { + "epoch": 3.7252650176678443, + "grad_norm": 0.6093161106109619, + "learning_rate": 2.1272342112397272e-05, + "loss": 0.2419, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.21553654968738556, + "step": 4215 + }, + { + "epoch": 3.7296819787985864, + "grad_norm": 0.5793571472167969, + "learning_rate": 2.1228372871502955e-05, + "loss": 0.2842, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.24574121832847595, + "step": 4220 + }, + { + "epoch": 3.7340989399293285, + "grad_norm": 0.6067283153533936, + "learning_rate": 2.1184397670297624e-05, + "loss": 0.2336, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2613888382911682, + "step": 4225 + }, + { + "epoch": 3.7385159010600706, + "grad_norm": 0.6006746292114258, + "learning_rate": 2.1140416722157765e-05, + "loss": 0.2854, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.38783538341522217, + "step": 4230 + }, + { + "epoch": 3.7429328621908127, + "grad_norm": 0.5855950117111206, + "learning_rate": 2.1096430240487723e-05, + "loss": 0.253, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2570192515850067, + "step": 4235 + }, + { + "epoch": 3.7473498233215548, + "grad_norm": 0.632363498210907, + "learning_rate": 2.105243843871873e-05, + "loss": 0.2336, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.24486814439296722, + "step": 4240 + }, + { + "epoch": 3.751766784452297, + "grad_norm": 0.6137506365776062, + "learning_rate": 2.100844153030779e-05, + "loss": 0.2863, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.21526217460632324, + "step": 4245 + }, + { + "epoch": 3.756183745583039, + "grad_norm": 0.567816972732544, + "learning_rate": 2.096443972873673e-05, + "loss": 0.2202, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.21241986751556396, + "step": 4250 + }, + { + "epoch": 3.760600706713781, + "grad_norm": 0.6972650289535522, + "learning_rate": 2.0920433247511092e-05, + "loss": 0.2904, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3464621901512146, + "step": 4255 + }, + { + "epoch": 3.765017667844523, + "grad_norm": 0.5928968191146851, + "learning_rate": 2.087642230015912e-05, + "loss": 0.2641, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.24683159589767456, + "step": 4260 + }, + { + "epoch": 3.769434628975265, + "grad_norm": 0.8208529353141785, + "learning_rate": 2.0832407100230747e-05, + "loss": 0.2684, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.30278170108795166, + "step": 4265 + }, + { + "epoch": 3.7738515901060072, + "grad_norm": 0.5523584485054016, + "learning_rate": 2.078838786129653e-05, + "loss": 0.276, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.28194743394851685, + "step": 4270 + }, + { + "epoch": 3.7782685512367493, + "grad_norm": 0.6361742615699768, + "learning_rate": 2.0744364796946624e-05, + "loss": 0.2752, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3716488778591156, + "step": 4275 + }, + { + "epoch": 3.7826855123674914, + "grad_norm": 0.6412866711616516, + "learning_rate": 2.0700338120789754e-05, + "loss": 0.2855, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.24867978692054749, + "step": 4280 + }, + { + "epoch": 3.7871024734982335, + "grad_norm": 0.6166922450065613, + "learning_rate": 2.0656308046452157e-05, + "loss": 0.2542, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.25477123260498047, + "step": 4285 + }, + { + "epoch": 3.791519434628975, + "grad_norm": 0.5771932601928711, + "learning_rate": 2.0612274787576565e-05, + "loss": 0.2917, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2894698977470398, + "step": 4290 + }, + { + "epoch": 3.795936395759717, + "grad_norm": 0.645137369632721, + "learning_rate": 2.0568238557821175e-05, + "loss": 0.2617, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2622634470462799, + "step": 4295 + }, + { + "epoch": 3.8003533568904593, + "grad_norm": 0.6164206266403198, + "learning_rate": 2.0524199570858573e-05, + "loss": 0.2591, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.28416186571121216, + "step": 4300 + }, + { + "epoch": 3.8047703180212014, + "grad_norm": 0.6087357401847839, + "learning_rate": 2.048015804037474e-05, + "loss": 0.2536, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.22203269600868225, + "step": 4305 + }, + { + "epoch": 3.8091872791519434, + "grad_norm": 0.6240975260734558, + "learning_rate": 2.0436114180068008e-05, + "loss": 0.2946, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3069062829017639, + "step": 4310 + }, + { + "epoch": 3.8136042402826855, + "grad_norm": 0.5895731449127197, + "learning_rate": 2.039206820364798e-05, + "loss": 0.2882, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3107955753803253, + "step": 4315 + }, + { + "epoch": 3.8180212014134276, + "grad_norm": 0.6357214450836182, + "learning_rate": 2.034802032483457e-05, + "loss": 0.2684, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2024916559457779, + "step": 4320 + }, + { + "epoch": 3.8224381625441697, + "grad_norm": 0.6543165445327759, + "learning_rate": 2.0303970757356894e-05, + "loss": 0.251, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2537229657173157, + "step": 4325 + }, + { + "epoch": 3.8268551236749118, + "grad_norm": 0.6982588768005371, + "learning_rate": 2.025991971495226e-05, + "loss": 0.2544, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2701827883720398, + "step": 4330 + }, + { + "epoch": 3.831272084805654, + "grad_norm": 0.5591413378715515, + "learning_rate": 2.021586741136516e-05, + "loss": 0.2919, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3260684907436371, + "step": 4335 + }, + { + "epoch": 3.835689045936396, + "grad_norm": 0.7472929954528809, + "learning_rate": 2.017181406034617e-05, + "loss": 0.2375, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.20827150344848633, + "step": 4340 + }, + { + "epoch": 3.8401060070671376, + "grad_norm": 0.8127717971801758, + "learning_rate": 2.0127759875650974e-05, + "loss": 0.2724, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.18227070569992065, + "step": 4345 + }, + { + "epoch": 3.8445229681978796, + "grad_norm": 0.5871464610099792, + "learning_rate": 2.0083705071039297e-05, + "loss": 0.2649, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.307786226272583, + "step": 4350 + }, + { + "epoch": 3.8489399293286217, + "grad_norm": 0.6023159623146057, + "learning_rate": 2.0039649860273855e-05, + "loss": 0.2563, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.27349045872688293, + "step": 4355 + }, + { + "epoch": 3.853356890459364, + "grad_norm": 1.6955772638320923, + "learning_rate": 1.9995594457119364e-05, + "loss": 0.2609, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.24877923727035522, + "step": 4360 + }, + { + "epoch": 3.857773851590106, + "grad_norm": 0.6980836987495422, + "learning_rate": 1.995153907534145e-05, + "loss": 0.3256, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.317715585231781, + "step": 4365 + }, + { + "epoch": 3.862190812720848, + "grad_norm": 0.6564183831214905, + "learning_rate": 1.990748392870563e-05, + "loss": 0.2733, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3203606605529785, + "step": 4370 + }, + { + "epoch": 3.86660777385159, + "grad_norm": 0.5388532876968384, + "learning_rate": 1.986342923097631e-05, + "loss": 0.3132, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2857687771320343, + "step": 4375 + }, + { + "epoch": 3.871024734982332, + "grad_norm": 0.5364903211593628, + "learning_rate": 1.98193751959157e-05, + "loss": 0.2478, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.21258722245693207, + "step": 4380 + }, + { + "epoch": 3.875441696113074, + "grad_norm": 0.636022686958313, + "learning_rate": 1.977532203728278e-05, + "loss": 0.3042, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.355568528175354, + "step": 4385 + }, + { + "epoch": 3.8798586572438163, + "grad_norm": 0.5986488461494446, + "learning_rate": 1.9731269968832305e-05, + "loss": 0.2595, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2478679120540619, + "step": 4390 + }, + { + "epoch": 3.8842756183745584, + "grad_norm": 0.5643515586853027, + "learning_rate": 1.9687219204313717e-05, + "loss": 0.2887, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.21933361887931824, + "step": 4395 + }, + { + "epoch": 3.8886925795053005, + "grad_norm": 0.6250163316726685, + "learning_rate": 1.9643169957470157e-05, + "loss": 0.2878, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.29644590616226196, + "step": 4400 + }, + { + "epoch": 3.8931095406360425, + "grad_norm": 0.6132106781005859, + "learning_rate": 1.959912244203737e-05, + "loss": 0.256, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.24946492910385132, + "step": 4405 + }, + { + "epoch": 3.8975265017667846, + "grad_norm": 0.596892774105072, + "learning_rate": 1.9555076871742734e-05, + "loss": 0.2952, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.24082526564598083, + "step": 4410 + }, + { + "epoch": 3.9019434628975267, + "grad_norm": 0.5782345533370972, + "learning_rate": 1.951103346030415e-05, + "loss": 0.2534, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2679441571235657, + "step": 4415 + }, + { + "epoch": 3.9063604240282688, + "grad_norm": 0.5399941205978394, + "learning_rate": 1.9466992421429076e-05, + "loss": 0.2328, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.18268732726573944, + "step": 4420 + }, + { + "epoch": 3.910777385159011, + "grad_norm": 0.6017982959747314, + "learning_rate": 1.9422953968813454e-05, + "loss": 0.2697, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.27265119552612305, + "step": 4425 + }, + { + "epoch": 3.9151943462897525, + "grad_norm": 0.7943002581596375, + "learning_rate": 1.937891831614066e-05, + "loss": 0.256, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2352999448776245, + "step": 4430 + }, + { + "epoch": 3.9196113074204946, + "grad_norm": 0.6090646386146545, + "learning_rate": 1.93348856770805e-05, + "loss": 0.2937, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3457292914390564, + "step": 4435 + }, + { + "epoch": 3.9240282685512367, + "grad_norm": 0.584635317325592, + "learning_rate": 1.929085626528814e-05, + "loss": 0.2579, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.30109745264053345, + "step": 4440 + }, + { + "epoch": 3.9284452296819787, + "grad_norm": 0.6093024611473083, + "learning_rate": 1.9246830294403108e-05, + "loss": 0.2978, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3744407594203949, + "step": 4445 + }, + { + "epoch": 3.932862190812721, + "grad_norm": 0.594610333442688, + "learning_rate": 1.920280797804822e-05, + "loss": 0.2748, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.25274011492729187, + "step": 4450 + }, + { + "epoch": 3.937279151943463, + "grad_norm": 0.5948688387870789, + "learning_rate": 1.915878952982857e-05, + "loss": 0.3056, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2250659167766571, + "step": 4455 + }, + { + "epoch": 3.941696113074205, + "grad_norm": 0.608898937702179, + "learning_rate": 1.911477516333048e-05, + "loss": 0.2758, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2589750289916992, + "step": 4460 + }, + { + "epoch": 3.946113074204947, + "grad_norm": 0.710386335849762, + "learning_rate": 1.907076509212046e-05, + "loss": 0.2521, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.19801661372184753, + "step": 4465 + }, + { + "epoch": 3.950530035335689, + "grad_norm": 0.6093432307243347, + "learning_rate": 1.9026759529744187e-05, + "loss": 0.3113, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.26044270396232605, + "step": 4470 + }, + { + "epoch": 3.954946996466431, + "grad_norm": 0.546137809753418, + "learning_rate": 1.8982758689725447e-05, + "loss": 0.2627, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.19887375831604004, + "step": 4475 + }, + { + "epoch": 3.9593639575971733, + "grad_norm": 0.5550782084465027, + "learning_rate": 1.8938762785565137e-05, + "loss": 0.2416, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2474510371685028, + "step": 4480 + }, + { + "epoch": 3.963780918727915, + "grad_norm": 0.5750576853752136, + "learning_rate": 1.8894772030740182e-05, + "loss": 0.2648, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.26418790221214294, + "step": 4485 + }, + { + "epoch": 3.968197879858657, + "grad_norm": 0.6318395137786865, + "learning_rate": 1.8850786638702528e-05, + "loss": 0.29, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2796365022659302, + "step": 4490 + }, + { + "epoch": 3.972614840989399, + "grad_norm": 0.6644579172134399, + "learning_rate": 1.88068068228781e-05, + "loss": 0.2798, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.20628443360328674, + "step": 4495 + }, + { + "epoch": 3.977031802120141, + "grad_norm": 0.6782865524291992, + "learning_rate": 1.876283279666576e-05, + "loss": 0.2907, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3842836320400238, + "step": 4500 + }, + { + "epoch": 3.9814487632508833, + "grad_norm": 0.5653501152992249, + "learning_rate": 1.87188647734363e-05, + "loss": 0.2882, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2926984429359436, + "step": 4505 + }, + { + "epoch": 3.9858657243816253, + "grad_norm": 0.6898596286773682, + "learning_rate": 1.8674902966531354e-05, + "loss": 0.2813, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2633523941040039, + "step": 4510 + }, + { + "epoch": 3.9902826855123674, + "grad_norm": 0.5879707336425781, + "learning_rate": 1.8630947589262417e-05, + "loss": 0.2905, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.27371013164520264, + "step": 4515 + }, + { + "epoch": 3.9946996466431095, + "grad_norm": 0.6024365425109863, + "learning_rate": 1.858699885490977e-05, + "loss": 0.2682, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.26837724447250366, + "step": 4520 + }, + { + "epoch": 3.9991166077738516, + "grad_norm": 0.7211329340934753, + "learning_rate": 1.8543056976721472e-05, + "loss": 0.2526, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.221034437417984, + "step": 4525 + }, + { + "epoch": 4.004416961130742, + "grad_norm": 0.5379669070243835, + "learning_rate": 1.84991221679123e-05, + "loss": 0.2219, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1902405172586441, + "step": 4530 + }, + { + "epoch": 4.008833922261484, + "grad_norm": 0.546558141708374, + "learning_rate": 1.845519464166275e-05, + "loss": 0.2414, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.19735684990882874, + "step": 4535 + }, + { + "epoch": 4.013250883392226, + "grad_norm": 0.6087609529495239, + "learning_rate": 1.8411274611117974e-05, + "loss": 0.2617, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2241283804178238, + "step": 4540 + }, + { + "epoch": 4.017667844522968, + "grad_norm": 0.5435627698898315, + "learning_rate": 1.836736228938674e-05, + "loss": 0.2354, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.20003776252269745, + "step": 4545 + }, + { + "epoch": 4.02208480565371, + "grad_norm": 0.7864016890525818, + "learning_rate": 1.832345788954043e-05, + "loss": 0.2476, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.20103882253170013, + "step": 4550 + }, + { + "epoch": 4.0265017667844525, + "grad_norm": 0.6416686773300171, + "learning_rate": 1.8279561624611962e-05, + "loss": 0.2612, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.22663789987564087, + "step": 4555 + }, + { + "epoch": 4.030918727915195, + "grad_norm": 0.5910441875457764, + "learning_rate": 1.8235673707594822e-05, + "loss": 0.2545, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.24896244704723358, + "step": 4560 + }, + { + "epoch": 4.035335689045937, + "grad_norm": 0.6123059988021851, + "learning_rate": 1.819179435144195e-05, + "loss": 0.2485, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3180444836616516, + "step": 4565 + }, + { + "epoch": 4.039752650176679, + "grad_norm": 0.6581359505653381, + "learning_rate": 1.8147923769064776e-05, + "loss": 0.2517, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.23575004935264587, + "step": 4570 + }, + { + "epoch": 4.044169611307421, + "grad_norm": 0.6589792370796204, + "learning_rate": 1.8104062173332134e-05, + "loss": 0.242, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2652082145214081, + "step": 4575 + }, + { + "epoch": 4.048586572438163, + "grad_norm": 0.6208878755569458, + "learning_rate": 1.8060209777069267e-05, + "loss": 0.2391, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2510373592376709, + "step": 4580 + }, + { + "epoch": 4.053003533568905, + "grad_norm": 0.7314044833183289, + "learning_rate": 1.801636679305679e-05, + "loss": 0.2537, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.23933479189872742, + "step": 4585 + }, + { + "epoch": 4.057420494699647, + "grad_norm": 0.7164866328239441, + "learning_rate": 1.797253343402962e-05, + "loss": 0.2101, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2189275324344635, + "step": 4590 + }, + { + "epoch": 4.061837455830389, + "grad_norm": 0.7242477536201477, + "learning_rate": 1.7928709912676e-05, + "loss": 0.2666, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.23622474074363708, + "step": 4595 + }, + { + "epoch": 4.06625441696113, + "grad_norm": 0.6429215669631958, + "learning_rate": 1.788489644163642e-05, + "loss": 0.2344, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2727344036102295, + "step": 4600 + }, + { + "epoch": 4.070671378091872, + "grad_norm": 0.6153692603111267, + "learning_rate": 1.784109323350261e-05, + "loss": 0.2724, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3209673762321472, + "step": 4605 + }, + { + "epoch": 4.0750883392226145, + "grad_norm": 0.6157684922218323, + "learning_rate": 1.77973005008165e-05, + "loss": 0.2666, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.25213688611984253, + "step": 4610 + }, + { + "epoch": 4.079505300353357, + "grad_norm": 0.5954070687294006, + "learning_rate": 1.7753518456069198e-05, + "loss": 0.2325, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.23194274306297302, + "step": 4615 + }, + { + "epoch": 4.083922261484099, + "grad_norm": 0.6085749864578247, + "learning_rate": 1.770974731169995e-05, + "loss": 0.2456, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.23410743474960327, + "step": 4620 + }, + { + "epoch": 4.088339222614841, + "grad_norm": 0.5981534123420715, + "learning_rate": 1.76659872800951e-05, + "loss": 0.2454, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2934369444847107, + "step": 4625 + }, + { + "epoch": 4.092756183745583, + "grad_norm": 0.6048264503479004, + "learning_rate": 1.7622238573587093e-05, + "loss": 0.2482, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3395196795463562, + "step": 4630 + }, + { + "epoch": 4.097173144876325, + "grad_norm": 0.5977709293365479, + "learning_rate": 1.7578501404453388e-05, + "loss": 0.281, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.25631198287010193, + "step": 4635 + }, + { + "epoch": 4.101590106007067, + "grad_norm": 0.6589481234550476, + "learning_rate": 1.7534775984915503e-05, + "loss": 0.2383, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2767007350921631, + "step": 4640 + }, + { + "epoch": 4.106007067137809, + "grad_norm": 0.6230020523071289, + "learning_rate": 1.7491062527137912e-05, + "loss": 0.2795, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.27050620317459106, + "step": 4645 + }, + { + "epoch": 4.110424028268551, + "grad_norm": 0.6690418720245361, + "learning_rate": 1.744736124322707e-05, + "loss": 0.2497, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.26453179121017456, + "step": 4650 + }, + { + "epoch": 4.114840989399293, + "grad_norm": 0.5822317004203796, + "learning_rate": 1.7403672345230342e-05, + "loss": 0.2322, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.24094851315021515, + "step": 4655 + }, + { + "epoch": 4.119257950530035, + "grad_norm": 0.6514151692390442, + "learning_rate": 1.7359996045135007e-05, + "loss": 0.2192, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1770108938217163, + "step": 4660 + }, + { + "epoch": 4.123674911660777, + "grad_norm": 0.5683082938194275, + "learning_rate": 1.7316332554867224e-05, + "loss": 0.2665, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2293209284543991, + "step": 4665 + }, + { + "epoch": 4.1280918727915195, + "grad_norm": 0.6644694805145264, + "learning_rate": 1.7272682086290982e-05, + "loss": 0.2602, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.411828875541687, + "step": 4670 + }, + { + "epoch": 4.1325088339222615, + "grad_norm": 0.6769957542419434, + "learning_rate": 1.722904485120709e-05, + "loss": 0.2498, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.23401038348674774, + "step": 4675 + }, + { + "epoch": 4.136925795053004, + "grad_norm": 0.615993857383728, + "learning_rate": 1.7185421061352135e-05, + "loss": 0.2403, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2185857892036438, + "step": 4680 + }, + { + "epoch": 4.141342756183746, + "grad_norm": 0.680887758731842, + "learning_rate": 1.7141810928397495e-05, + "loss": 0.2512, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2617225646972656, + "step": 4685 + }, + { + "epoch": 4.145759717314488, + "grad_norm": 0.6120012998580933, + "learning_rate": 1.7098214663948243e-05, + "loss": 0.2467, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.24213114380836487, + "step": 4690 + }, + { + "epoch": 4.15017667844523, + "grad_norm": 0.655899703502655, + "learning_rate": 1.7054632479542196e-05, + "loss": 0.2392, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2701648473739624, + "step": 4695 + }, + { + "epoch": 4.154593639575972, + "grad_norm": 0.6427193880081177, + "learning_rate": 1.7011064586648828e-05, + "loss": 0.2549, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.36555686593055725, + "step": 4700 + }, + { + "epoch": 4.159010600706714, + "grad_norm": 0.77866530418396, + "learning_rate": 1.6967511196668277e-05, + "loss": 0.2433, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2713695466518402, + "step": 4705 + }, + { + "epoch": 4.163427561837456, + "grad_norm": 0.6055473685264587, + "learning_rate": 1.6923972520930307e-05, + "loss": 0.2595, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2517254948616028, + "step": 4710 + }, + { + "epoch": 4.167844522968198, + "grad_norm": 0.6935552358627319, + "learning_rate": 1.688044877069328e-05, + "loss": 0.2606, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2867121696472168, + "step": 4715 + }, + { + "epoch": 4.17226148409894, + "grad_norm": 0.5792904496192932, + "learning_rate": 1.6836940157143152e-05, + "loss": 0.2477, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.26725780963897705, + "step": 4720 + }, + { + "epoch": 4.176678445229682, + "grad_norm": 0.6815539002418518, + "learning_rate": 1.6793446891392422e-05, + "loss": 0.2758, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.31988525390625, + "step": 4725 + }, + { + "epoch": 4.181095406360424, + "grad_norm": 0.6169085502624512, + "learning_rate": 1.6749969184479116e-05, + "loss": 0.2304, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.24607014656066895, + "step": 4730 + }, + { + "epoch": 4.1855123674911665, + "grad_norm": 0.6831942200660706, + "learning_rate": 1.670650724736577e-05, + "loss": 0.2323, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.23659402132034302, + "step": 4735 + }, + { + "epoch": 4.189929328621908, + "grad_norm": 0.6516426205635071, + "learning_rate": 1.66630612909384e-05, + "loss": 0.2365, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2049470990896225, + "step": 4740 + }, + { + "epoch": 4.19434628975265, + "grad_norm": 0.7550818920135498, + "learning_rate": 1.661963152600549e-05, + "loss": 0.2397, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.22731956839561462, + "step": 4745 + }, + { + "epoch": 4.198763250883392, + "grad_norm": 0.641147255897522, + "learning_rate": 1.657621816329694e-05, + "loss": 0.2502, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.20782433450222015, + "step": 4750 + }, + { + "epoch": 4.203180212014134, + "grad_norm": 0.6107928156852722, + "learning_rate": 1.6532821413463083e-05, + "loss": 0.2476, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.23882174491882324, + "step": 4755 + }, + { + "epoch": 4.207597173144876, + "grad_norm": 0.621147096157074, + "learning_rate": 1.648944148707363e-05, + "loss": 0.2452, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2622299790382385, + "step": 4760 + }, + { + "epoch": 4.212014134275618, + "grad_norm": 0.7022213339805603, + "learning_rate": 1.6446078594616666e-05, + "loss": 0.2463, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.18493574857711792, + "step": 4765 + }, + { + "epoch": 4.21643109540636, + "grad_norm": 0.6667992472648621, + "learning_rate": 1.640273294649762e-05, + "loss": 0.2593, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3022603392601013, + "step": 4770 + }, + { + "epoch": 4.220848056537102, + "grad_norm": 0.6353417038917542, + "learning_rate": 1.635940475303826e-05, + "loss": 0.2516, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.28547731041908264, + "step": 4775 + }, + { + "epoch": 4.225265017667844, + "grad_norm": 0.703382670879364, + "learning_rate": 1.631609422447565e-05, + "loss": 0.2362, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.18541069328784943, + "step": 4780 + }, + { + "epoch": 4.229681978798586, + "grad_norm": 0.5781343579292297, + "learning_rate": 1.6272801570961136e-05, + "loss": 0.2179, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.21327394247055054, + "step": 4785 + }, + { + "epoch": 4.2340989399293285, + "grad_norm": 0.628350019454956, + "learning_rate": 1.6229527002559346e-05, + "loss": 0.2669, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.22581510245800018, + "step": 4790 + }, + { + "epoch": 4.238515901060071, + "grad_norm": 0.6018873453140259, + "learning_rate": 1.6186270729247137e-05, + "loss": 0.2395, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2645873725414276, + "step": 4795 + }, + { + "epoch": 4.242932862190813, + "grad_norm": 0.6109693646430969, + "learning_rate": 1.614303296091262e-05, + "loss": 0.2536, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.23807910084724426, + "step": 4800 + }, + { + "epoch": 4.247349823321555, + "grad_norm": 0.6540105938911438, + "learning_rate": 1.6099813907354077e-05, + "loss": 0.2925, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3849636912345886, + "step": 4805 + }, + { + "epoch": 4.251766784452297, + "grad_norm": 0.603500485420227, + "learning_rate": 1.6056613778279026e-05, + "loss": 0.253, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2598439157009125, + "step": 4810 + }, + { + "epoch": 4.256183745583039, + "grad_norm": 0.6518641710281372, + "learning_rate": 1.6013432783303133e-05, + "loss": 0.2903, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.30824732780456543, + "step": 4815 + }, + { + "epoch": 4.260600706713781, + "grad_norm": 0.6308322548866272, + "learning_rate": 1.5970271131949213e-05, + "loss": 0.2959, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.22987233102321625, + "step": 4820 + }, + { + "epoch": 4.265017667844523, + "grad_norm": 0.5965448617935181, + "learning_rate": 1.5927129033646264e-05, + "loss": 0.2509, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3175223171710968, + "step": 4825 + }, + { + "epoch": 4.269434628975265, + "grad_norm": 0.6295101642608643, + "learning_rate": 1.588400669772836e-05, + "loss": 0.2623, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2827831506729126, + "step": 4830 + }, + { + "epoch": 4.273851590106007, + "grad_norm": 0.592918872833252, + "learning_rate": 1.5840904333433717e-05, + "loss": 0.2439, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.25347912311553955, + "step": 4835 + }, + { + "epoch": 4.278268551236749, + "grad_norm": 0.6142850518226624, + "learning_rate": 1.5797822149903625e-05, + "loss": 0.2262, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2786719501018524, + "step": 4840 + }, + { + "epoch": 4.282685512367491, + "grad_norm": 0.6769436597824097, + "learning_rate": 1.575476035618147e-05, + "loss": 0.253, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.27305734157562256, + "step": 4845 + }, + { + "epoch": 4.2871024734982335, + "grad_norm": 0.6391955018043518, + "learning_rate": 1.5711719161211674e-05, + "loss": 0.2378, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2296718955039978, + "step": 4850 + }, + { + "epoch": 4.291519434628976, + "grad_norm": 0.6884132623672485, + "learning_rate": 1.5668698773838746e-05, + "loss": 0.2877, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.24689459800720215, + "step": 4855 + }, + { + "epoch": 4.295936395759718, + "grad_norm": 0.6045219898223877, + "learning_rate": 1.562569940280622e-05, + "loss": 0.229, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.24978384375572205, + "step": 4860 + }, + { + "epoch": 4.30035335689046, + "grad_norm": 0.8001757860183716, + "learning_rate": 1.5582721256755632e-05, + "loss": 0.243, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16442768275737762, + "step": 4865 + }, + { + "epoch": 4.304770318021202, + "grad_norm": 0.6849569082260132, + "learning_rate": 1.5539764544225565e-05, + "loss": 0.2499, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1961660087108612, + "step": 4870 + }, + { + "epoch": 4.309187279151944, + "grad_norm": 0.6818974614143372, + "learning_rate": 1.5496829473650568e-05, + "loss": 0.2427, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.24043825268745422, + "step": 4875 + }, + { + "epoch": 4.313604240282686, + "grad_norm": 0.5977321863174438, + "learning_rate": 1.5453916253360218e-05, + "loss": 0.2614, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2922767102718353, + "step": 4880 + }, + { + "epoch": 4.318021201413427, + "grad_norm": 0.6175761818885803, + "learning_rate": 1.5411025091578025e-05, + "loss": 0.2386, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.21561937034130096, + "step": 4885 + }, + { + "epoch": 4.322438162544169, + "grad_norm": 0.6610462069511414, + "learning_rate": 1.5368156196420506e-05, + "loss": 0.3025, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3540098965167999, + "step": 4890 + }, + { + "epoch": 4.326855123674911, + "grad_norm": 0.5722936987876892, + "learning_rate": 1.5325309775896117e-05, + "loss": 0.2698, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2856179475784302, + "step": 4895 + }, + { + "epoch": 4.331272084805653, + "grad_norm": 0.5998113751411438, + "learning_rate": 1.5282486037904253e-05, + "loss": 0.2487, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.23048731684684753, + "step": 4900 + }, + { + "epoch": 4.3356890459363955, + "grad_norm": 0.5948532819747925, + "learning_rate": 1.5239685190234287e-05, + "loss": 0.2564, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.24808341264724731, + "step": 4905 + }, + { + "epoch": 4.340106007067138, + "grad_norm": 0.6422690749168396, + "learning_rate": 1.519690744056447e-05, + "loss": 0.2617, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.20361298322677612, + "step": 4910 + }, + { + "epoch": 4.34452296819788, + "grad_norm": 0.6684097051620483, + "learning_rate": 1.5154152996461026e-05, + "loss": 0.2334, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17725619673728943, + "step": 4915 + }, + { + "epoch": 4.348939929328622, + "grad_norm": 0.6210095286369324, + "learning_rate": 1.5111422065377062e-05, + "loss": 0.2756, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.34692680835723877, + "step": 4920 + }, + { + "epoch": 4.353356890459364, + "grad_norm": 0.617612898349762, + "learning_rate": 1.5068714854651614e-05, + "loss": 0.2544, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3084735870361328, + "step": 4925 + }, + { + "epoch": 4.357773851590106, + "grad_norm": 0.6277985572814941, + "learning_rate": 1.5026031571508606e-05, + "loss": 0.2548, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2271774411201477, + "step": 4930 + }, + { + "epoch": 4.362190812720848, + "grad_norm": 0.6329308152198792, + "learning_rate": 1.498337242305588e-05, + "loss": 0.239, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.20148731768131256, + "step": 4935 + }, + { + "epoch": 4.36660777385159, + "grad_norm": 0.7278103232383728, + "learning_rate": 1.4940737616284163e-05, + "loss": 0.2697, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.24024207890033722, + "step": 4940 + }, + { + "epoch": 4.371024734982332, + "grad_norm": 0.7309764623641968, + "learning_rate": 1.4898127358066061e-05, + "loss": 0.2612, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.26143181324005127, + "step": 4945 + }, + { + "epoch": 4.375441696113074, + "grad_norm": 0.6814038157463074, + "learning_rate": 1.4855541855155086e-05, + "loss": 0.2499, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.26438629627227783, + "step": 4950 + }, + { + "epoch": 4.379858657243816, + "grad_norm": 0.6568551063537598, + "learning_rate": 1.4812981314184607e-05, + "loss": 0.2344, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.26820051670074463, + "step": 4955 + }, + { + "epoch": 4.384275618374558, + "grad_norm": 0.6261412501335144, + "learning_rate": 1.4770445941666905e-05, + "loss": 0.2311, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.282004714012146, + "step": 4960 + }, + { + "epoch": 4.3886925795053005, + "grad_norm": 0.8228702545166016, + "learning_rate": 1.4727935943992098e-05, + "loss": 0.244, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.24492314457893372, + "step": 4965 + }, + { + "epoch": 4.3931095406360425, + "grad_norm": 0.6519868969917297, + "learning_rate": 1.4685451527427224e-05, + "loss": 0.2287, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2917710840702057, + "step": 4970 + }, + { + "epoch": 4.397526501766785, + "grad_norm": 0.5828742980957031, + "learning_rate": 1.4642992898115158e-05, + "loss": 0.246, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.21987062692642212, + "step": 4975 + }, + { + "epoch": 4.401943462897527, + "grad_norm": 0.5715969204902649, + "learning_rate": 1.460056026207367e-05, + "loss": 0.2758, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3032934367656708, + "step": 4980 + }, + { + "epoch": 4.406360424028269, + "grad_norm": 0.7088951468467712, + "learning_rate": 1.4558153825194419e-05, + "loss": 0.2615, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2616375982761383, + "step": 4985 + }, + { + "epoch": 4.410777385159011, + "grad_norm": 0.5688840746879578, + "learning_rate": 1.4515773793241898e-05, + "loss": 0.2407, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.28225451707839966, + "step": 4990 + }, + { + "epoch": 4.415194346289753, + "grad_norm": 0.6520532369613647, + "learning_rate": 1.4473420371852526e-05, + "loss": 0.2543, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.302867591381073, + "step": 4995 + }, + { + "epoch": 4.419611307420495, + "grad_norm": 0.6046358942985535, + "learning_rate": 1.4431093766533567e-05, + "loss": 0.2606, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.26887935400009155, + "step": 5000 + }, + { + "epoch": 4.424028268551237, + "grad_norm": 0.6057432889938354, + "learning_rate": 1.4388794182662186e-05, + "loss": 0.2842, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3114582598209381, + "step": 5005 + }, + { + "epoch": 4.428445229681979, + "grad_norm": 0.616709291934967, + "learning_rate": 1.4346521825484424e-05, + "loss": 0.2327, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2023238241672516, + "step": 5010 + }, + { + "epoch": 4.432862190812721, + "grad_norm": 0.5935223698616028, + "learning_rate": 1.4304276900114222e-05, + "loss": 0.248, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3148966431617737, + "step": 5015 + }, + { + "epoch": 4.4372791519434625, + "grad_norm": 0.5329979658126831, + "learning_rate": 1.4262059611532419e-05, + "loss": 0.2224, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.20250526070594788, + "step": 5020 + }, + { + "epoch": 4.4416961130742045, + "grad_norm": 0.6398733854293823, + "learning_rate": 1.4219870164585739e-05, + "loss": 0.2735, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3195938467979431, + "step": 5025 + }, + { + "epoch": 4.446113074204947, + "grad_norm": 1.2401176691055298, + "learning_rate": 1.417770876398583e-05, + "loss": 0.2832, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.27838078141212463, + "step": 5030 + }, + { + "epoch": 4.450530035335689, + "grad_norm": 0.8684335947036743, + "learning_rate": 1.4135575614308232e-05, + "loss": 0.2552, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.29576045274734497, + "step": 5035 + }, + { + "epoch": 4.454946996466431, + "grad_norm": 0.709621012210846, + "learning_rate": 1.4093470919991442e-05, + "loss": 0.2892, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.29040542244911194, + "step": 5040 + }, + { + "epoch": 4.459363957597173, + "grad_norm": 0.5984362363815308, + "learning_rate": 1.4051394885335836e-05, + "loss": 0.2772, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.21487905085086823, + "step": 5045 + }, + { + "epoch": 4.463780918727915, + "grad_norm": 0.5817950963973999, + "learning_rate": 1.4009347714502778e-05, + "loss": 0.2243, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2756061553955078, + "step": 5050 + }, + { + "epoch": 4.468197879858657, + "grad_norm": 0.6032534837722778, + "learning_rate": 1.3967329611513543e-05, + "loss": 0.243, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.34259840846061707, + "step": 5055 + }, + { + "epoch": 4.472614840989399, + "grad_norm": 0.5521541833877563, + "learning_rate": 1.3925340780248373e-05, + "loss": 0.2629, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2480975240468979, + "step": 5060 + }, + { + "epoch": 4.477031802120141, + "grad_norm": 0.5998513102531433, + "learning_rate": 1.3883381424445506e-05, + "loss": 0.2168, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17923498153686523, + "step": 5065 + }, + { + "epoch": 4.481448763250883, + "grad_norm": 0.6650380492210388, + "learning_rate": 1.3841451747700098e-05, + "loss": 0.2603, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2855014503002167, + "step": 5070 + }, + { + "epoch": 4.485865724381625, + "grad_norm": 0.5877347588539124, + "learning_rate": 1.3799551953463362e-05, + "loss": 0.2619, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.21608762443065643, + "step": 5075 + }, + { + "epoch": 4.490282685512367, + "grad_norm": 0.5865316390991211, + "learning_rate": 1.3757682245041466e-05, + "loss": 0.2375, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.22935955226421356, + "step": 5080 + }, + { + "epoch": 4.4946996466431095, + "grad_norm": 0.5989511013031006, + "learning_rate": 1.3715842825594628e-05, + "loss": 0.2427, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2101748287677765, + "step": 5085 + }, + { + "epoch": 4.499116607773852, + "grad_norm": 0.6382969617843628, + "learning_rate": 1.3674033898136071e-05, + "loss": 0.278, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2421863079071045, + "step": 5090 + }, + { + "epoch": 4.503533568904594, + "grad_norm": 0.638594388961792, + "learning_rate": 1.3632255665531088e-05, + "loss": 0.2619, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.22802847623825073, + "step": 5095 + }, + { + "epoch": 4.507950530035336, + "grad_norm": 0.6239488124847412, + "learning_rate": 1.3590508330496027e-05, + "loss": 0.2318, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2688833773136139, + "step": 5100 + }, + { + "epoch": 4.512367491166078, + "grad_norm": 0.6269590258598328, + "learning_rate": 1.3548792095597305e-05, + "loss": 0.2235, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.23217537999153137, + "step": 5105 + }, + { + "epoch": 4.51678445229682, + "grad_norm": 0.6065074801445007, + "learning_rate": 1.3507107163250453e-05, + "loss": 0.2419, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2584981918334961, + "step": 5110 + }, + { + "epoch": 4.521201413427562, + "grad_norm": 0.6206178665161133, + "learning_rate": 1.3465453735719087e-05, + "loss": 0.2439, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2482869029045105, + "step": 5115 + }, + { + "epoch": 4.525618374558304, + "grad_norm": 0.6238287091255188, + "learning_rate": 1.3423832015114e-05, + "loss": 0.2832, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.31270158290863037, + "step": 5120 + }, + { + "epoch": 4.530035335689046, + "grad_norm": 0.5761831998825073, + "learning_rate": 1.3382242203392083e-05, + "loss": 0.2112, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1838235706090927, + "step": 5125 + }, + { + "epoch": 4.534452296819788, + "grad_norm": 0.636617124080658, + "learning_rate": 1.3340684502355443e-05, + "loss": 0.2808, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3398388922214508, + "step": 5130 + }, + { + "epoch": 4.53886925795053, + "grad_norm": 0.6107141971588135, + "learning_rate": 1.3299159113650357e-05, + "loss": 0.2751, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.26246610283851624, + "step": 5135 + }, + { + "epoch": 4.543286219081272, + "grad_norm": 0.6003983020782471, + "learning_rate": 1.325766623876632e-05, + "loss": 0.2733, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.27694079279899597, + "step": 5140 + }, + { + "epoch": 4.5477031802120145, + "grad_norm": 0.6244370937347412, + "learning_rate": 1.321620607903508e-05, + "loss": 0.261, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.26806601881980896, + "step": 5145 + }, + { + "epoch": 4.5521201413427566, + "grad_norm": 0.6757631897926331, + "learning_rate": 1.3174778835629605e-05, + "loss": 0.2636, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.27561718225479126, + "step": 5150 + }, + { + "epoch": 4.556537102473499, + "grad_norm": 0.6886018514633179, + "learning_rate": 1.3133384709563188e-05, + "loss": 0.2509, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.24703247845172882, + "step": 5155 + }, + { + "epoch": 4.560954063604241, + "grad_norm": 0.6107934713363647, + "learning_rate": 1.309202390168841e-05, + "loss": 0.2404, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.28265172243118286, + "step": 5160 + }, + { + "epoch": 4.565371024734983, + "grad_norm": 0.7151978015899658, + "learning_rate": 1.3050696612696188e-05, + "loss": 0.2432, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.25835981965065, + "step": 5165 + }, + { + "epoch": 4.569787985865725, + "grad_norm": 0.5881211161613464, + "learning_rate": 1.3009403043114796e-05, + "loss": 0.26, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2113930583000183, + "step": 5170 + }, + { + "epoch": 4.574204946996466, + "grad_norm": 0.7579076886177063, + "learning_rate": 1.2968143393308897e-05, + "loss": 0.2324, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3218274712562561, + "step": 5175 + }, + { + "epoch": 4.578621908127208, + "grad_norm": 0.5710316896438599, + "learning_rate": 1.2926917863478581e-05, + "loss": 0.2566, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2202773243188858, + "step": 5180 + }, + { + "epoch": 4.58303886925795, + "grad_norm": 0.6459076404571533, + "learning_rate": 1.2885726653658355e-05, + "loss": 0.2738, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2878916263580322, + "step": 5185 + }, + { + "epoch": 4.587455830388692, + "grad_norm": 0.5926949381828308, + "learning_rate": 1.2844569963716222e-05, + "loss": 0.2545, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.23166950047016144, + "step": 5190 + }, + { + "epoch": 4.591872791519434, + "grad_norm": 0.6707189083099365, + "learning_rate": 1.280344799335267e-05, + "loss": 0.2543, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.25186440348625183, + "step": 5195 + }, + { + "epoch": 4.5962897526501765, + "grad_norm": 0.573384165763855, + "learning_rate": 1.2762360942099745e-05, + "loss": 0.2415, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.24173548817634583, + "step": 5200 + }, + { + "epoch": 4.6007067137809186, + "grad_norm": 0.6182858347892761, + "learning_rate": 1.2721309009320021e-05, + "loss": 0.2502, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.23106129467487335, + "step": 5205 + }, + { + "epoch": 4.605123674911661, + "grad_norm": 0.6368361115455627, + "learning_rate": 1.268029239420571e-05, + "loss": 0.295, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2934111952781677, + "step": 5210 + }, + { + "epoch": 4.609540636042403, + "grad_norm": 0.6030951738357544, + "learning_rate": 1.2639311295777632e-05, + "loss": 0.2495, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2604959309101105, + "step": 5215 + }, + { + "epoch": 4.613957597173145, + "grad_norm": 0.6463732123374939, + "learning_rate": 1.2598365912884267e-05, + "loss": 0.2556, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.25189077854156494, + "step": 5220 + }, + { + "epoch": 4.618374558303887, + "grad_norm": 0.5990574359893799, + "learning_rate": 1.2557456444200831e-05, + "loss": 0.296, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2886963486671448, + "step": 5225 + }, + { + "epoch": 4.622791519434629, + "grad_norm": 0.6595236659049988, + "learning_rate": 1.2516583088228224e-05, + "loss": 0.2777, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17959409952163696, + "step": 5230 + }, + { + "epoch": 4.627208480565371, + "grad_norm": 0.5914446711540222, + "learning_rate": 1.2475746043292176e-05, + "loss": 0.2595, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2113860547542572, + "step": 5235 + }, + { + "epoch": 4.631625441696113, + "grad_norm": 0.6367279887199402, + "learning_rate": 1.243494550754219e-05, + "loss": 0.2763, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2789275348186493, + "step": 5240 + }, + { + "epoch": 4.636042402826855, + "grad_norm": 0.6216886043548584, + "learning_rate": 1.239418167895063e-05, + "loss": 0.2777, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2759518623352051, + "step": 5245 + }, + { + "epoch": 4.640459363957597, + "grad_norm": 0.689033031463623, + "learning_rate": 1.2353454755311751e-05, + "loss": 0.2444, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.25160372257232666, + "step": 5250 + }, + { + "epoch": 4.644876325088339, + "grad_norm": 0.5909837484359741, + "learning_rate": 1.2312764934240735e-05, + "loss": 0.2706, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.26029708981513977, + "step": 5255 + }, + { + "epoch": 4.6492932862190814, + "grad_norm": 0.6285930275917053, + "learning_rate": 1.227211241317275e-05, + "loss": 0.2523, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.27858561277389526, + "step": 5260 + }, + { + "epoch": 4.6537102473498235, + "grad_norm": 0.6383758783340454, + "learning_rate": 1.223149738936195e-05, + "loss": 0.2539, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2381717562675476, + "step": 5265 + }, + { + "epoch": 4.658127208480566, + "grad_norm": 0.6008221507072449, + "learning_rate": 1.219092005988057e-05, + "loss": 0.2527, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2692645490169525, + "step": 5270 + }, + { + "epoch": 4.662544169611308, + "grad_norm": 0.6864904761314392, + "learning_rate": 1.215038062161792e-05, + "loss": 0.2383, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.24149903655052185, + "step": 5275 + }, + { + "epoch": 4.66696113074205, + "grad_norm": 0.6051673889160156, + "learning_rate": 1.2109879271279486e-05, + "loss": 0.2567, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2649880647659302, + "step": 5280 + }, + { + "epoch": 4.671378091872792, + "grad_norm": 0.6154433488845825, + "learning_rate": 1.2069416205385902e-05, + "loss": 0.2271, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.19507455825805664, + "step": 5285 + }, + { + "epoch": 4.675795053003534, + "grad_norm": 0.6651699542999268, + "learning_rate": 1.2028991620272081e-05, + "loss": 0.2139, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.19150885939598083, + "step": 5290 + }, + { + "epoch": 4.680212014134275, + "grad_norm": 0.6139252185821533, + "learning_rate": 1.1988605712086199e-05, + "loss": 0.2503, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.201002836227417, + "step": 5295 + }, + { + "epoch": 4.684628975265017, + "grad_norm": 0.6537069082260132, + "learning_rate": 1.1948258676788751e-05, + "loss": 0.269, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.33721426129341125, + "step": 5300 + }, + { + "epoch": 4.689045936395759, + "grad_norm": 0.6756031513214111, + "learning_rate": 1.190795071015165e-05, + "loss": 0.2719, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.34285324811935425, + "step": 5305 + }, + { + "epoch": 4.693462897526501, + "grad_norm": 0.5824704170227051, + "learning_rate": 1.1867682007757191e-05, + "loss": 0.2423, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2109375149011612, + "step": 5310 + }, + { + "epoch": 4.6978798586572434, + "grad_norm": 0.6546387076377869, + "learning_rate": 1.1827452764997198e-05, + "loss": 0.2419, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17242984473705292, + "step": 5315 + }, + { + "epoch": 4.7022968197879855, + "grad_norm": 0.698542058467865, + "learning_rate": 1.1787263177071997e-05, + "loss": 0.2423, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.18797524273395538, + "step": 5320 + }, + { + "epoch": 4.706713780918728, + "grad_norm": 0.7340909242630005, + "learning_rate": 1.174711343898952e-05, + "loss": 0.234, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.19447045028209686, + "step": 5325 + }, + { + "epoch": 4.71113074204947, + "grad_norm": 0.5774402022361755, + "learning_rate": 1.1707003745564319e-05, + "loss": 0.232, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.24991655349731445, + "step": 5330 + }, + { + "epoch": 4.715547703180212, + "grad_norm": 0.6813011169433594, + "learning_rate": 1.1666934291416666e-05, + "loss": 0.2505, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2309049814939499, + "step": 5335 + }, + { + "epoch": 4.719964664310954, + "grad_norm": 0.6249779462814331, + "learning_rate": 1.1626905270971563e-05, + "loss": 0.2342, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.20291580259799957, + "step": 5340 + }, + { + "epoch": 4.724381625441696, + "grad_norm": 0.6181374788284302, + "learning_rate": 1.1586916878457837e-05, + "loss": 0.235, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.34734612703323364, + "step": 5345 + }, + { + "epoch": 4.728798586572438, + "grad_norm": 0.5957716107368469, + "learning_rate": 1.1546969307907162e-05, + "loss": 0.2824, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.21913906931877136, + "step": 5350 + }, + { + "epoch": 4.73321554770318, + "grad_norm": 0.6221516132354736, + "learning_rate": 1.1507062753153155e-05, + "loss": 0.2466, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.20802852511405945, + "step": 5355 + }, + { + "epoch": 4.737632508833922, + "grad_norm": 0.6126749515533447, + "learning_rate": 1.1467197407830409e-05, + "loss": 0.2835, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3353455662727356, + "step": 5360 + }, + { + "epoch": 4.742049469964664, + "grad_norm": 0.6599173545837402, + "learning_rate": 1.1427373465373541e-05, + "loss": 0.2764, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.289046049118042, + "step": 5365 + }, + { + "epoch": 4.746466431095406, + "grad_norm": 0.5880971550941467, + "learning_rate": 1.1387591119016292e-05, + "loss": 0.2267, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2312006950378418, + "step": 5370 + }, + { + "epoch": 4.750883392226148, + "grad_norm": 0.6444661617279053, + "learning_rate": 1.1347850561790594e-05, + "loss": 0.2895, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2494499385356903, + "step": 5375 + }, + { + "epoch": 4.7553003533568905, + "grad_norm": 0.5412120819091797, + "learning_rate": 1.1308151986525557e-05, + "loss": 0.2552, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.20912671089172363, + "step": 5380 + }, + { + "epoch": 4.759717314487633, + "grad_norm": 0.5995916724205017, + "learning_rate": 1.1268495585846621e-05, + "loss": 0.2509, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.21118956804275513, + "step": 5385 + }, + { + "epoch": 4.764134275618375, + "grad_norm": 0.6803336143493652, + "learning_rate": 1.1228881552174585e-05, + "loss": 0.2416, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2551300525665283, + "step": 5390 + }, + { + "epoch": 4.768551236749117, + "grad_norm": 0.6087960600852966, + "learning_rate": 1.1189310077724667e-05, + "loss": 0.2682, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.23086312413215637, + "step": 5395 + }, + { + "epoch": 4.772968197879859, + "grad_norm": 0.6534774303436279, + "learning_rate": 1.1149781354505565e-05, + "loss": 0.2789, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2714969515800476, + "step": 5400 + }, + { + "epoch": 4.777385159010601, + "grad_norm": 0.5906716585159302, + "learning_rate": 1.111029557431858e-05, + "loss": 0.2106, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.25104430317878723, + "step": 5405 + }, + { + "epoch": 4.781802120141343, + "grad_norm": 0.6094644665718079, + "learning_rate": 1.1070852928756598e-05, + "loss": 0.2423, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.34287014603614807, + "step": 5410 + }, + { + "epoch": 4.786219081272085, + "grad_norm": 0.6480569243431091, + "learning_rate": 1.1031453609203244e-05, + "loss": 0.2544, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.24471309781074524, + "step": 5415 + }, + { + "epoch": 4.790636042402827, + "grad_norm": 0.6177884936332703, + "learning_rate": 1.0992097806831894e-05, + "loss": 0.2405, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2734081745147705, + "step": 5420 + }, + { + "epoch": 4.795053003533569, + "grad_norm": 0.666823148727417, + "learning_rate": 1.0952785712604777e-05, + "loss": 0.2846, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.23860061168670654, + "step": 5425 + }, + { + "epoch": 4.799469964664311, + "grad_norm": 0.6771073937416077, + "learning_rate": 1.0913517517272057e-05, + "loss": 0.2523, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2811965048313141, + "step": 5430 + }, + { + "epoch": 4.803886925795053, + "grad_norm": 0.6511576771736145, + "learning_rate": 1.0874293411370847e-05, + "loss": 0.251, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2048361599445343, + "step": 5435 + }, + { + "epoch": 4.8083038869257955, + "grad_norm": 0.622969925403595, + "learning_rate": 1.083511358522439e-05, + "loss": 0.2611, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.25874412059783936, + "step": 5440 + }, + { + "epoch": 4.8127208480565375, + "grad_norm": 0.6246639490127563, + "learning_rate": 1.0795978228941025e-05, + "loss": 0.2497, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.28046876192092896, + "step": 5445 + }, + { + "epoch": 4.81713780918728, + "grad_norm": 0.5713747143745422, + "learning_rate": 1.0756887532413328e-05, + "loss": 0.2686, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.20135197043418884, + "step": 5450 + }, + { + "epoch": 4.821554770318021, + "grad_norm": 0.6463353037834167, + "learning_rate": 1.0717841685317207e-05, + "loss": 0.258, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.24732375144958496, + "step": 5455 + }, + { + "epoch": 4.825971731448763, + "grad_norm": 0.6025271415710449, + "learning_rate": 1.0678840877110906e-05, + "loss": 0.2883, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2834791839122772, + "step": 5460 + }, + { + "epoch": 4.830388692579505, + "grad_norm": 0.5726755261421204, + "learning_rate": 1.0639885297034157e-05, + "loss": 0.2302, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.23436738550662994, + "step": 5465 + }, + { + "epoch": 4.834805653710247, + "grad_norm": 0.6157001852989197, + "learning_rate": 1.060097513410723e-05, + "loss": 0.2783, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.25192394852638245, + "step": 5470 + }, + { + "epoch": 4.839222614840989, + "grad_norm": 0.6347540020942688, + "learning_rate": 1.0562110577130031e-05, + "loss": 0.2754, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.324943482875824, + "step": 5475 + }, + { + "epoch": 4.843639575971731, + "grad_norm": 0.5740549564361572, + "learning_rate": 1.0523291814681149e-05, + "loss": 0.2622, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.25427937507629395, + "step": 5480 + }, + { + "epoch": 4.848056537102473, + "grad_norm": 0.5769616961479187, + "learning_rate": 1.0484519035117015e-05, + "loss": 0.2484, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2660577595233917, + "step": 5485 + }, + { + "epoch": 4.852473498233215, + "grad_norm": 0.7008361220359802, + "learning_rate": 1.0445792426570894e-05, + "loss": 0.2689, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2324357032775879, + "step": 5490 + }, + { + "epoch": 4.8568904593639575, + "grad_norm": 0.6129425764083862, + "learning_rate": 1.040711217695205e-05, + "loss": 0.2246, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.245370551943779, + "step": 5495 + }, + { + "epoch": 4.8613074204946995, + "grad_norm": 0.6054858565330505, + "learning_rate": 1.0368478473944792e-05, + "loss": 0.2404, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.23680360615253448, + "step": 5500 + }, + { + "epoch": 4.865724381625442, + "grad_norm": 0.5556029081344604, + "learning_rate": 1.0329891505007582e-05, + "loss": 0.242, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.21621742844581604, + "step": 5505 + }, + { + "epoch": 4.870141342756184, + "grad_norm": 0.5951539874076843, + "learning_rate": 1.029135145737212e-05, + "loss": 0.2418, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.24147434532642365, + "step": 5510 + }, + { + "epoch": 4.874558303886926, + "grad_norm": 0.6369734406471252, + "learning_rate": 1.0252858518042413e-05, + "loss": 0.253, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2957211136817932, + "step": 5515 + }, + { + "epoch": 4.878975265017668, + "grad_norm": 0.6428771018981934, + "learning_rate": 1.0214412873793931e-05, + "loss": 0.2393, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.20931799709796906, + "step": 5520 + }, + { + "epoch": 4.88339222614841, + "grad_norm": 0.591044008731842, + "learning_rate": 1.0176014711172615e-05, + "loss": 0.2694, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.25282272696495056, + "step": 5525 + }, + { + "epoch": 4.887809187279152, + "grad_norm": 0.5807618498802185, + "learning_rate": 1.0137664216494035e-05, + "loss": 0.2504, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.26002442836761475, + "step": 5530 + }, + { + "epoch": 4.892226148409894, + "grad_norm": 0.5614008903503418, + "learning_rate": 1.0099361575842486e-05, + "loss": 0.2173, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2629309296607971, + "step": 5535 + }, + { + "epoch": 4.896643109540636, + "grad_norm": 0.5964021682739258, + "learning_rate": 1.0061106975070025e-05, + "loss": 0.2604, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3080860376358032, + "step": 5540 + }, + { + "epoch": 4.901060070671378, + "grad_norm": 0.6463096141815186, + "learning_rate": 1.0022900599795641e-05, + "loss": 0.3287, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2719125747680664, + "step": 5545 + }, + { + "epoch": 4.90547703180212, + "grad_norm": 0.6948069334030151, + "learning_rate": 9.984742635404313e-06, + "loss": 0.2694, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2654094398021698, + "step": 5550 + }, + { + "epoch": 4.909893992932862, + "grad_norm": 0.9675594568252563, + "learning_rate": 9.946633267046125e-06, + "loss": 0.2607, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2373758852481842, + "step": 5555 + }, + { + "epoch": 4.9143109540636045, + "grad_norm": 0.6239486336708069, + "learning_rate": 9.908572679635337e-06, + "loss": 0.2684, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.19304318726062775, + "step": 5560 + }, + { + "epoch": 4.918727915194347, + "grad_norm": 0.6223797798156738, + "learning_rate": 9.87056105784957e-06, + "loss": 0.2712, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2559204697608948, + "step": 5565 + }, + { + "epoch": 4.923144876325089, + "grad_norm": 0.5596070885658264, + "learning_rate": 9.832598586128796e-06, + "loss": 0.3051, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.24063706398010254, + "step": 5570 + }, + { + "epoch": 4.927561837455831, + "grad_norm": 0.7187504768371582, + "learning_rate": 9.794685448674533e-06, + "loss": 0.2447, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2679745554924011, + "step": 5575 + }, + { + "epoch": 4.931978798586572, + "grad_norm": 0.6528657078742981, + "learning_rate": 9.756821829448911e-06, + "loss": 0.2278, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2918229103088379, + "step": 5580 + }, + { + "epoch": 4.936395759717314, + "grad_norm": 0.5671543478965759, + "learning_rate": 9.719007912173786e-06, + "loss": 0.2551, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.24266092479228973, + "step": 5585 + }, + { + "epoch": 4.940812720848056, + "grad_norm": 0.5972075462341309, + "learning_rate": 9.681243880329864e-06, + "loss": 0.2973, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.26102444529533386, + "step": 5590 + }, + { + "epoch": 4.945229681978798, + "grad_norm": 0.6930065751075745, + "learning_rate": 9.643529917155765e-06, + "loss": 0.2431, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15023840963840485, + "step": 5595 + }, + { + "epoch": 4.94964664310954, + "grad_norm": 0.5619693994522095, + "learning_rate": 9.60586620564721e-06, + "loss": 0.2326, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17770014703273773, + "step": 5600 + }, + { + "epoch": 4.954063604240282, + "grad_norm": 0.683238685131073, + "learning_rate": 9.568252928556045e-06, + "loss": 0.2577, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.27513328194618225, + "step": 5605 + }, + { + "epoch": 4.958480565371024, + "grad_norm": 0.6224611401557922, + "learning_rate": 9.530690268389419e-06, + "loss": 0.2536, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2843214273452759, + "step": 5610 + }, + { + "epoch": 4.9628975265017665, + "grad_norm": 0.6748574376106262, + "learning_rate": 9.493178407408898e-06, + "loss": 0.2502, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3191365599632263, + "step": 5615 + }, + { + "epoch": 4.967314487632509, + "grad_norm": 0.6514118313789368, + "learning_rate": 9.45571752762952e-06, + "loss": 0.2612, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.27559196949005127, + "step": 5620 + }, + { + "epoch": 4.971731448763251, + "grad_norm": 0.5978466868400574, + "learning_rate": 9.418307810818974e-06, + "loss": 0.2224, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2235470712184906, + "step": 5625 + }, + { + "epoch": 4.976148409893993, + "grad_norm": 0.8135108351707458, + "learning_rate": 9.380949438496694e-06, + "loss": 0.2443, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.21574482321739197, + "step": 5630 + }, + { + "epoch": 4.980565371024735, + "grad_norm": 0.5374975204467773, + "learning_rate": 9.343642591932986e-06, + "loss": 0.2481, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.24713656306266785, + "step": 5635 + }, + { + "epoch": 4.984982332155477, + "grad_norm": 0.6835610866546631, + "learning_rate": 9.306387452148117e-06, + "loss": 0.2555, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2892993092536926, + "step": 5640 + }, + { + "epoch": 4.989399293286219, + "grad_norm": 0.6098293662071228, + "learning_rate": 9.269184199911507e-06, + "loss": 0.2758, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2980913519859314, + "step": 5645 + }, + { + "epoch": 4.993816254416961, + "grad_norm": 0.5952140092849731, + "learning_rate": 9.232033015740765e-06, + "loss": 0.283, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3264719247817993, + "step": 5650 + }, + { + "epoch": 4.998233215547703, + "grad_norm": 0.6798433661460876, + "learning_rate": 9.19493407990087e-06, + "loss": 0.2749, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2580171823501587, + "step": 5655 + }, + { + "epoch": 5.003533568904594, + "grad_norm": 0.5702685117721558, + "learning_rate": 9.157887572403292e-06, + "loss": 0.2212, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.20382651686668396, + "step": 5660 + }, + { + "epoch": 5.007950530035336, + "grad_norm": 0.692338764667511, + "learning_rate": 9.120893673005095e-06, + "loss": 0.2174, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.19380773603916168, + "step": 5665 + }, + { + "epoch": 5.012367491166078, + "grad_norm": 0.5917826890945435, + "learning_rate": 9.083952561208093e-06, + "loss": 0.2525, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.21118654310703278, + "step": 5670 + }, + { + "epoch": 5.01678445229682, + "grad_norm": 0.6775484681129456, + "learning_rate": 9.04706441625793e-06, + "loss": 0.2458, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3707857131958008, + "step": 5675 + }, + { + "epoch": 5.021201413427562, + "grad_norm": 0.6675564646720886, + "learning_rate": 9.010229417143298e-06, + "loss": 0.2348, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.18382994830608368, + "step": 5680 + }, + { + "epoch": 5.025618374558304, + "grad_norm": 0.6618363261222839, + "learning_rate": 8.973447742594959e-06, + "loss": 0.2264, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.22905008494853973, + "step": 5685 + }, + { + "epoch": 5.030035335689046, + "grad_norm": 0.6834167242050171, + "learning_rate": 8.936719571084964e-06, + "loss": 0.2468, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16894429922103882, + "step": 5690 + }, + { + "epoch": 5.034452296819788, + "grad_norm": 1.0125937461853027, + "learning_rate": 8.900045080825772e-06, + "loss": 0.1942, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1643485128879547, + "step": 5695 + }, + { + "epoch": 5.03886925795053, + "grad_norm": 0.66016685962677, + "learning_rate": 8.863424449769326e-06, + "loss": 0.2063, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.19492757320404053, + "step": 5700 + }, + { + "epoch": 5.043286219081272, + "grad_norm": 0.6835595369338989, + "learning_rate": 8.826857855606268e-06, + "loss": 0.2236, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2136615514755249, + "step": 5705 + }, + { + "epoch": 5.0477031802120145, + "grad_norm": 0.6571291089057922, + "learning_rate": 8.790345475765028e-06, + "loss": 0.2325, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.31727343797683716, + "step": 5710 + }, + { + "epoch": 5.0521201413427566, + "grad_norm": 0.6805379390716553, + "learning_rate": 8.753887487410988e-06, + "loss": 0.244, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2703685164451599, + "step": 5715 + }, + { + "epoch": 5.056537102473499, + "grad_norm": 0.6848156452178955, + "learning_rate": 8.71748406744559e-06, + "loss": 0.2635, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2362714409828186, + "step": 5720 + }, + { + "epoch": 5.060954063604241, + "grad_norm": 0.6533686518669128, + "learning_rate": 8.681135392505521e-06, + "loss": 0.2934, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.33763277530670166, + "step": 5725 + }, + { + "epoch": 5.065371024734982, + "grad_norm": 0.7272844910621643, + "learning_rate": 8.644841638961827e-06, + "loss": 0.2103, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16482684016227722, + "step": 5730 + }, + { + "epoch": 5.069787985865724, + "grad_norm": 0.6510602831840515, + "learning_rate": 8.608602982919061e-06, + "loss": 0.2306, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1856457144021988, + "step": 5735 + }, + { + "epoch": 5.074204946996466, + "grad_norm": 0.7189889550209045, + "learning_rate": 8.57241960021444e-06, + "loss": 0.2622, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.23383861780166626, + "step": 5740 + }, + { + "epoch": 5.078621908127208, + "grad_norm": 0.6912586092948914, + "learning_rate": 8.536291666416971e-06, + "loss": 0.2268, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1808895766735077, + "step": 5745 + }, + { + "epoch": 5.08303886925795, + "grad_norm": 0.6760678291320801, + "learning_rate": 8.500219356826633e-06, + "loss": 0.2813, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3097551465034485, + "step": 5750 + }, + { + "epoch": 5.087455830388692, + "grad_norm": 0.6680203080177307, + "learning_rate": 8.464202846473467e-06, + "loss": 0.2059, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.20965927839279175, + "step": 5755 + }, + { + "epoch": 5.091872791519434, + "grad_norm": 0.6519942879676819, + "learning_rate": 8.428242310116817e-06, + "loss": 0.2579, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.28410807251930237, + "step": 5760 + }, + { + "epoch": 5.0962897526501765, + "grad_norm": 0.7109984755516052, + "learning_rate": 8.392337922244383e-06, + "loss": 0.2401, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.19458754360675812, + "step": 5765 + }, + { + "epoch": 5.1007067137809186, + "grad_norm": 4.666537284851074, + "learning_rate": 8.35648985707144e-06, + "loss": 0.2129, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.21463000774383545, + "step": 5770 + }, + { + "epoch": 5.105123674911661, + "grad_norm": 0.7026737928390503, + "learning_rate": 8.320698288539997e-06, + "loss": 0.2755, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1861979365348816, + "step": 5775 + }, + { + "epoch": 5.109540636042403, + "grad_norm": 0.7229135632514954, + "learning_rate": 8.284963390317885e-06, + "loss": 0.1896, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17001771926879883, + "step": 5780 + }, + { + "epoch": 5.113957597173145, + "grad_norm": 0.7189328670501709, + "learning_rate": 8.24928533579799e-06, + "loss": 0.2427, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2957168519496918, + "step": 5785 + }, + { + "epoch": 5.118374558303887, + "grad_norm": 0.6980794668197632, + "learning_rate": 8.21366429809737e-06, + "loss": 0.2294, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.27361002564430237, + "step": 5790 + }, + { + "epoch": 5.122791519434629, + "grad_norm": 0.6661799550056458, + "learning_rate": 8.17810045005644e-06, + "loss": 0.2844, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.26608705520629883, + "step": 5795 + }, + { + "epoch": 5.127208480565371, + "grad_norm": 0.6025856733322144, + "learning_rate": 8.142593964238092e-06, + "loss": 0.2243, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.23461788892745972, + "step": 5800 + }, + { + "epoch": 5.131625441696113, + "grad_norm": 0.6775685548782349, + "learning_rate": 8.107145012926909e-06, + "loss": 0.2261, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.20733845233917236, + "step": 5805 + }, + { + "epoch": 5.136042402826855, + "grad_norm": 0.6828117966651917, + "learning_rate": 8.071753768128299e-06, + "loss": 0.2198, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.19489261507987976, + "step": 5810 + }, + { + "epoch": 5.140459363957597, + "grad_norm": 0.6730659604072571, + "learning_rate": 8.036420401567662e-06, + "loss": 0.236, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2441500872373581, + "step": 5815 + }, + { + "epoch": 5.144876325088339, + "grad_norm": 0.6313605904579163, + "learning_rate": 8.001145084689563e-06, + "loss": 0.2842, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.26540714502334595, + "step": 5820 + }, + { + "epoch": 5.1492932862190814, + "grad_norm": 0.6368901133537292, + "learning_rate": 7.965927988656903e-06, + "loss": 0.2422, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2015400528907776, + "step": 5825 + }, + { + "epoch": 5.1537102473498235, + "grad_norm": 0.7787892818450928, + "learning_rate": 7.930769284350084e-06, + "loss": 0.2294, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.21253234148025513, + "step": 5830 + }, + { + "epoch": 5.158127208480566, + "grad_norm": 0.7237471342086792, + "learning_rate": 7.895669142366159e-06, + "loss": 0.2181, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.29186874628067017, + "step": 5835 + }, + { + "epoch": 5.162544169611308, + "grad_norm": 0.677948534488678, + "learning_rate": 7.860627733018065e-06, + "loss": 0.2292, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.22216159105300903, + "step": 5840 + }, + { + "epoch": 5.16696113074205, + "grad_norm": 0.6752915382385254, + "learning_rate": 7.825645226333714e-06, + "loss": 0.219, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.18272480368614197, + "step": 5845 + }, + { + "epoch": 5.171378091872792, + "grad_norm": 0.6883508563041687, + "learning_rate": 7.79072179205523e-06, + "loss": 0.2321, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3108995854854584, + "step": 5850 + }, + { + "epoch": 5.175795053003534, + "grad_norm": 0.6497143507003784, + "learning_rate": 7.755857599638124e-06, + "loss": 0.2032, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.24391454458236694, + "step": 5855 + }, + { + "epoch": 5.180212014134276, + "grad_norm": 0.6400555372238159, + "learning_rate": 7.721052818250419e-06, + "loss": 0.2759, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.252552330493927, + "step": 5860 + }, + { + "epoch": 5.184628975265018, + "grad_norm": 0.624398946762085, + "learning_rate": 7.686307616771883e-06, + "loss": 0.2458, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2461286336183548, + "step": 5865 + }, + { + "epoch": 5.189045936395759, + "grad_norm": 0.6464707255363464, + "learning_rate": 7.651622163793189e-06, + "loss": 0.247, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.23976178467273712, + "step": 5870 + }, + { + "epoch": 5.193462897526501, + "grad_norm": 0.6472598314285278, + "learning_rate": 7.616996627615103e-06, + "loss": 0.2295, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2011461853981018, + "step": 5875 + }, + { + "epoch": 5.1978798586572434, + "grad_norm": 0.7136189341545105, + "learning_rate": 7.582431176247642e-06, + "loss": 0.2714, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.19472795724868774, + "step": 5880 + }, + { + "epoch": 5.2022968197879855, + "grad_norm": 0.6750161051750183, + "learning_rate": 7.547925977409301e-06, + "loss": 0.2119, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.20839814841747284, + "step": 5885 + }, + { + "epoch": 5.206713780918728, + "grad_norm": 0.6782904267311096, + "learning_rate": 7.5134811985262115e-06, + "loss": 0.2842, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.19967156648635864, + "step": 5890 + }, + { + "epoch": 5.21113074204947, + "grad_norm": 0.5999016761779785, + "learning_rate": 7.479097006731333e-06, + "loss": 0.2569, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2858957052230835, + "step": 5895 + }, + { + "epoch": 5.215547703180212, + "grad_norm": 0.7441349625587463, + "learning_rate": 7.444773568863646e-06, + "loss": 0.2368, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2590683400630951, + "step": 5900 + }, + { + "epoch": 5.219964664310954, + "grad_norm": 0.7110154032707214, + "learning_rate": 7.410511051467339e-06, + "loss": 0.247, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17946434020996094, + "step": 5905 + }, + { + "epoch": 5.224381625441696, + "grad_norm": 0.6729068756103516, + "learning_rate": 7.376309620791016e-06, + "loss": 0.1984, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.24539992213249207, + "step": 5910 + }, + { + "epoch": 5.228798586572438, + "grad_norm": 0.7076547145843506, + "learning_rate": 7.342169442786835e-06, + "loss": 0.2352, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.22367143630981445, + "step": 5915 + }, + { + "epoch": 5.23321554770318, + "grad_norm": 0.6758275032043457, + "learning_rate": 7.308090683109803e-06, + "loss": 0.2557, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.222344771027565, + "step": 5920 + }, + { + "epoch": 5.237632508833922, + "grad_norm": 0.5967568755149841, + "learning_rate": 7.274073507116865e-06, + "loss": 0.2537, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.29815196990966797, + "step": 5925 + }, + { + "epoch": 5.242049469964664, + "grad_norm": 0.5832816958427429, + "learning_rate": 7.240118079866163e-06, + "loss": 0.2208, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2399141788482666, + "step": 5930 + }, + { + "epoch": 5.246466431095406, + "grad_norm": 0.6074890494346619, + "learning_rate": 7.206224566116247e-06, + "loss": 0.2618, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.19510237872600555, + "step": 5935 + }, + { + "epoch": 5.250883392226148, + "grad_norm": 0.6020046472549438, + "learning_rate": 7.172393130325208e-06, + "loss": 0.2298, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.22199654579162598, + "step": 5940 + }, + { + "epoch": 5.2553003533568905, + "grad_norm": 0.6055787801742554, + "learning_rate": 7.138623936649951e-06, + "loss": 0.2273, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2283298373222351, + "step": 5945 + }, + { + "epoch": 5.259717314487633, + "grad_norm": 0.6854278445243835, + "learning_rate": 7.104917148945363e-06, + "loss": 0.2597, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.23151074349880219, + "step": 5950 + }, + { + "epoch": 5.264134275618375, + "grad_norm": 0.6667275428771973, + "learning_rate": 7.0712729307635284e-06, + "loss": 0.2464, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.24305643141269684, + "step": 5955 + }, + { + "epoch": 5.268551236749117, + "grad_norm": 0.6479029655456543, + "learning_rate": 7.037691445352917e-06, + "loss": 0.2446, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2675766944885254, + "step": 5960 + }, + { + "epoch": 5.272968197879859, + "grad_norm": 0.6230666041374207, + "learning_rate": 7.00417285565762e-06, + "loss": 0.2521, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.20548516511917114, + "step": 5965 + }, + { + "epoch": 5.277385159010601, + "grad_norm": 0.6456372737884521, + "learning_rate": 6.970717324316545e-06, + "loss": 0.2666, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2712244689464569, + "step": 5970 + }, + { + "epoch": 5.281802120141343, + "grad_norm": 0.6794562339782715, + "learning_rate": 6.937325013662623e-06, + "loss": 0.2322, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.29143932461738586, + "step": 5975 + }, + { + "epoch": 5.286219081272085, + "grad_norm": 0.6608504056930542, + "learning_rate": 6.903996085722033e-06, + "loss": 0.233, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.20360919833183289, + "step": 5980 + }, + { + "epoch": 5.290636042402827, + "grad_norm": 0.6932647824287415, + "learning_rate": 6.8707307022134e-06, + "loss": 0.2401, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2070590704679489, + "step": 5985 + }, + { + "epoch": 5.295053003533569, + "grad_norm": 0.631776750087738, + "learning_rate": 6.8375290245470296e-06, + "loss": 0.2582, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2469078004360199, + "step": 5990 + }, + { + "epoch": 5.299469964664311, + "grad_norm": 0.6876675486564636, + "learning_rate": 6.804391213824087e-06, + "loss": 0.2549, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2884756028652191, + "step": 5995 + }, + { + "epoch": 5.303886925795053, + "grad_norm": 0.6492973566055298, + "learning_rate": 6.771317430835888e-06, + "loss": 0.2322, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1926867663860321, + "step": 6000 + }, + { + "epoch": 5.3083038869257955, + "grad_norm": 0.6198075413703918, + "learning_rate": 6.73830783606303e-06, + "loss": 0.2568, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3009505867958069, + "step": 6005 + }, + { + "epoch": 5.3127208480565375, + "grad_norm": 0.6509351134300232, + "learning_rate": 6.705362589674667e-06, + "loss": 0.2372, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.22769448161125183, + "step": 6010 + }, + { + "epoch": 5.317137809187279, + "grad_norm": 0.6019710302352905, + "learning_rate": 6.6724818515277544e-06, + "loss": 0.2299, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2702226936817169, + "step": 6015 + }, + { + "epoch": 5.321554770318021, + "grad_norm": 0.6558071970939636, + "learning_rate": 6.639665781166189e-06, + "loss": 0.2595, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2750241160392761, + "step": 6020 + }, + { + "epoch": 5.325971731448763, + "grad_norm": 0.5591468811035156, + "learning_rate": 6.606914537820122e-06, + "loss": 0.2572, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2233850359916687, + "step": 6025 + }, + { + "epoch": 5.330388692579505, + "grad_norm": 0.6942284107208252, + "learning_rate": 6.574228280405139e-06, + "loss": 0.2525, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.19524750113487244, + "step": 6030 + }, + { + "epoch": 5.334805653710247, + "grad_norm": 0.6840284466743469, + "learning_rate": 6.5416071675215136e-06, + "loss": 0.233, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2810319662094116, + "step": 6035 + }, + { + "epoch": 5.339222614840989, + "grad_norm": 0.6279734373092651, + "learning_rate": 6.509051357453393e-06, + "loss": 0.2251, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.24808743596076965, + "step": 6040 + }, + { + "epoch": 5.343639575971731, + "grad_norm": 0.6400632262229919, + "learning_rate": 6.476561008168096e-06, + "loss": 0.2538, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.21455033123493195, + "step": 6045 + }, + { + "epoch": 5.348056537102473, + "grad_norm": 0.6099967360496521, + "learning_rate": 6.444136277315296e-06, + "loss": 0.2336, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.21934688091278076, + "step": 6050 + }, + { + "epoch": 5.352473498233215, + "grad_norm": 0.9150162935256958, + "learning_rate": 6.4117773222262805e-06, + "loss": 0.2358, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17225447297096252, + "step": 6055 + }, + { + "epoch": 5.3568904593639575, + "grad_norm": 0.631752073764801, + "learning_rate": 6.379484299913172e-06, + "loss": 0.2533, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.23191042244434357, + "step": 6060 + }, + { + "epoch": 5.3613074204946995, + "grad_norm": 0.6716257929801941, + "learning_rate": 6.3472573670681805e-06, + "loss": 0.2593, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3306858539581299, + "step": 6065 + }, + { + "epoch": 5.365724381625442, + "grad_norm": 0.6031466126441956, + "learning_rate": 6.315096680062838e-06, + "loss": 0.2525, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2807907164096832, + "step": 6070 + }, + { + "epoch": 5.370141342756184, + "grad_norm": 0.6290670037269592, + "learning_rate": 6.283002394947216e-06, + "loss": 0.2355, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.22744987905025482, + "step": 6075 + }, + { + "epoch": 5.374558303886926, + "grad_norm": 0.6004354357719421, + "learning_rate": 6.2509746674492346e-06, + "loss": 0.2552, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.23005065321922302, + "step": 6080 + }, + { + "epoch": 5.378975265017668, + "grad_norm": 0.6632710099220276, + "learning_rate": 6.21901365297382e-06, + "loss": 0.2316, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17470410466194153, + "step": 6085 + }, + { + "epoch": 5.38339222614841, + "grad_norm": 0.6784233450889587, + "learning_rate": 6.187119506602215e-06, + "loss": 0.2751, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2649574875831604, + "step": 6090 + }, + { + "epoch": 5.387809187279152, + "grad_norm": 0.6669394969940186, + "learning_rate": 6.1552923830912e-06, + "loss": 0.2403, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3093627393245697, + "step": 6095 + }, + { + "epoch": 5.392226148409894, + "grad_norm": 0.6822030544281006, + "learning_rate": 6.123532436872353e-06, + "loss": 0.2475, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2727193236351013, + "step": 6100 + }, + { + "epoch": 5.396643109540636, + "grad_norm": 0.6640558838844299, + "learning_rate": 6.091839822051284e-06, + "loss": 0.2906, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3642868995666504, + "step": 6105 + }, + { + "epoch": 5.401060070671378, + "grad_norm": 0.6346195936203003, + "learning_rate": 6.060214692406905e-06, + "loss": 0.2527, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.32739025354385376, + "step": 6110 + }, + { + "epoch": 5.40547703180212, + "grad_norm": 0.662929356098175, + "learning_rate": 6.028657201390682e-06, + "loss": 0.224, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2002319097518921, + "step": 6115 + }, + { + "epoch": 5.409893992932862, + "grad_norm": 0.6548178195953369, + "learning_rate": 5.99716750212586e-06, + "loss": 0.2456, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2806054651737213, + "step": 6120 + }, + { + "epoch": 5.4143109540636045, + "grad_norm": 0.6379974484443665, + "learning_rate": 5.965745747406775e-06, + "loss": 0.2818, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3060104548931122, + "step": 6125 + }, + { + "epoch": 5.418727915194347, + "grad_norm": 0.6715204119682312, + "learning_rate": 5.934392089698064e-06, + "loss": 0.2124, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16398535668849945, + "step": 6130 + }, + { + "epoch": 5.423144876325089, + "grad_norm": 0.6135415434837341, + "learning_rate": 5.903106681133952e-06, + "loss": 0.2021, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.19793318212032318, + "step": 6135 + }, + { + "epoch": 5.427561837455831, + "grad_norm": 0.7642617225646973, + "learning_rate": 5.871889673517501e-06, + "loss": 0.2439, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.24503038823604584, + "step": 6140 + }, + { + "epoch": 5.431978798586573, + "grad_norm": 0.6476492881774902, + "learning_rate": 5.840741218319881e-06, + "loss": 0.2563, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.23620697855949402, + "step": 6145 + }, + { + "epoch": 5.436395759717314, + "grad_norm": 0.6400974988937378, + "learning_rate": 5.809661466679635e-06, + "loss": 0.246, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.21778175234794617, + "step": 6150 + }, + { + "epoch": 5.440812720848056, + "grad_norm": 0.637679398059845, + "learning_rate": 5.778650569401922e-06, + "loss": 0.2374, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2865186929702759, + "step": 6155 + }, + { + "epoch": 5.445229681978798, + "grad_norm": 0.6278441548347473, + "learning_rate": 5.747708676957844e-06, + "loss": 0.2198, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.22370074689388275, + "step": 6160 + }, + { + "epoch": 5.44964664310954, + "grad_norm": 0.6846042275428772, + "learning_rate": 5.716835939483641e-06, + "loss": 0.2423, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.18103906512260437, + "step": 6165 + }, + { + "epoch": 5.454063604240282, + "grad_norm": 0.765556812286377, + "learning_rate": 5.686032506780015e-06, + "loss": 0.2228, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.20441153645515442, + "step": 6170 + }, + { + "epoch": 5.458480565371024, + "grad_norm": 0.7388072609901428, + "learning_rate": 5.655298528311388e-06, + "loss": 0.243, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.210384339094162, + "step": 6175 + }, + { + "epoch": 5.4628975265017665, + "grad_norm": 0.6503342390060425, + "learning_rate": 5.624634153205178e-06, + "loss": 0.2485, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.23481428623199463, + "step": 6180 + }, + { + "epoch": 5.467314487632509, + "grad_norm": 0.6476783752441406, + "learning_rate": 5.594039530251065e-06, + "loss": 0.2386, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2961850166320801, + "step": 6185 + }, + { + "epoch": 5.471731448763251, + "grad_norm": 0.7560742497444153, + "learning_rate": 5.563514807900285e-06, + "loss": 0.2508, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2309841811656952, + "step": 6190 + }, + { + "epoch": 5.476148409893993, + "grad_norm": 0.5617563128471375, + "learning_rate": 5.533060134264907e-06, + "loss": 0.2262, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1815638542175293, + "step": 6195 + }, + { + "epoch": 5.480565371024735, + "grad_norm": 0.7148119211196899, + "learning_rate": 5.5026756571170896e-06, + "loss": 0.2373, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2046487033367157, + "step": 6200 + }, + { + "epoch": 5.484982332155477, + "grad_norm": 1.1636927127838135, + "learning_rate": 5.472361523888401e-06, + "loss": 0.2203, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16061300039291382, + "step": 6205 + }, + { + "epoch": 5.489399293286219, + "grad_norm": 0.682188093662262, + "learning_rate": 5.442117881669085e-06, + "loss": 0.2265, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.19478756189346313, + "step": 6210 + }, + { + "epoch": 5.493816254416961, + "grad_norm": 0.6677638292312622, + "learning_rate": 5.411944877207347e-06, + "loss": 0.2506, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2209673374891281, + "step": 6215 + }, + { + "epoch": 5.498233215547703, + "grad_norm": 0.6800921559333801, + "learning_rate": 5.38184265690864e-06, + "loss": 0.2008, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1875065267086029, + "step": 6220 + }, + { + "epoch": 5.502650176678445, + "grad_norm": 0.7293074131011963, + "learning_rate": 5.3518113668349645e-06, + "loss": 0.2397, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.31664156913757324, + "step": 6225 + }, + { + "epoch": 5.507067137809187, + "grad_norm": 0.6156737208366394, + "learning_rate": 5.321851152704154e-06, + "loss": 0.2621, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17080801725387573, + "step": 6230 + }, + { + "epoch": 5.511484098939929, + "grad_norm": 0.6778889298439026, + "learning_rate": 5.291962159889148e-06, + "loss": 0.2454, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.24503864347934723, + "step": 6235 + }, + { + "epoch": 5.5159010600706715, + "grad_norm": 0.7297868728637695, + "learning_rate": 5.262144533417344e-06, + "loss": 0.2354, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2277633249759674, + "step": 6240 + }, + { + "epoch": 5.520318021201414, + "grad_norm": 0.6664875745773315, + "learning_rate": 5.232398417969815e-06, + "loss": 0.2233, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.21163435280323029, + "step": 6245 + }, + { + "epoch": 5.524734982332156, + "grad_norm": 0.6079632639884949, + "learning_rate": 5.2027239578806734e-06, + "loss": 0.2322, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2564762830734253, + "step": 6250 + }, + { + "epoch": 5.529151943462898, + "grad_norm": 0.6662228107452393, + "learning_rate": 5.173121297136337e-06, + "loss": 0.201, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.21402853727340698, + "step": 6255 + }, + { + "epoch": 5.53356890459364, + "grad_norm": 0.6768613457679749, + "learning_rate": 5.14359057937484e-06, + "loss": 0.2965, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2002478837966919, + "step": 6260 + }, + { + "epoch": 5.537985865724382, + "grad_norm": 0.6485766172409058, + "learning_rate": 5.114131947885137e-06, + "loss": 0.2873, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3273354470729828, + "step": 6265 + }, + { + "epoch": 5.542402826855124, + "grad_norm": 0.6236052513122559, + "learning_rate": 5.084745545606402e-06, + "loss": 0.2543, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17392995953559875, + "step": 6270 + }, + { + "epoch": 5.546819787985866, + "grad_norm": 0.613959789276123, + "learning_rate": 5.055431515127349e-06, + "loss": 0.2463, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2359083592891693, + "step": 6275 + }, + { + "epoch": 5.551236749116608, + "grad_norm": 0.6324638724327087, + "learning_rate": 5.026189998685504e-06, + "loss": 0.2449, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.24177028238773346, + "step": 6280 + }, + { + "epoch": 5.55565371024735, + "grad_norm": 0.6312211155891418, + "learning_rate": 4.9970211381665665e-06, + "loss": 0.275, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.22367584705352783, + "step": 6285 + }, + { + "epoch": 5.560070671378092, + "grad_norm": 0.9709349274635315, + "learning_rate": 4.967925075103685e-06, + "loss": 0.2554, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.32471510767936707, + "step": 6290 + }, + { + "epoch": 5.564487632508834, + "grad_norm": 0.6403810977935791, + "learning_rate": 4.93890195067678e-06, + "loss": 0.2563, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2835839092731476, + "step": 6295 + }, + { + "epoch": 5.5689045936395765, + "grad_norm": 0.6615350246429443, + "learning_rate": 4.909951905711858e-06, + "loss": 0.2297, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1782667636871338, + "step": 6300 + }, + { + "epoch": 5.573321554770318, + "grad_norm": 0.7057219743728638, + "learning_rate": 4.881075080680335e-06, + "loss": 0.2317, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.28443485498428345, + "step": 6305 + }, + { + "epoch": 5.57773851590106, + "grad_norm": 0.6871824264526367, + "learning_rate": 4.852271615698349e-06, + "loss": 0.2183, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.28328755497932434, + "step": 6310 + }, + { + "epoch": 5.582155477031802, + "grad_norm": 0.6732246279716492, + "learning_rate": 4.823541650526058e-06, + "loss": 0.2652, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.24324092268943787, + "step": 6315 + }, + { + "epoch": 5.586572438162544, + "grad_norm": 0.6424944400787354, + "learning_rate": 4.7948853245670294e-06, + "loss": 0.2128, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17290586233139038, + "step": 6320 + }, + { + "epoch": 5.590989399293286, + "grad_norm": 0.659040093421936, + "learning_rate": 4.7663027768674705e-06, + "loss": 0.2051, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15980809926986694, + "step": 6325 + }, + { + "epoch": 5.595406360424028, + "grad_norm": 0.6853231191635132, + "learning_rate": 4.737794146115633e-06, + "loss": 0.2225, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.19948209822177887, + "step": 6330 + }, + { + "epoch": 5.59982332155477, + "grad_norm": 0.651915431022644, + "learning_rate": 4.7093595706410945e-06, + "loss": 0.2627, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.36641934514045715, + "step": 6335 + }, + { + "epoch": 5.604240282685512, + "grad_norm": 0.6209613084793091, + "learning_rate": 4.680999188414108e-06, + "loss": 0.24, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.20791277289390564, + "step": 6340 + }, + { + "epoch": 5.608657243816254, + "grad_norm": 0.7123491764068604, + "learning_rate": 4.652713137044927e-06, + "loss": 0.2189, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.25710418820381165, + "step": 6345 + }, + { + "epoch": 5.613074204946996, + "grad_norm": 0.7246097922325134, + "learning_rate": 4.624501553783127e-06, + "loss": 0.2295, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.23944400250911713, + "step": 6350 + }, + { + "epoch": 5.6174911660777385, + "grad_norm": 0.7621732354164124, + "learning_rate": 4.596364575516969e-06, + "loss": 0.2256, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.26072970032691956, + "step": 6355 + }, + { + "epoch": 5.6219081272084805, + "grad_norm": 0.6371416449546814, + "learning_rate": 4.568302338772688e-06, + "loss": 0.2364, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.21054017543792725, + "step": 6360 + }, + { + "epoch": 5.626325088339223, + "grad_norm": 0.6732529401779175, + "learning_rate": 4.540314979713876e-06, + "loss": 0.2299, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.19560235738754272, + "step": 6365 + }, + { + "epoch": 5.630742049469965, + "grad_norm": 0.6369305849075317, + "learning_rate": 4.512402634140804e-06, + "loss": 0.2584, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2767673134803772, + "step": 6370 + }, + { + "epoch": 5.635159010600707, + "grad_norm": 0.6594879627227783, + "learning_rate": 4.484565437489759e-06, + "loss": 0.2553, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.27891114354133606, + "step": 6375 + }, + { + "epoch": 5.639575971731449, + "grad_norm": 0.6730323433876038, + "learning_rate": 4.456803524832389e-06, + "loss": 0.2364, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2509545385837555, + "step": 6380 + }, + { + "epoch": 5.643992932862191, + "grad_norm": 0.6865994334220886, + "learning_rate": 4.429117030875052e-06, + "loss": 0.2052, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1903909146785736, + "step": 6385 + }, + { + "epoch": 5.648409893992933, + "grad_norm": 0.6434392333030701, + "learning_rate": 4.401506089958161e-06, + "loss": 0.2322, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.29577142000198364, + "step": 6390 + }, + { + "epoch": 5.652826855123675, + "grad_norm": 0.6849035620689392, + "learning_rate": 4.37397083605551e-06, + "loss": 0.2162, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2336483895778656, + "step": 6395 + }, + { + "epoch": 5.657243816254417, + "grad_norm": 0.6565053462982178, + "learning_rate": 4.346511402773688e-06, + "loss": 0.2306, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.23053717613220215, + "step": 6400 + }, + { + "epoch": 5.661660777385159, + "grad_norm": 0.7413392066955566, + "learning_rate": 4.319127923351339e-06, + "loss": 0.2713, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.27822649478912354, + "step": 6405 + }, + { + "epoch": 5.666077738515901, + "grad_norm": 0.613976776599884, + "learning_rate": 4.291820530658595e-06, + "loss": 0.2549, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2500184178352356, + "step": 6410 + }, + { + "epoch": 5.670494699646643, + "grad_norm": 0.6709519028663635, + "learning_rate": 4.264589357196389e-06, + "loss": 0.2342, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2182079702615738, + "step": 6415 + }, + { + "epoch": 5.6749116607773855, + "grad_norm": 0.618645966053009, + "learning_rate": 4.2374345350958256e-06, + "loss": 0.2329, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16464778780937195, + "step": 6420 + }, + { + "epoch": 5.679328621908128, + "grad_norm": 0.6145651936531067, + "learning_rate": 4.2103561961175354e-06, + "loss": 0.2168, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.25252848863601685, + "step": 6425 + }, + { + "epoch": 5.683745583038869, + "grad_norm": 0.7100237607955933, + "learning_rate": 4.183354471651037e-06, + "loss": 0.2357, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.21529585123062134, + "step": 6430 + }, + { + "epoch": 5.688162544169611, + "grad_norm": 0.6366299986839294, + "learning_rate": 4.156429492714109e-06, + "loss": 0.2213, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.22616274654865265, + "step": 6435 + }, + { + "epoch": 5.692579505300353, + "grad_norm": 0.6993550658226013, + "learning_rate": 4.129581389952129e-06, + "loss": 0.2259, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16478785872459412, + "step": 6440 + }, + { + "epoch": 5.696996466431095, + "grad_norm": 0.6645349860191345, + "learning_rate": 4.102810293637465e-06, + "loss": 0.2262, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.22474364936351776, + "step": 6445 + }, + { + "epoch": 5.701413427561837, + "grad_norm": 0.6140891313552856, + "learning_rate": 4.076116333668838e-06, + "loss": 0.2337, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.23928777873516083, + "step": 6450 + }, + { + "epoch": 5.705830388692579, + "grad_norm": 0.6906691193580627, + "learning_rate": 4.049499639570682e-06, + "loss": 0.2503, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.206419438123703, + "step": 6455 + }, + { + "epoch": 5.710247349823321, + "grad_norm": 0.8086975812911987, + "learning_rate": 4.022960340492525e-06, + "loss": 0.2277, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2215789258480072, + "step": 6460 + }, + { + "epoch": 5.714664310954063, + "grad_norm": 0.6342015266418457, + "learning_rate": 3.996498565208358e-06, + "loss": 0.2277, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.20739787817001343, + "step": 6465 + }, + { + "epoch": 5.719081272084805, + "grad_norm": 0.6628281474113464, + "learning_rate": 3.970114442116013e-06, + "loss": 0.1905, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.18304401636123657, + "step": 6470 + }, + { + "epoch": 5.7234982332155475, + "grad_norm": 0.6513307690620422, + "learning_rate": 3.943808099236524e-06, + "loss": 0.2257, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.21980533003807068, + "step": 6475 + }, + { + "epoch": 5.72791519434629, + "grad_norm": 0.6264829635620117, + "learning_rate": 3.917579664213549e-06, + "loss": 0.2471, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3289884328842163, + "step": 6480 + }, + { + "epoch": 5.732332155477032, + "grad_norm": 0.6363914012908936, + "learning_rate": 3.8914292643126915e-06, + "loss": 0.225, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2795798182487488, + "step": 6485 + }, + { + "epoch": 5.736749116607774, + "grad_norm": 0.693565309047699, + "learning_rate": 3.865357026420926e-06, + "loss": 0.2457, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2867644131183624, + "step": 6490 + }, + { + "epoch": 5.741166077738516, + "grad_norm": 0.6560927629470825, + "learning_rate": 3.839363077045974e-06, + "loss": 0.229, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2569573223590851, + "step": 6495 + }, + { + "epoch": 5.745583038869258, + "grad_norm": 0.6350461840629578, + "learning_rate": 3.8134475423156757e-06, + "loss": 0.2428, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2617478370666504, + "step": 6500 + }, + { + "epoch": 5.75, + "grad_norm": 0.7265409827232361, + "learning_rate": 3.787610547977396e-06, + "loss": 0.2413, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.18774326145648956, + "step": 6505 + }, + { + "epoch": 5.754416961130742, + "grad_norm": 0.6530236005783081, + "learning_rate": 3.7618522193973994e-06, + "loss": 0.2527, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.277637243270874, + "step": 6510 + }, + { + "epoch": 5.758833922261484, + "grad_norm": 0.684080958366394, + "learning_rate": 3.7361726815602596e-06, + "loss": 0.2515, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2156839370727539, + "step": 6515 + }, + { + "epoch": 5.763250883392226, + "grad_norm": 0.6697428822517395, + "learning_rate": 3.710572059068218e-06, + "loss": 0.2364, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.26217517256736755, + "step": 6520 + }, + { + "epoch": 5.767667844522968, + "grad_norm": 0.6842355132102966, + "learning_rate": 3.6850504761406282e-06, + "loss": 0.291, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.34840089082717896, + "step": 6525 + }, + { + "epoch": 5.77208480565371, + "grad_norm": 0.7177510261535645, + "learning_rate": 3.6596080566133176e-06, + "loss": 0.2566, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.25881335139274597, + "step": 6530 + }, + { + "epoch": 5.7765017667844525, + "grad_norm": 0.6908156275749207, + "learning_rate": 3.6342449239379974e-06, + "loss": 0.2514, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.25273647904396057, + "step": 6535 + }, + { + "epoch": 5.780918727915195, + "grad_norm": 0.6992905139923096, + "learning_rate": 3.608961201181662e-06, + "loss": 0.2722, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.27057141065597534, + "step": 6540 + }, + { + "epoch": 5.785335689045937, + "grad_norm": 0.708998441696167, + "learning_rate": 3.5837570110259945e-06, + "loss": 0.2274, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2736978530883789, + "step": 6545 + }, + { + "epoch": 5.789752650176679, + "grad_norm": 0.5654464364051819, + "learning_rate": 3.558632475766777e-06, + "loss": 0.237, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.23132240772247314, + "step": 6550 + }, + { + "epoch": 5.794169611307421, + "grad_norm": 0.7154576182365417, + "learning_rate": 3.5335877173132672e-06, + "loss": 0.2623, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.29507291316986084, + "step": 6555 + }, + { + "epoch": 5.798586572438163, + "grad_norm": 0.6882935762405396, + "learning_rate": 3.5086228571876622e-06, + "loss": 0.2782, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2825637757778168, + "step": 6560 + }, + { + "epoch": 5.803003533568905, + "grad_norm": 0.6030979156494141, + "learning_rate": 3.4837380165244494e-06, + "loss": 0.2596, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.29193153977394104, + "step": 6565 + }, + { + "epoch": 5.807420494699647, + "grad_norm": 0.6919524073600769, + "learning_rate": 3.4589333160698592e-06, + "loss": 0.2359, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2584839463233948, + "step": 6570 + }, + { + "epoch": 5.811837455830389, + "grad_norm": 0.6700971722602844, + "learning_rate": 3.434208876181262e-06, + "loss": 0.244, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2972269654273987, + "step": 6575 + }, + { + "epoch": 5.816254416961131, + "grad_norm": 0.6191151142120361, + "learning_rate": 3.409564816826587e-06, + "loss": 0.2667, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.31485503911972046, + "step": 6580 + }, + { + "epoch": 5.820671378091872, + "grad_norm": 0.627770185470581, + "learning_rate": 3.385001257583744e-06, + "loss": 0.2082, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.27181103825569153, + "step": 6585 + }, + { + "epoch": 5.8250883392226145, + "grad_norm": 0.6451102495193481, + "learning_rate": 3.3605183176400402e-06, + "loss": 0.2312, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3111085295677185, + "step": 6590 + }, + { + "epoch": 5.829505300353357, + "grad_norm": 0.6402949094772339, + "learning_rate": 3.3361161157916012e-06, + "loss": 0.2148, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17799827456474304, + "step": 6595 + }, + { + "epoch": 5.833922261484099, + "grad_norm": 0.6498696804046631, + "learning_rate": 3.3117947704427866e-06, + "loss": 0.2404, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2313275933265686, + "step": 6600 + }, + { + "epoch": 5.838339222614841, + "grad_norm": 0.6034268736839294, + "learning_rate": 3.287554399605637e-06, + "loss": 0.2114, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.18082204461097717, + "step": 6605 + }, + { + "epoch": 5.842756183745583, + "grad_norm": 0.6362025141716003, + "learning_rate": 3.2633951208992797e-06, + "loss": 0.2358, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.22728809714317322, + "step": 6610 + }, + { + "epoch": 5.847173144876325, + "grad_norm": 0.6251878142356873, + "learning_rate": 3.2393170515493756e-06, + "loss": 0.2204, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.24366095662117004, + "step": 6615 + }, + { + "epoch": 5.851590106007067, + "grad_norm": 0.6515399813652039, + "learning_rate": 3.2153203083875306e-06, + "loss": 0.2544, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.21778994798660278, + "step": 6620 + }, + { + "epoch": 5.856007067137809, + "grad_norm": 0.8303614854812622, + "learning_rate": 3.19140500785075e-06, + "loss": 0.2382, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14264947175979614, + "step": 6625 + }, + { + "epoch": 5.860424028268551, + "grad_norm": 0.6612645387649536, + "learning_rate": 3.1675712659808576e-06, + "loss": 0.2356, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.22965016961097717, + "step": 6630 + }, + { + "epoch": 5.864840989399293, + "grad_norm": 0.6375709176063538, + "learning_rate": 3.1438191984239297e-06, + "loss": 0.226, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.24031785130500793, + "step": 6635 + }, + { + "epoch": 5.869257950530035, + "grad_norm": 0.6527304649353027, + "learning_rate": 3.1201489204297663e-06, + "loss": 0.2465, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.18436655402183533, + "step": 6640 + }, + { + "epoch": 5.873674911660777, + "grad_norm": 0.6133697628974915, + "learning_rate": 3.0965605468512837e-06, + "loss": 0.2645, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2529999315738678, + "step": 6645 + }, + { + "epoch": 5.8780918727915195, + "grad_norm": 0.6855648756027222, + "learning_rate": 3.0730541921439936e-06, + "loss": 0.2243, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2065194845199585, + "step": 6650 + }, + { + "epoch": 5.8825088339222615, + "grad_norm": 0.7099367380142212, + "learning_rate": 3.049629970365433e-06, + "loss": 0.2309, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.20094117522239685, + "step": 6655 + }, + { + "epoch": 5.886925795053004, + "grad_norm": 0.6442746520042419, + "learning_rate": 3.026287995174615e-06, + "loss": 0.2356, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.22805841267108917, + "step": 6660 + }, + { + "epoch": 5.891342756183746, + "grad_norm": 0.6667559146881104, + "learning_rate": 3.0030283798314785e-06, + "loss": 0.2445, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.19497643411159515, + "step": 6665 + }, + { + "epoch": 5.895759717314488, + "grad_norm": 0.6932128071784973, + "learning_rate": 2.9798512371963207e-06, + "loss": 0.2535, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2080661952495575, + "step": 6670 + }, + { + "epoch": 5.90017667844523, + "grad_norm": 0.6642143130302429, + "learning_rate": 2.9567566797292914e-06, + "loss": 0.2605, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.26065370440483093, + "step": 6675 + }, + { + "epoch": 5.904593639575972, + "grad_norm": 0.7001204490661621, + "learning_rate": 2.9337448194897943e-06, + "loss": 0.2583, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.25018325448036194, + "step": 6680 + }, + { + "epoch": 5.909010600706714, + "grad_norm": 0.6896331310272217, + "learning_rate": 2.9108157681359837e-06, + "loss": 0.2428, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.317201167345047, + "step": 6685 + }, + { + "epoch": 5.913427561837456, + "grad_norm": 0.6316036581993103, + "learning_rate": 2.8879696369242062e-06, + "loss": 0.2704, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2903493642807007, + "step": 6690 + }, + { + "epoch": 5.917844522968198, + "grad_norm": 0.5617479681968689, + "learning_rate": 2.8652065367084627e-06, + "loss": 0.2479, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.25086894631385803, + "step": 6695 + }, + { + "epoch": 5.92226148409894, + "grad_norm": 0.6145947575569153, + "learning_rate": 2.8425265779398704e-06, + "loss": 0.251, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2544384002685547, + "step": 6700 + }, + { + "epoch": 5.926678445229682, + "grad_norm": 0.718999981880188, + "learning_rate": 2.819929870666129e-06, + "loss": 0.2146, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.155266672372818, + "step": 6705 + }, + { + "epoch": 5.9310954063604235, + "grad_norm": 0.8380730748176575, + "learning_rate": 2.7974165245309913e-06, + "loss": 0.2275, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1611081063747406, + "step": 6710 + }, + { + "epoch": 5.935512367491166, + "grad_norm": 0.7612493634223938, + "learning_rate": 2.774986648773701e-06, + "loss": 0.2487, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.23470436036586761, + "step": 6715 + }, + { + "epoch": 5.939929328621908, + "grad_norm": 0.6034536361694336, + "learning_rate": 2.752640352228524e-06, + "loss": 0.254, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2653493583202362, + "step": 6720 + }, + { + "epoch": 5.94434628975265, + "grad_norm": 0.6654173731803894, + "learning_rate": 2.7303777433241506e-06, + "loss": 0.2321, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.25490570068359375, + "step": 6725 + }, + { + "epoch": 5.948763250883392, + "grad_norm": 0.6058556437492371, + "learning_rate": 2.708198930083219e-06, + "loss": 0.2504, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2901003956794739, + "step": 6730 + }, + { + "epoch": 5.953180212014134, + "grad_norm": 0.6939293146133423, + "learning_rate": 2.6861040201217692e-06, + "loss": 0.2246, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.260385125875473, + "step": 6735 + }, + { + "epoch": 5.957597173144876, + "grad_norm": 0.6245858669281006, + "learning_rate": 2.6640931206487252e-06, + "loss": 0.2539, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.25670942664146423, + "step": 6740 + }, + { + "epoch": 5.962014134275618, + "grad_norm": 0.6986921429634094, + "learning_rate": 2.642166338465384e-06, + "loss": 0.2416, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.21145933866500854, + "step": 6745 + }, + { + "epoch": 5.96643109540636, + "grad_norm": 0.6841897964477539, + "learning_rate": 2.6203237799648663e-06, + "loss": 0.213, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.18843623995780945, + "step": 6750 + }, + { + "epoch": 5.970848056537102, + "grad_norm": 0.6969452500343323, + "learning_rate": 2.598565551131653e-06, + "loss": 0.2295, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3311152458190918, + "step": 6755 + }, + { + "epoch": 5.975265017667844, + "grad_norm": 0.6374683380126953, + "learning_rate": 2.5768917575410134e-06, + "loss": 0.2332, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2040780782699585, + "step": 6760 + }, + { + "epoch": 5.979681978798586, + "grad_norm": 0.7229990363121033, + "learning_rate": 2.555302504358537e-06, + "loss": 0.2225, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.21656137704849243, + "step": 6765 + }, + { + "epoch": 5.9840989399293285, + "grad_norm": 0.7268469333648682, + "learning_rate": 2.5337978963396003e-06, + "loss": 0.227, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1876240074634552, + "step": 6770 + }, + { + "epoch": 5.988515901060071, + "grad_norm": 0.642284631729126, + "learning_rate": 2.5123780378288642e-06, + "loss": 0.2404, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.24313439428806305, + "step": 6775 + }, + { + "epoch": 5.992932862190813, + "grad_norm": 0.6617910265922546, + "learning_rate": 2.49104303275977e-06, + "loss": 0.2628, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.25242412090301514, + "step": 6780 + }, + { + "epoch": 5.997349823321555, + "grad_norm": 0.6622885465621948, + "learning_rate": 2.4697929846540335e-06, + "loss": 0.2371, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.28016042709350586, + "step": 6785 + }, + { + "epoch": 6.002650176678445, + "grad_norm": 0.6871181726455688, + "learning_rate": 2.4486279966211425e-06, + "loss": 0.2211, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.21785372495651245, + "step": 6790 + }, + { + "epoch": 6.007067137809187, + "grad_norm": 0.634680986404419, + "learning_rate": 2.427548171357843e-06, + "loss": 0.2488, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.23458047211170197, + "step": 6795 + }, + { + "epoch": 6.011484098939929, + "grad_norm": 0.7124053835868835, + "learning_rate": 2.406553611147684e-06, + "loss": 0.2631, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1963014453649521, + "step": 6800 + }, + { + "epoch": 6.0159010600706715, + "grad_norm": 0.6288027763366699, + "learning_rate": 2.38564441786046e-06, + "loss": 0.2463, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.22201281785964966, + "step": 6805 + }, + { + "epoch": 6.020318021201414, + "grad_norm": 0.8096534013748169, + "learning_rate": 2.364820692951766e-06, + "loss": 0.2418, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2866779565811157, + "step": 6810 + }, + { + "epoch": 6.024734982332156, + "grad_norm": 0.7279224395751953, + "learning_rate": 2.3440825374624798e-06, + "loss": 0.2504, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1597205102443695, + "step": 6815 + }, + { + "epoch": 6.029151943462898, + "grad_norm": 0.6755273938179016, + "learning_rate": 2.3234300520182873e-06, + "loss": 0.2535, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2171267867088318, + "step": 6820 + }, + { + "epoch": 6.03356890459364, + "grad_norm": 0.6738324761390686, + "learning_rate": 2.3028633368291843e-06, + "loss": 0.2408, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.31061723828315735, + "step": 6825 + }, + { + "epoch": 6.037985865724382, + "grad_norm": 0.6739371418952942, + "learning_rate": 2.2823824916889724e-06, + "loss": 0.2133, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.19004075229167938, + "step": 6830 + }, + { + "epoch": 6.042402826855124, + "grad_norm": 0.7035517692565918, + "learning_rate": 2.261987615974832e-06, + "loss": 0.2345, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.29125821590423584, + "step": 6835 + }, + { + "epoch": 6.046819787985866, + "grad_norm": 0.718480110168457, + "learning_rate": 2.241678808646768e-06, + "loss": 0.227, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.21620148420333862, + "step": 6840 + }, + { + "epoch": 6.051236749116608, + "grad_norm": 0.6759348511695862, + "learning_rate": 2.2214561682471825e-06, + "loss": 0.2263, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.22231999039649963, + "step": 6845 + }, + { + "epoch": 6.05565371024735, + "grad_norm": 0.6698014140129089, + "learning_rate": 2.201319792900374e-06, + "loss": 0.2305, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.20235413312911987, + "step": 6850 + }, + { + "epoch": 6.060070671378092, + "grad_norm": 0.6121541261672974, + "learning_rate": 2.181269780312063e-06, + "loss": 0.2177, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.20862017571926117, + "step": 6855 + }, + { + "epoch": 6.0644876325088335, + "grad_norm": 0.6756522059440613, + "learning_rate": 2.1613062277689266e-06, + "loss": 0.2205, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17809954285621643, + "step": 6860 + }, + { + "epoch": 6.068904593639576, + "grad_norm": 0.7342451810836792, + "learning_rate": 2.141429232138117e-06, + "loss": 0.2207, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2191966474056244, + "step": 6865 + }, + { + "epoch": 6.073321554770318, + "grad_norm": 0.7150316834449768, + "learning_rate": 2.1216388898667973e-06, + "loss": 0.2352, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.23332220315933228, + "step": 6870 + }, + { + "epoch": 6.07773851590106, + "grad_norm": 0.6240957379341125, + "learning_rate": 2.1019352969816585e-06, + "loss": 0.265, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.24282801151275635, + "step": 6875 + }, + { + "epoch": 6.082155477031802, + "grad_norm": 0.6066805720329285, + "learning_rate": 2.082318549088491e-06, + "loss": 0.2542, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.26603370904922485, + "step": 6880 + }, + { + "epoch": 6.086572438162544, + "grad_norm": 0.7509016990661621, + "learning_rate": 2.062788741371673e-06, + "loss": 0.2227, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3612693250179291, + "step": 6885 + }, + { + "epoch": 6.090989399293286, + "grad_norm": 0.701538622379303, + "learning_rate": 2.0433459685937395e-06, + "loss": 0.2593, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.318620890378952, + "step": 6890 + }, + { + "epoch": 6.095406360424028, + "grad_norm": 0.6263982653617859, + "learning_rate": 2.0239903250949176e-06, + "loss": 0.2221, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.23397664725780487, + "step": 6895 + }, + { + "epoch": 6.09982332155477, + "grad_norm": 0.644121527671814, + "learning_rate": 2.0047219047926614e-06, + "loss": 0.2221, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.22818706929683685, + "step": 6900 + }, + { + "epoch": 6.104240282685512, + "grad_norm": 0.6542277336120605, + "learning_rate": 1.9855408011812117e-06, + "loss": 0.2456, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2627973258495331, + "step": 6905 + }, + { + "epoch": 6.108657243816254, + "grad_norm": 0.6295666694641113, + "learning_rate": 1.966447107331104e-06, + "loss": 0.2238, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.27365830540657043, + "step": 6910 + }, + { + "epoch": 6.113074204946996, + "grad_norm": 0.6320794224739075, + "learning_rate": 1.9474409158887807e-06, + "loss": 0.233, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.23829936981201172, + "step": 6915 + }, + { + "epoch": 6.1174911660777385, + "grad_norm": 0.7099300622940063, + "learning_rate": 1.9285223190760737e-06, + "loss": 0.2178, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16763125360012054, + "step": 6920 + }, + { + "epoch": 6.1219081272084805, + "grad_norm": 0.5849335193634033, + "learning_rate": 1.9096914086898087e-06, + "loss": 0.2098, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2682895064353943, + "step": 6925 + }, + { + "epoch": 6.126325088339223, + "grad_norm": 0.5837305784225464, + "learning_rate": 1.8909482761013254e-06, + "loss": 0.2236, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.20228040218353271, + "step": 6930 + }, + { + "epoch": 6.130742049469965, + "grad_norm": 0.7193915247917175, + "learning_rate": 1.872293012256059e-06, + "loss": 0.2034, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.24561667442321777, + "step": 6935 + }, + { + "epoch": 6.135159010600707, + "grad_norm": 0.7574843168258667, + "learning_rate": 1.853725707673082e-06, + "loss": 0.2399, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.26278358697891235, + "step": 6940 + }, + { + "epoch": 6.139575971731449, + "grad_norm": 0.7833778262138367, + "learning_rate": 1.8352464524446724e-06, + "loss": 0.2054, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.18190684914588928, + "step": 6945 + }, + { + "epoch": 6.143992932862191, + "grad_norm": 0.6389424204826355, + "learning_rate": 1.8168553362358787e-06, + "loss": 0.2165, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.228424072265625, + "step": 6950 + }, + { + "epoch": 6.148409893992933, + "grad_norm": 0.7503507733345032, + "learning_rate": 1.7985524482840676e-06, + "loss": 0.2483, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3514091968536377, + "step": 6955 + }, + { + "epoch": 6.152826855123675, + "grad_norm": 0.6644186973571777, + "learning_rate": 1.7803378773985214e-06, + "loss": 0.2445, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.23452533781528473, + "step": 6960 + }, + { + "epoch": 6.157243816254417, + "grad_norm": 0.6842535138130188, + "learning_rate": 1.7622117119599802e-06, + "loss": 0.2313, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2150776982307434, + "step": 6965 + }, + { + "epoch": 6.161660777385159, + "grad_norm": 0.6151363849639893, + "learning_rate": 1.74417403992023e-06, + "loss": 0.2473, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.25320905447006226, + "step": 6970 + }, + { + "epoch": 6.166077738515901, + "grad_norm": 0.8280779719352722, + "learning_rate": 1.7262249488016648e-06, + "loss": 0.276, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3232913315296173, + "step": 6975 + }, + { + "epoch": 6.170494699646643, + "grad_norm": 0.6878020167350769, + "learning_rate": 1.708364525696864e-06, + "loss": 0.2731, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3126068711280823, + "step": 6980 + }, + { + "epoch": 6.1749116607773855, + "grad_norm": 0.7097288966178894, + "learning_rate": 1.6905928572681806e-06, + "loss": 0.2509, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3269100785255432, + "step": 6985 + }, + { + "epoch": 6.179328621908128, + "grad_norm": 0.6415253281593323, + "learning_rate": 1.6729100297472967e-06, + "loss": 0.2125, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2540552020072937, + "step": 6990 + }, + { + "epoch": 6.18374558303887, + "grad_norm": 0.6544218063354492, + "learning_rate": 1.6553161289348429e-06, + "loss": 0.2182, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.21726730465888977, + "step": 6995 + }, + { + "epoch": 6.188162544169611, + "grad_norm": 0.6272726655006409, + "learning_rate": 1.637811240199938e-06, + "loss": 0.2144, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.27637243270874023, + "step": 7000 + }, + { + "epoch": 6.192579505300353, + "grad_norm": 0.651709258556366, + "learning_rate": 1.620395448479808e-06, + "loss": 0.2198, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15154890716075897, + "step": 7005 + }, + { + "epoch": 6.196996466431095, + "grad_norm": 0.6224712133407593, + "learning_rate": 1.603068838279358e-06, + "loss": 0.2222, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2673448920249939, + "step": 7010 + }, + { + "epoch": 6.201413427561837, + "grad_norm": 0.6794725060462952, + "learning_rate": 1.5858314936707731e-06, + "loss": 0.2144, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.26523715257644653, + "step": 7015 + }, + { + "epoch": 6.205830388692579, + "grad_norm": 0.6605962514877319, + "learning_rate": 1.5686834982930954e-06, + "loss": 0.2489, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.24990308284759521, + "step": 7020 + }, + { + "epoch": 6.210247349823321, + "grad_norm": 0.6467421054840088, + "learning_rate": 1.551624935351832e-06, + "loss": 0.2235, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.21919971704483032, + "step": 7025 + }, + { + "epoch": 6.214664310954063, + "grad_norm": 0.6687191724777222, + "learning_rate": 1.5346558876185459e-06, + "loss": 0.2176, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.19604460895061493, + "step": 7030 + }, + { + "epoch": 6.219081272084805, + "grad_norm": 0.6332302689552307, + "learning_rate": 1.5177764374304493e-06, + "loss": 0.2437, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2982211112976074, + "step": 7035 + }, + { + "epoch": 6.2234982332155475, + "grad_norm": 0.6573311686515808, + "learning_rate": 1.500986666690012e-06, + "loss": 0.241, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2335302233695984, + "step": 7040 + }, + { + "epoch": 6.22791519434629, + "grad_norm": 0.6834275126457214, + "learning_rate": 1.4842866568645642e-06, + "loss": 0.2073, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2586025595664978, + "step": 7045 + }, + { + "epoch": 6.232332155477032, + "grad_norm": 0.6670604944229126, + "learning_rate": 1.4676764889858964e-06, + "loss": 0.2075, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15618737041950226, + "step": 7050 + }, + { + "epoch": 6.236749116607774, + "grad_norm": 0.6588166356086731, + "learning_rate": 1.4511562436498671e-06, + "loss": 0.2369, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2003191113471985, + "step": 7055 + }, + { + "epoch": 6.241166077738516, + "grad_norm": 0.6686882972717285, + "learning_rate": 1.4347260010160112e-06, + "loss": 0.2507, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3045847713947296, + "step": 7060 + }, + { + "epoch": 6.245583038869258, + "grad_norm": 0.8178129196166992, + "learning_rate": 1.418385840807157e-06, + "loss": 0.2332, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.307248055934906, + "step": 7065 + }, + { + "epoch": 6.25, + "grad_norm": 0.7019538283348083, + "learning_rate": 1.402135842309027e-06, + "loss": 0.2203, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.20041900873184204, + "step": 7070 + }, + { + "epoch": 6.254416961130742, + "grad_norm": 0.6783828735351562, + "learning_rate": 1.3859760843698733e-06, + "loss": 0.2481, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.20858903229236603, + "step": 7075 + }, + { + "epoch": 6.258833922261484, + "grad_norm": 0.7637086510658264, + "learning_rate": 1.3699066454000698e-06, + "loss": 0.2303, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2005981206893921, + "step": 7080 + }, + { + "epoch": 6.263250883392226, + "grad_norm": 0.7075777649879456, + "learning_rate": 1.353927603371754e-06, + "loss": 0.2606, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.28827959299087524, + "step": 7085 + }, + { + "epoch": 6.267667844522968, + "grad_norm": 0.6434727311134338, + "learning_rate": 1.3380390358184324e-06, + "loss": 0.2137, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.19321520626544952, + "step": 7090 + }, + { + "epoch": 6.27208480565371, + "grad_norm": 0.7264281511306763, + "learning_rate": 1.322241019834616e-06, + "loss": 0.2577, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.30105140805244446, + "step": 7095 + }, + { + "epoch": 6.2765017667844525, + "grad_norm": 0.6986357569694519, + "learning_rate": 1.3065336320754418e-06, + "loss": 0.232, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2179412990808487, + "step": 7100 + }, + { + "epoch": 6.280918727915195, + "grad_norm": 0.6690407991409302, + "learning_rate": 1.2909169487562978e-06, + "loss": 0.2367, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.27286529541015625, + "step": 7105 + }, + { + "epoch": 6.285335689045937, + "grad_norm": 0.6591483354568481, + "learning_rate": 1.2753910456524588e-06, + "loss": 0.2241, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.22066080570220947, + "step": 7110 + }, + { + "epoch": 6.289752650176679, + "grad_norm": 0.825552225112915, + "learning_rate": 1.2599559980987076e-06, + "loss": 0.2708, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.22613100707530975, + "step": 7115 + }, + { + "epoch": 6.294169611307421, + "grad_norm": 0.6642948985099792, + "learning_rate": 1.2446118809889906e-06, + "loss": 0.2394, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1839137226343155, + "step": 7120 + }, + { + "epoch": 6.298586572438163, + "grad_norm": 0.7041632533073425, + "learning_rate": 1.22935876877603e-06, + "loss": 0.2177, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.19626665115356445, + "step": 7125 + }, + { + "epoch": 6.303003533568905, + "grad_norm": 0.7177551984786987, + "learning_rate": 1.214196735470985e-06, + "loss": 0.2486, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.276611328125, + "step": 7130 + }, + { + "epoch": 6.307420494699647, + "grad_norm": 0.6658399701118469, + "learning_rate": 1.1991258546430683e-06, + "loss": 0.2368, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.27585369348526, + "step": 7135 + }, + { + "epoch": 6.311837455830389, + "grad_norm": 0.6692659258842468, + "learning_rate": 1.184146199419216e-06, + "loss": 0.2728, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2560042142868042, + "step": 7140 + }, + { + "epoch": 6.316254416961131, + "grad_norm": 0.6785102486610413, + "learning_rate": 1.1692578424837131e-06, + "loss": 0.232, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3469223976135254, + "step": 7145 + }, + { + "epoch": 6.320671378091872, + "grad_norm": 0.6885204911231995, + "learning_rate": 1.1544608560778392e-06, + "loss": 0.198, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1730516254901886, + "step": 7150 + }, + { + "epoch": 6.3250883392226145, + "grad_norm": 0.7053970694541931, + "learning_rate": 1.139755311999544e-06, + "loss": 0.2155, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.22853153944015503, + "step": 7155 + }, + { + "epoch": 6.329505300353357, + "grad_norm": 0.6633711457252502, + "learning_rate": 1.1251412816030637e-06, + "loss": 0.2489, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.18317949771881104, + "step": 7160 + }, + { + "epoch": 6.333922261484099, + "grad_norm": 0.6200980544090271, + "learning_rate": 1.1106188357986003e-06, + "loss": 0.2302, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.28622856736183167, + "step": 7165 + }, + { + "epoch": 6.338339222614841, + "grad_norm": 0.7030071020126343, + "learning_rate": 1.096188045051969e-06, + "loss": 0.2349, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.20836327970027924, + "step": 7170 + }, + { + "epoch": 6.342756183745583, + "grad_norm": 0.6545681953430176, + "learning_rate": 1.0818489793842523e-06, + "loss": 0.2205, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.21316681802272797, + "step": 7175 + }, + { + "epoch": 6.347173144876325, + "grad_norm": 0.626930296421051, + "learning_rate": 1.0676017083714684e-06, + "loss": 0.2523, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.33314627408981323, + "step": 7180 + }, + { + "epoch": 6.351590106007067, + "grad_norm": 0.6591570973396301, + "learning_rate": 1.0534463011442276e-06, + "loss": 0.2251, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2925190329551697, + "step": 7185 + }, + { + "epoch": 6.356007067137809, + "grad_norm": 0.6768887639045715, + "learning_rate": 1.0393828263873985e-06, + "loss": 0.2692, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.20045700669288635, + "step": 7190 + }, + { + "epoch": 6.360424028268551, + "grad_norm": 0.6835484504699707, + "learning_rate": 1.0254113523397736e-06, + "loss": 0.2218, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1818992793560028, + "step": 7195 + }, + { + "epoch": 6.364840989399293, + "grad_norm": 0.6533926725387573, + "learning_rate": 1.0115319467937402e-06, + "loss": 0.242, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2500065565109253, + "step": 7200 + }, + { + "epoch": 6.369257950530035, + "grad_norm": 0.6944401860237122, + "learning_rate": 9.977446770949562e-07, + "loss": 0.2294, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.23091307282447815, + "step": 7205 + }, + { + "epoch": 6.373674911660777, + "grad_norm": 0.6814969778060913, + "learning_rate": 9.840496101420106e-07, + "loss": 0.2407, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.20970004796981812, + "step": 7210 + }, + { + "epoch": 6.3780918727915195, + "grad_norm": 0.6713853478431702, + "learning_rate": 9.704468123861077e-07, + "loss": 0.2477, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.24036778509616852, + "step": 7215 + }, + { + "epoch": 6.3825088339222615, + "grad_norm": 0.6768038868904114, + "learning_rate": 9.569363498307482e-07, + "loss": 0.239, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2124941051006317, + "step": 7220 + }, + { + "epoch": 6.386925795053004, + "grad_norm": 0.7332238554954529, + "learning_rate": 9.43518288031402e-07, + "loss": 0.2365, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.25171586871147156, + "step": 7225 + }, + { + "epoch": 6.391342756183746, + "grad_norm": 0.7168375849723816, + "learning_rate": 9.301926920951798e-07, + "loss": 0.192, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.18527813255786896, + "step": 7230 + }, + { + "epoch": 6.395759717314488, + "grad_norm": 0.7241307497024536, + "learning_rate": 9.169596266805536e-07, + "loss": 0.2508, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.25133055448532104, + "step": 7235 + }, + { + "epoch": 6.40017667844523, + "grad_norm": 0.6914727091789246, + "learning_rate": 9.038191559969967e-07, + "loss": 0.2597, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2576597332954407, + "step": 7240 + }, + { + "epoch": 6.404593639575972, + "grad_norm": 0.6564986705780029, + "learning_rate": 8.907713438047039e-07, + "loss": 0.2343, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.28039878606796265, + "step": 7245 + }, + { + "epoch": 6.409010600706714, + "grad_norm": 0.690380334854126, + "learning_rate": 8.77816253414272e-07, + "loss": 0.2845, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.19468779861927032, + "step": 7250 + }, + { + "epoch": 6.413427561837456, + "grad_norm": 0.6827090978622437, + "learning_rate": 8.649539476863933e-07, + "loss": 0.1907, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1751917153596878, + "step": 7255 + }, + { + "epoch": 6.417844522968198, + "grad_norm": 0.7905651926994324, + "learning_rate": 8.521844890315489e-07, + "loss": 0.2369, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.22601011395454407, + "step": 7260 + }, + { + "epoch": 6.42226148409894, + "grad_norm": 0.7389162182807922, + "learning_rate": 8.395079394097072e-07, + "loss": 0.2287, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.18410909175872803, + "step": 7265 + }, + { + "epoch": 6.426678445229682, + "grad_norm": 0.7305995225906372, + "learning_rate": 8.269243603300259e-07, + "loss": 0.2387, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.22303178906440735, + "step": 7270 + }, + { + "epoch": 6.431095406360424, + "grad_norm": 0.7176210284233093, + "learning_rate": 8.144338128505458e-07, + "loss": 0.2216, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2397802472114563, + "step": 7275 + }, + { + "epoch": 6.435512367491166, + "grad_norm": 0.6935552954673767, + "learning_rate": 8.020363575779044e-07, + "loss": 0.2516, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2560634911060333, + "step": 7280 + }, + { + "epoch": 6.439929328621908, + "grad_norm": 0.6029765009880066, + "learning_rate": 7.897320546670362e-07, + "loss": 0.2133, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1827746331691742, + "step": 7285 + }, + { + "epoch": 6.44434628975265, + "grad_norm": 0.6512175798416138, + "learning_rate": 7.775209638208814e-07, + "loss": 0.2498, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3437669277191162, + "step": 7290 + }, + { + "epoch": 6.448763250883392, + "grad_norm": 0.8887504935264587, + "learning_rate": 7.654031442900978e-07, + "loss": 0.1744, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12437689304351807, + "step": 7295 + }, + { + "epoch": 6.453180212014134, + "grad_norm": 0.776211678981781, + "learning_rate": 7.533786548727695e-07, + "loss": 0.2215, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.26376765966415405, + "step": 7300 + }, + { + "epoch": 6.457597173144876, + "grad_norm": 0.6631959080696106, + "learning_rate": 7.414475539141275e-07, + "loss": 0.2707, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.23870348930358887, + "step": 7305 + }, + { + "epoch": 6.462014134275618, + "grad_norm": 0.6867117285728455, + "learning_rate": 7.296098993062562e-07, + "loss": 0.1844, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15025901794433594, + "step": 7310 + }, + { + "epoch": 6.46643109540636, + "grad_norm": 0.6691762208938599, + "learning_rate": 7.178657484878338e-07, + "loss": 0.257, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.25004538893699646, + "step": 7315 + }, + { + "epoch": 6.470848056537102, + "grad_norm": 0.5934985876083374, + "learning_rate": 7.062151584438215e-07, + "loss": 0.2236, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2773967385292053, + "step": 7320 + }, + { + "epoch": 6.475265017667844, + "grad_norm": 0.6796112656593323, + "learning_rate": 6.946581857052192e-07, + "loss": 0.2281, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2649118900299072, + "step": 7325 + }, + { + "epoch": 6.479681978798586, + "grad_norm": 0.6687401533126831, + "learning_rate": 6.831948863487703e-07, + "loss": 0.2464, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.24808388948440552, + "step": 7330 + }, + { + "epoch": 6.4840989399293285, + "grad_norm": 0.7123255133628845, + "learning_rate": 6.71825315996697e-07, + "loss": 0.225, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.28177204728126526, + "step": 7335 + }, + { + "epoch": 6.488515901060071, + "grad_norm": 0.6031168103218079, + "learning_rate": 6.605495298164299e-07, + "loss": 0.2491, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.21055057644844055, + "step": 7340 + }, + { + "epoch": 6.492932862190813, + "grad_norm": 0.5988543033599854, + "learning_rate": 6.493675825203416e-07, + "loss": 0.1915, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.18854458630084991, + "step": 7345 + }, + { + "epoch": 6.497349823321555, + "grad_norm": 0.6946279406547546, + "learning_rate": 6.382795283654796e-07, + "loss": 0.2077, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2246820479631424, + "step": 7350 + }, + { + "epoch": 6.501766784452297, + "grad_norm": 0.7605702877044678, + "learning_rate": 6.272854211532964e-07, + "loss": 0.2322, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16712304949760437, + "step": 7355 + }, + { + "epoch": 6.506183745583039, + "grad_norm": 0.7368548512458801, + "learning_rate": 6.163853142294041e-07, + "loss": 0.2214, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1958334743976593, + "step": 7360 + }, + { + "epoch": 6.510600706713781, + "grad_norm": 0.6829209923744202, + "learning_rate": 6.055792604833022e-07, + "loss": 0.2615, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2502579689025879, + "step": 7365 + }, + { + "epoch": 6.515017667844523, + "grad_norm": 0.6418865323066711, + "learning_rate": 5.948673123481286e-07, + "loss": 0.249, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.24650785326957703, + "step": 7370 + }, + { + "epoch": 6.519434628975265, + "grad_norm": 0.6992244124412537, + "learning_rate": 5.842495218003952e-07, + "loss": 0.2199, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.25637686252593994, + "step": 7375 + }, + { + "epoch": 6.523851590106007, + "grad_norm": 0.6708112955093384, + "learning_rate": 5.737259403597484e-07, + "loss": 0.2334, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.274458646774292, + "step": 7380 + }, + { + "epoch": 6.528268551236749, + "grad_norm": 0.6802352070808411, + "learning_rate": 5.632966190887157e-07, + "loss": 0.2038, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2165306955575943, + "step": 7385 + }, + { + "epoch": 6.532685512367491, + "grad_norm": 0.6572979688644409, + "learning_rate": 5.529616085924439e-07, + "loss": 0.233, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2785220146179199, + "step": 7390 + }, + { + "epoch": 6.5371024734982335, + "grad_norm": 0.702512264251709, + "learning_rate": 5.42720959018479e-07, + "loss": 0.268, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.32956579327583313, + "step": 7395 + }, + { + "epoch": 6.541519434628976, + "grad_norm": 0.6240583658218384, + "learning_rate": 5.325747200564979e-07, + "loss": 0.2227, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.28135302662849426, + "step": 7400 + }, + { + "epoch": 6.545936395759718, + "grad_norm": 0.7530537247657776, + "learning_rate": 5.225229409380839e-07, + "loss": 0.2127, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2570284605026245, + "step": 7405 + }, + { + "epoch": 6.55035335689046, + "grad_norm": 0.6967406868934631, + "learning_rate": 5.125656704364801e-07, + "loss": 0.2077, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1789890080690384, + "step": 7410 + }, + { + "epoch": 6.554770318021202, + "grad_norm": 0.6931060552597046, + "learning_rate": 5.027029568663566e-07, + "loss": 0.2762, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2260131686925888, + "step": 7415 + }, + { + "epoch": 6.559187279151944, + "grad_norm": 0.7548974752426147, + "learning_rate": 4.929348480835749e-07, + "loss": 0.212, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.18322241306304932, + "step": 7420 + }, + { + "epoch": 6.563604240282686, + "grad_norm": 0.6624125242233276, + "learning_rate": 4.832613914849504e-07, + "loss": 0.2257, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.20556357502937317, + "step": 7425 + }, + { + "epoch": 6.568021201413428, + "grad_norm": 0.7263450622558594, + "learning_rate": 4.7368263400803693e-07, + "loss": 0.2179, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.22019252181053162, + "step": 7430 + }, + { + "epoch": 6.572438162544169, + "grad_norm": 0.6411981582641602, + "learning_rate": 4.6419862213087365e-07, + "loss": 0.221, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2418275624513626, + "step": 7435 + }, + { + "epoch": 6.576855123674911, + "grad_norm": 0.652529776096344, + "learning_rate": 4.548094018717919e-07, + "loss": 0.2338, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.24279262125492096, + "step": 7440 + }, + { + "epoch": 6.581272084805653, + "grad_norm": 0.7010001540184021, + "learning_rate": 4.4551501878916214e-07, + "loss": 0.2489, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.38378775119781494, + "step": 7445 + }, + { + "epoch": 6.5856890459363955, + "grad_norm": 0.6368486881256104, + "learning_rate": 4.363155179811962e-07, + "loss": 0.1945, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.18857476115226746, + "step": 7450 + }, + { + "epoch": 6.590106007067138, + "grad_norm": 0.7825067043304443, + "learning_rate": 4.2721094408570974e-07, + "loss": 0.2113, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2131972312927246, + "step": 7455 + }, + { + "epoch": 6.59452296819788, + "grad_norm": 0.7557323575019836, + "learning_rate": 4.1820134127991794e-07, + "loss": 0.2024, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.18626517057418823, + "step": 7460 + }, + { + "epoch": 6.598939929328622, + "grad_norm": 0.6673694849014282, + "learning_rate": 4.0928675328022027e-07, + "loss": 0.2569, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.25416773557662964, + "step": 7465 + }, + { + "epoch": 6.603356890459364, + "grad_norm": 0.7011893391609192, + "learning_rate": 4.0046722334197375e-07, + "loss": 0.2523, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3195936977863312, + "step": 7470 + }, + { + "epoch": 6.607773851590106, + "grad_norm": 0.8319687247276306, + "learning_rate": 3.9174279425931105e-07, + "loss": 0.2493, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3487233519554138, + "step": 7475 + }, + { + "epoch": 6.612190812720848, + "grad_norm": 0.7716171145439148, + "learning_rate": 3.8311350836490514e-07, + "loss": 0.2072, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17440339922904968, + "step": 7480 + }, + { + "epoch": 6.61660777385159, + "grad_norm": 0.7053916454315186, + "learning_rate": 3.7457940752977594e-07, + "loss": 0.2245, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.21849893033504486, + "step": 7485 + }, + { + "epoch": 6.621024734982332, + "grad_norm": 0.7948006987571716, + "learning_rate": 3.6614053316309074e-07, + "loss": 0.2484, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.26136571168899536, + "step": 7490 + }, + { + "epoch": 6.625441696113074, + "grad_norm": 0.6417169570922852, + "learning_rate": 3.577969262119574e-07, + "loss": 0.2471, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.26517075300216675, + "step": 7495 + }, + { + "epoch": 6.629858657243816, + "grad_norm": 0.7573156952857971, + "learning_rate": 3.4954862716122473e-07, + "loss": 0.245, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16177725791931152, + "step": 7500 + }, + { + "epoch": 6.634275618374558, + "grad_norm": 0.6673710942268372, + "learning_rate": 3.413956760332937e-07, + "loss": 0.2486, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.28934532403945923, + "step": 7505 + }, + { + "epoch": 6.6386925795053005, + "grad_norm": 0.7154965400695801, + "learning_rate": 3.3333811238791316e-07, + "loss": 0.2397, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.19894251227378845, + "step": 7510 + }, + { + "epoch": 6.6431095406360425, + "grad_norm": 0.8006401658058167, + "learning_rate": 3.2537597532199315e-07, + "loss": 0.2319, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.23368564248085022, + "step": 7515 + }, + { + "epoch": 6.647526501766785, + "grad_norm": 0.6231130361557007, + "learning_rate": 3.175093034694188e-07, + "loss": 0.2462, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.19901973009109497, + "step": 7520 + }, + { + "epoch": 6.651943462897527, + "grad_norm": 0.6645359396934509, + "learning_rate": 3.0973813500085215e-07, + "loss": 0.2524, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.23790386319160461, + "step": 7525 + }, + { + "epoch": 6.656360424028269, + "grad_norm": 0.7502444386482239, + "learning_rate": 3.0206250762356393e-07, + "loss": 0.2229, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.22129979729652405, + "step": 7530 + }, + { + "epoch": 6.660777385159011, + "grad_norm": 0.6512811183929443, + "learning_rate": 2.944824585812289e-07, + "loss": 0.253, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.24692480266094208, + "step": 7535 + }, + { + "epoch": 6.665194346289753, + "grad_norm": 0.6338093876838684, + "learning_rate": 2.86998024653764e-07, + "loss": 0.2605, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.21317501366138458, + "step": 7540 + }, + { + "epoch": 6.669611307420495, + "grad_norm": 0.7195187211036682, + "learning_rate": 2.7960924215714394e-07, + "loss": 0.2675, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.21621374785900116, + "step": 7545 + }, + { + "epoch": 6.674028268551237, + "grad_norm": 0.750506579875946, + "learning_rate": 2.723161469432123e-07, + "loss": 0.1965, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15070626139640808, + "step": 7550 + }, + { + "epoch": 6.678445229681979, + "grad_norm": 0.6777002811431885, + "learning_rate": 2.6511877439953536e-07, + "loss": 0.2459, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2585234045982361, + "step": 7555 + }, + { + "epoch": 6.68286219081272, + "grad_norm": 0.6883443593978882, + "learning_rate": 2.5801715944919983e-07, + "loss": 0.2413, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.20979022979736328, + "step": 7560 + }, + { + "epoch": 6.6872791519434625, + "grad_norm": 1.0622401237487793, + "learning_rate": 2.510113365506639e-07, + "loss": 0.2139, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2046893984079361, + "step": 7565 + }, + { + "epoch": 6.6916961130742045, + "grad_norm": 0.6978748440742493, + "learning_rate": 2.441013396975822e-07, + "loss": 0.2604, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.24113841354846954, + "step": 7570 + }, + { + "epoch": 6.696113074204947, + "grad_norm": 0.7417298555374146, + "learning_rate": 2.3728720241864123e-07, + "loss": 0.2582, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2803192436695099, + "step": 7575 + }, + { + "epoch": 6.700530035335689, + "grad_norm": 0.6066814661026001, + "learning_rate": 2.3056895777740174e-07, + "loss": 0.2462, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2779116630554199, + "step": 7580 + }, + { + "epoch": 6.704946996466431, + "grad_norm": 0.7119974493980408, + "learning_rate": 2.2394663837213005e-07, + "loss": 0.226, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.18332818150520325, + "step": 7585 + }, + { + "epoch": 6.709363957597173, + "grad_norm": 0.6575401425361633, + "learning_rate": 2.1742027633564477e-07, + "loss": 0.2058, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.22709758579730988, + "step": 7590 + }, + { + "epoch": 6.713780918727915, + "grad_norm": 0.6911765933036804, + "learning_rate": 2.1098990333516144e-07, + "loss": 0.2245, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1879327893257141, + "step": 7595 + }, + { + "epoch": 6.718197879858657, + "grad_norm": 0.6780998110771179, + "learning_rate": 2.0465555057213705e-07, + "loss": 0.2762, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2876776456832886, + "step": 7600 + }, + { + "epoch": 6.722614840989399, + "grad_norm": 0.6587400436401367, + "learning_rate": 1.9841724878211676e-07, + "loss": 0.2409, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2115318775177002, + "step": 7605 + }, + { + "epoch": 6.727031802120141, + "grad_norm": 0.7173461318016052, + "learning_rate": 1.9227502823458976e-07, + "loss": 0.2223, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.23369640111923218, + "step": 7610 + }, + { + "epoch": 6.731448763250883, + "grad_norm": 1.5791888236999512, + "learning_rate": 1.8622891873284254e-07, + "loss": 0.2322, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1805000901222229, + "step": 7615 + }, + { + "epoch": 6.735865724381625, + "grad_norm": 0.720446765422821, + "learning_rate": 1.8027894961380353e-07, + "loss": 0.237, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.25251534581184387, + "step": 7620 + }, + { + "epoch": 6.740282685512367, + "grad_norm": 0.8006609082221985, + "learning_rate": 1.7442514974792103e-07, + "loss": 0.2704, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.23466309905052185, + "step": 7625 + }, + { + "epoch": 6.7446996466431095, + "grad_norm": 0.6387749910354614, + "learning_rate": 1.6866754753899429e-07, + "loss": 0.2293, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.29380500316619873, + "step": 7630 + }, + { + "epoch": 6.749116607773852, + "grad_norm": 0.6837402582168579, + "learning_rate": 1.6300617092406933e-07, + "loss": 0.2852, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.24347926676273346, + "step": 7635 + }, + { + "epoch": 6.753533568904594, + "grad_norm": 0.6359254717826843, + "learning_rate": 1.5744104737327458e-07, + "loss": 0.2428, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2921152710914612, + "step": 7640 + }, + { + "epoch": 6.757950530035336, + "grad_norm": 0.7265500426292419, + "learning_rate": 1.5197220388970313e-07, + "loss": 0.2053, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1532514989376068, + "step": 7645 + }, + { + "epoch": 6.762367491166078, + "grad_norm": 0.6616162061691284, + "learning_rate": 1.4659966700927952e-07, + "loss": 0.2316, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.23041373491287231, + "step": 7650 + }, + { + "epoch": 6.76678445229682, + "grad_norm": 0.6630392670631409, + "learning_rate": 1.413234628006288e-07, + "loss": 0.2372, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2294270396232605, + "step": 7655 + }, + { + "epoch": 6.771201413427562, + "grad_norm": 0.6096079349517822, + "learning_rate": 1.3614361686494549e-07, + "loss": 0.2375, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2563217282295227, + "step": 7660 + }, + { + "epoch": 6.775618374558304, + "grad_norm": 0.6756962537765503, + "learning_rate": 1.310601543358847e-07, + "loss": 0.2402, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.18047744035720825, + "step": 7665 + }, + { + "epoch": 6.780035335689046, + "grad_norm": 0.855846107006073, + "learning_rate": 1.260730998794202e-07, + "loss": 0.2261, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.21389582753181458, + "step": 7670 + }, + { + "epoch": 6.784452296819788, + "grad_norm": 0.64863520860672, + "learning_rate": 1.2118247769373758e-07, + "loss": 0.2558, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3245598375797272, + "step": 7675 + }, + { + "epoch": 6.78886925795053, + "grad_norm": 0.6336214542388916, + "learning_rate": 1.163883115091169e-07, + "loss": 0.2173, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2048630714416504, + "step": 7680 + }, + { + "epoch": 6.793286219081272, + "grad_norm": 0.7539443373680115, + "learning_rate": 1.1169062458781022e-07, + "loss": 0.2131, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.18259122967720032, + "step": 7685 + }, + { + "epoch": 6.7977031802120145, + "grad_norm": 0.7678613662719727, + "learning_rate": 1.0708943972393748e-07, + "loss": 0.2261, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.23812338709831238, + "step": 7690 + }, + { + "epoch": 6.8021201413427566, + "grad_norm": 0.6185624003410339, + "learning_rate": 1.025847792433643e-07, + "loss": 0.2611, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2555660903453827, + "step": 7695 + }, + { + "epoch": 6.806537102473499, + "grad_norm": 0.6969472169876099, + "learning_rate": 9.817666500360867e-08, + "loss": 0.2362, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.20245115458965302, + "step": 7700 + }, + { + "epoch": 6.810954063604241, + "grad_norm": 0.678084135055542, + "learning_rate": 9.386511839372114e-08, + "loss": 0.2119, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.21555975079536438, + "step": 7705 + }, + { + "epoch": 6.815371024734983, + "grad_norm": 0.6462898850440979, + "learning_rate": 8.965016033418705e-08, + "loss": 0.2257, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.20813468098640442, + "step": 7710 + }, + { + "epoch": 6.819787985865725, + "grad_norm": 0.6597393155097961, + "learning_rate": 8.553181127683108e-08, + "loss": 0.2653, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.23082324862480164, + "step": 7715 + }, + { + "epoch": 6.824204946996466, + "grad_norm": 0.6514454483985901, + "learning_rate": 8.1510091204704e-08, + "loss": 0.2372, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.23393850028514862, + "step": 7720 + }, + { + "epoch": 6.828621908127208, + "grad_norm": 0.7454639077186584, + "learning_rate": 7.758501963199605e-08, + "loss": 0.2673, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.26989006996154785, + "step": 7725 + }, + { + "epoch": 6.83303886925795, + "grad_norm": 0.6669009327888489, + "learning_rate": 7.375661560394154e-08, + "loss": 0.2844, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2976769208908081, + "step": 7730 + }, + { + "epoch": 6.837455830388692, + "grad_norm": 0.7060198783874512, + "learning_rate": 7.002489769672105e-08, + "loss": 0.2247, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.265421986579895, + "step": 7735 + }, + { + "epoch": 6.841872791519434, + "grad_norm": 0.7038555145263672, + "learning_rate": 6.638988401737933e-08, + "loss": 0.2383, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2049991488456726, + "step": 7740 + }, + { + "epoch": 6.8462897526501765, + "grad_norm": 0.7277244925498962, + "learning_rate": 6.285159220372982e-08, + "loss": 0.2929, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.20731091499328613, + "step": 7745 + }, + { + "epoch": 6.8507067137809186, + "grad_norm": 0.6668598651885986, + "learning_rate": 5.941003942427026e-08, + "loss": 0.2092, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.23843859136104584, + "step": 7750 + }, + { + "epoch": 6.855123674911661, + "grad_norm": 0.6584317684173584, + "learning_rate": 5.6065242378104957e-08, + "loss": 0.2357, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.24361775815486908, + "step": 7755 + }, + { + "epoch": 6.859540636042403, + "grad_norm": 0.6468645334243774, + "learning_rate": 5.281721729486044e-08, + "loss": 0.2196, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2681673467159271, + "step": 7760 + }, + { + "epoch": 6.863957597173145, + "grad_norm": 0.5975419282913208, + "learning_rate": 4.966597993460109e-08, + "loss": 0.2458, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2951905131340027, + "step": 7765 + }, + { + "epoch": 6.868374558303887, + "grad_norm": 0.7025241255760193, + "learning_rate": 4.6611545587762486e-08, + "loss": 0.1947, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.22322210669517517, + "step": 7770 + }, + { + "epoch": 6.872791519434629, + "grad_norm": 0.7678356170654297, + "learning_rate": 4.365392907507149e-08, + "loss": 0.1886, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15035316348075867, + "step": 7775 + }, + { + "epoch": 6.877208480565371, + "grad_norm": 0.6374990344047546, + "learning_rate": 4.079314474747742e-08, + "loss": 0.2243, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.24768120050430298, + "step": 7780 + }, + { + "epoch": 6.881625441696113, + "grad_norm": 0.6044369339942932, + "learning_rate": 3.802920648607433e-08, + "loss": 0.2157, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.22181479632854462, + "step": 7785 + }, + { + "epoch": 6.886042402826855, + "grad_norm": 0.6744352579116821, + "learning_rate": 3.536212770204772e-08, + "loss": 0.2375, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.29637134075164795, + "step": 7790 + }, + { + "epoch": 6.890459363957597, + "grad_norm": 0.6962430477142334, + "learning_rate": 3.279192133659459e-08, + "loss": 0.2164, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2750842869281769, + "step": 7795 + }, + { + "epoch": 6.894876325088339, + "grad_norm": 0.6851283311843872, + "learning_rate": 3.0318599860872377e-08, + "loss": 0.2213, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.29073143005371094, + "step": 7800 + }, + { + "epoch": 6.8992932862190814, + "grad_norm": 0.7150095701217651, + "learning_rate": 2.7942175275932347e-08, + "loss": 0.2358, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.22673749923706055, + "step": 7805 + }, + { + "epoch": 6.9037102473498235, + "grad_norm": 0.7254000306129456, + "learning_rate": 2.5662659112659637e-08, + "loss": 0.2383, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2616110146045685, + "step": 7810 + }, + { + "epoch": 6.908127208480566, + "grad_norm": 0.613750696182251, + "learning_rate": 2.3480062431724404e-08, + "loss": 0.2359, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17015941441059113, + "step": 7815 + }, + { + "epoch": 6.912544169611308, + "grad_norm": 0.6614463925361633, + "learning_rate": 2.1394395823524093e-08, + "loss": 0.198, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.182722270488739, + "step": 7820 + }, + { + "epoch": 6.91696113074205, + "grad_norm": 0.7142705917358398, + "learning_rate": 1.9405669408127935e-08, + "loss": 0.2227, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.23320986330509186, + "step": 7825 + }, + { + "epoch": 6.921378091872792, + "grad_norm": 0.701187252998352, + "learning_rate": 1.7513892835236967e-08, + "loss": 0.2251, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.23433920741081238, + "step": 7830 + }, + { + "epoch": 6.925795053003534, + "grad_norm": 0.6869240999221802, + "learning_rate": 1.5719075284126307e-08, + "loss": 0.2325, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.24257785081863403, + "step": 7835 + }, + { + "epoch": 6.930212014134275, + "grad_norm": 0.656295657157898, + "learning_rate": 1.4021225463614063e-08, + "loss": 0.2759, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3004852533340454, + "step": 7840 + }, + { + "epoch": 6.934628975265017, + "grad_norm": 0.6697101593017578, + "learning_rate": 1.2420351612003611e-08, + "loss": 0.2478, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.24285230040550232, + "step": 7845 + }, + { + "epoch": 6.939045936395759, + "grad_norm": 0.6110602617263794, + "learning_rate": 1.0916461497059161e-08, + "loss": 0.1851, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1842736005783081, + "step": 7850 + }, + { + "epoch": 6.943462897526501, + "grad_norm": 0.7281805276870728, + "learning_rate": 9.509562415952468e-09, + "loss": 0.2303, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.19850802421569824, + "step": 7855 + }, + { + "epoch": 6.9478798586572434, + "grad_norm": 0.6979923248291016, + "learning_rate": 8.199661195240626e-09, + "loss": 0.2786, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3593960404396057, + "step": 7860 + }, + { + "epoch": 6.9522968197879855, + "grad_norm": 0.7403237223625183, + "learning_rate": 6.9867641908305524e-09, + "loss": 0.2273, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.32437562942504883, + "step": 7865 + }, + { + "epoch": 6.956713780918728, + "grad_norm": 0.6508459448814392, + "learning_rate": 5.870877287934562e-09, + "loss": 0.2254, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.27816981077194214, + "step": 7870 + }, + { + "epoch": 6.96113074204947, + "grad_norm": 0.8912989497184753, + "learning_rate": 4.852005901063717e-09, + "loss": 0.207, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17091882228851318, + "step": 7875 + }, + { + "epoch": 6.965547703180212, + "grad_norm": 0.6817123889923096, + "learning_rate": 3.930154973985634e-09, + "loss": 0.2314, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.20704488456249237, + "step": 7880 + }, + { + "epoch": 6.969964664310954, + "grad_norm": 0.6807764172554016, + "learning_rate": 3.1053289797022825e-09, + "loss": 0.2361, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.19280683994293213, + "step": 7885 + }, + { + "epoch": 6.974381625441696, + "grad_norm": 0.7215080261230469, + "learning_rate": 2.37753192043888e-09, + "loss": 0.2094, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.20111405849456787, + "step": 7890 + }, + { + "epoch": 6.978798586572438, + "grad_norm": 0.7553413510322571, + "learning_rate": 1.746767327610588e-09, + "loss": 0.2498, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.21126879751682281, + "step": 7895 + }, + { + "epoch": 6.98321554770318, + "grad_norm": 0.6956353187561035, + "learning_rate": 1.2130382618114057e-09, + "loss": 0.2267, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.26631930470466614, + "step": 7900 + }, + { + "epoch": 6.987632508833922, + "grad_norm": 0.9583781361579895, + "learning_rate": 7.763473128052923e-10, + "loss": 0.2211, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.20020142197608948, + "step": 7905 + }, + { + "epoch": 6.992049469964664, + "grad_norm": 0.7983410358428955, + "learning_rate": 4.366965994995198e-10, + "loss": 0.2046, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17487777769565582, + "step": 7910 + }, + { + "epoch": 6.996466431095406, + "grad_norm": 0.7559086084365845, + "learning_rate": 1.9408776995355483e-10, + "loss": 0.1849, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1718912273645401, + "step": 7915 + }, + { + "epoch": 6.999116607773852, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2685817778110504, + "step": 7918, + "total_flos": 7677204649476096.0, + "train_loss": 0.2887290850240313, + "train_runtime": 73070.9484, + "train_samples_per_second": 0.108, + "train_steps_per_second": 0.108 + } + ], + "logging_steps": 5, + "max_steps": 7924, + "num_input_tokens_seen": 0, + "num_train_epochs": 7, + "save_steps": 1500, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": true, + "should_training_stop": false + }, + "attributes": {} + } + }, + "total_flos": 7677204649476096.0, + "train_batch_size": 1, + "trial_name": null, + "trial_params": null +} diff --git a/training_args.bin b/training_args.bin new file mode 100644 index 0000000..6a8527f --- /dev/null +++ b/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ae1d7ef5e0a267ac210bbad7b8c2859daab974d7f6ef7fb7f19b17206a9f8c4d +size 8785 diff --git a/training_loss.png b/training_loss.png new file mode 100644 index 0000000000000000000000000000000000000000..c34edd6de88709854d9e07b1b7ddffe36d486e4d GIT binary patch literal 47452 zcmeEui943v+wP6bvt%BksAQHQnJEgTNM=tInTO2dlX<3BB9bPl5Hgl|Rzf9HW{S!@ zlWAW|{eIuKkNti7+ke1*kE7!)p69;Tz1F&}>paivTz90-d39GsLyJsUBWOz zJcbdBkdeSoBtH#&ga668oi=dOb+U5vyy|)#)4uBFZ13b|Z)0)LwZ`u4a20ZA4a1ulMtc?gA$YEsWrJa5Eoaq~^}Oy+ z4|;kfUfv{~pG$mt_N&|6pbF)k*A&-VNQjja9*wCGdz-tA)kP99Zq!a;E)S1BUYDgO zU8i_05b=Jfobkty?^B0~jqTeLHV<;1Ih^ZU-7&c$(voO>|FDDWq{8GtxMhqOoeCLG zNP@8Hmm%tdA?P1HOvL!TznD_}|G)73KTSf6CNZ8}${uTWk?~o6uG>-Wzv*$9k55@R zIe1(~Pft%~Wy`^4pv>h*f2mV+eAeWbOLiH$pJfXQ3d%Mn((*RkPsY^;5mUa}a5wjz zk#y$pY|HC6a;n~zdh0nyQ(zGD^y$-8M;adCV}}kAaWr4852M~H*_^B9e(>PIOwUnL z!+m&ezj*>bKfi~cZoE26^!2w-W@cvDc-*Ox+1WTryY7+fys99TN6zc(Hz(UiSNomR z*JcMtM@Mhf)F{QUN|5Ys4(|%CnwC0pHpOv9CnV6dToZSj>ZV?rXj46NCamjbo_RG_ zrSDppiLiP+m;8J!DR_MXJ^bTa!i9 z&CIyWU=r@jdB1L)IVvDf7cEj15wQKcwzG4;>l==Rg@qJpC$62HO{dzbI6rU6fOS`n z@awI24F@M5_|6hEy9oVOdiDIdlC!g@Ok91u;?9qcV&hG*A*rM!CeYKpA;%g*YN#8=7|Rad9FbITT**p0WNe?2d$8oGT*UB}ckR(i;b zXg|N!jn##F_wGG^^{Q#$%}shPuDGvXjkcCMFQh2=%PMSq+b_3161-J)@^z7e1EaGpF%z>&bZ3&q*YsL=sgMRlGYXDCnY6JdDT0zSbcb^jbq4pcM?+;$%b+})mES#kJu@&p^T$IREz1IP7c{6268ZV}wp2w9ox zQ58rc#NxQ*qk7B(Ibk;ny(i;R@MD2No_2SaoP9ebEVngZpYZOQ#)S*b6K!b)SL;c( zHYPJzB&@UwJA{&37Y03tN0Y9Y(c51UNa}rko${NjcVP!P7R@Yj#=*g%FwT49d-6gJ z5v@Vd_-Ht{kR)exb=8RzhnKAc!j9<@6Y)yzB^I|cgy$RTxF`{P85#0rz4Q0 zC7fIc&l6-yInlE@7A?Ypd3TPDbs>YPH0govq+(XvBb}qYu!?!_ zitp!^m8B5iH17%)yy@^?8*GL!=+9YdYG^R1INw8vt@c{Rn=Vb2H1#`Gw_*9q%gam| zhxqt1`zFy|gc}4osK;?!#f+9WboBL`>gwtYZrnIp?mQaQxb0KaR?8sAX& zM5L6=40f7l43|6`MJS<}nHj{BKzsVB=3zz00htf!rz~#|3ySZ)EGNec6}?>{&e%#z@**_y|H`N50v%v7_W)5`mFp4 z^&Ik6pZBbF;ka{-uWD;ivx>51-Q>EJ)z~L>?$dc@l_*QX0}TXh&ghqyT*y#}g)!9) z4szbUecPbKp0#RyjMACIx*$+DkNcyH0-v-rvxIefNLJR7prD}n>B0_`tWE+^w_IXk zVgd{WUez|FI<`QrO0)Qx{!+_dUtR<)1#YkOu*kX{m6MYjnVbw4vuwgmwx=`2Pt8^D zHo=VWB)5L5^!40YtKKc{&3gQp@PhmbVSmS z?_?Adb+CcbrrC^PjB32}ih4N6ydyF+?9z@6nj)1UA3y3GWu(x`P~hzA>tkwm`dUQ( z?%g|QPfv9m?%-_Yn%TFB35wMMDUy#eTrQC7p*)Gel>J`*84SawjPsp+Gl^k?Z{5eu ziN@`!?0SS_*d!^cs;cIfm(^h2=09tS7#6ueZqv}x`nKyxK|vwvG$f~X=8Ot|B9EXT zm4wxYk&adu4m5#Blg{m=LXoUV;8DdexZ8ev&xWtXcDFM!;vt3)@$l4+1;mXzuMK)G z_1t(hS|9$zxYThSiWPA`3K6Xba;!T$J8cS+kUArc9p4Z?QIB`Ln5KoI3!XDY%<@d) zCR-qB_3n;u#p3(purKi8;iNBLzD&4(-~Lk)CmkLB(-Zaax=>1{N!R6VFPJPAIWN&~ zt|!OF#^&E0$uQU&isL-RL_|WB|8>Hw(&y^ur)M#?^6$gmlbkt*g<;&kZjQ%F^PW1z z9wSx(X*VJ!CZxRll+}lnTd!Zg=Fec(g@E+>^&*Root^TYR@lf0PV2r5eRDkb{+O3(L?ae}{KC#u?kWfW$ zYcyc_h1MO5{hP}_>!IW{vRG7`?*+=Xk*D;RnS+vVgtck=7%R8XOia`<|J1V1zI zCb2rO@AY*m?6{yHBS+Sy7bzNtEui9lD~;tmbvHCTyzu$+qpRN%FT9Rd_#I&iVM>Md zmpG(#O{G1a{Ib#U!Yt1k9$J1}CQg37hG-g!_P74!FS-{NHvLz+u4wTm7Qplt!r0Hv zn>c(eilk-{#S58Lr1-=7PG=^YSp)|_@1E-3X70cbj^#5_BzXVjPwHECOAF2164>-z zW5?0{z+YdE?Z}bRaus=rc+C{|uwYUa4P>hjGKFhHzVU4jnOJr#KgpMuH=~s z9zGmWx%^oZJ;SwN5^4*MX7ct-Jpserooz#?1HL~qlY1c~`|h8-$f zs7O?Usz-^>itTFqskwYs39Ecsg?0QesTFHYvU$Kg7mj9y?PcA7{s;2D>($9gb1$|@V);YVaem3!IWWEsNeE{oP1_lQW?d=7Ri-=6F8Na?p9-Lno zN`Qm-!vQFp2F!)azLIA(Sng>Jc|Paq(-5(y-WR%Ay?{EP40{gyCkIm7+S>N^6{Bj1GANp7*CG+bJh0T~sU?cWli)^ML zSxu~YZ!I+U-8rV)yZF7e??{GU-Ol!0phSVehh~8@aTVO+R$jy05>{0EBh{v%kX}a= zqR@isflT)9bkV5Qk_J(r@v^Z96Sdri>fB(u3x+%M8=sG{5%+D0v*Lgv) z&8Zy5nF5(qQ(OCcDT*>QR~t){g~fI~ubIufZXIK!kOBm;fj9%k1oXd3hpfih)%leo)IlG&if~4}MLHfD9n-vy4mR*JjwK z1OPn;3X}aGy z_2a22C@ee&-I)V@r+W*66os3pl(lc%cz#f(G;|M*rb#!Dv5eV<*`&gJ$0Cmie!byi+KJ7>H?BgScj(Pm&aYmm zm*(XSx_tTaLVw`;C@Bd(Fhe)HOHf88Wo4#68H(b<<~C#{D$KaTORTz09|JHzTzx$2 ze7Xg;PdPhRcm%~V0j{yJktx3K3zR*8zSbRFO)Qq*zkm0uxfWlHe<5jFgEHx{WA7|{ zFJC@zTv&J_P-{O!8#M+fu@%d&-uWF3Yn&=+mjcL4S0Jg40<*KTGlcLoF1kT)8B;8a zZH)xea_qR;_*BjgmK*zLh?c@EG`ZYAjT`;3xlm-&nF5HRwXN-YDG+EeSk?8)+~?16 zu5V1{ug8fsJ@6VMf2MPEy{COWbssTtK~xk)K=tWprUi39U|7}L5d2n`Ufk*G={dEy zEZW4fw6I{~DidBj=?Fm%Rq*b;d+(eezCW&4?ZVDm=q~~iBkxnxh0$TY5N#U{z!tRA zWs}ZkUfP)}Ihny1%_L+2(+qGd9@Q2Cln^K8b|F46q<>3l|1_=PI`xUg$+VQVk#b)s^kRopsFTeEQmb zM;Mbu0SjcW5ZtCjha6E8OTg~-8h^*=>}>n4`IAo%i;5UQ8UVI?z|GQrxQeqroR*pD z?b_N}v;5k?AsKT`vuemf2F||AKR&hnPX7uOeq_!QVCcuG6$99nYP?r(5iALRY-mtZ z?lA&B1;l}`YzIggnmb_e?klaew6yV7>641P8?-0bV!LLS?;=D^ z&&U`8xln4V?9I(dilHMp$6#F3eTAQLjiu#wx0bd%)Q98UWjQ_OhGIc^py2<~*xYPP z)wZ~>Pz!p|Jk;NX!9>~gJ*M&Do`jHqG9piYau^2M6cL8jF+t;S&Lkyvufas4`74>q!P5VpfQuz{nAQYwRtsByNV-AXiXONRe@QJhJ^+#=d5q73Vj% zK2ti(W=-HU`1Syl2f+IIBjgy>Eps5XkV9{QXaqAyHdnDk6PQt2Dy=B>5E8pVnKO@m zu2HB{)dmRwf~Mu4oh|?>C<0h>Ogo+f>kjG%*^P{na=iKZ)67f)>{a!SPxghkrKP1^ zGiRE{9V$17l&)Xr+uGV%?{8A@s+k{k2G(E!WfkTip)vde6n6qldeCh@A`Vsln{}O? ziM!W4-kSivfm>C(#X4e)L=;ZaMZ=R{UYK+>S1Qc`8OcZkj8O%be7O z6iP!1#f?}dU`yt%u<3iJa@X$^z#bX)?@yCckd^&vk+!h{MZ@{`vQ4xH8}O2a9%@=z z&l~r90(UpfzkK;JG9Itk1Y5x2rs7gHbKE!`H+RQwC5!nuP$ol;w1foQ#m^0F>JNyF zXDgP@`vDO18o5O}4+Ui`jNP#&w{$4Vu+WM;$jLH^c%1Z=7Jq%3Q&pt(18)*oC8Ps6 zFSSV{X(7mEpnc-|_t4Z-w&)qCO5b54MWeR;dAs*20q-JNOf2>!b-70xSTsLbV12G!R} zUugwg4(S)P{;1dX99DAlQ8-r}@HhJ%LQKiTgoA{JBLbo) zEbs{>C1vpWEY*t_Z;v!y%I*Rw=YZdSBHY3=i)(BrfVGm(B4KY|qC~1n$BSDM8M?k) zfm<$q3ysW(MT`2agJQjoAD0<$?=kg|kS#f}wo(k<_sCl|M-R(ybE zUrgu|z3wOB5C?kg7|0^)^YyeY&77V^H(c?BGK(C&V<7T?zKCjA?_2ld0Dt0Q*`>#R zPvtcO3*}*ozG!S54I1|t>~1EGO(1wzA!I??mhhPFy#;UzW+bNuw9{L+h)89>7u?g- z(5L}20+RbadHU_mHP2t}vsIgpf<{GTOOqYyN=o>)*T*j?Y#c_C)6Od7sRGYAGeNTo z7J7Pm7HLP0T^@%mUIP=8n3xzUsEC$;o0+ON5oJ(gKQ#W7aVbU~m!f9}*(9N!tLPJ$n3jJ*7wm9vBly4j>MV z?QG7Kdc`};Y>pPcwrr-s!k(TnGD;oM3^L182Z6l`beZgCHX;*C?N%UPY=kv{h(PHd zwoTdsQfxU~<^Q8Qm)+dYwFXZx55;43rr%|2-K9!D<4cEpD539A4}0Lqa^%qGDA3Owy;4>*mAER58tjDr>l^LX?|yb> z6}bJp^dU`}79eQ@5v?2^c=0^zm$EV$zzgh$)Z^@3e_)si5U;Q#r5HZ89Didrd7l&F z;^O%(<7cWg4GuFPiX|d}R&LIBj5HV6@W&x-O`~IdReU?_s6)DNghq-$T9U?}-xw$l?xIe}vlw zEY1vije_$5iqI}`0*iv)G4?r9Za>^b4;<)}P}cCcl~7Ovqn*5S{kL<(;?l*PstfY- zH*U(_oX~=@B53;d%NV=I&8k86K}e0L5boyo=$$@IysL1t-=XxDnAJ-3^WrI}sobD| z)dQOr1raTJ;Sv5(cG=svsny$%8+jEKGXfnzbcz9%onulA2{ zNSw*)>@#(Ezbp|X76yGzI!8wc;4t=eq;EX_bi)}K${{&94kSrQ+4tf>n3UUI?%Wdd z9B?6ZljYpqv^|&IwHR0m9CvGpSFBruUDEarF6O~)u`*CX8bNm1n6Ye%sf7*!%{?vt z=Yz|T1aP%M#AU#yKY^%rqu9s~$}(MaW7$keKUOolf~op!B9O)DIdBV7iyvr)ERwd2 z&n{%p)Xe8jAmj%9t#IXFlF?V?vns%IfZ*o?$AJ9%d5m0=k0Ej%WZuzdX^tw8J@@2% zH8sGL%#ep}0Z6VE>_h#ZmoEiS;}|O0(W6IeAVmZNs@Y4S(zgJvshvJO3PPb8T;ie5 zQ552+sG|x1oviof!_ZrCs#qZ5@|_k##3AK`4Ff1gBB*GsSMP3COZ~hH;;VPZPoU(Y zkajD!7F%0dS|CFa<=>n>l!6H-zl zh)8L0(Cj87Bcn5!g4Sg-@UvLpK4YM0gsVKNLG8#NKW^MfN?I_y*IgCR%@Vu-xZpA1 zF9ar`f?b3P4UZdr@Q8u}7m#i7A+K?2R7fGiABT2H9LLF^`T2RPTr~o$Lw=1TV0-1t zfZtLZlJjrhzfT417_!m`EFCmPNC=6DP#qb7aQ1@zar^Gwuy=_^RP%bq0lw7%HOO5( zf$|kH%iB zfV}!7Kc5cmI}1xoVy6$eSE;nj0yeHA9OZoTWxE<+6%{oPY>G!* z{GW{Z0)UW^R4+JJ0q9<+E>F9gU&zjZ95NgFsgYJ#=XM0r9hQ z&0)iNDWLGwN|T}n9k<5Rb1;-gHLRwsjT(ATy+tw;L-F$jXJNT=g*T<0N0gRl`kB+D98{pmBX;BC!7OmXV)QWh!ZxnsZ%pD)Z*O+s4U3dLQX@R=foFkG9=qM zul%~q#K9q$^3`4#B(>U{rmw?+yC8rA#G+iS^jQ(u?5W;qgz+QE8AL(*t@QT<%OQW zUPA@i01{44PN=z!zFEDy?UZ(4T@k8jclH$#{-kq;fTd6~fLM2xwXo8*5p+yqJ{kK* zsPA&jG7R7|P(0ARts%4K0QH^*LSItW-5AZhC+&k`OjMH`0!|Za*!q^YZruVdm#Ce3 zBGRo%yL26tjlLTxZl7J_}9Yfun`T2#;pI(n3Dda7m0I{R=~u`A!^+MG2M84$u3 z`K~_jUp2g&l5%6|&BoSJM;}P4ka;bEHQR%3Yv1$AbZfgrpOE#GM^58QUQjmeKRrGz zsG#rw6gwssmKGpXwL{P@R5m=6tKW)v=-q^b8faWmg8qv*KbSUHs%dj__QC#OLVf;p z&!Klv49tVZK`OUIdS7jA?T5j^bkG4W>&VvK01&xlxw5uqjm!+K@7~!gdKY&yjo-A~ z`CSc)tsp3`d9|qz9<;-%^g>cL@bZ#LJeMg5Hj_cf^?vU-EYC7_CP}hLeChOq{+t2q zb+9aex*blwsRdiq)tYPN4*rdV5}dw+`sn!A15P7zaW zgIOe9oJ-5_UuE^3xMvO9wDjv4OV$FYzs3XBQ<4QMWvS&xN#`;xPy$7^j-|y#3yRPn z=!+@o6oPaQaJz$FE2TzGUfvLz*PziWnA^U)efd<*_Tb&ScV9!_CEnZDSAv_H+XE>1 zLgrjf`V#aGr<>TFu0it%wZ4Q5UpG8f)7Q=51IBF05UHkfDNL;1cL<0z%vssP*&kUM z0q_4wOuPph3k?83pe3>=wYIie#<2cU7=q~(l$A|&38&$-fP7aEw1FeFvd1W#mbzU2X&3?hp=S@@r&m+MABn?hbqt@5Q;~!mm;XKwrUd8_mEE-VCqM(@%^1+>rjRlAk_wq%TTeMAe-vR%NmI&wVf3gY(;H}f|I6sRsVhqRz z=_dC0nJ3Sl`7xb5mcX7E3>+{UpyPhBKLsCn8jOp1^!6>-&zUB5Z36ow6>~( zt;Sb67?~%964KLS0jaDyo+*Eb7NIEEdDTgu*jCFhrI=i#@1ZzNa$J*0pC7N_EYRpr*Q4e8RsViGS0C`nAprpj2&9=ojlW(u@^3FqU)NYc-fLW97|$46?) zATV%O%JzWGg)hX@K~*R3M4nPSiFe<8RpO}&Gh7C}r9CGdq)~9mAoL3ejS%ZC_Y?s` zAYkb@*afhSwxA!%WSVD`%Y>Sue*XUSU=V?3BLR3M8$s0G@c&Sbx3sh@fHv{gl=*H@ zax(MdCr{9BrTDs9M=63dIskr3P-z#yp{4TVQ_=TM<li=6(yr@J(*mI|vMFoB4uRfp`^?vDLZ#84-`Vq^rBJSy@q$5A^%KVjvfSf1p}~ zL*SJ5#osRC{6*-Y>|bA5KwlHO^>Wpb0`~hcqjWAsjy9WQyJ0aI28IGlZ&rY-pop*_ zr;fJt;6BU%T%pK2h;j!&GYk&|+ycZRsg5P^bKs!-m3_Z!jA6i1d7!=1{cR-+U@ICF zo~M5eIuRD2xc%tLX2wRqC^)(n8ynkL*r5;&3&DlN2-W!bB6$kTRL3HPR*ggF(vvd* z*E#F_2Kvj-LR6TvfV=?`o2ctIjexYYH1;qW&M0ti%o8AW(&53A*RL5uLqn0Vs#;r+ z>CnlOtUi_7F;8uzZjZ}^goQnR{ybv11h{?$RQ@thx*-oe?6Mu5nHijJKKqr17%$wS zD|{#7t;U-bcC8<9cS>N{+X}s(C)h*a2{hS3_2M%9^3?UVgziawIH~} z!NS!xHBcy7HnI=RK$vw5elumO}r}Bj)*q!BNDsH`Sv!St38I)Xb zx{a3FoM7WP>ADYaHBEXwTmm((8|X1$ouJBl6@ImF1T8QkGO`x7Neq|~0FQCCeE4AB zbux14BFRT$7Y^`xbieF>BZ;Oc7HG_&c)1`zbu@tfB0%r~%od8Ashh*QO`uV7$aVnp zivtDoAaEFRa&mB~(gNkBi5oW>C;{{2K@fw`bA(Z`2IU%;jsg6%S=i9%e~5h~j+Ch} zwe_+QusucubJOq{ZP6bqK8V2}A1o{bxaDYojGy$2L*WCxg%k;Eng$CoF28wYBuiQp zlr_$zfBXGee!0@?^lZV@z#hMI!r5`^5yH-$qJgva-wjMN8VlnxkSWOOY}bY~bJ5>l9*mTzU)Az`5lU$}2t+>y%cYUXiL6;O zgZK3d*1nc>O?s-UuqrVhek_cGIP6h^oQ1?s7_v&{rBN`#s6oPmPUhf{_92HD{##`S zgcML~ucZCB0N?i2#Oy}Oa|w0FHAtO1AjK`ALF za7QCh0us~AY_-pd?eecKO32g^QBie}a$M-tuc_IABu<5gO|@e{J|1n;(5h6VB>`oQ*u)91H4eRY1wHuVb}{lpa(jTF$Im{){8&v}22XEM%yLQ0uYZ}CvG$hH9!{piW# z28$D_E9Cq6m0svAn!Z97cMT0PC^tyD1OZ15P&QH;pec!LlC>Mq_zH&f@<4GI3`h|m zrtAa&41ui63*;57gJAQ4#{a3nKya927$l^9%`VVRxIeOmI+@@-ri1%6bNN!SX^&sZ zf=~lz50C_25Mo5_dqn|;UUA-nwV~tWguWi_fNQG|G-ljCOY}*)M`8JVtFwdA0L;gs zPhAHkY5~ZjC0IPYpowYERe0kQFPMQB6@kq=bSuOWU2DP;&=Y@41!$z7>yXUdlu|$Q zp`)W2I)t%-+f(Lc&ZC6rnutd#f(o*A3m_Ge9;B=>MBP5r>jY3;haB-ph69@% zhCwAlDHeVO=IUC2&H27-j*vd^)e{ICBi$RwQ!S{#$RvquuFx36YSy05fc6U}p>>dZ zHZ0es`+|>Od4+>2w7%FXg2ZY2-uzqDfuN%y3A76AqW2a2*`OVAivPlc4;hApWK!8e(ve9G0vIKcb7DeEKJo%e+P*Onu=#YN&-&3Z*35q z*9h9YGOK(ac$W`cp)xWo(DEa~U?0Qo9D!O}k5m}ITn>?=&667s|JS`*!w*IDI565eK0rHb=cS# zyyzB1^-xdXG>CXeQULc0*Rf;Apif!WWr@_{Ah~goXLbz@90P2&1l^ss=zx+I6^ke) zJ>X0PStuO*_~>Ybs<-#az(CaP$#8*MisCNN7|_7MgZR8VE~9nC@Ettu4u<>IavS^} z8rs^YLFk7^M>$>j6gqwanBqj=5Ev-tGYT*|CZ;CyO~8Oqk$?0R9s$<5kpa5T{wuFc zM*F29ifT_f_*d<3mRY_0ga|nXCfd`Zp-@}?`1r`6(nk`E=5Sns6hnm&LN6aO1y+Mp zH`^`&Dh?Ssl&2xa5Nt%ofFSeFt935;v|dr_l)J%)05sGOYRABe7xx!)@-amhfS(2( zph036Nd9=#7lLph#2_D$uTILe|B|ZQ`TFHvvF8lri-<-Gj#C~V#+G_IJC{<6@{a#H zK`;W55;-`*H4mtK4;#Dj^178+V$ zq9Fza2vqcO06s|e+89SbmWorB5&uYi(TRLJmcA{-tce<87F7WM_YChQVY$S)1>6i6w)bBA#m`X>n9 zpf)<{Mx!JJx=cQxq*|(LpZ(G;sBqSY$JiwB?zTo0aZoi6!5r<0vCp)WAx|y5Kc0sJ zgpb-|Ms5m_q=B>(UB7;v1Q?>XG z6sH+PR?w%NfzH>DP6dJtk2d84?+I3f50KyhV??z8+dG2eI4D~IA|wlFFbF5X=Eyu9 zpTy!8Wmq$lDP&E*?NUHDwR^5v@yfY{mNJR{CQY7n3qhcWrHj~)$1<2BuK>{=Bc6PMeLNj06DPiR>dJ?*)?xwR~+ z4h}-_Ge9bK1y!PAfOt;P2lE6}qN1Q!D zs@{bI)msh@s@OA~Lv1|gL5V+eubN*6o=}ZroeC%4M#vAJJHixw?8ZS;5^X}4iyYRk z8SkG_FT97;fUy(kX>Y$fOqc#Z`N@;NKtvfZB15!H6&00ICT{Oza_D39&B}sfxSTUe zjSm4~(2WVAAI3?<5ZyWZoRJF6?fqy#jeNEm+n=-$+6|FmhY?57VtdFvgzm`ROtUrZ zED#)wB|xG>L?RlhOtdg=k7?75aiD8jl6;IM{DU}I(FL0J)RCvT=(8~d4}ZEk!&dG(w|Mc}T9>O~(wlx2OU`jNkIYP(hq`DU@Au+*B9Nrr> zbH19^e*(%;B1DHC8`hwqx(K6DT<26u-G>H2H;Go+rAZh^|94N`M0+xZRmqZ-0lUZdK%nZxkT82XwDdQFg^nM(G3!DZF7BRjKo>Vsbxdv<3v}U0_eP zogR_wO$6@;Z}9%vQ(3zE@SyF3)O8e&aM+6o3aWu~deAZnPICcxLnPO6Q5KYQ6|mjy zCZ?wkzVwi8l-X}E!B6t9Sm8+L#lRnikQ3M!sq+piJo|;)k6WA>o#a&7;n!i;EbOh*=CMns5yD zD%6&(y?Pv>Ru^JzYb$`xw@wedDU?qgIBW@a=@V`Qc1IEa4oT>qIzey{Lo}L_Nig4M zc&O4>s08%P*39VR?8(82ny`=mMDu5#y;9S-z*-X~hW{hPHIPHZU2PH<6|bU9`5V|P zdp{D!9s3)Uk?7yNzaVu%ZDNO53BA`tpmz~TBalSUzZY1Q#P+mRx-pcRL<5e=kRJ%z zd%`8i5_K_&N&|m}OLye)pYK2Y8;T?liYeYCd!zwEmkNE|lIMnW z^2!nB>8Vd}6DVNAHhtn9KH|$J2WFIRtGNUYO9h(`QsI!N>|;Ucpr8qr?fYg)r3lz1}tl!_1;Qer%3 zNYEVNvp-cK7nB1^2@e7y{Nt-kqJJj;vk{uT#PyEDL>h@o=$4;l|NJCR4EvWvu=mAr ztxI^WFT^0x!U{ra3niDp>yhjQcRQ3D%KedA68IB;r&5^!M>;CxNrdTj+kAb)2g4)# z`2)u6(scM?bsZgc_sjGC0wg_={iNFavG>6Mu1lEG+1Qtydn)&ziENhCA|_w{hX8vy zRs8%_)}Jg?!~tv44gyBYVKPTDLJ{V!BFpH#k$-~u8;dE%Sc!PI2MDm!U0tQ2X&(6FW9RcdI*5X$M=mo9s5>QE+Bi6<6RA$tyHMW04 ziQ=DCM|`3EKp8zctqrMH3|K%8G^e3`ssaHE{xqT0E7@IOLFR?OFCeE3FE1fP0PEbwfi|@Z!)) zORM4zu(SILrA^ay8rH;GVt;pBt9z6%4lIF|&2d+OcWp_*scy*ZBLGG9<2S%yBnA%3 zz2_q*SBBO&QnG;+ruAHWq4)gdONOl?$#Fqk7}P_Mz4;t@0qAh?=;cV=^wy7M00z&8?Z(V?sySm)rl3Rkf#IpyB{s01_Cy=~_9`#Jo zb8y5!HB3wt2_S$bRem5EKN$A-`I!$)sfN@$U!xz=9L)WL6^_81K)Qwcjn43+ zzC3Jzk=yEAw}9wa6L$dohte~)7WnP9I=KJB0B$oR3!B!#a-#W0(?7F9mot|0g&^mm zO)ZOEhZQ|a0z^}crZt%JWV+|$vN9HQU|w07;Gci~K@dK`LPKAlN$qiymo#);pk-Wx z1UzV`AA&7~ETv}fj(}AkGW`Y98Y(~L{PP_;d7tV{1xoz)QdR^t#}mCv(QqE;KO_go zM7wEO^N2Cq183IALXx$Dlv_QNJMi@)gLs^icbZ8eislcm8YfG$r%ssGq$k;sIQRDv zYVd_6#~?}yl|T~(DBF=+EDXi2k&pV5^@&jC`t##;Sv+;=b~|+j^f(;zzT*hd~+GY-T;I} znZ|~b0z1Xx`5#3KJ~xf~S4l*d<_n`dey0^Yr~-fL$kR}Cr2+yl>_>&I1X8~I)&Jz= zUeel!e!);M&qrZnuq&OLPf`>kfgZ&u2vYqKdY7Pnl+M60ctaBI44h*h%2{QM_iyX* z25DJ%^}WCIYxM>L`iiU!RFeCKgml=*_$&!_H1kig7DCWiq61zTlNZi|31&a~^W!#k z`0;+jvjl(<|0LstyU-;h`Xf$<6p=v6*MWM`)h@I*wd!h+iPipK+Pz;Uh+%?R|Gou| z^qcnIv`@i^c_{t8WcS`B#YOj~jT)xypwk;3Ea&nRY?9)ec7Kws4%SN`CR!FrP5fBX z_Ah-%4Tt6U;d+~a;>A8pM^F0?B;Wh#dzt-Z&tF!h8}BV;To`~RhaiDF+2nTaeQptfe9-NyyzLs-Nt z_K8bK^cP%z)o8HU(Ng}1vMm2^9Y1lQoD>hjX*l%dF_hhqIE761`$$N}pdIHp+5IMt z-G`LYk-Z#mL$wzJshXdMs9W0G&-lTq6dW|okV9@B+8!350fAg)bGda zeBf`!St{}+`3UbBfx}-IKK+y@>T|&sG>L=u_o{uWrbIj8tBW=lvz~SB72B~gj}8o4 zaP+it^B8iygI($>G_^52hzvYZQYHTElXce^=$|sse=(9jXR8;nxOQWXe=q!lZ>&*` zz*#k9a%W^_9*0gpdKCjU0vbsfxEcE2xW9@Xms$7KAiy%ux&M{wInS0`WB&gB3vj;F z$Q~U0J^ha)IW_TFjKr5$j%Kca5DOT9YUTmK07%3LMDDD@O^{6(sc{t*6`aNszqevo zYuNsREteTCOaUZls0V}?YHuKCF*2SlFS|j~%Qu`fg2@IO8CZN#$>UJ)>z$|qod;e$ z62$$+WawqHYwpHzDgxYJKnQmqBEAm-CY+?&J0=x^9C@&M&A@#&$a&(BCzDCoBm%5s z7QOjbkzB^Fof-me#eJlt#_~=5{p?_k-g1E6)Ee$OXMEIM+Ek(g(p#YcbM3Lbiwqq;9I>YP3v zgi=d`7)(Eysl$=K4p|}K56bb_JREVbT>SQqCjH6Nr~72n+j)bTL8b+|kll=Q8&K0u z!kPRaP?*_3HHE{&O6SkV{YQ7>6(11DVlBZl`N>pRE*Xkiy5{0dmKyLnZo%mdrhahA z3`$kbLKQ?PVP`dh{t!A&gxr1buWC>oL5d z1A4hps*_+|_}?kr>4fiv>RFwmpvnQLD|{2L#UBhOvqrMyFL&8IN?;4R_E+e5?m>%X zz;RA!%ksbr9pD`eXn)>LN(u$xeKhjO1Lt))g!JCJPt8NF8Uya<1r;9D>VFfyeCUM) z?4ou!yvPIG=>l=wigD1HU~VX=-@$YccUW^NzvgQH~$AbP+C#JflikIRsb^}yx1ZJoB~UdI&A&^XgM~8zhY9ibW zy9kbhp%;3TdM=r8{iP`;kP@cBq$6ZpG6Jjb$C?79^^k%B)RS&87NVLcBkB{f@&gSs zT%CK8Ut_T*cEd=sBNa?U^SkE7wwH~ivs7;;NAZkNoqnWg_l0cbcB$uHq>hb!<-_D$ z#zIs#U92=S=xfYJ!^AVgs)fd6jy`43%Pum0Fss0CMtb1ed6GFKk9#D-I%!4 zQ6?}T_@&XK3P|Le@PQ}hPerji5vMicR*A4L(og;{MpIh2tAA-L`H8pRu$p~-=|f%Z(6gb!csV=u zG8f5>q0~A|$28AIpWYT-r-&9{Nj~e@QKo*Ve2;(m1f+?@bsCZXu-7L1^lQ1-Re;%7 zmy?psPS0p-PER2~_U_cjojvrV2*bru+}k%=Ug)(B!!lf@SPqydfsHYpnS491O z=1HP+$syPG9(ssZunB0+U%OjJX=kusrc$r(%`QI;@PA%OY9E zlf7+wazPK;o8S8K`JH3W^~!&^r5gLbpukVoN|C5++=R!&{HTi0*(v@sGy&hfKEd+$ ziwl!l(_^lf<*J15ncw_`HK}1qXT*(9_V2X=dO(y(Pe`ud!A=;{8Z@|QlX$d!pI+Jn zMKiDo$XO-uT>`le{h~|D**_dv`f5bpaPh=R^xdCcfcZz>68p^WCWD0{OQl=p&1QMM z;nwa+%r@7OKJ7`Qe5w!{w40nB$+O^S%Eb_H_5heS5q??wZki(EbgWHf(POqR&x>Td ziW%$6Ny&m#t@jGD;hYZt#V4KP*F1D4 zMvp!P!EYk$U|;NKXXU<8Ae8r*>qz18RR32K`QC_=IPi_Hez$Ep+S+M4bn?i`SgDij-Qa4Guurkp;JL0JU+y0{`JK zGRHKgNDsC5p(;-gbdThn*i2ME`T;Rg6XX9@(e9OVgx4jJ(aCzoF6T)xb= z{!`-`gBZz~jm6UApoDXO!285Lig3I4dB^3>&wpQ1^V~T=HgpWUADlz~WdeaVyEHP# zLL2?0U;2NRt1p-L*Utn$p&NHf;q-e`)-*bvvVYNsk?GG9gZ$BU8QvcVke(KGs&Nm$ zYZ)3d2^odJi|=-hoH|ubZQSQUa^f1nnu(x_cP`syceZH0K_f}>rw1I+h&1}A7&U8& zNk~)#nc!51PJSgE?SMD6F>RW^aV2JDWkp?l5VWP;oX#C)c>3%auds0Ym<;IH_p~Oy zb)SlVqTK)RhT?SfrKo?{4g|>j6Z_${Q3LTY=izyl#G8^L2fuwRW8zrBP{w_g``;-g zLaP_xxGU@<)E3>b20Val5y*zaQ|>{a^lc>jSW<+j_>kx!nZg_<%sJD5JobFYoTX4$ zVg#qbJhR8!=wqU&<7A?e)$;!;rNc|Co;VFx1&ssaA*iwNRsVdbmon@1an7&O!XG;= zjIUmn=iR0nC(2_wPP`_DiARzdnJ!t;>_Lc?|4C@)(7x@ib18B>zGGO|wqVBIk&ky< zE~kC$dW`Zcf7qoiH%!A6HIE|f{!hJqhN~QGB$LuDp^-8)$`8aDd!Da59QQPSiLdvdzFn}ez8HXJcIW=#7&ecr$Se*e0;ajtVcuSeYL zdiuG7vAMh^t2S@NBESS8wctHHldsOt>X52M6VYF^S5d6eL2nbsNEiWF=kw>UI9&Tu zfB<<#^2^AzLu=6J^h}GPjTbw}m+~xi zgUTf?bP1rVP}1+e&VC}k;v`UCG`WZn_@>v`)A+n)&tL->Ve~TQ4EjUMFPcRgdFl1A zQ+!(l_8}EABzuCr(q5$mKlbwO$}x3--iNTm%RPsAu(*)-KG9_&X0egF3H8!z;kf81 z`#?6qNF};SIPJO0TY_@AQ*bXpjve)iq_ma@0ZzC!4YUg+9?%ztgnmCI?ET=33DD0_ zc?lVzt+7!|s#^Jy|LVhkQxcv7y#H5mok1OSF2?2S_u1w8hkni#T|{4|ytOKpF;c~X zU?&(TIL$v!;-7D`;4}0}O6v%TYl;Pc+Ib5rr%X&`tO&lE@~Jh22_Dan-`MjdI#JiK zSiCp=lh|=Cvbo$zN2tZ60!t7okBhu`}3bW0xfPd!yHm2Uy7HWLW)?otf=qI9nOQP43HRhxs+0MT{o<( z@3b{|rWYJuM;Tov8M_eT_vvb2W+b&Jbr3?L`_HVvEhNz&wF$tB0i&%SfymaXf?wJTy&-W zl)@U}H?HM@vC795Z6c5x)abddXQ`>rJQPz%Z9dIyjM2g;v6}%QeLd|e0 z6F9!NXt$QIf!4hYukS(&Z~LoKwCUz+)-|#D+gZ73(`}YW`0fte^LQO4J9mby^KrD& zMz3Q51H*qhi!)n)*-D$d?p=x+yRs;i3TL)!8jX|Zlpe&u>68bjQ+`H zg{Qj0@3~D!uQtOq164qdd-X5Whwx9~ZEKl(Cef!s{3R`(bVAfGeGJ{Icyw&(c%@Nf08!d{ZoT1<5be8 z1ueA|{aQ6`3qjZtDJ?;yV_PYb#RVPW>eQYF8g;j&tJgO@n!&Q;NDYJ8d9}sW-1BFt ztVsbRp658{$oOofQ7F(Z=`OTDvy|Xo!5ou7v%-;~Vf>Jn(BubuDAj?PMh70dd|>xq zoTBxn;Fxm*Fxfn|YM~S@AHMSv6w{aZRKf1Cw=eirHR8mAq$7@bN3*b2HpFFO#K-M* zb!x)1J`-Gvn=xC&?ifES>$jnOTqjZhe&NWo1qhvn%Dldia+i7S+%ZsK6gszOS`Viy+< zvvtd5hawP-3lc=-%{2ea9D1&ERN%&#dWCqwL6C;7jaU2~qwaz_i7nEpgSFibR#xpl z4{y>$|Hg|Ru8G^XIJ<4_*s7+OCR~~L&)+E2URk|sv$e6nx^%qW$-%zI(tRygwouBq z=&~s|V_CrPVt1E~n7S}|HQj%k##`+D5oiB!l7V&{F7^7-M3I{E8K6eLohTEfD^l}9 z|9hbHK^#h%hzUm*7Eu>Wr2I1dp&uE@?6KGWBls1e-#n0mCjrhIV1t;n@&oEYNURrG>d$v<}7 zg=g}5LOWTwKqcO`rh#8VT}FR|kaH;o9JM10zuhLs);*<934e8Z6f3l~G11-JBO2Ke zCW+pxjs6Jzhqm|RS5`_)a#aEYbZk*!%|roy=lWQ)3PYw%{-G zr0+iq3?Dws-i8@@=#d-43Dg?-g_4hXfTtAWjRYzp`(fP%>-o49meQg6PyHy3|R+G-D z{l%yrSP;8aWuJG}11(T95!#Vyvz>FuC~|(`mkLMsAg>KlVnkeum8pc-2cY?uvAdTM z<_kIRs4t!FF_t+^dhiv=qKC*EHwk}M|BA2kiLu~b@Y=3eB)!XHT&EUD>zMXPG%$ny|)$ui5f@)X*sI%RPrQ+WA*bpdr1i zdGNxtt|B`qlKqU;_(8RWHdY>mH%&9-F`*k0oFORKpf5{ZC~n26#g0`P-4Mu3ut>h7 zi&>`C8g|25txiy@Y%@%vv0&xc)QZ$K(21DQt&^0ov}pbOk6#S~sfMBRPbG|#y*Lzu zf56`IzpaD<-|5%9lH*DzMsyDAH$3%1jgvLA?}Y3O7v}dp${N~?krj-ZjAeCfK=Ovb zmz~m?d7f>Ya5im+Wf1|t+=lSf&se9uHNTDUPZ%Pf}PCkvSg5yYAkrN z==DS=iTYH`z9Owo?&yB{xV#*VTQQd0Pkv)vm-ytzo=f-qNJYa5Z`XT;5goq5SDU{` zLM1d=@+)#OKWS!P7~X7>RBkF)XE)*1sv)t&IDSh_)XPRMx4xUKYj872)|Eq1(wT}C z{-j7RB2k6pXTW6jN6WkDb7e{!SanuLVza}bs5qh{O~B-3#cF`XO=GGsbt)fwj%(UQ zI>F^NwodNOejf?G?q0$MtRQ|yn(Q44i*s(*BKd5=nrTbl@Z9fsfO2n;$$L3J9fGws z$BhAB;PcF#eWY*jd~s%S6Ya%aw@}-m7923$4S^WA4-6(hk&X2Ju_HjxepeVMJI2PQ z8EI2{CrSYxdVm@dn$Xm<#ZD1N1kD$rSCI1OCa}6We;bVM5+2JO53SapomVp&Bm@d84KB~CkG9<+)> zCxME7;|O-l=RS5De3$pJ70VjHL|$w)qb^)c{o&`G!X^pkForb)#XUF(rO)~n6CTW2 z`DOigYh{%}(SPJ0J-)vMF=xXjyrq@1n*bny_%A`QqQhM(2V+>(l&wBrvsV#J=J=s7^=Gm}f^4i8W=gndiH1jmn!pKr}P?>lF&VQ3TIX1a|u@ z{+Iy{A%0snPpUP|SP@p=Gy76)%au9wo6t@E!f*Ig+YD1>GsJL!O<{masr+WDD=N^4 z8K2Sli+N%1Oe6>7m`V`oW0jG&zsPnXl9uuC|78q|#4?TTjo3rU$FxL;^lbf+!L>6H z%x^BL#)y=p>A}^7H~fN|ps5;?N#gUnB5W|}$))cFGZX13&G4A2m&x6-&#@eRU-UOe zjF`)P_+>DE{&y?`lbgsMs~AL2^487PEV1B8`9wkE^T}qR=#!Fq9QYM4!GZa0V$x0> zqz_o!YjeyPpth8S%!A)U4Uvx~lEJ=rnqaImrbn5mD3$WGqp&9;v~n^ihR2oV;6o}4 zZqSmg5j201woqEj-9#ue-5rkZ+$Ez3HEgR`bt{HI@)izQ-!Ak3e~t8_$P03(@TMu+ z3;FbQR>O(T)Yh%F7R<=OxM7CH*O=$217osjF0NfCOh5dX(u|cF{{s)6Q#IrfjVRRZ zB22PmI5nv)US=?Tjr%qKBoTDF2#?tOKYDeY&h{II&o4#BQ)P^=e6dj1X7oIHXJS@^hhUZ;E^|PgAKo>5t>h@E?O(;6mGAYS*pUm zr6;3H(5P}bjOTWFR4T4_o?7&IlIUAq>|U;z;GYFl6e(F<@_@Fy`wSpTl$I|VTlcVp zXLaPczBfA>Bcu3cr_}wmG2K(?SEVUnfwDg=0u&&+amJMd%mYi1yRn5!)PJGj@S$$( zrg+V{j1UtHz`8G`X#`HZcR3%;)vSH|{h4&Cychhd>(4*cEB0qHB;-LI5wR0{BV3f? z6?~;Z_QKz}NT(J-58xJI>=>WwFoXe0)Axv|zK}KsHZFdF=4mxuvz_P?&M63SLpdn^ zppAsVv?p?Z&1CGT&lMKyN8!*Lyl$r>rWPJ)k{}YEG2S~G<|Br1T?*xr!orM`u%KlR z1FS+6glrLnAKpv_gsDZJg%-@m8T=QA_0($XG+9FZS2(@|1N?2EVwd(J0*~iwjj~$v z-)6o&@(tQ0ZFsKNQ@;sViVV`{=Re3-E%{sWe89=)Gy^z(DVUn-rg zXRX8G#mkE0!%zjkg&NHBBT1n)huyToc4@n|sIf(g=1p#z(c>c}Q2rzbz<5T8sb#NQ zJMs$@gZ@29+x)3h2P@b^g6*SQoZ-iBc=KAvC9~{6t%cC12naukdAjV0s{X$(_@v)a zW{;MEbL|ojRtt){M=(5{4?pjOX6$!=8VukCi@;A{;u?sd!-(YLB{vlfphxPi+xx@{ zelw2RT*voz+U}N1Wp|6okgyqB&2Td^Kz~C4`Bq6l@vRF~j?4);roOq*bBAIu_y|Gj zr6`if*EBIBq=R<|kAk|hF1Ai6lFk- zpwkpbjr%iR?r7CuMrhHV{0TShf{)9Sko(BeY(k`7~fRUT&G`KW=H*LMBf?J{AFaY8iS~t=bQhcb0+IBNGEJg zh%%5vES%}`Im^yVW)5`)X<_v#(KJ7J7JiF%v!E}}$&weJZ!o`ic6wVlel>~d4Bd%z z@l08AFIzfv`J`L{J-rB_sb>{R;vgszLv2e1Zg8furpw;YLA2qtON{e=g)d@GUqo4u zB<>2_O9_gv&<9Ll+t{Z^OepxpSSV8egb^P(>$4^s!cJlg(yBC7;??=hd@Z(b$`AbjvII zz=SVSHbc2+%iB@h&ZqF7WS3-Iqv+8Ka~GaM^lLS@ch2%~PO5(TbYO}>>tKG}bbWu5 zGGTB9*sbh5a4HUAKhEBZ{%zKkQAf~l>M>SANPz&hMV z`65|UIn0nbBkxtSpqVV^Yc^H@CLeP0l}yMYXX-;g)>!;F%n&oslh-tAi)YSe04{L2 zChIrVN2glTe*jZND4AcYg^)MV? zI}sa38~FdcN7oVha%s!N2Q4Du1kBO}If$`+L-&W5@ZRkHegSC(eCr(CQG=ImA*1FV z3!|*vh}WU6kDv+baq&(vNqsC`)y82tr@zrcvqp)OSrrn&p9eo?m@9L=-m>JnmAE#%s{Mukb@8PyKJS zg{Lp``O*5GRY+-QHhI&mrfIsd=mHs^0)W=hw?##|qb<`~;d=;W6x{N`J6Rxtw7lT8 zr9-!1{82gk3&4Lt4QQQrZna#i zaUp5cx!!J*^;~%qeqXkgV*y|UCMVTIkgT6lRP<=^-Fi*|rvpZLJz9;GPcF+NpZ6-8 z9zYc~TD(~x2s4icZANo}x0%Tq;7QxoDG~%7bw`W&5%lNogMe7hTd^?p~B}i!f+xZc zHmSKPSN>?~pkC?IiPol(HY8Hz$ICj-n0Ecl429IzmXl=^H4)+*?fJ}0;Sw7?lY8$LA$|QuzAWj)vTEAM^42pUa9b2Y?RY2ns2#bL49V3 zfNWaM)6;Vc*o8~W%AON4pA~K(W8MTjFB<<0>WmS=dx0)(R;Bw@M+OPksls#L@Pe6k zXr3;ZTb_mWB$1s_&2A-ybmv_I3gNxL+7GHhTzg||Q0AoR%LAGzUS)BSV}M?way;kd z@@*qq{8&r4JU-$EBUNi~tI6sMIL9Gx_R*9Li#E@4(;}ZcJr9JR7_JfgFpq2O4D4ef z$vw!o!MgZ^kCwmjqU>}PcPW;1W9*GMs=$*Wd)qY z1#!c)#YH&+yHiZJVDh`xuFKD4sdj=>KTmN_V-|}qw;T+{xw9K2i+0iRv)oIL{*LEk ztO3=*Gqv{g)!D~+YI%*W!(`z5aq@SkHoBkUI){A@n0opbCoLr*eAC-2A1zVlr5|eS{IrdcTLHBKj``&dJTsX03zfHxvt%(#`?XPvPF6JEefi$#cR00!syvz9QJ z3!ymcIbFqto+PA+1aOhHD)HPgTys%+zYcbY#9cAWIN+WSxHT}b;E zbag(fh{tv)y@0=gCUO6@WQ%tl^usdU>_KKDrSnwLujt#uZNsOx;L-W9vbyeTG2GmM z=q5MGL(H;F%w0NjSPK*<%{ikjI>@z*<=tuv)=GL+qM*un#jr@*oLstbn2I#&G{n*jsQLdoNqPtz|VV9gk}< zU1zs{g|2%tzSyNov%1#h5fh-7-w^vMl&K%p?i;f@(;r^72C@)*w z?oCn2`j-_u(J1AR`Ya~P(=Wtryhy`sQ^G1O zn@*j`>_6=qj9nHAU6Ns<$U)yzP5I@l)xfMZe^_Z_kqFpfDx%#bx8h4Yi1e6iCaXV4 zz-zmgR9cp~n28VI97x#_E0E`!Ox1ru9 z)=!SdJeuS*3-_bl6SS+FeAYDHL`{l^D zP&0x7pB=(n!w)vcDA3@Zl4j}xFwXJh8bn+hGGA+rit3usA8D5?Y$xzv39{l~ETqSPD4e=~6Ut!~=A1Of zY*Zm%&KFLBvFx`K-v2YCmV_c2f{O;e#v@}#z7`y?SpBhKm%8%?L?@BoabHEKt){f(+r=pHQQ<|rr>3p@SpUUM8Ia40knA)-wEffY>`*?)O3dbHSN|k;{1(ke7ZBc^**#T zpUv@k?^;9FsY7?Y-iU|wA2^uz-@Wu`RkmX(lrg`qE;!tuR|1-4j;f|0hnc~bsRjoK zMW#Q@l#KSMM;$dP!%b+(3zift1VCzciL|B0aIu}MK4&r+&a_$So=vP0&$9YIP<>&s z6HXrzn)no=IOr5^tM$FgA{^F0fGl}OK{1XFmXykzS!`6J4Ubc(eruUQy5Z`Zpu+3j ze-N>SnaooWw5Y&U%Gd!hlXmpx^>o5O>~n>w6;9yf9z>wAO(iDU5QH!*DsDRbH%T)v zc@c7bpZnQXATcKU4`1?J>Dltn1Fj%$sd`pEK{0#JHwK}_O__8Hw!~T)H9c!Nf9Q{t zwdT`Z>#igu3TH2DK61Y|v1W!J`CVv1Q(iq>W;<}PxN~8-d%IAdI)6$0>#saZteS({ zYQ}h&B1ag#JA@soRISF{c5mcD3y8ZZx)#C;-I8LyCKJYvua`7+78lDRFsH=>=pT*HD~>hy|~d)IkO z2{W$&LI<`Nddie z$(LfMs^r4yhx(e1B>=U?Z*Has? zXnJRi*?=Sw2=iwwan7G-UE`vu_k3qf>T$q##L^ASMm$Nn5zjYqi=wLTeG`hhs}2YR zGm#xgoAZj9EU0{K1=M+oOh%%{EV4Fp;h1NZ^;!EsD9e+f9+2Z^oe*ob76=n<^Yzucdxv5vo0w2l<2r*#vtCSv@8Ly3+?DNXN0jOA& zQ~2JKyjXtjD@zPOrtQ=K?0Yv@a6o0f4nWOCd(7SVxzcV9Y3=&1Ul8MNzR<=^(zRv9 zDvlO|t7OI3{Fki#AG{SK&B|vEDaVeszT6zvd*5P+j$n|a#dW>8R|a4d0`c@3`H+fq z(A~LNb3c{O|FUIzZ>1n<#fUR7fB%-oMhEw)?a$kI_LmiqPmCIToRG?LcGE{X`TDIQ zMVeLd>>vQk$VL6({%j(7-DDpCidfqOMkp^<83NTdd{yiOdv{3GDo>K>NrSO$4^+El z;%k}hqymHUFVb!@m1|BP^5!8i%)iOc%q0Hmoz(5-w38FW9}wKfGELbxRtzf`gpmdn zjb4C8fN3hya?`Uq(RIKTalSD*Ys+B;gn6ybp83xrW6}v40(na7(HjNcdcIKus|EI8 zyM(`{mdBx7sTr&<^I|PDgsn`DBGQTI*Z5_UZ$s*+q|2xu$Yz^rteX$A?YCiQ<-Ey9T45Q~{>GZM1D zPR&{Ppm*ls{*_t0<`r|W?u}rnx{VX`+v-Ep)7OEQT|rys8W-n2zIc<|uUoi2UmUS` zYc;MrWl5-CYJE)%y*B(P|NFmK;rAAvHLn*9!6GOB-zz}LBJfWUh*r#s(P0~Xv}WB7 z5@h{pD_|ZW!`Oc4^|Es(c4C3kjEq$KPJWU`d0X6VRw5(LER|%T!+vkZ*}7_Dk9e(k zg8gxY7sn^#v|oqC`db;RZ`J7br|}wSQJ^*0$Q`B`EH(2M*&VY@4210vy%YpP0O(dA zU)DEr4H-oEQF+ij$LfbJleR^Te?FGbhJd{B#5;Lo652y#t#d?+l{w(?LQSiiM#Obx=t`T zC93M%xP-dPEy#82sZn`+eZwnN1;3jr|A|(^*nc^#Ztn-X7(Y0E&A&W!8~Fs?a+t+I zD>G6&?&mC8WWV>K!(xk&W;qzojK(RN`1#-0uiV4Wyf~etrd;xCNHZ(&#eYNGu{RdK z^CtiPbA5ZDm&hpb#{2Ce%jA!d1T0S}K&|1+ht@T#w`58&B8KY~qn7%}WUN2V;q zs@=Qs#Z4JnefIM48+=TR20TD`vvN;Ot4;`abI!0_Pj(+k&(|<;;HN73BY!e)-wkL7 zMqof3xoTLI%%hEfQpR>EcZ201pDdac2$@IhjpOf5(fiHl=lQxM&MZFnnReVnTf%Ba z1L)s@t>FT8NosU_$`SkI9IR3_loOS@e$IHaUaPq{b!$y~1>X+X?Ld1YDKB03uLRwg z+3hjgjJJ^_O zy7M=#{Du%fdw@+r>;8z>cQya>08bClc5DHr)W`g9H<&-lYKg<-58uFr4ttLEq8xXq zMdMNdvAT`yCIunFx zbEVe>a|(D}OseHujAE@9Up~dcqY4Q%C*r%RgmSG#X%!W&R zUO+ZW4kD7T{Dl9~pOrIx$b>hr3MH(Icxm$p7b-)5!x|Pa?aS3bzNej&zkbguNK2}E z8&V(@Ub22{WFrkUCy`fpw)_d70sxRFvcMp;gv*{t$*@2hd*&(?-a^`f6>IIx6VNms z64yaujs>7=p9~>B#ez2B{YXA~}nb&{u$vp>a z2@Jsl1C@8$bodlYa-jjrp~JQeBX)jO#to}(|Cv$EW;Djus{7n+&a)l&=mi2h52ik2 zLwj6XXQWqD)7O~BK**0X@A)+&yh-BFt&!FqP!gQDTF{2svdGx26oAeQcrI4AVG1U6 zHlHd}nuvw2+4XBm%gHn&89{t*GELosZGc4Tikb~nRONo34+w9<;j2HQNV;iPiJ7Q) zamZ^ZaTFqi6uxFd_0w;Y#sITR7ClC~xiDy@TG1EmpZMDxg0XIcy@OypJKCtWkaSm( z4l1Emr~8F?Ex%_cN|SPZQIkE%>j+&W$?e%Mn(^z7F}bn3XUpknC4WP8#hQ)#uE_uv z6PhLu*QChoF=0<+Ny9(;evKrk-~s4&$Ej=HoKg`)-_cj9Y=K_jV)-uW>Ff6aW&ZyT z-7#YOp@qx$-`V0~qs070k?O(+4ANf8Z2;alkRxK`&z;v7F=2C|jjD}4b(KsK$EC3q z&X!Iouu6C{;d6U_P4%r}d8q;v48R?encZo4^0+t1_YQNMt)_M+#EWw5jPD!1+Jymm z*1+F7ow3>!Ep4v}Ey~eF+tCj&f|oEYf!_G7b>C{Ib^VJX#1{i5ADNZ8w5`kDH8jY8 zh|l;}$iN8uYdTt8NQdP966LCKW!`l1ufo#LO5cowJ6qeleo6?(Q!V>O^Ci);8iAv# zc7O*m=)VyzOxy}z12?dtcKZY?c=Vp+M1c|7_`(QZguIS18hmm`~+M8|tF71N0y<&W8w3vw6Cm0m+ zqOlUuqzDj4;Ar-*n{H14THcqodoQsg0%(;2VNZhPB&Q@~Q9!e0Bn$LRvFBmpZ~Ilb z{|@d5<>T)I@booS%t0(br}`Wk@EN3T+Dz1ZV6Dn9&LK)n?zh2OK7GHiw^6~22gdm9 z26kntE^#YtpC)x)@(aVQBe4t*3ybpgQ&Fkx@#dhf(UcihLJK^5~b^yr;6(xe-Evh$5Svm~YflG6I zQxjz*)+em#7QQE6mQ}e}8`>K~t?%2=fFf+Wz-KM1`nUOQ^Q03g0RHlI^Wa>Gzlsg( z!leHU=|Cx|g#$pT@c8~tV{wDiAl1f_`w4%dG+9>MCaeyJB$09<^C*AnyD{<-!eKucl>#nq)U_V(5Z4k&%`SuVisp4q_jCq$MvG{X zYf`tJ#mFy!+vn}@^|{1A*4XFuO&~}b4ChJ0ut%rrE$4g5gr~u6^Tz2S>?=S@&l!%i z*FaBwmP0%I8G=s#;iQa(H;KlYtHNtysw^i+X3rF4&~fyWiHZC#Gp}V9v2vEZo5xGe zG?ng#0!{LYtZ3=G5+arEfSmLlcsS!RZH1GWfj6trLK@~FP$oXNoe}PTHt=>(Hx#(Y zG^T*?@Yx7h6ZZN8*_f5{(?T-d-^+MoYrg?LD1`!oNd-!6(- zP34Nv51GXy-f(`I;>Gd^lCQ>d?i476Y&DOTe`MBQz1r{ ze&jaYJXFzKbJJOw6M*(-CT~y5suwp0J}^=7^}_$dh^p)OZPc*5&@n}@ebuMz76-g_ z*MNr474&wsrd%7$mgqYW(;RzsupaM1tvbjCtN=19|p zF;B-hxl7b;r11U`?0JBPAWsPg@a6Wf;RYWI+=twep@?HN@G6!%5B*q+l)`q!pt`O= zfB*;N9dR_(5)aK^;v295hNv&DO&~=zz%Qm=kE!-ua}HN@%y>m9LG->YZINu4f)vS- z5+7e*0PL7R+)e!duu)8B?m7@BcGjtY=+{KNb+Ne=Nsj>tY!FjDaQVi=q^`M=!pZ=s z&}aI1b@-2=LiwOdF6HHvDHaZ#$?DhHIBtw2NOgF5(fY`i`z zwp}uq0?{PU*pqMjcZ5oco%Fs6e$dpb@psdd0;w2GbW5X0lehHWo~toHTRPSQu7uKV zC9iqZ9(Q!ugzyt$AU}xMWml8HxCIox@!C33fJUbpmzs9e+jM)l}$&+5sdpZz-`uT`z|7 z|FYSB(DYX+B4$tOud`l!*LSlaC)3b+wJ~S>h!=JBPgt~+`yP)iN_+fDqs6iLK~pxX zs}j7IoKRRdqt3aAIP2q%cGqvPTAU?zXv-(cGL}jjx3a+7j@xr{q7LfwH@RWfYWyX! zWNgb(&Lr1EWRQc>Y|9BMh z3ll~_{#h=I6RO&^BGmhLvGK#pQ-4eMrSUr(HhZlxZ}Pmt2xP`MU~(%4w^^2qG+0fa zf{1Uov3bywYPE2teg%Y9YtQmUNU?oCjx~_OxPWeF;Zh`pBWxvx&7JX4OX)83>E7+* zW$oKkfdb#npB=*d&mylpzqnYZ!r_Xq&)@9Km90e7aW%cFx&@bo2;S~u|K8LsWa2iq zxWQS0J<64<-7iYMeQhGhrtjF8q%Z%x2@d1a_!AFBhZKFVo;P?Hw);iD?AWO&t>i4I z-Edo5X{YWTTB-02`TW^BYlX<4k|2H5J_;1J81w`4ovkxoB2QBP%HDAbf*13{W? zkJx#Pbzli2t;IcNLCXGK-3#=IrHj8DS$kMfKIEv!rg7&EfyL6P6k5W$x6B9oS)PZP zx4|M*16YS1j?WWW>~9DxYfR&t=ItRU-)Df3tP;P)p6Np4`EEU>;6z>4L|iC=q%!ed zBGg{sE+8*dix{6-G51>eEu0O4VbE6qEzSsghPVmoh53mC=ob+VBy5$_k7!wAqQr8=>60E4HI=yI(Lb|v zOj%9Z^$W#fWNpsZ&{jawF$lpPnvRBIoPt6XRq*?2X$+8`YMD$FOEVc=zy}n_TDfN+ z;Oa#V)VcP?3A1P(xD?Ns6mXNjA^aRBsma>Gc!HJ%wmcsr@9%6AK zc!F;Mxoss#Zf*z!g-ZQwkk?M6Ia$fQKNr|T!H(0b%G9@8K)j>5PzbW}G^-HOq5j(d z*lextIP+}nc1NjBrJ1H$!hKfA0)_6|U{A3@Cop4%3`2)aCddYY<4ADDKr^dEqA|t} z(;iLuaX_TMK11jqdtgN4W7mG#h&}9-&MtuOtr~@myJ_UAR}RV|kV+@$Du)4qZC<$| zJUHfT5hM9#L@$Jk=C?LeR6L~L@non50NsGc=sHWg6!`~isPq)ZivmwwAjY+3V>%Br zS@t*#C3CY#CuuO{6C6PH(`#<~&#*M^-||n*{Fz_pe+P58f`l(d@t4h96{}u&Rsn2a zyRb|1)gIMwdX5M{nHq1pO{$7%og6%YI9465Ams0j;EiV8lQEUvxC;cl0KsQGTp_vvKIq1*o{2lKij{o@ zxUVs5m;p&({GPxGWo*L;`Fxox04@IWOmV&E=_6;(STC3a3PCNlDl9*K*0tg7M^GUs zJ38^5M4CHFV*nff4>Jn7_)?;6;^>X6cy=C%>Y!7rP@iv1PdB#ENLA^;%{^WAlmmmb zK-ou{Pu*V;kMK1*~At1Vu?wGlV_OY z(H-+A>Gz%CwF9Doe^T{*hBPi0Gt3qYfgfygy6Om?o&ep{`Pg%D4Kt?r$>_;6RWHZc z_3fZ&7cx+3TzK9U*!O~_W}duf=9QAhqRq8-ljQ0Y-eyPW>0Tw9ShMZGBgSzsk= zt&xgjJgH$UtXgj`@gZbBR29kcycw(TnxLqu-Kvg zC?DLD7p*s$@S0j}k%pZ-3>#h0-gCcOTYSTP`EuyDMaZWeL4ZedS&xQ!s*a5j4X?Fz zM4aqJ`mC+L+GY&u$RaEuhK{U%piFf*`uk$*{yE#&=wArsb*o$gDQtKO)7R40{oY4_ z0El0J+CLO~9bQ?zM#R5ltJB_ivOsq!{e_PqkoIEz87#|21C^L3RXg^3J7tvFg8YA) z5>>=UPu<@g_(6UH)ixaB9g&6O6OjA@8;!~28R6R{Eh}@71OhmOfwir=K-TE2SaXDIUSZ+VqtmlkuTRLqd84GF^=wt$Fiht^~R)6 z7kHE&nne<>AEle;A2M$lj&M)mKV~2p37v9TIJQS-k+9{(R+8Ax0fCQINUYgpG=P#@ zJ0g@V3K1fwfm%duPCGY-&)tK;D})QF$Q`q8?U`f(n(@tt55z}!k6>N@@Wx?W|Hf0= z>8KaQr+(^Ha@ESZ)=@nC5BfQDu!e21=SH(8bC1Vg+FWNH5&$t2Gafoy5X8~tiRCZK zmFVSDi~H|sWp#_E-X05cwibEnq)}6I=bm_Bas;@DLC0y00{_m)g48q#ydNte{q(O1 z3NG<%t3>UNJ|5LC$x8fu7n%0XvZ}y8IBHiVo*#dQ7(rARMaEyx;^`5^h0TI3X*_r> z{l<5ppdzEe#r!ufVa{<^uEpYl$$4logY{Hql&~r!?SuoiJC8g)OcW9!rn0G+=(?z= z_pnZs`(iV823^ZL@4$9zm*21V=l)W*w=_Fd2GUng+!W4fm4I&(%&D?!j8xsV*1*+r z!`_w5@7wd8HsXLF7bD*cdRCZO$tnxe2{E z32RQg$6@FicJ`Ij`{vW_l&6cxL5F(lHi-V(?tHUiL>NhuEJFPX+Po3@zL6m31g~}O|1t^ z*{AvLa}>jd0rSrw!T0b}l-(Xh`cA)_eI9a)$s1c8E~bJXO6b3k!B9TSYt#LSVfPoG zD-(!<>Z7N~Cc-zyI;uL51f$qytQaKhWqA*8*}5`r!wdXw@iXT5j7^s-HQ3a=+||UM zi2P1hNqv?)ZLgNF$r-79*=BE^xJ-S{(S~{IqgYj9{A-h5ECQd!=!+W#5#r^=fO^K^ z=|wBaN|`XNB+QpFOg_3g0g`EQLW+|oeh$AYcwjt)=hH}oQrssYmrM!9tk%juwZd|* zA_jCm?0)1TGI#}}NjIWop-Zh{!j@EhougT=nT_NB{I0MO_M8s|5q z+`kb6J#k;9F6S>?hIii;be1DMwa92=o!hE2b_@E7oc}AU?hE--PTa-?d$QwOuvT_7 z*jUwx^o89P_RS|_S0%pnj4R)dc2E_SatU=QUJrIrZ*pcKE1KO5DBu{BI5<20a7DXO z7{`v*;b%$5rtj2N*7zf(UGm=KAFZVtj^sX%(VP_z{89uV1V>p*HuBs+Z#iql1a*m+ zh=RG!ij(dwlG8?w&byoS-1!w1YyEn}pQN|b`%SB4P)mxvlL=>)Bv&cvE7CF3PX6>kjmF&K#pdq(Pb%i+Knkk`|DYaAF$5;B z@q+fT8`C99W!0ZFxd#H|w8Xe*iPM`n*Ngcsf7-so{@}2@oj{XGbiB6Zl$|nwPISR` zEV%fFBU0`%uOcN6Iiwp24Ou1iUP7`SyO}mT#3v0uIG^H}|6IxPhVWy<3sV+wSqdKZ zudhrkn@42m-tpF2`x#1LBMY$NhAozFAcCS$aPg#Rp$ESI_UAL0*bN+r4JRj!KFZ_8uK! zQH|NFkPZ-(%^Ip0(dmh9HmsK@>HhBDoFlyd0#hFPg2NcR``Y*iP7{c<_#D!gVB!!W4QZ6hZbLnK?}yGy zIhHY3Yc64Q~7 z@}HY+?L^?v9i=PrP@Vn67poO9 zZYJiw{Vy+9itnhk;8s5#OqkE~omwy#Gl?QrNrv!uZSXV65}ZX)LL9|(##~amtmf!2 zIx>AO8Uo+SlVvXK{j=mO_s1VrPYm9OrSXI^j~(6r>i?@H;OxTuH`0bINl-Qn`TlLw zHO{e+Zi9Yddce#7SK66JL;b#ed?qC!G4?%@RI-+2H^#n`(vY=~NR}e|ZtRkjuQd{~ zh3xxYA|(4hLJ}csw&(ivdw%CU=RD7O{`sAw)4`be%yr-Qb-k~9KKJ{jq!&SRPe$gD zsjyzQ-Fw}~n7x$K8Wl=#7w;C*iseZyiCJ#w6s5R@)qkivIMu{XGV>~0=W|r6;=;qN zG#=DSb-NICUA%S3Yont1J3+?XJ>xuIx({l4udVVGzV*|}xRKO7N<%D&$A~Ok7Gk}e zUGt?TTHvL$m7kSVG<%G15y?OPT@`%nN?WePq39PYmXDIVONvTa=xs;%oM%3DI2ZNcwdOOo%woB3KR;ia}$)MYE2^~aR1-lk>X_C3TLw; zo2dmj<2Wkyx@)}UsRm<`*}ERQvzpA?(@l3hD+?)Mxnj+$=wWEM0vqn%KQ;Zem4!{9 za5=^~O*IsKnx^*=4!f9@lucg}ps8N`=%hfs-%aHe`WUMb%@6MrSC;!Nr+K1<9^>Lq zCQ==e)!w+2x?$V%_9+W%sOWQc*1fYvC&LnY65ZR)cULJsR95|bAbPCMTBxV*?$}uA zh5V;@!!xDc+Gjs%MzoP~W_iAE&F-5jGAdr_nsyE=zJQ8JwN>=)akaCkt_m{Wo8lQ0 z7Lu1?9ljknk~f%E{07|{Okk+L{$5#6L%+v8rwrYedp3qg)&GiKI0OcN$5Ow23M7c` zsW>}(HMWPGUGu!Ea`znxcwv&E_fl*sEAtZ}-=Eaw7iLU&p7=I&w_E0%QDntc*__rP z-!kn3viPl^a2tSocWyL$Xe`4%7F_j??3t9x(c{nqK@&~AlnV$MBPsp0l{YCc#{ zTa;Sl+ust+6t@l6QYHA=Z*9(b1-0(`U#wf!eyx>3Hjw{R)6YivP)|-1$`Z|^Bd&7K z?}49EBde%|uvFhboF|dSRVDVd_+fvdHHxWor|HQ@h6v6ig30Eb*@xeB@dN&!H~GRKu zpl2MmP`&(N=z5)xhj^#E27A1~Am@AK1uuqcr>q=u1g~zCnNRp^H0zgd)xOz{aiD~W zFVWcW+9qQ87tU{xUnhxI`XEMA`*hPvF>!@_YX(Nk@rDyti{`Mt{7^y;O4rS|rR$ z-E$9rnUg;r7Sah~E|Ll0e6H4He~k*Hh{9-x#w%IUe3nf<_N3k_-+I{?W;xSKB1^ol zN4X)!bH6>5?hKC@k94Y+ZSNn*1>EYsaqBVdWm|)AVuzf*+!C|fllnp#Y$jPp?WF8b zdRv|U+;oB%?#JaN_p+2Lm{R!zrydSJI$YDe(;3QreLAv*5UujmGxZ8J*oowNlPHez8-6k$p`@UE4l35?iwy zF~utSPdEAF?m73*7FDtcY%!tePOiL`C)$6qdx#^yKb%s@Qkngh0(s{VUZ~1DMg_X!J)`y?@KHS3=5QZ9v{Y(@Tx&6ZeXgY`$Jd z%E?o-SI98dUTupo^fYoudnI*wY}Qz#X!+ff-e$|knFQ3_2~RSRdViB&p#RG0z6W}7 zX$#rIl>=^d%A8pzj(5C8qe6vAlh(!SZ)L>y^n5?;yGQ5WsCI73S`GKLS@D>mPl4!I ztbKc(KBJ>TRcZg@1FO|PZOYTteFjWPw)X~ik7LB$Ngu-0Zl$sD>v2ok7#pxHJKT?o zZ9hzJJv<(#o^YlZFJM${-5KV7{@FPw3K1XltnPRm_Z53ab=9f`o2C2FTAoC6m!Z$| zYfp9jh*#Y<{>=s@NjT2AxwlD4+C=9isIUrhCg=Y2WiAT9q|D`3=#jY;x! z-KWV>=KS^#byP1mmM)5l@SP>ynmflOQ?>N_q1S4Je(?4Qtn{dxRXZum%u!mDQnr!9 z6tm%zX8|1!z!^2UColcQ|X&u3!Li}8+7snHeN>+-5F~UcZ zdr6DL7}(>uPe!056+ayH1uUx!;4mYBGcyBpwN`N3M!<)0FJA20nd<5Jhti%yJo|>; z-=hUTeaBJza^%p1=mddXBrXol)7MTUUSyyoXL>#Rf^I^T6Dwm8#f z^j;Le2tRG?s*3&?`9d9iMTDY4Kd^__N&qy
#eE_hl}ro+K0ENhduk4m*Xy{!qZt00x86F*g2L-sOLcVkL2MZ|k<7F@SdX=;$q-e3J{>_@u)eS07Nd=GicE2~z^gJ)sGrqneIAijO zC%GQgQ)@Ri;jO=jqc-;j@&NmEukgz|Q#kg1kJOlatd;m!||i z{wr>s2Nfg@ax;Erwp{L<$Frh!X6p97rM*zAb|MurT+;luN?CI|IYx!}OKdswfb=u! zaaar)%3h2W0M`KI%y8QWN9l(W$=-FjA2|=Vl@VWXfMT@yqDk=fxqS<1eWkZo;s^wZ z;7_6`%6IQpwi`pU^IdN{f6?&R{J1f-eacZ+f^vzT)#VmfDfhFAohNR*kJlZQ%=@M} zX*k(E(N3yIlr~FYPlMAjr>u9zD>0=!9Br0G7Rp1NeyB$WRFG6qdBEj*c&vLhuxr zU$=qBwa}v<*HH~5r=)zr*ByZmfkpY`AqN=iH&_3v3kT4A-1FyXlm;&?Y$r`_adD>? zSbn)DR9twwU|NM;o%0n|(`)h34JqqCAST*o&u$wZ*gN#x9vu4q`_Yz@M2WAYGGrfG zUX2DYsz=O0;pw;p1y8}Dn>bnppxwV(%ieeoj@w`R`==*7ei@8eAMEY8Z4O&k7cb&) zU&%uTP3i`sG8a|hPMpXLr=)G|TW6`_49=((${T1p$2-I~AFWjyw>V!ToiLD}Q_JED zceXB{ex}}#eCEy(In+yGOH%M_pZ<+kRShLB0#fCXcL;%y-F#OvL+6%$^tYGyB{efj z5P9@RiE({UK8+=(@3d@xSW=*kP5%Y{-K`TlcyBe0>P>ml4lkK^^?Jg~3Cg^FE>SV$ zP__Bik&a=he~o`U12CY_2dK|nJto3{e$TG5n zKj&}?5I8M=X5V&<&=%@3`Df5^G*6p+Uw;?qwh^=GXOb0HJ*a|>nm^(h-_A^DTMDCi z;DlySN=$>3v_Ph*cEwQ*cuG1eL;%vz1TmBP!nd!@meJ}BUn?t)#}Ehv6smk)Lbne+ z&!^wrj{4V67&6MLtL1J7syD#Hapu9GfoN!SUz$VH`M;j+KmD~*-#B|k$_XwM3gD>Y zW3+JnjfG!yxGv8WL6>sZl5Dq4d62AN}!jBf4_Vz6~FtBxUnw+n) zS)&U5Ak6}PG;4g)PAuf7Ps;-~OPeVs1(J3q7V)-*z{So~Be&q%zkvWQ^nV9ph-AS< zqBB*NQzE&by86q#|M?nplt6#`8}7g3_3yC%UxC_`E3DmQ`SRsU+8~_oFeCtc`?2c_ zPdp&R5R^5XkCKb!0m2T}@iv(sf=&j0H)#MGbOYkOPH+D?RTcrrH`<9cTNF*b3gN(l z>3ez?oIDisOJAaDp#=;zN@AAzIki;yD$SNv{tsRD?s2q;-G5gg(X<9=Wx2_z*Y;|>M5RCE8pA8uq2T0C=JgP6R+>hVG^F^!fz;I$1MrZcD{ zsV5jH9NhqYrU+>C9UC*Y8!gkU^R7J{Q&m^Lzsh!2wP}5!4;|R%Y-eZJBHAq#09Xnp zN>T)Y&@H`{L^PgQ+`neRcyxpBQ~V`beEyA^ow-B{U>y;GkiozZt_VnrH`UbxO<*uA zfZ)dw!U6@#VrOkOY!NPxuD7=rI0y6=g9W#Vz-6ObSXdx-Jiq9a2rSZYpcRC5C0-3b z$tp&Gw=3*O#O?fnqtHRk)6~Fy!gy>g(*utnMTm)#Sm?8V|3!r3*KA#cp=|-!0t5>{ z1l9s0oXAYCs=_}I1+ePVq$K`rbp<-5*~JB0T8WPLrKfB*x~eyD-& zQ2B1yj+m2`5<3c{cpOutlCa07U6IeBH^Pi1FU4O%kn;;4x_3Ujx1q6M(gDI?83mcnr_lX)LA@!B+jNwvffl-_-WMySl#A5w{LLh&2 z^R4{^I_=XeMk`ByZ~Bx^jA?FmHnl{u7D68d?=t{j38Dx-hr5$biGp?8Sd~~Fa2p1f zY@(pgPB;KIXi7^%q`n%PgX%tFn{zSkRGUkHXXM0kC3@P(-qt6)~7yZy?n_8=NE}FET#qq z&p&i@V8FB*b9K_Qgbfwk@i%-xd-?twpB>={@Mnwdm1*z75Mp;S1USs3*8RkbPxK<) zKnE!zHXi4^UL)qc=Y}Avxo|cz?I{HH25FVsx7o`aCZhmCyL08aS`hqiH}XD*)q|lX zLpeGcq2jyYV{>m3ANb$MeLfn&a)taj z6XNxD)+Mn;AmK173_Ov}s5OLXs-Sw1Ke zi!Uw~8CrVW`Y2L~B)9*q&CDW4%CBAnNc#v2*StCSmCxaU6X$#UC4L6=a2ZljQXhMG zdyYUT`^*&8lp^h6CXxV#-U!#2-?WYmz>^dSt1!zc5wJaE#U3JzWDy)Tcc|T9fOQoL zvtt?%x(Ji#?c29ls|W-@4mmCc@F1`%Fws(LWE>tpg%Qekn0IhTH)05$mY#k@|Bc4e zNlChtnP=KEp@#>*4wV464Fyfersp(rY#ow@-n!`Z6ie;Qg=H%0+k$VjQ=?#|S^!ag zeEd1pAV5?A`$)?uXwst0=@t0#**VMcsz>AF`BaX?mJ^*x*JxoX`kNH~T3&Wx+4^|` zh0w5rf`Y)4q!q}}sDQ?vNa;m!Xl{V=43IEJ6e@u9WE0>vR-qq4h%)12)lpl3H)n_F z%LB{P)iqPW3z#2deh_wRc5Lr0(AjM z=W4j=9@hKP^^_SX2H$QqK$#f>JNNDWLhrNPJt1G5e|}^_ZJx&GGa#2yVimdf&!^cY zz$O8b`fH>#g^9bnM3PpP1wZH$Wa&x8r>AoR$lMI5olc1efd|43W8jns5L9HvJtzUp zH)Ooy!otGIGE`hg5fBjIzGvX5vy!`e717<>TLxwY-Ha3f=wUU{&y1B=M-xN(!Fk0@ zEOFMS6XHa-^{{-QV1C)DQpd=M6*I$n&536dVD`YJ>hOVVAOsm@=g{<*<$7jS6$|+7 z0P4yUi3H#c4Z}q=05F3CFrjbHP9+Hn*t34HMqxqE4q|7onSjf|h36uBI=TH1D~(au z#cP{3E6zscc(}h6 z4wF6%jwqkV=pDLyZx0p}0Yn`GxQ>Kl@Or~*2A`cR1bPE|F3 zvwitEFVLya!&*I0BoaO2?*h|0Y8#L?Er9kOxO)&;YJhmRJetV`3 zpGzU<_vtY+zgtBq07}F(1C&MqH8r&sDE6(8-_O9o^`I^*+S%DzF3~p7V&suZXHt9) zo~%Lw42Ld=JfoKn$!nFGe0yW}?Cj{Ev|B(4xKt3-#I&s3DTtKRlNvFgW57=Z6E!jJ(>Aq+~u<9=$d zs8ha%#dku-=0M00dzCrqVIDTwTn>X$Xp|)5g?CJa_9j%=vW+aD@wA;-q8nVUqL)`~ zdKau#MH7>h9@|pxGiMZ5Ig%HkfYAYt3!B>?R#732T+y{{TH+thRKW?f8DE4WIN@;1 z$e6%;diwGuAMk|JvZEQvE$OfXpS>B*bjZxutiDjFK&$2T`gJk11=W3JsOs_yB%J!r?ZB zj*E{^FD}Nw5=Em7^KX0|9JB-#$#>yG6PQk=_1dgdPwrEJENBB&|Ek{>je>nEKa)ab zxFCe(3|KO5F);=ZW1g;5F9_M15DLC;$m^Xvc@m*C=;`ajZGn}QPzoA-M@I1C$y$~B zlD}$5UVI7yF{=>EgKCI9t8P|g2)ii$qYtW2qSE?pSLYHPVA61(J9oT(f5`!fhZJz) zbaE;husCT(A1saT2UhWirI_ZUQ!yZmI9jjk*T^b1jL^2o9D|w%tfUg_5 z^uR?E_uPI*tW(O+J*djgChPC-7l%2W6%|7Gj&~%DAz)MR`E8H_S3pZsn0jr{n|N&) zNbRrpAz8P36CVGeySx2Mrn-@FczF2Y&t>>fLjpQlItCuqrg!GeLcpg(OsinN8rFGU z13|#T%1Un!i7*2EsW_pHu__tDhY%4UF387G84il8mJ_bV3(_*Kf=`AI~=Qlw$ z2&XED(WC)Hx(*&67A+KQ*~@xkNOt~0PLD(_G57o5q3j%V2yv33aEGu zkV$kQitQX8l(Ks(0iKhvp(nN8qcBwOBP(;b=Zv(BqE?+#2eRBhR>etFa9rJXVwNCD z5GOI+Tem{NOKNXH+ouUOB!Io@8ykxOjgqv0L{RvMQUPz1@5=?$ zG{+(r>HfC1|L*aTn`n?;f12{&t?qx)^Y0w~S