commit 6c2b1d30280fce58fd0afcf9f93adc4cf5e0ee6c Author: ModelHub XC Date: Fri May 15 07:16:06 2026 +0800 初始化项目,由ModelHub XC社区提供模型 Model: baban/QwenTranslate_English_Tamil Source: Original Platform diff --git a/.gitattributes b/.gitattributes new file mode 100644 index 0000000..52373fe --- /dev/null +++ b/.gitattributes @@ -0,0 +1,36 @@ +*.7z filter=lfs diff=lfs merge=lfs -text +*.arrow filter=lfs diff=lfs merge=lfs -text +*.bin filter=lfs diff=lfs merge=lfs -text +*.bz2 filter=lfs diff=lfs merge=lfs -text +*.ckpt filter=lfs diff=lfs merge=lfs -text +*.ftz filter=lfs diff=lfs merge=lfs -text +*.gz filter=lfs diff=lfs merge=lfs -text +*.h5 filter=lfs diff=lfs merge=lfs -text +*.joblib filter=lfs diff=lfs merge=lfs -text +*.lfs.* filter=lfs diff=lfs merge=lfs -text +*.mlmodel filter=lfs diff=lfs merge=lfs -text +*.model filter=lfs diff=lfs merge=lfs -text +*.msgpack filter=lfs diff=lfs merge=lfs -text +*.npy filter=lfs diff=lfs merge=lfs -text +*.npz filter=lfs diff=lfs merge=lfs -text +*.onnx filter=lfs diff=lfs merge=lfs -text +*.ot filter=lfs diff=lfs merge=lfs -text +*.parquet filter=lfs diff=lfs merge=lfs -text +*.pb filter=lfs diff=lfs merge=lfs -text +*.pickle filter=lfs diff=lfs merge=lfs -text +*.pkl filter=lfs diff=lfs merge=lfs -text +*.pt filter=lfs diff=lfs merge=lfs -text +*.pth filter=lfs diff=lfs merge=lfs -text +*.rar filter=lfs diff=lfs merge=lfs -text +*.safetensors filter=lfs diff=lfs merge=lfs -text +saved_model/**/* filter=lfs diff=lfs merge=lfs -text +*.tar.* filter=lfs diff=lfs merge=lfs -text +*.tar filter=lfs diff=lfs merge=lfs -text +*.tflite filter=lfs diff=lfs merge=lfs -text +*.tgz filter=lfs diff=lfs merge=lfs -text +*.wasm filter=lfs diff=lfs merge=lfs -text +*.xz filter=lfs diff=lfs merge=lfs -text +*.zip filter=lfs diff=lfs merge=lfs -text +*.zst filter=lfs diff=lfs merge=lfs -text +*tfevents* filter=lfs diff=lfs merge=lfs -text +tokenizer.json filter=lfs diff=lfs merge=lfs -text diff --git a/README.md b/README.md new file mode 100644 index 0000000..dc836f3 --- /dev/null +++ b/README.md @@ -0,0 +1,63 @@ +--- +library_name: transformers +license: other +base_model: Qwen/Qwen2.5-3B-Instruct +tags: +- llama-factory +- full +- generated_from_trainer +model-index: +- name: MT_En_Tamil + results: [] +--- + + + +# MT_En_Tamil + +This model is a fine-tuned version of [Qwen/Qwen2.5-3B-Instruct](https://huggingface.co/Qwen/Qwen2.5-3B-Instruct) on the MT_En_Tamil dataset. +It achieves the following results on the evaluation set: +- Loss: 0.3116 +- Num Input Tokens Seen: 4473404224 + +## Model description + +More information needed + +## Intended uses & limitations + +More information needed + +## Training and evaluation data + +More information needed + +## Training procedure + +### Training hyperparameters + +The following hyperparameters were used during training: +- learning_rate: 5e-05 +- train_batch_size: 8 +- eval_batch_size: 16 +- seed: 42 +- distributed_type: multi-GPU +- num_devices: 8 +- gradient_accumulation_steps: 16 +- total_train_batch_size: 1024 +- total_eval_batch_size: 128 +- optimizer: Use OptimizerNames.ADAMW_TORCH with betas=(0.9,0.999) and epsilon=1e-08 and optimizer_args=No additional optimizer arguments +- lr_scheduler_type: inverse_sqrt +- num_epochs: 3.0 + +### Training results + + + +### Framework versions + +- Transformers 4.52.4 +- Pytorch 2.5.1+cu124 +- Datasets 3.6.0 +- Tokenizers 0.21.1 diff --git a/chat_template.jinja b/chat_template.jinja new file mode 100644 index 0000000..bdf7919 --- /dev/null +++ b/chat_template.jinja @@ -0,0 +1,54 @@ +{%- if tools %} + {{- '<|im_start|>system\n' }} + {%- if messages[0]['role'] == 'system' %} + {{- messages[0]['content'] }} + {%- else %} + {{- 'You are Qwen, created by Alibaba Cloud. You are a helpful assistant.' }} + {%- endif %} + {{- "\n\n# Tools\n\nYou may call one or more functions to assist with the user query.\n\nYou are provided with function signatures within XML tags:\n" }} + {%- for tool in tools %} + {{- "\n" }} + {{- tool | tojson }} + {%- endfor %} + {{- "\n\n\nFor each function call, return a json object with function name and arguments within XML tags:\n\n{\"name\": , \"arguments\": }\n<|im_end|>\n" }} +{%- else %} + {%- if messages[0]['role'] == 'system' %} + {{- '<|im_start|>system\n' + messages[0]['content'] + '<|im_end|>\n' }} + {%- else %} + {{- '<|im_start|>system\nYou are Qwen, created by Alibaba Cloud. You are a helpful assistant.<|im_end|>\n' }} + {%- endif %} +{%- endif %} +{%- for message in messages %} + {%- if (message.role == "user") or (message.role == "system" and not loop.first) or (message.role == "assistant" and not message.tool_calls) %} + {{- '<|im_start|>' + message.role + '\n' + message.content + '<|im_end|>' + '\n' }} + {%- elif message.role == "assistant" %} + {{- '<|im_start|>' + message.role }} + {%- if message.content %} + {{- '\n' + message.content }} + {%- endif %} + {%- for tool_call in message.tool_calls %} + {%- if tool_call.function is defined %} + {%- set tool_call = tool_call.function %} + {%- endif %} + {{- '\n\n{"name": "' }} + {{- tool_call.name }} + {{- '", "arguments": ' }} + {{- tool_call.arguments | tojson }} + {{- '}\n' }} + {%- endfor %} + {{- '<|im_end|>\n' }} + {%- elif message.role == "tool" %} + {%- if (loop.index0 == 0) or (messages[loop.index0 - 1].role != "tool") %} + {{- '<|im_start|>user' }} + {%- endif %} + {{- '\n\n' }} + {{- message.content }} + {{- '\n' }} + {%- if loop.last or (messages[loop.index0 + 1].role != "tool") %} + {{- '<|im_end|>\n' }} + {%- endif %} + {%- endif %} +{%- endfor %} +{%- if add_generation_prompt %} + {{- '<|im_start|>assistant\n' }} +{%- endif %} diff --git a/config.json b/config.json new file mode 100644 index 0000000..fb880d9 --- /dev/null +++ b/config.json @@ -0,0 +1,28 @@ +{ + "architectures": [ + "Qwen2ForCausalLM" + ], + "attention_dropout": 0.0, + "bos_token_id": 151643, + "eos_token_id": 151645, + "hidden_act": "silu", + "hidden_size": 2048, + "initializer_range": 0.02, + "intermediate_size": 11008, + "max_position_embeddings": 32768, + "max_window_layers": 70, + "model_type": "qwen2", + "num_attention_heads": 16, + "num_hidden_layers": 36, + "num_key_value_heads": 2, + "rms_norm_eps": 1e-06, + "rope_scaling": null, + "rope_theta": 1000000.0, + "sliding_window": 32768, + "tie_word_embeddings": true, + "torch_dtype": "bfloat16", + "transformers_version": "4.52.4", + "use_cache": false, + "use_sliding_window": false, + "vocab_size": 151936 +} diff --git a/generation_config.json b/generation_config.json new file mode 100644 index 0000000..028ef0a --- /dev/null +++ b/generation_config.json @@ -0,0 +1,14 @@ +{ + "bos_token_id": 151643, + "do_sample": true, + "eos_token_id": [ + 151645, + 151643 + ], + "pad_token_id": 151643, + "repetition_penalty": 1.05, + "temperature": 0.7, + "top_k": 20, + "top_p": 0.8, + "transformers_version": "4.52.4" +} diff --git a/model-00001-of-00002.safetensors b/model-00001-of-00002.safetensors new file mode 100644 index 0000000..057bf61 --- /dev/null +++ b/model-00001-of-00002.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ea31e9c1e050b73c5dd276b47006030a9ab6b91abc310520b3b106fe3a1e4727 +size 4957560304 diff --git a/model-00002-of-00002.safetensors b/model-00002-of-00002.safetensors new file mode 100644 index 0000000..5fedeba --- /dev/null +++ b/model-00002-of-00002.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5359df61243604dcaa5251c776228849ddd7020efd408e8412c3eed4a5348450 +size 1214366696 diff --git a/model.safetensors.index.json b/model.safetensors.index.json new file mode 100644 index 0000000..f19a648 --- /dev/null +++ b/model.safetensors.index.json @@ -0,0 +1,441 @@ +{ + "metadata": { + "total_size": 6171877376 + }, + "weight_map": { + "model.embed_tokens.weight": "model-00001-of-00002.safetensors", + "model.layers.0.input_layernorm.weight": "model-00001-of-00002.safetensors", + "model.layers.0.mlp.down_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.0.mlp.gate_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.0.mlp.up_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.0.post_attention_layernorm.weight": "model-00001-of-00002.safetensors", + "model.layers.0.self_attn.k_proj.bias": "model-00001-of-00002.safetensors", + "model.layers.0.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.0.self_attn.o_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.0.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.layers.0.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.0.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.layers.0.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.1.input_layernorm.weight": "model-00001-of-00002.safetensors", + "model.layers.1.mlp.down_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.1.mlp.gate_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.1.mlp.up_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.1.post_attention_layernorm.weight": "model-00001-of-00002.safetensors", + "model.layers.1.self_attn.k_proj.bias": "model-00001-of-00002.safetensors", + "model.layers.1.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.1.self_attn.o_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.1.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.layers.1.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.1.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.layers.1.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.10.input_layernorm.weight": "model-00001-of-00002.safetensors", + "model.layers.10.mlp.down_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.10.mlp.gate_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.10.mlp.up_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.10.post_attention_layernorm.weight": "model-00001-of-00002.safetensors", + "model.layers.10.self_attn.k_proj.bias": "model-00001-of-00002.safetensors", + "model.layers.10.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.10.self_attn.o_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.10.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.layers.10.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.10.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.layers.10.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.11.input_layernorm.weight": "model-00001-of-00002.safetensors", + "model.layers.11.mlp.down_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.11.mlp.gate_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.11.mlp.up_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.11.post_attention_layernorm.weight": "model-00001-of-00002.safetensors", + "model.layers.11.self_attn.k_proj.bias": "model-00001-of-00002.safetensors", + "model.layers.11.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.11.self_attn.o_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.11.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.layers.11.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.11.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.layers.11.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.12.input_layernorm.weight": "model-00001-of-00002.safetensors", + "model.layers.12.mlp.down_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.12.mlp.gate_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.12.mlp.up_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.12.post_attention_layernorm.weight": "model-00001-of-00002.safetensors", + "model.layers.12.self_attn.k_proj.bias": "model-00001-of-00002.safetensors", + "model.layers.12.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.12.self_attn.o_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.12.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.layers.12.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.12.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.layers.12.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.13.input_layernorm.weight": "model-00001-of-00002.safetensors", + "model.layers.13.mlp.down_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.13.mlp.gate_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.13.mlp.up_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.13.post_attention_layernorm.weight": "model-00001-of-00002.safetensors", + "model.layers.13.self_attn.k_proj.bias": "model-00001-of-00002.safetensors", + "model.layers.13.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.13.self_attn.o_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.13.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.layers.13.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.13.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.layers.13.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.14.input_layernorm.weight": "model-00001-of-00002.safetensors", + "model.layers.14.mlp.down_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.14.mlp.gate_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.14.mlp.up_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.14.post_attention_layernorm.weight": "model-00001-of-00002.safetensors", + "model.layers.14.self_attn.k_proj.bias": "model-00001-of-00002.safetensors", + "model.layers.14.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.14.self_attn.o_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.14.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.layers.14.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.14.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.layers.14.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.15.input_layernorm.weight": "model-00001-of-00002.safetensors", + "model.layers.15.mlp.down_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.15.mlp.gate_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.15.mlp.up_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.15.post_attention_layernorm.weight": "model-00001-of-00002.safetensors", + "model.layers.15.self_attn.k_proj.bias": "model-00001-of-00002.safetensors", + "model.layers.15.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.15.self_attn.o_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.15.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.layers.15.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.15.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.layers.15.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.16.input_layernorm.weight": "model-00001-of-00002.safetensors", + "model.layers.16.mlp.down_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.16.mlp.gate_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.16.mlp.up_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.16.post_attention_layernorm.weight": "model-00001-of-00002.safetensors", + "model.layers.16.self_attn.k_proj.bias": "model-00001-of-00002.safetensors", + "model.layers.16.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.16.self_attn.o_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.16.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.layers.16.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.16.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.layers.16.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.17.input_layernorm.weight": "model-00001-of-00002.safetensors", + "model.layers.17.mlp.down_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.17.mlp.gate_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.17.mlp.up_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.17.post_attention_layernorm.weight": "model-00001-of-00002.safetensors", + "model.layers.17.self_attn.k_proj.bias": "model-00001-of-00002.safetensors", + "model.layers.17.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.17.self_attn.o_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.17.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.layers.17.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.17.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.layers.17.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.18.input_layernorm.weight": "model-00001-of-00002.safetensors", + "model.layers.18.mlp.down_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.18.mlp.gate_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.18.mlp.up_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.18.post_attention_layernorm.weight": "model-00001-of-00002.safetensors", + "model.layers.18.self_attn.k_proj.bias": "model-00001-of-00002.safetensors", + "model.layers.18.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.18.self_attn.o_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.18.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.layers.18.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.18.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.layers.18.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.19.input_layernorm.weight": "model-00001-of-00002.safetensors", + "model.layers.19.mlp.down_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.19.mlp.gate_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.19.mlp.up_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.19.post_attention_layernorm.weight": "model-00001-of-00002.safetensors", + "model.layers.19.self_attn.k_proj.bias": "model-00001-of-00002.safetensors", + "model.layers.19.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.19.self_attn.o_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.19.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.layers.19.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.19.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.layers.19.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.2.input_layernorm.weight": "model-00001-of-00002.safetensors", + "model.layers.2.mlp.down_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.2.mlp.gate_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.2.mlp.up_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.2.post_attention_layernorm.weight": "model-00001-of-00002.safetensors", + "model.layers.2.self_attn.k_proj.bias": "model-00001-of-00002.safetensors", + "model.layers.2.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.2.self_attn.o_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.2.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.layers.2.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.2.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.layers.2.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.20.input_layernorm.weight": "model-00001-of-00002.safetensors", + "model.layers.20.mlp.down_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.20.mlp.gate_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.20.mlp.up_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.20.post_attention_layernorm.weight": "model-00001-of-00002.safetensors", + "model.layers.20.self_attn.k_proj.bias": "model-00001-of-00002.safetensors", + "model.layers.20.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.20.self_attn.o_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.20.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.layers.20.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.20.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.layers.20.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.21.input_layernorm.weight": "model-00001-of-00002.safetensors", + "model.layers.21.mlp.down_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.21.mlp.gate_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.21.mlp.up_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.21.post_attention_layernorm.weight": "model-00001-of-00002.safetensors", + "model.layers.21.self_attn.k_proj.bias": "model-00001-of-00002.safetensors", + "model.layers.21.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.21.self_attn.o_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.21.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.layers.21.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.21.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.layers.21.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.22.input_layernorm.weight": "model-00001-of-00002.safetensors", + "model.layers.22.mlp.down_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.22.mlp.gate_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.22.mlp.up_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.22.post_attention_layernorm.weight": "model-00001-of-00002.safetensors", + "model.layers.22.self_attn.k_proj.bias": "model-00001-of-00002.safetensors", + "model.layers.22.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.22.self_attn.o_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.22.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.layers.22.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.22.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.layers.22.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.23.input_layernorm.weight": "model-00001-of-00002.safetensors", + "model.layers.23.mlp.down_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.23.mlp.gate_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.23.mlp.up_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.23.post_attention_layernorm.weight": "model-00001-of-00002.safetensors", + "model.layers.23.self_attn.k_proj.bias": "model-00001-of-00002.safetensors", + "model.layers.23.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.23.self_attn.o_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.23.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.layers.23.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.23.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.layers.23.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.24.input_layernorm.weight": "model-00001-of-00002.safetensors", + "model.layers.24.mlp.down_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.24.mlp.gate_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.24.mlp.up_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.24.post_attention_layernorm.weight": "model-00001-of-00002.safetensors", + "model.layers.24.self_attn.k_proj.bias": "model-00001-of-00002.safetensors", + "model.layers.24.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.24.self_attn.o_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.24.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.layers.24.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.24.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.layers.24.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.25.input_layernorm.weight": "model-00001-of-00002.safetensors", + "model.layers.25.mlp.down_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.25.mlp.gate_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.25.mlp.up_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.25.post_attention_layernorm.weight": "model-00001-of-00002.safetensors", + "model.layers.25.self_attn.k_proj.bias": "model-00001-of-00002.safetensors", + "model.layers.25.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.25.self_attn.o_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.25.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.layers.25.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.25.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.layers.25.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.26.input_layernorm.weight": "model-00001-of-00002.safetensors", + "model.layers.26.mlp.down_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.26.mlp.gate_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.26.mlp.up_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.26.post_attention_layernorm.weight": "model-00001-of-00002.safetensors", + "model.layers.26.self_attn.k_proj.bias": "model-00001-of-00002.safetensors", + "model.layers.26.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.26.self_attn.o_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.26.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.layers.26.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.26.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.layers.26.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.27.input_layernorm.weight": "model-00001-of-00002.safetensors", + "model.layers.27.mlp.down_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.27.mlp.gate_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.27.mlp.up_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.27.post_attention_layernorm.weight": "model-00001-of-00002.safetensors", + "model.layers.27.self_attn.k_proj.bias": "model-00001-of-00002.safetensors", + "model.layers.27.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.27.self_attn.o_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.27.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.layers.27.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.27.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.layers.27.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.28.input_layernorm.weight": "model-00002-of-00002.safetensors", + "model.layers.28.mlp.down_proj.weight": "model-00002-of-00002.safetensors", + "model.layers.28.mlp.gate_proj.weight": "model-00002-of-00002.safetensors", + "model.layers.28.mlp.up_proj.weight": "model-00002-of-00002.safetensors", + "model.layers.28.post_attention_layernorm.weight": "model-00002-of-00002.safetensors", + "model.layers.28.self_attn.k_proj.bias": "model-00001-of-00002.safetensors", + "model.layers.28.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.28.self_attn.o_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.28.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.layers.28.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.28.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.layers.28.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.29.input_layernorm.weight": "model-00002-of-00002.safetensors", + "model.layers.29.mlp.down_proj.weight": "model-00002-of-00002.safetensors", + "model.layers.29.mlp.gate_proj.weight": "model-00002-of-00002.safetensors", + "model.layers.29.mlp.up_proj.weight": "model-00002-of-00002.safetensors", + "model.layers.29.post_attention_layernorm.weight": "model-00002-of-00002.safetensors", + "model.layers.29.self_attn.k_proj.bias": "model-00002-of-00002.safetensors", + "model.layers.29.self_attn.k_proj.weight": "model-00002-of-00002.safetensors", + "model.layers.29.self_attn.o_proj.weight": "model-00002-of-00002.safetensors", + "model.layers.29.self_attn.q_proj.bias": "model-00002-of-00002.safetensors", + "model.layers.29.self_attn.q_proj.weight": "model-00002-of-00002.safetensors", + "model.layers.29.self_attn.v_proj.bias": "model-00002-of-00002.safetensors", + "model.layers.29.self_attn.v_proj.weight": "model-00002-of-00002.safetensors", + "model.layers.3.input_layernorm.weight": "model-00001-of-00002.safetensors", + "model.layers.3.mlp.down_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.3.mlp.gate_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.3.mlp.up_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.3.post_attention_layernorm.weight": "model-00001-of-00002.safetensors", + "model.layers.3.self_attn.k_proj.bias": "model-00001-of-00002.safetensors", + "model.layers.3.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.3.self_attn.o_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.3.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.layers.3.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.3.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.layers.3.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.30.input_layernorm.weight": "model-00002-of-00002.safetensors", + "model.layers.30.mlp.down_proj.weight": "model-00002-of-00002.safetensors", + "model.layers.30.mlp.gate_proj.weight": "model-00002-of-00002.safetensors", + "model.layers.30.mlp.up_proj.weight": "model-00002-of-00002.safetensors", + "model.layers.30.post_attention_layernorm.weight": "model-00002-of-00002.safetensors", + "model.layers.30.self_attn.k_proj.bias": "model-00002-of-00002.safetensors", + "model.layers.30.self_attn.k_proj.weight": "model-00002-of-00002.safetensors", + "model.layers.30.self_attn.o_proj.weight": "model-00002-of-00002.safetensors", + "model.layers.30.self_attn.q_proj.bias": "model-00002-of-00002.safetensors", + "model.layers.30.self_attn.q_proj.weight": "model-00002-of-00002.safetensors", + "model.layers.30.self_attn.v_proj.bias": "model-00002-of-00002.safetensors", + "model.layers.30.self_attn.v_proj.weight": "model-00002-of-00002.safetensors", + "model.layers.31.input_layernorm.weight": "model-00002-of-00002.safetensors", + "model.layers.31.mlp.down_proj.weight": "model-00002-of-00002.safetensors", + "model.layers.31.mlp.gate_proj.weight": "model-00002-of-00002.safetensors", + "model.layers.31.mlp.up_proj.weight": "model-00002-of-00002.safetensors", + "model.layers.31.post_attention_layernorm.weight": "model-00002-of-00002.safetensors", + "model.layers.31.self_attn.k_proj.bias": "model-00002-of-00002.safetensors", + "model.layers.31.self_attn.k_proj.weight": "model-00002-of-00002.safetensors", + "model.layers.31.self_attn.o_proj.weight": "model-00002-of-00002.safetensors", + "model.layers.31.self_attn.q_proj.bias": "model-00002-of-00002.safetensors", + "model.layers.31.self_attn.q_proj.weight": "model-00002-of-00002.safetensors", + "model.layers.31.self_attn.v_proj.bias": "model-00002-of-00002.safetensors", + "model.layers.31.self_attn.v_proj.weight": "model-00002-of-00002.safetensors", + "model.layers.32.input_layernorm.weight": "model-00002-of-00002.safetensors", + "model.layers.32.mlp.down_proj.weight": "model-00002-of-00002.safetensors", + "model.layers.32.mlp.gate_proj.weight": "model-00002-of-00002.safetensors", + "model.layers.32.mlp.up_proj.weight": "model-00002-of-00002.safetensors", + "model.layers.32.post_attention_layernorm.weight": "model-00002-of-00002.safetensors", + "model.layers.32.self_attn.k_proj.bias": "model-00002-of-00002.safetensors", + "model.layers.32.self_attn.k_proj.weight": "model-00002-of-00002.safetensors", + "model.layers.32.self_attn.o_proj.weight": "model-00002-of-00002.safetensors", + "model.layers.32.self_attn.q_proj.bias": "model-00002-of-00002.safetensors", + "model.layers.32.self_attn.q_proj.weight": "model-00002-of-00002.safetensors", + "model.layers.32.self_attn.v_proj.bias": "model-00002-of-00002.safetensors", + "model.layers.32.self_attn.v_proj.weight": "model-00002-of-00002.safetensors", + "model.layers.33.input_layernorm.weight": "model-00002-of-00002.safetensors", + "model.layers.33.mlp.down_proj.weight": "model-00002-of-00002.safetensors", + "model.layers.33.mlp.gate_proj.weight": "model-00002-of-00002.safetensors", + "model.layers.33.mlp.up_proj.weight": "model-00002-of-00002.safetensors", + "model.layers.33.post_attention_layernorm.weight": "model-00002-of-00002.safetensors", + "model.layers.33.self_attn.k_proj.bias": "model-00002-of-00002.safetensors", + "model.layers.33.self_attn.k_proj.weight": "model-00002-of-00002.safetensors", + "model.layers.33.self_attn.o_proj.weight": "model-00002-of-00002.safetensors", + "model.layers.33.self_attn.q_proj.bias": "model-00002-of-00002.safetensors", + "model.layers.33.self_attn.q_proj.weight": "model-00002-of-00002.safetensors", + "model.layers.33.self_attn.v_proj.bias": "model-00002-of-00002.safetensors", + "model.layers.33.self_attn.v_proj.weight": "model-00002-of-00002.safetensors", + "model.layers.34.input_layernorm.weight": "model-00002-of-00002.safetensors", + "model.layers.34.mlp.down_proj.weight": "model-00002-of-00002.safetensors", + "model.layers.34.mlp.gate_proj.weight": "model-00002-of-00002.safetensors", + "model.layers.34.mlp.up_proj.weight": "model-00002-of-00002.safetensors", + "model.layers.34.post_attention_layernorm.weight": "model-00002-of-00002.safetensors", + "model.layers.34.self_attn.k_proj.bias": "model-00002-of-00002.safetensors", + "model.layers.34.self_attn.k_proj.weight": "model-00002-of-00002.safetensors", + "model.layers.34.self_attn.o_proj.weight": "model-00002-of-00002.safetensors", + "model.layers.34.self_attn.q_proj.bias": "model-00002-of-00002.safetensors", + "model.layers.34.self_attn.q_proj.weight": "model-00002-of-00002.safetensors", + "model.layers.34.self_attn.v_proj.bias": "model-00002-of-00002.safetensors", + "model.layers.34.self_attn.v_proj.weight": "model-00002-of-00002.safetensors", + "model.layers.35.input_layernorm.weight": "model-00002-of-00002.safetensors", + "model.layers.35.mlp.down_proj.weight": "model-00002-of-00002.safetensors", + "model.layers.35.mlp.gate_proj.weight": "model-00002-of-00002.safetensors", + "model.layers.35.mlp.up_proj.weight": "model-00002-of-00002.safetensors", + "model.layers.35.post_attention_layernorm.weight": "model-00002-of-00002.safetensors", + "model.layers.35.self_attn.k_proj.bias": "model-00002-of-00002.safetensors", + "model.layers.35.self_attn.k_proj.weight": "model-00002-of-00002.safetensors", + "model.layers.35.self_attn.o_proj.weight": "model-00002-of-00002.safetensors", + "model.layers.35.self_attn.q_proj.bias": "model-00002-of-00002.safetensors", + "model.layers.35.self_attn.q_proj.weight": "model-00002-of-00002.safetensors", + "model.layers.35.self_attn.v_proj.bias": "model-00002-of-00002.safetensors", + "model.layers.35.self_attn.v_proj.weight": "model-00002-of-00002.safetensors", + "model.layers.4.input_layernorm.weight": "model-00001-of-00002.safetensors", + "model.layers.4.mlp.down_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.4.mlp.gate_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.4.mlp.up_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.4.post_attention_layernorm.weight": "model-00001-of-00002.safetensors", + "model.layers.4.self_attn.k_proj.bias": "model-00001-of-00002.safetensors", + "model.layers.4.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.4.self_attn.o_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.4.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.layers.4.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.4.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.layers.4.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.5.input_layernorm.weight": "model-00001-of-00002.safetensors", + "model.layers.5.mlp.down_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.5.mlp.gate_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.5.mlp.up_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.5.post_attention_layernorm.weight": "model-00001-of-00002.safetensors", + "model.layers.5.self_attn.k_proj.bias": "model-00001-of-00002.safetensors", + "model.layers.5.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.5.self_attn.o_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.5.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.layers.5.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.5.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.layers.5.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.6.input_layernorm.weight": "model-00001-of-00002.safetensors", + "model.layers.6.mlp.down_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.6.mlp.gate_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.6.mlp.up_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.6.post_attention_layernorm.weight": "model-00001-of-00002.safetensors", + "model.layers.6.self_attn.k_proj.bias": "model-00001-of-00002.safetensors", + "model.layers.6.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.6.self_attn.o_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.6.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.layers.6.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.6.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.layers.6.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.7.input_layernorm.weight": "model-00001-of-00002.safetensors", + "model.layers.7.mlp.down_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.7.mlp.gate_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.7.mlp.up_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.7.post_attention_layernorm.weight": "model-00001-of-00002.safetensors", + "model.layers.7.self_attn.k_proj.bias": "model-00001-of-00002.safetensors", + "model.layers.7.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.7.self_attn.o_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.7.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.layers.7.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.7.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.layers.7.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.8.input_layernorm.weight": "model-00001-of-00002.safetensors", + "model.layers.8.mlp.down_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.8.mlp.gate_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.8.mlp.up_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.8.post_attention_layernorm.weight": "model-00001-of-00002.safetensors", + "model.layers.8.self_attn.k_proj.bias": "model-00001-of-00002.safetensors", + "model.layers.8.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.8.self_attn.o_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.8.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.layers.8.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.8.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.layers.8.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.9.input_layernorm.weight": "model-00001-of-00002.safetensors", + "model.layers.9.mlp.down_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.9.mlp.gate_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.9.mlp.up_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.9.post_attention_layernorm.weight": "model-00001-of-00002.safetensors", + "model.layers.9.self_attn.k_proj.bias": "model-00001-of-00002.safetensors", + "model.layers.9.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.9.self_attn.o_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.9.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.layers.9.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.9.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.layers.9.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.norm.weight": "model-00002-of-00002.safetensors" + } +} diff --git a/special_tokens_map.json b/special_tokens_map.json new file mode 100644 index 0000000..ac23c0a --- /dev/null +++ b/special_tokens_map.json @@ -0,0 +1,31 @@ +{ + "additional_special_tokens": [ + "<|im_start|>", + "<|im_end|>", + "<|object_ref_start|>", + "<|object_ref_end|>", + "<|box_start|>", + "<|box_end|>", + "<|quad_start|>", + "<|quad_end|>", + "<|vision_start|>", + "<|vision_end|>", + "<|vision_pad|>", + "<|image_pad|>", + "<|video_pad|>" + ], + "eos_token": { + "content": "<|im_end|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "pad_token": { + "content": "<|endoftext|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + } +} diff --git a/tokenizer.json b/tokenizer.json new file mode 100644 index 0000000..51ebb3b --- /dev/null +++ b/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9c5ae00e602b8860cbd784ba82a8aa14e8feecec692e7076590d014d7b7fdafa +size 11421896 diff --git a/tokenizer_config.json b/tokenizer_config.json new file mode 100644 index 0000000..fb44f02 --- /dev/null +++ b/tokenizer_config.json @@ -0,0 +1,208 @@ +{ + "add_bos_token": false, + "add_prefix_space": false, + "added_tokens_decoder": { + "151643": { + "content": "<|endoftext|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "151644": { + "content": "<|im_start|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "151645": { + "content": "<|im_end|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "151646": { + "content": "<|object_ref_start|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "151647": { + "content": "<|object_ref_end|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "151648": { + "content": "<|box_start|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "151649": { + "content": "<|box_end|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "151650": { + "content": "<|quad_start|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "151651": { + "content": "<|quad_end|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "151652": { + "content": "<|vision_start|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "151653": { + "content": "<|vision_end|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "151654": { + "content": "<|vision_pad|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "151655": { + "content": "<|image_pad|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "151656": { + "content": "<|video_pad|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "151657": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "151658": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "151659": { + "content": "<|fim_prefix|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "151660": { + "content": "<|fim_middle|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "151661": { + "content": "<|fim_suffix|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "151662": { + "content": "<|fim_pad|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "151663": { + "content": "<|repo_name|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "151664": { + "content": "<|file_sep|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + } + }, + "additional_special_tokens": [ + "<|im_start|>", + "<|im_end|>", + "<|object_ref_start|>", + "<|object_ref_end|>", + "<|box_start|>", + "<|box_end|>", + "<|quad_start|>", + "<|quad_end|>", + "<|vision_start|>", + "<|vision_end|>", + "<|vision_pad|>", + "<|image_pad|>", + "<|video_pad|>" + ], + "bos_token": null, + "clean_up_tokenization_spaces": false, + "eos_token": "<|im_end|>", + "errors": "replace", + "extra_special_tokens": {}, + "model_max_length": 131072, + "pad_token": "<|endoftext|>", + "padding_side": "right", + "split_special_tokens": false, + "tokenizer_class": "Qwen2Tokenizer", + "unk_token": null +} diff --git a/trainer_log.jsonl b/trainer_log.jsonl new file mode 100644 index 0000000..778e986 --- /dev/null +++ b/trainer_log.jsonl @@ -0,0 +1,1595 @@ +{"current_steps": 10, "total_steps": 15426, "loss": 1.4007, "lr": 4.9977515176118345e-05, "epoch": 0.0019449576971700866, "percentage": 0.06, "elapsed_time": "0:02:11", "remaining_time": "2 days, 8:22:48", "throughput": 26962.89, "total_tokens": 3549952} +{"current_steps": 20, "total_steps": 15426, "loss": 0.9519, "lr": 4.9952567580506e-05, "epoch": 0.0038899153943401732, "percentage": 0.13, "elapsed_time": "0:04:18", "remaining_time": "2 days, 7:24:41", "throughput": 27442.56, "total_tokens": 7106688} +{"current_steps": 30, "total_steps": 15426, "loss": 0.8373, "lr": 4.992765730738634e-05, "epoch": 0.005834873091510259, "percentage": 0.19, "elapsed_time": "0:06:26", "remaining_time": "2 days, 7:04:37", "throughput": 27386.71, "total_tokens": 10580992} +{"current_steps": 40, "total_steps": 15426, "loss": 0.7687, "lr": 4.9902784263792476e-05, "epoch": 0.0077798307886803465, "percentage": 0.26, "elapsed_time": "0:08:26", "remaining_time": "2 days, 6:06:51", "throughput": 27743.47, "total_tokens": 14051072} +{"current_steps": 50, "total_steps": 15426, "loss": 0.7288, "lr": 4.987794835708133e-05, "epoch": 0.009724788485850433, "percentage": 0.32, "elapsed_time": "0:10:28", "remaining_time": "2 days, 5:40:46", "throughput": 27923.17, "total_tokens": 17547008} +{"current_steps": 60, "total_steps": 15426, "loss": 0.6917, "lr": 4.985314949493234e-05, "epoch": 0.011669746183020519, "percentage": 0.39, "elapsed_time": "0:12:36", "remaining_time": "2 days, 5:48:31", "throughput": 27841.48, "total_tokens": 21059072} +{"current_steps": 70, "total_steps": 15426, "loss": 0.6701, "lr": 4.982838758534584e-05, "epoch": 0.013614703880190605, "percentage": 0.45, "elapsed_time": "0:14:39", "remaining_time": "2 days, 5:37:18", "throughput": 28009.81, "total_tokens": 24647552} +{"current_steps": 80, "total_steps": 15426, "loss": 0.6515, "lr": 4.980366253664179e-05, "epoch": 0.015559661577360693, "percentage": 0.52, "elapsed_time": "0:16:49", "remaining_time": "2 days, 5:47:32", "throughput": 27894.02, "total_tokens": 28159744} +{"current_steps": 90, "total_steps": 15426, "loss": 0.6248, "lr": 4.977897425745825e-05, "epoch": 0.01750461927453078, "percentage": 0.58, "elapsed_time": "0:18:53", "remaining_time": "2 days, 5:38:14", "throughput": 27995.3, "total_tokens": 31723776} +{"current_steps": 100, "total_steps": 15426, "loss": 0.6196, "lr": 4.975432265674997e-05, "epoch": 0.019449576971700865, "percentage": 0.65, "elapsed_time": "0:20:55", "remaining_time": "2 days, 5:28:10", "throughput": 28116.53, "total_tokens": 35313664} +{"current_steps": 110, "total_steps": 15426, "loss": 0.6064, "lr": 4.972970764378705e-05, "epoch": 0.02139453466887095, "percentage": 0.71, "elapsed_time": "0:23:03", "remaining_time": "2 days, 5:30:29", "throughput": 28164.05, "total_tokens": 38964096} +{"current_steps": 120, "total_steps": 15426, "loss": 0.5962, "lr": 4.970512912815344e-05, "epoch": 0.023339492366041038, "percentage": 0.78, "elapsed_time": "0:25:05", "remaining_time": "2 days, 5:19:39", "throughput": 28254.39, "total_tokens": 42526464} +{"current_steps": 130, "total_steps": 15426, "loss": 0.5868, "lr": 4.968058701974564e-05, "epoch": 0.025284450063211124, "percentage": 0.84, "elapsed_time": "0:27:03", "remaining_time": "2 days, 5:04:31", "throughput": 28344.44, "total_tokens": 46028800} +{"current_steps": 140, "total_steps": 15426, "loss": 0.5789, "lr": 4.96560812287712e-05, "epoch": 0.02722940776038121, "percentage": 0.91, "elapsed_time": "0:29:00", "remaining_time": "2 days, 4:47:45", "throughput": 28474.26, "total_tokens": 49566720} +{"current_steps": 150, "total_steps": 15426, "loss": 0.5757, "lr": 4.963161166574748e-05, "epoch": 0.0291743654575513, "percentage": 0.97, "elapsed_time": "0:31:01", "remaining_time": "2 days, 4:40:07", "throughput": 28522.0, "total_tokens": 53102848} +{"current_steps": 160, "total_steps": 15426, "loss": 0.5656, "lr": 4.960717824150013e-05, "epoch": 0.031119323154721386, "percentage": 1.04, "elapsed_time": "0:32:52", "remaining_time": "2 days, 4:16:42", "throughput": 28661.9, "total_tokens": 56536064} +{"current_steps": 170, "total_steps": 15426, "loss": 0.561, "lr": 4.9582780867161893e-05, "epoch": 0.03306428085189147, "percentage": 1.1, "elapsed_time": "0:34:51", "remaining_time": "2 days, 4:08:13", "throughput": 28733.74, "total_tokens": 60096640} +{"current_steps": 180, "total_steps": 15426, "loss": 0.5518, "lr": 4.955841945417105e-05, "epoch": 0.03500923854906156, "percentage": 1.17, "elapsed_time": "0:36:46", "remaining_time": "2 days, 3:55:19", "throughput": 28823.98, "total_tokens": 63609856} +{"current_steps": 190, "total_steps": 15426, "loss": 0.5523, "lr": 4.953409391427024e-05, "epoch": 0.03695419624623165, "percentage": 1.23, "elapsed_time": "0:38:46", "remaining_time": "2 days, 3:49:22", "throughput": 28871.09, "total_tokens": 67169024} +{"current_steps": 200, "total_steps": 15426, "loss": 0.5465, "lr": 4.950980415950502e-05, "epoch": 0.03889915394340173, "percentage": 1.3, "elapsed_time": "0:40:44", "remaining_time": "2 days, 3:41:46", "throughput": 28925.1, "total_tokens": 70710016} +{"current_steps": 210, "total_steps": 15426, "loss": 0.54, "lr": 4.9485550102222575e-05, "epoch": 0.04084411164057182, "percentage": 1.36, "elapsed_time": "0:42:42", "remaining_time": "2 days, 3:35:02", "throughput": 28959.93, "total_tokens": 74222336} +{"current_steps": 220, "total_steps": 15426, "loss": 0.5376, "lr": 4.946133165507037e-05, "epoch": 0.0427890693377419, "percentage": 1.43, "elapsed_time": "0:44:40", "remaining_time": "2 days, 3:28:10", "throughput": 28982.96, "total_tokens": 77697024} +{"current_steps": 230, "total_steps": 15426, "loss": 0.5338, "lr": 4.943714873099483e-05, "epoch": 0.04473402703491199, "percentage": 1.49, "elapsed_time": "0:46:34", "remaining_time": "2 days, 3:17:16", "throughput": 29035.57, "total_tokens": 81142144} +{"current_steps": 240, "total_steps": 15426, "loss": 0.5276, "lr": 4.9413001243240024e-05, "epoch": 0.046678984732082075, "percentage": 1.56, "elapsed_time": "0:48:31", "remaining_time": "2 days, 3:10:53", "throughput": 29082.65, "total_tokens": 84686976} +{"current_steps": 250, "total_steps": 15426, "loss": 0.525, "lr": 4.938888910534637e-05, "epoch": 0.048623942429252165, "percentage": 1.62, "elapsed_time": "0:50:31", "remaining_time": "2 days, 3:06:45", "throughput": 29102.37, "total_tokens": 88215040} +{"current_steps": 260, "total_steps": 15426, "loss": 0.5249, "lr": 4.936481223114932e-05, "epoch": 0.05056890012642225, "percentage": 1.69, "elapsed_time": "0:52:31", "remaining_time": "2 days, 3:04:05", "throughput": 29122.52, "total_tokens": 91787904} +{"current_steps": 270, "total_steps": 15426, "loss": 0.5227, "lr": 4.934077053477808e-05, "epoch": 0.05251385782359234, "percentage": 1.75, "elapsed_time": "0:54:26", "remaining_time": "2 days, 2:55:43", "throughput": 29190.23, "total_tokens": 95341568} +{"current_steps": 280, "total_steps": 15426, "loss": 0.5148, "lr": 4.931676393065431e-05, "epoch": 0.05445881552076242, "percentage": 1.82, "elapsed_time": "0:56:22", "remaining_time": "2 days, 2:49:40", "throughput": 29227.03, "total_tokens": 98866432} +{"current_steps": 290, "total_steps": 15426, "loss": 0.5132, "lr": 4.929279233349088e-05, "epoch": 0.05640377321793251, "percentage": 1.88, "elapsed_time": "0:58:23", "remaining_time": "2 days, 2:47:18", "throughput": 29224.84, "total_tokens": 102377728} +{"current_steps": 300, "total_steps": 15426, "loss": 0.5067, "lr": 4.926885565829051e-05, "epoch": 0.0583487309151026, "percentage": 1.94, "elapsed_time": "1:00:20", "remaining_time": "2 days, 2:42:29", "throughput": 29258.45, "total_tokens": 105932672} +{"current_steps": 310, "total_steps": 15426, "loss": 0.5094, "lr": 4.924495382034461e-05, "epoch": 0.06029368861227268, "percentage": 2.01, "elapsed_time": "1:02:18", "remaining_time": "2 days, 2:38:32", "throughput": 29282.06, "total_tokens": 109482240} +{"current_steps": 320, "total_steps": 15426, "loss": 0.5128, "lr": 4.9221086735231975e-05, "epoch": 0.06223864630944277, "percentage": 2.07, "elapsed_time": "1:04:16", "remaining_time": "2 days, 2:34:25", "throughput": 29302.28, "total_tokens": 113013504} +{"current_steps": 330, "total_steps": 15426, "loss": 0.5065, "lr": 4.919725431881751e-05, "epoch": 0.06418360400661285, "percentage": 2.14, "elapsed_time": "1:06:14", "remaining_time": "2 days, 2:29:52", "throughput": 29318.46, "total_tokens": 116511616} +{"current_steps": 340, "total_steps": 15426, "loss": 0.5039, "lr": 4.917345648725101e-05, "epoch": 0.06612856170378294, "percentage": 2.2, "elapsed_time": "1:08:14", "remaining_time": "2 days, 2:27:37", "throughput": 29349.39, "total_tokens": 120159104} +{"current_steps": 350, "total_steps": 15426, "loss": 0.5004, "lr": 4.914969315696596e-05, "epoch": 0.06807351940095303, "percentage": 2.27, "elapsed_time": "1:10:32", "remaining_time": "2 days, 2:38:17", "throughput": 29236.24, "total_tokens": 123732736} +{"current_steps": 360, "total_steps": 15426, "loss": 0.4981, "lr": 4.912596424467818e-05, "epoch": 0.07001847709812312, "percentage": 2.33, "elapsed_time": "1:13:20", "remaining_time": "2 days, 3:09:34", "throughput": 28929.7, "total_tokens": 127314560} +{"current_steps": 370, "total_steps": 15426, "loss": 0.4932, "lr": 4.910226966738475e-05, "epoch": 0.0719634347952932, "percentage": 2.4, "elapsed_time": "1:15:24", "remaining_time": "2 days, 3:08:13", "throughput": 28920.72, "total_tokens": 130840064} +{"current_steps": 380, "total_steps": 15426, "loss": 0.496, "lr": 4.9078609342362666e-05, "epoch": 0.0739083924924633, "percentage": 2.46, "elapsed_time": "1:17:22", "remaining_time": "2 days, 3:03:44", "throughput": 28955.03, "total_tokens": 134427904} +{"current_steps": 390, "total_steps": 15426, "loss": 0.4948, "lr": 4.905498318716775e-05, "epoch": 0.07585335018963338, "percentage": 2.53, "elapsed_time": "1:19:20", "remaining_time": "2 days, 2:59:10", "throughput": 28971.1, "total_tokens": 137927936} +{"current_steps": 400, "total_steps": 15426, "loss": 0.4934, "lr": 4.9031391119633295e-05, "epoch": 0.07779830788680346, "percentage": 2.59, "elapsed_time": "1:21:14", "remaining_time": "2 days, 2:52:01", "throughput": 29018.1, "total_tokens": 141457280} +{"current_steps": 410, "total_steps": 15426, "loss": 0.4867, "lr": 4.9007833057869e-05, "epoch": 0.07974326558397354, "percentage": 2.66, "elapsed_time": "1:23:14", "remaining_time": "2 days, 2:48:36", "throughput": 29044.45, "total_tokens": 145059584} +{"current_steps": 420, "total_steps": 15426, "loss": 0.4863, "lr": 4.898430892025967e-05, "epoch": 0.08168822328114364, "percentage": 2.72, "elapsed_time": "1:25:16", "remaining_time": "2 days, 2:46:54", "throughput": 29040.93, "total_tokens": 148595456} +{"current_steps": 430, "total_steps": 15426, "loss": 0.4802, "lr": 4.896081862546415e-05, "epoch": 0.08363318097831372, "percentage": 2.79, "elapsed_time": "1:27:10", "remaining_time": "2 days, 2:40:23", "throughput": 29076.44, "total_tokens": 152094848} +{"current_steps": 440, "total_steps": 15426, "loss": 0.4837, "lr": 4.8937362092414e-05, "epoch": 0.0855781386754838, "percentage": 2.85, "elapsed_time": "1:29:12", "remaining_time": "2 days, 2:38:12", "throughput": 29084.89, "total_tokens": 155669760} +{"current_steps": 450, "total_steps": 15426, "loss": 0.4833, "lr": 4.891393924031244e-05, "epoch": 0.08752309637265389, "percentage": 2.92, "elapsed_time": "1:31:04", "remaining_time": "2 days, 2:31:04", "throughput": 29128.75, "total_tokens": 159179136} +{"current_steps": 460, "total_steps": 15426, "loss": 0.4848, "lr": 4.8890549988633095e-05, "epoch": 0.08946805406982399, "percentage": 2.98, "elapsed_time": "1:33:06", "remaining_time": "2 days, 2:29:15", "throughput": 29138.81, "total_tokens": 162783872} +{"current_steps": 470, "total_steps": 15426, "loss": 0.4768, "lr": 4.8867194257118907e-05, "epoch": 0.09141301176699407, "percentage": 3.05, "elapsed_time": "1:35:04", "remaining_time": "2 days, 2:25:15", "throughput": 29161.87, "total_tokens": 166345600} +{"current_steps": 480, "total_steps": 15426, "loss": 0.4773, "lr": 4.884387196578093e-05, "epoch": 0.09335796946416415, "percentage": 3.11, "elapsed_time": "1:37:06", "remaining_time": "2 days, 2:23:31", "throughput": 29165.32, "total_tokens": 169921664} +{"current_steps": 490, "total_steps": 15426, "loss": 0.4719, "lr": 4.882058303489718e-05, "epoch": 0.09530292716133425, "percentage": 3.18, "elapsed_time": "1:39:05", "remaining_time": "2 days, 2:20:26", "throughput": 29168.2, "total_tokens": 173417728} +{"current_steps": 10, "total_steps": 15426, "loss": 1.4053, "lr": 4.9977515176118345e-05, "epoch": 0.0019449576971700866, "percentage": 0.06, "elapsed_time": "0:02:03", "remaining_time": "2 days, 4:41:01", "throughput": 23795.65, "total_tokens": 2927552} +{"current_steps": 20, "total_steps": 15426, "loss": 0.9602, "lr": 4.9952567580506e-05, "epoch": 0.0038899153943401732, "percentage": 0.13, "elapsed_time": "0:03:56", "remaining_time": "2 days, 2:41:19", "throughput": 24661.76, "total_tokens": 5842240} +{"current_steps": 30, "total_steps": 15426, "loss": 0.8414, "lr": 4.992765730738634e-05, "epoch": 0.005834873091510259, "percentage": 0.19, "elapsed_time": "0:05:49", "remaining_time": "2 days, 1:46:44", "throughput": 24960.37, "total_tokens": 8715904} +{"current_steps": 40, "total_steps": 15426, "loss": 0.7714, "lr": 4.9902784263792476e-05, "epoch": 0.0077798307886803465, "percentage": 0.26, "elapsed_time": "0:07:36", "remaining_time": "2 days, 0:45:40", "throughput": 25423.02, "total_tokens": 11602176} +{"current_steps": 50, "total_steps": 15426, "loss": 0.7304, "lr": 4.987794835708133e-05, "epoch": 0.009724788485850433, "percentage": 0.32, "elapsed_time": "0:09:26", "remaining_time": "2 days, 0:23:46", "throughput": 25584.83, "total_tokens": 14495168} +{"current_steps": 60, "total_steps": 15426, "loss": 0.6928, "lr": 4.985314949493234e-05, "epoch": 0.011669746183020519, "percentage": 0.39, "elapsed_time": "0:11:16", "remaining_time": "2 days, 0:08:46", "throughput": 25689.05, "total_tokens": 17386112} +{"current_steps": 70, "total_steps": 15426, "loss": 0.6714, "lr": 4.982838758534584e-05, "epoch": 0.013614703880190605, "percentage": 0.45, "elapsed_time": "0:13:07", "remaining_time": "1 day, 23:57:38", "throughput": 25808.42, "total_tokens": 20312768} +{"current_steps": 80, "total_steps": 15426, "loss": 0.6524, "lr": 4.980366253664179e-05, "epoch": 0.015559661577360693, "percentage": 0.52, "elapsed_time": "0:15:01", "remaining_time": "2 days, 0:03:14", "throughput": 25741.16, "total_tokens": 23214336} +{"current_steps": 90, "total_steps": 15426, "loss": 0.6253, "lr": 4.977897425745825e-05, "epoch": 0.01750461927453078, "percentage": 0.58, "elapsed_time": "0:16:57", "remaining_time": "2 days, 0:10:39", "throughput": 25686.96, "total_tokens": 26145152} +{"current_steps": 100, "total_steps": 15426, "loss": 0.6202, "lr": 4.975432265674997e-05, "epoch": 0.019449576971700865, "percentage": 0.65, "elapsed_time": "0:18:49", "remaining_time": "2 days, 0:05:17", "throughput": 25745.54, "total_tokens": 29081280} +{"current_steps": 110, "total_steps": 15426, "loss": 0.6066, "lr": 4.972970764378705e-05, "epoch": 0.02139453466887095, "percentage": 0.71, "elapsed_time": "0:20:42", "remaining_time": "2 days, 0:03:16", "throughput": 25792.17, "total_tokens": 32045824} +{"current_steps": 120, "total_steps": 15426, "loss": 0.5966, "lr": 4.970512912815344e-05, "epoch": 0.023339492366041038, "percentage": 0.78, "elapsed_time": "0:22:32", "remaining_time": "1 day, 23:55:45", "throughput": 25842.96, "total_tokens": 34959552} +{"current_steps": 130, "total_steps": 15426, "loss": 0.5871, "lr": 4.968058701974564e-05, "epoch": 0.025284450063211124, "percentage": 0.84, "elapsed_time": "0:24:21", "remaining_time": "1 day, 23:46:36", "throughput": 25900.67, "total_tokens": 37861376} +{"current_steps": 140, "total_steps": 15426, "loss": 0.5793, "lr": 4.96560812287712e-05, "epoch": 0.02722940776038121, "percentage": 0.91, "elapsed_time": "0:26:09", "remaining_time": "1 day, 23:36:52", "throughput": 25974.82, "total_tokens": 40778368} +{"current_steps": 150, "total_steps": 15426, "loss": 0.5762, "lr": 4.963161166574748e-05, "epoch": 0.0291743654575513, "percentage": 0.97, "elapsed_time": "0:27:59", "remaining_time": "1 day, 23:30:44", "throughput": 26015.72, "total_tokens": 43694464} +{"current_steps": 160, "total_steps": 15426, "loss": 0.566, "lr": 4.960717824150013e-05, "epoch": 0.031119323154721386, "percentage": 1.04, "elapsed_time": "0:29:42", "remaining_time": "1 day, 23:14:33", "throughput": 26102.6, "total_tokens": 46528128} +{"current_steps": 170, "total_steps": 15426, "loss": 0.5614, "lr": 4.9582780867161893e-05, "epoch": 0.03306428085189147, "percentage": 1.1, "elapsed_time": "0:31:31", "remaining_time": "1 day, 23:09:08", "throughput": 26131.98, "total_tokens": 49429440} +{"current_steps": 180, "total_steps": 15426, "loss": 0.5522, "lr": 4.955841945417105e-05, "epoch": 0.03500923854906156, "percentage": 1.17, "elapsed_time": "0:33:19", "remaining_time": "1 day, 23:02:15", "throughput": 26177.02, "total_tokens": 52334016} +{"current_steps": 190, "total_steps": 15426, "loss": 0.5529, "lr": 4.953409391427024e-05, "epoch": 0.03695419624623165, "percentage": 1.23, "elapsed_time": "0:35:07", "remaining_time": "1 day, 22:57:19", "throughput": 26211.55, "total_tokens": 55253824} +{"current_steps": 200, "total_steps": 15426, "loss": 0.5466, "lr": 4.950980415950502e-05, "epoch": 0.03889915394340173, "percentage": 1.3, "elapsed_time": "0:36:55", "remaining_time": "1 day, 22:51:08", "throughput": 26249.77, "total_tokens": 58157312} +{"current_steps": 210, "total_steps": 15426, "loss": 0.5404, "lr": 4.9485550102222575e-05, "epoch": 0.04084411164057182, "percentage": 1.36, "elapsed_time": "0:38:42", "remaining_time": "1 day, 22:44:30", "throughput": 26284.8, "total_tokens": 61042496} +{"current_steps": 220, "total_steps": 15426, "loss": 0.5374, "lr": 4.946133165507037e-05, "epoch": 0.0427890693377419, "percentage": 1.43, "elapsed_time": "0:40:34", "remaining_time": "1 day, 22:45:02", "throughput": 26239.16, "total_tokens": 63892224} +{"current_steps": 230, "total_steps": 15426, "loss": 0.5342, "lr": 4.943714873099483e-05, "epoch": 0.04473402703491199, "percentage": 1.49, "elapsed_time": "0:42:28", "remaining_time": "1 day, 22:46:09", "throughput": 26202.37, "total_tokens": 66773440} +{"current_steps": 240, "total_steps": 15426, "loss": 0.5274, "lr": 4.9413001243240024e-05, "epoch": 0.046678984732082075, "percentage": 1.56, "elapsed_time": "0:44:16", "remaining_time": "1 day, 22:41:56", "throughput": 26221.97, "total_tokens": 69669504} +{"current_steps": 250, "total_steps": 15426, "loss": 0.5251, "lr": 4.938888910534637e-05, "epoch": 0.048623942429252165, "percentage": 1.62, "elapsed_time": "0:46:05", "remaining_time": "1 day, 22:37:34", "throughput": 26250.04, "total_tokens": 72584960} +{"current_steps": 260, "total_steps": 15426, "loss": 0.5252, "lr": 4.936481223114932e-05, "epoch": 0.05056890012642225, "percentage": 1.69, "elapsed_time": "0:47:57", "remaining_time": "1 day, 22:37:02", "throughput": 26247.54, "total_tokens": 75516416} +{"current_steps": 270, "total_steps": 15426, "loss": 0.5228, "lr": 4.934077053477808e-05, "epoch": 0.05251385782359234, "percentage": 1.75, "elapsed_time": "0:49:45", "remaining_time": "1 day, 22:32:40", "throughput": 26274.39, "total_tokens": 78430400} +{"current_steps": 280, "total_steps": 15426, "loss": 0.515, "lr": 4.931676393065431e-05, "epoch": 0.05445881552076242, "percentage": 1.82, "elapsed_time": "0:51:33", "remaining_time": "1 day, 22:28:31", "throughput": 26292.51, "total_tokens": 81323968} +{"current_steps": 290, "total_steps": 15426, "loss": 0.5134, "lr": 4.929279233349088e-05, "epoch": 0.05640377321793251, "percentage": 1.88, "elapsed_time": "0:53:20", "remaining_time": "1 day, 22:24:12", "throughput": 26309.44, "total_tokens": 84207616} +{"current_steps": 300, "total_steps": 15426, "loss": 0.507, "lr": 4.926885565829051e-05, "epoch": 0.0583487309151026, "percentage": 1.94, "elapsed_time": "0:55:11", "remaining_time": "1 day, 22:22:22", "throughput": 26317.95, "total_tokens": 87139904} +{"current_steps": 310, "total_steps": 15426, "loss": 0.5093, "lr": 4.924495382034461e-05, "epoch": 0.06029368861227268, "percentage": 2.01, "elapsed_time": "0:57:00", "remaining_time": "1 day, 22:19:39", "throughput": 26335.61, "total_tokens": 90076096} +{"current_steps": 320, "total_steps": 15426, "loss": 0.5129, "lr": 4.9221086735231975e-05, "epoch": 0.06223864630944277, "percentage": 2.07, "elapsed_time": "0:58:49", "remaining_time": "1 day, 22:17:14", "throughput": 26340.42, "total_tokens": 92979456} +{"current_steps": 330, "total_steps": 15426, "loss": 0.5069, "lr": 4.919725431881751e-05, "epoch": 0.06418360400661285, "percentage": 2.14, "elapsed_time": "1:00:36", "remaining_time": "1 day, 22:12:23", "throughput": 26360.85, "total_tokens": 95855296} +{"current_steps": 340, "total_steps": 15426, "loss": 0.5049, "lr": 4.917345648725101e-05, "epoch": 0.06612856170378294, "percentage": 2.2, "elapsed_time": "1:02:27", "remaining_time": "1 day, 22:11:18", "throughput": 26369.31, "total_tokens": 98818816} +{"current_steps": 350, "total_steps": 15426, "loss": 0.5004, "lr": 4.914969315696596e-05, "epoch": 0.06807351940095303, "percentage": 2.27, "elapsed_time": "1:04:18", "remaining_time": "1 day, 22:10:05", "throughput": 26369.15, "total_tokens": 101747712} +{"current_steps": 360, "total_steps": 15426, "loss": 0.4983, "lr": 4.912596424467818e-05, "epoch": 0.07001847709812312, "percentage": 2.33, "elapsed_time": "1:06:09", "remaining_time": "1 day, 22:08:50", "throughput": 26370.91, "total_tokens": 104683392} +{"current_steps": 370, "total_steps": 15426, "loss": 0.4933, "lr": 4.910226966738475e-05, "epoch": 0.0719634347952932, "percentage": 2.4, "elapsed_time": "1:07:57", "remaining_time": "1 day, 22:05:27", "throughput": 26384.07, "total_tokens": 107585344} +{"current_steps": 380, "total_steps": 15426, "loss": 0.4957, "lr": 4.9078609342362666e-05, "epoch": 0.0739083924924633, "percentage": 2.46, "elapsed_time": "1:09:47", "remaining_time": "1 day, 22:03:25", "throughput": 26390.27, "total_tokens": 110510784} +{"current_steps": 390, "total_steps": 15426, "loss": 0.4949, "lr": 4.905498318716775e-05, "epoch": 0.07585335018963338, "percentage": 2.53, "elapsed_time": "1:11:34", "remaining_time": "1 day, 21:59:25", "throughput": 26401.24, "total_tokens": 113377408} +{"current_steps": 400, "total_steps": 15426, "loss": 0.4932, "lr": 4.9031391119633295e-05, "epoch": 0.07779830788680346, "percentage": 2.59, "elapsed_time": "1:13:20", "remaining_time": "1 day, 21:54:55", "throughput": 26431.41, "total_tokens": 116304704} +{"current_steps": 410, "total_steps": 15426, "loss": 0.4865, "lr": 4.9007833057869e-05, "epoch": 0.07974326558397354, "percentage": 2.66, "elapsed_time": "1:15:08", "remaining_time": "1 day, 21:51:48", "throughput": 26451.46, "total_tokens": 119247616} +{"current_steps": 420, "total_steps": 15426, "loss": 0.4868, "lr": 4.898430892025967e-05, "epoch": 0.08168822328114364, "percentage": 2.72, "elapsed_time": "1:16:54", "remaining_time": "1 day, 21:47:34", "throughput": 26478.39, "total_tokens": 122173440} +{"current_steps": 430, "total_steps": 15426, "loss": 0.4806, "lr": 4.896081862546415e-05, "epoch": 0.08363318097831372, "percentage": 2.79, "elapsed_time": "1:18:40", "remaining_time": "1 day, 21:43:57", "throughput": 26487.7, "total_tokens": 125045248} +{"current_steps": 440, "total_steps": 15426, "loss": 0.4834, "lr": 4.8937362092414e-05, "epoch": 0.0855781386754838, "percentage": 2.85, "elapsed_time": "1:20:36", "remaining_time": "1 day, 21:45:34", "throughput": 26457.84, "total_tokens": 127969600} +{"current_steps": 450, "total_steps": 15426, "loss": 0.4837, "lr": 4.891393924031244e-05, "epoch": 0.08752309637265389, "percentage": 2.92, "elapsed_time": "1:22:24", "remaining_time": "1 day, 21:42:42", "throughput": 26465.13, "total_tokens": 130864256} +{"current_steps": 460, "total_steps": 15426, "loss": 0.4848, "lr": 4.8890549988633095e-05, "epoch": 0.08946805406982399, "percentage": 2.98, "elapsed_time": "1:24:17", "remaining_time": "1 day, 21:42:22", "throughput": 26459.57, "total_tokens": 133817856} +{"current_steps": 470, "total_steps": 15426, "loss": 0.4766, "lr": 4.8867194257118907e-05, "epoch": 0.09141301176699407, "percentage": 3.05, "elapsed_time": "1:26:06", "remaining_time": "1 day, 21:40:08", "throughput": 26467.55, "total_tokens": 136748096} +{"current_steps": 480, "total_steps": 15426, "loss": 0.4772, "lr": 4.884387196578093e-05, "epoch": 0.09335796946416415, "percentage": 3.11, "elapsed_time": "1:27:58", "remaining_time": "1 day, 21:39:10", "throughput": 26462.57, "total_tokens": 139675456} +{"current_steps": 490, "total_steps": 15426, "loss": 0.4721, "lr": 4.882058303489718e-05, "epoch": 0.09530292716133425, "percentage": 3.18, "elapsed_time": "1:29:48", "remaining_time": "1 day, 21:37:21", "throughput": 26453.69, "total_tokens": 142537920} +{"current_steps": 500, "total_steps": 15426, "loss": 0.4762, "lr": 4.8797327385011496e-05, "epoch": 0.09724788485850433, "percentage": 3.24, "elapsed_time": "1:31:45", "remaining_time": "1 day, 21:39:07", "throughput": 26424.19, "total_tokens": 145476416} +{"current_steps": 510, "total_steps": 15426, "loss": 0.4666, "lr": 4.8774104936932425e-05, "epoch": 0.09919284255567441, "percentage": 3.31, "elapsed_time": "1:33:36", "remaining_time": "1 day, 21:37:36", "throughput": 26416.06, "total_tokens": 148357184} +{"current_steps": 520, "total_steps": 15426, "loss": 0.4736, "lr": 4.8750915611732076e-05, "epoch": 0.1011378002528445, "percentage": 3.37, "elapsed_time": "1:35:22", "remaining_time": "1 day, 21:34:04", "throughput": 26433.34, "total_tokens": 151271104} +{"current_steps": 530, "total_steps": 15426, "loss": 0.4706, "lr": 4.8727759330744986e-05, "epoch": 0.10308275795001459, "percentage": 3.44, "elapsed_time": "1:37:16", "remaining_time": "1 day, 21:33:55", "throughput": 26422.85, "total_tokens": 154213824} +{"current_steps": 540, "total_steps": 15426, "loss": 0.4677, "lr": 4.870463601556696e-05, "epoch": 0.10502771564718467, "percentage": 3.5, "elapsed_time": "1:39:29", "remaining_time": "1 day, 21:42:46", "throughput": 26317.99, "total_tokens": 157112192} +{"current_steps": 550, "total_steps": 15426, "loss": 0.47, "lr": 4.8681545588054075e-05, "epoch": 0.10697267334435476, "percentage": 3.57, "elapsed_time": "1:41:35", "remaining_time": "1 day, 21:47:51", "throughput": 26254.33, "total_tokens": 160037696} +{"current_steps": 560, "total_steps": 15426, "loss": 0.4665, "lr": 4.8658487970321404e-05, "epoch": 0.10891763104152484, "percentage": 3.63, "elapsed_time": "1:43:26", "remaining_time": "1 day, 21:46:05", "throughput": 26258.23, "total_tokens": 162976768} +{"current_steps": 570, "total_steps": 15426, "loss": 0.4651, "lr": 4.863546308474209e-05, "epoch": 0.11086258873869494, "percentage": 3.7, "elapsed_time": "1:45:15", "remaining_time": "1 day, 21:43:34", "throughput": 26260.1, "total_tokens": 165857984} +{"current_steps": 580, "total_steps": 15426, "loss": 0.4618, "lr": 4.86124708539461e-05, "epoch": 0.11280754643586502, "percentage": 3.76, "elapsed_time": "1:47:01", "remaining_time": "1 day, 21:39:20", "throughput": 26282.23, "total_tokens": 168763648} +{"current_steps": 590, "total_steps": 15426, "loss": 0.4595, "lr": 4.8589511200819216e-05, "epoch": 0.1147525041330351, "percentage": 3.82, "elapsed_time": "1:48:52", "remaining_time": "1 day, 21:37:55", "throughput": 26283.04, "total_tokens": 171705792} +{"current_steps": 600, "total_steps": 15426, "loss": 0.4595, "lr": 4.8566584048501926e-05, "epoch": 0.1166974618302052, "percentage": 3.89, "elapsed_time": "1:50:33", "remaining_time": "1 day, 21:31:53", "throughput": 26313.18, "total_tokens": 174548480} +{"current_steps": 610, "total_steps": 15426, "loss": 0.4624, "lr": 4.854368932038835e-05, "epoch": 0.11864241952737528, "percentage": 3.95, "elapsed_time": "1:52:20", "remaining_time": "1 day, 21:28:47", "throughput": 26326.94, "total_tokens": 177468864} +{"current_steps": 620, "total_steps": 15426, "loss": 0.4542, "lr": 4.8520826940125144e-05, "epoch": 0.12058737722454536, "percentage": 4.02, "elapsed_time": "1:54:11", "remaining_time": "1 day, 21:26:51", "throughput": 26332.92, "total_tokens": 180412544} +{"current_steps": 630, "total_steps": 15426, "loss": 0.4629, "lr": 4.849799683161046e-05, "epoch": 0.12253233492171545, "percentage": 4.08, "elapsed_time": "1:55:55", "remaining_time": "1 day, 21:22:37", "throughput": 26347.08, "total_tokens": 183260032} +{"current_steps": 640, "total_steps": 15426, "loss": 0.4566, "lr": 4.8475198918992835e-05, "epoch": 0.12447729261888554, "percentage": 4.15, "elapsed_time": "1:57:53", "remaining_time": "1 day, 21:23:36", "throughput": 26312.87, "total_tokens": 186119808} +{"current_steps": 650, "total_steps": 15426, "loss": 0.4595, "lr": 4.845243312667023e-05, "epoch": 0.1264222503160556, "percentage": 4.21, "elapsed_time": "1:59:46", "remaining_time": "1 day, 21:22:48", "throughput": 26300.41, "total_tokens": 189010624} +{"current_steps": 660, "total_steps": 15426, "loss": 0.4537, "lr": 4.842969937928884e-05, "epoch": 0.1283672080132257, "percentage": 4.28, "elapsed_time": "2:01:34", "remaining_time": "1 day, 21:19:51", "throughput": 26311.84, "total_tokens": 191923904} +{"current_steps": 670, "total_steps": 15426, "loss": 0.4548, "lr": 4.840699760174217e-05, "epoch": 0.1303121657103958, "percentage": 4.34, "elapsed_time": "2:03:13", "remaining_time": "1 day, 21:14:02", "throughput": 26339.26, "total_tokens": 194750208} +{"current_steps": 680, "total_steps": 15426, "loss": 0.4524, "lr": 4.8384327719169906e-05, "epoch": 0.13225712340756587, "percentage": 4.41, "elapsed_time": "2:05:00", "remaining_time": "1 day, 21:10:49", "throughput": 26351.84, "total_tokens": 197651264} +{"current_steps": 690, "total_steps": 15426, "loss": 0.4551, "lr": 4.836168965695694e-05, "epoch": 0.13420208110473597, "percentage": 4.47, "elapsed_time": "2:06:47", "remaining_time": "1 day, 21:07:51", "throughput": 26363.36, "total_tokens": 200561216} +{"current_steps": 700, "total_steps": 15426, "loss": 0.4496, "lr": 4.8339083340732304e-05, "epoch": 0.13614703880190607, "percentage": 4.54, "elapsed_time": "2:08:34", "remaining_time": "1 day, 21:04:58", "throughput": 26374.13, "total_tokens": 203472512} +{"current_steps": 710, "total_steps": 15426, "loss": 0.4532, "lr": 4.8316508696368154e-05, "epoch": 0.13809199649907614, "percentage": 4.6, "elapsed_time": "2:10:20", "remaining_time": "1 day, 21:01:32", "throughput": 26386.82, "total_tokens": 206357248} +{"current_steps": 720, "total_steps": 15426, "loss": 0.4516, "lr": 4.8293965649978714e-05, "epoch": 0.14003695419624623, "percentage": 4.67, "elapsed_time": "2:12:07", "remaining_time": "1 day, 20:58:40", "throughput": 26397.02, "total_tokens": 209264512} +{"current_steps": 730, "total_steps": 15426, "loss": 0.4523, "lr": 4.8271454127919364e-05, "epoch": 0.14198191189341633, "percentage": 4.73, "elapsed_time": "2:13:53", "remaining_time": "1 day, 20:55:34", "throughput": 26412.75, "total_tokens": 212196800} +{"current_steps": 740, "total_steps": 15426, "loss": 0.4493, "lr": 4.824897405678549e-05, "epoch": 0.1439268695905864, "percentage": 4.8, "elapsed_time": "2:15:40", "remaining_time": "1 day, 20:52:26", "throughput": 26423.65, "total_tokens": 215089152} +{"current_steps": 750, "total_steps": 15426, "loss": 0.4456, "lr": 4.8226525363411576e-05, "epoch": 0.1458718272877565, "percentage": 4.86, "elapsed_time": "2:17:23", "remaining_time": "1 day, 20:48:21", "throughput": 26443.65, "total_tokens": 217978816} +{"current_steps": 760, "total_steps": 15426, "loss": 0.4472, "lr": 4.820410797487017e-05, "epoch": 0.1478167849849266, "percentage": 4.93, "elapsed_time": "2:19:09", "remaining_time": "1 day, 20:45:27", "throughput": 26449.24, "total_tokens": 220843008} +{"current_steps": 770, "total_steps": 15426, "loss": 0.4454, "lr": 4.818172181847091e-05, "epoch": 0.14976174268209666, "percentage": 4.99, "elapsed_time": "2:20:55", "remaining_time": "1 day, 20:42:23", "throughput": 26459.73, "total_tokens": 223734656} +{"current_steps": 780, "total_steps": 15426, "loss": 0.4464, "lr": 4.81593668217595e-05, "epoch": 0.15170670037926676, "percentage": 5.06, "elapsed_time": "2:22:48", "remaining_time": "1 day, 20:41:21", "throughput": 26448.64, "total_tokens": 226612800} +{"current_steps": 790, "total_steps": 15426, "loss": 0.443, "lr": 4.813704291251675e-05, "epoch": 0.15365165807643683, "percentage": 5.12, "elapsed_time": "2:24:39", "remaining_time": "1 day, 20:39:57", "throughput": 26444.48, "total_tokens": 229519424} +{"current_steps": 800, "total_steps": 15426, "loss": 0.4406, "lr": 4.811475001875759e-05, "epoch": 0.15559661577360692, "percentage": 5.19, "elapsed_time": "2:26:27", "remaining_time": "1 day, 20:37:38", "throughput": 26457.33, "total_tokens": 232494848} +{"current_steps": 810, "total_steps": 15426, "loss": 0.4493, "lr": 4.8092488068730105e-05, "epoch": 0.15754157347077702, "percentage": 5.25, "elapsed_time": "2:28:13", "remaining_time": "1 day, 20:34:33", "throughput": 26468.1, "total_tokens": 235387456} +{"current_steps": 820, "total_steps": 15426, "loss": 0.4452, "lr": 4.807025699091452e-05, "epoch": 0.1594865311679471, "percentage": 5.32, "elapsed_time": "2:29:55", "remaining_time": "1 day, 20:30:22", "throughput": 26484.65, "total_tokens": 238232448} +{"current_steps": 830, "total_steps": 15426, "loss": 0.4439, "lr": 4.8048056714022325e-05, "epoch": 0.16143148886511718, "percentage": 5.38, "elapsed_time": "2:31:37", "remaining_time": "1 day, 20:26:28", "throughput": 26503.14, "total_tokens": 241119040} +{"current_steps": 840, "total_steps": 15426, "loss": 0.4426, "lr": 4.802588716699519e-05, "epoch": 0.16337644656228728, "percentage": 5.45, "elapsed_time": "2:33:25", "remaining_time": "1 day, 20:24:07", "throughput": 26511.95, "total_tokens": 244056192} +{"current_steps": 850, "total_steps": 15426, "loss": 0.4382, "lr": 4.8003748279004156e-05, "epoch": 0.16532140425945735, "percentage": 5.51, "elapsed_time": "2:35:10", "remaining_time": "1 day, 20:20:54", "throughput": 26521.76, "total_tokens": 246924224} +{"current_steps": 860, "total_steps": 15426, "loss": 0.4439, "lr": 4.798163997944854e-05, "epoch": 0.16726636195662745, "percentage": 5.58, "elapsed_time": "2:36:53", "remaining_time": "1 day, 20:17:16", "throughput": 26536.81, "total_tokens": 249801856} +{"current_steps": 870, "total_steps": 15426, "loss": 0.4416, "lr": 4.79595621979551e-05, "epoch": 0.16921131965379754, "percentage": 5.64, "elapsed_time": "2:38:43", "remaining_time": "1 day, 20:15:43", "throughput": 26531.13, "total_tokens": 252677312} +{"current_steps": 880, "total_steps": 15426, "loss": 0.4391, "lr": 4.793751486437702e-05, "epoch": 0.1711562773509676, "percentage": 5.7, "elapsed_time": "2:40:38", "remaining_time": "1 day, 20:15:28", "throughput": 26518.55, "total_tokens": 255611904} +{"current_steps": 890, "total_steps": 15426, "loss": 0.4389, "lr": 4.7915497908793064e-05, "epoch": 0.1731012350481377, "percentage": 5.77, "elapsed_time": "2:42:25", "remaining_time": "1 day, 20:12:43", "throughput": 26527.17, "total_tokens": 258510528} +{"current_steps": 900, "total_steps": 15426, "loss": 0.4354, "lr": 4.7893511261506516e-05, "epoch": 0.17504619274530778, "percentage": 5.83, "elapsed_time": "2:44:09", "remaining_time": "1 day, 20:09:27", "throughput": 26540.91, "total_tokens": 261408896} +{"current_steps": 910, "total_steps": 15426, "loss": 0.4409, "lr": 4.787155485304435e-05, "epoch": 0.17699115044247787, "percentage": 5.9, "elapsed_time": "2:45:49", "remaining_time": "1 day, 20:05:17", "throughput": 26562.87, "total_tokens": 264297472} +{"current_steps": 920, "total_steps": 15426, "loss": 0.4351, "lr": 4.784962861415629e-05, "epoch": 0.17893610813964797, "percentage": 5.96, "elapsed_time": "2:47:36", "remaining_time": "1 day, 20:02:39", "throughput": 26569.46, "total_tokens": 267186880} +{"current_steps": 930, "total_steps": 15426, "loss": 0.4345, "lr": 4.7827732475813884e-05, "epoch": 0.18088106583681804, "percentage": 6.03, "elapsed_time": "2:49:27", "remaining_time": "1 day, 20:01:20", "throughput": 26564.75, "total_tokens": 270094784} +{"current_steps": 940, "total_steps": 15426, "loss": 0.4389, "lr": 4.7805866369209576e-05, "epoch": 0.18282602353398814, "percentage": 6.09, "elapsed_time": "2:51:15", "remaining_time": "1 day, 19:59:14", "throughput": 26573.88, "total_tokens": 273064832} +{"current_steps": 950, "total_steps": 15426, "loss": 0.434, "lr": 4.778403022575583e-05, "epoch": 0.18477098123115823, "percentage": 6.16, "elapsed_time": "2:53:00", "remaining_time": "1 day, 19:56:18", "throughput": 26583.15, "total_tokens": 275949056} +{"current_steps": 960, "total_steps": 15426, "loss": 0.4323, "lr": 4.7762223977084195e-05, "epoch": 0.1867159389283283, "percentage": 6.22, "elapsed_time": "2:54:45", "remaining_time": "1 day, 19:53:24", "throughput": 26595.31, "total_tokens": 278866368} +{"current_steps": 970, "total_steps": 15426, "loss": 0.4372, "lr": 4.774044755504444e-05, "epoch": 0.1886608966254984, "percentage": 6.29, "elapsed_time": "2:56:29", "remaining_time": "1 day, 19:50:12", "throughput": 26607.47, "total_tokens": 281752128} +{"current_steps": 980, "total_steps": 15426, "loss": 0.4313, "lr": 4.7718700891703616e-05, "epoch": 0.1906058543226685, "percentage": 6.35, "elapsed_time": "2:58:13", "remaining_time": "1 day, 19:47:11", "throughput": 26620.1, "total_tokens": 284662400} +{"current_steps": 990, "total_steps": 15426, "loss": 0.4331, "lr": 4.7696983919345215e-05, "epoch": 0.19255081201983856, "percentage": 6.42, "elapsed_time": "2:59:54", "remaining_time": "1 day, 19:43:18", "throughput": 26640.0, "total_tokens": 287556992} +{"current_steps": 1000, "total_steps": 15426, "loss": 0.432, "lr": 4.7675296570468216e-05, "epoch": 0.19449576971700866, "percentage": 6.48, "elapsed_time": "3:01:42", "remaining_time": "1 day, 19:41:16", "throughput": 26646.44, "total_tokens": 290507712} +{"current_steps": 1010, "total_steps": 15426, "loss": 0.4312, "lr": 4.76536387777863e-05, "epoch": 0.19644072741417873, "percentage": 6.55, "elapsed_time": "3:03:27", "remaining_time": "1 day, 19:38:32", "throughput": 26654.54, "total_tokens": 293399808} +{"current_steps": 1020, "total_steps": 15426, "loss": 0.4282, "lr": 4.7632010474226915e-05, "epoch": 0.19838568511134883, "percentage": 6.61, "elapsed_time": "3:05:12", "remaining_time": "1 day, 19:35:41", "throughput": 26661.94, "total_tokens": 296268224} +{"current_steps": 1030, "total_steps": 15426, "loss": 0.4285, "lr": 4.761041159293035e-05, "epoch": 0.20033064280851892, "percentage": 6.68, "elapsed_time": "3:06:54", "remaining_time": "1 day, 19:32:20", "throughput": 26674.73, "total_tokens": 299141888} +{"current_steps": 1040, "total_steps": 15426, "loss": 0.4307, "lr": 4.7588842067249e-05, "epoch": 0.202275600505689, "percentage": 6.74, "elapsed_time": "3:08:40", "remaining_time": "1 day, 19:29:49", "throughput": 26682.52, "total_tokens": 302053056} +{"current_steps": 1050, "total_steps": 15426, "loss": 0.4245, "lr": 4.756730183074637e-05, "epoch": 0.2042205582028591, "percentage": 6.81, "elapsed_time": "3:10:29", "remaining_time": "1 day, 19:28:08", "throughput": 26682.24, "total_tokens": 304969216} +{"current_steps": 1060, "total_steps": 15426, "loss": 0.428, "lr": 4.7545790817196314e-05, "epoch": 0.20616551590002918, "percentage": 6.87, "elapsed_time": "3:12:12", "remaining_time": "1 day, 19:24:56", "throughput": 26695.4, "total_tokens": 307861760} +{"current_steps": 1070, "total_steps": 15426, "loss": 0.4268, "lr": 4.752430896058212e-05, "epoch": 0.20811047359719925, "percentage": 6.94, "elapsed_time": "3:14:00", "remaining_time": "1 day, 19:22:58", "throughput": 26701.46, "total_tokens": 310817472} +{"current_steps": 1080, "total_steps": 15426, "loss": 0.4262, "lr": 4.750285619509567e-05, "epoch": 0.21005543129436935, "percentage": 7.0, "elapsed_time": "3:15:45", "remaining_time": "1 day, 19:20:15", "throughput": 26710.02, "total_tokens": 313714240} +{"current_steps": 1090, "total_steps": 15426, "loss": 0.4289, "lr": 4.7481432455136644e-05, "epoch": 0.21200038899153945, "percentage": 7.07, "elapsed_time": "3:17:30", "remaining_time": "1 day, 19:17:41", "throughput": 26716.38, "total_tokens": 316602624} +{"current_steps": 1100, "total_steps": 15426, "loss": 0.4176, "lr": 4.7460037675311584e-05, "epoch": 0.21394534668870951, "percentage": 7.13, "elapsed_time": "3:19:16", "remaining_time": "1 day, 19:15:11", "throughput": 26723.49, "total_tokens": 319508864} +{"current_steps": 1110, "total_steps": 15426, "loss": 0.4279, "lr": 4.7438671790433126e-05, "epoch": 0.2158903043858796, "percentage": 7.2, "elapsed_time": "3:20:55", "remaining_time": "1 day, 19:11:29", "throughput": 26736.64, "total_tokens": 322336896} +{"current_steps": 1120, "total_steps": 15426, "loss": 0.4293, "lr": 4.741733473551915e-05, "epoch": 0.21783526208304968, "percentage": 7.26, "elapsed_time": "3:22:43", "remaining_time": "1 day, 19:09:32", "throughput": 26745.14, "total_tokens": 325325056} +{"current_steps": 1130, "total_steps": 15426, "loss": 0.4258, "lr": 4.7396026445791966e-05, "epoch": 0.21978021978021978, "percentage": 7.33, "elapsed_time": "3:24:27", "remaining_time": "1 day, 19:06:40", "throughput": 26755.06, "total_tokens": 328218176} +{"current_steps": 1140, "total_steps": 15426, "loss": 0.426, "lr": 4.737474685667742e-05, "epoch": 0.22172517747738987, "percentage": 7.39, "elapsed_time": "3:26:12", "remaining_time": "1 day, 19:04:02", "throughput": 26762.73, "total_tokens": 331112704} +{"current_steps": 1150, "total_steps": 15426, "loss": 0.4205, "lr": 4.7353495903804165e-05, "epoch": 0.22367013517455994, "percentage": 7.45, "elapsed_time": "3:27:57", "remaining_time": "1 day, 19:01:35", "throughput": 26768.63, "total_tokens": 334006528} +{"current_steps": 1160, "total_steps": 15426, "loss": 0.425, "lr": 4.733227352300277e-05, "epoch": 0.22561509287173004, "percentage": 7.52, "elapsed_time": "3:29:43", "remaining_time": "1 day, 18:59:16", "throughput": 26772.37, "total_tokens": 336893696} +{"current_steps": 1170, "total_steps": 15426, "loss": 0.4242, "lr": 4.731107965030496e-05, "epoch": 0.22756005056890014, "percentage": 7.58, "elapsed_time": "3:31:25", "remaining_time": "1 day, 18:56:09", "throughput": 26781.59, "total_tokens": 339741952} +{"current_steps": 1180, "total_steps": 15426, "loss": 0.4255, "lr": 4.728991422194278e-05, "epoch": 0.2295050082660702, "percentage": 7.65, "elapsed_time": "3:33:09", "remaining_time": "1 day, 18:53:27", "throughput": 26792.12, "total_tokens": 342661056} +{"current_steps": 1190, "total_steps": 15426, "loss": 0.4205, "lr": 4.726877717434773e-05, "epoch": 0.2314499659632403, "percentage": 7.71, "elapsed_time": "3:34:50", "remaining_time": "1 day, 18:50:03", "throughput": 26806.02, "total_tokens": 345530240} +{"current_steps": 1200, "total_steps": 15426, "loss": 0.4229, "lr": 4.724766844415013e-05, "epoch": 0.2333949236604104, "percentage": 7.78, "elapsed_time": "3:36:36", "remaining_time": "1 day, 18:47:57", "throughput": 26809.25, "total_tokens": 348436032} +{"current_steps": 1210, "total_steps": 15426, "loss": 0.4227, "lr": 4.722658796817813e-05, "epoch": 0.23533988135758047, "percentage": 7.84, "elapsed_time": "3:38:15", "remaining_time": "1 day, 18:44:19", "throughput": 26823.32, "total_tokens": 351273664} +{"current_steps": 1220, "total_steps": 15426, "loss": 0.4224, "lr": 4.7205535683457044e-05, "epoch": 0.23728483905475056, "percentage": 7.91, "elapsed_time": "3:40:04", "remaining_time": "1 day, 18:42:35", "throughput": 26821.6, "total_tokens": 354162112} +{"current_steps": 1230, "total_steps": 15426, "loss": 0.4225, "lr": 4.7184511527208484e-05, "epoch": 0.23922979675192066, "percentage": 7.97, "elapsed_time": "3:41:51", "remaining_time": "1 day, 18:40:36", "throughput": 26822.12, "total_tokens": 357049088} +{"current_steps": 1240, "total_steps": 15426, "loss": 0.4225, "lr": 4.7163515436849644e-05, "epoch": 0.24117475444909073, "percentage": 8.04, "elapsed_time": "3:43:42", "remaining_time": "1 day, 18:39:13", "throughput": 26821.18, "total_tokens": 359998208} +{"current_steps": 1250, "total_steps": 15426, "loss": 0.4218, "lr": 4.714254734999245e-05, "epoch": 0.24311971214626082, "percentage": 8.1, "elapsed_time": "3:45:23", "remaining_time": "1 day, 18:36:11", "throughput": 26830.75, "total_tokens": 362856384} +{"current_steps": 1260, "total_steps": 15426, "loss": 0.4242, "lr": 4.712160720444284e-05, "epoch": 0.2450646698434309, "percentage": 8.17, "elapsed_time": "3:47:06", "remaining_time": "1 day, 18:33:18", "throughput": 26840.04, "total_tokens": 365729408} +{"current_steps": 1270, "total_steps": 15426, "loss": 0.4191, "lr": 4.710069493819992e-05, "epoch": 0.247009627540601, "percentage": 8.23, "elapsed_time": "3:48:57", "remaining_time": "1 day, 18:32:09", "throughput": 26833.72, "total_tokens": 368639808} +{"current_steps": 1280, "total_steps": 15426, "loss": 0.4172, "lr": 4.70798104894553e-05, "epoch": 0.2489545852377711, "percentage": 8.3, "elapsed_time": "3:50:56", "remaining_time": "1 day, 18:32:11", "throughput": 26818.41, "total_tokens": 371597632} +{"current_steps": 1290, "total_steps": 15426, "loss": 0.42, "lr": 4.705895379659219e-05, "epoch": 0.25089954293494116, "percentage": 8.36, "elapsed_time": "3:52:44", "remaining_time": "1 day, 18:30:27", "throughput": 26816.58, "total_tokens": 374485696} +{"current_steps": 1300, "total_steps": 15426, "loss": 0.4176, "lr": 4.7038124798184766e-05, "epoch": 0.2528445006321112, "percentage": 8.43, "elapsed_time": "3:54:29", "remaining_time": "1 day, 18:27:57", "throughput": 26821.82, "total_tokens": 377360256} +{"current_steps": 1310, "total_steps": 15426, "loss": 0.4197, "lr": 4.7017323432997304e-05, "epoch": 0.25478945832928135, "percentage": 8.49, "elapsed_time": "3:56:23", "remaining_time": "1 day, 18:27:19", "throughput": 26812.42, "total_tokens": 380305344} +{"current_steps": 1320, "total_steps": 15426, "loss": 0.4182, "lr": 4.6996549639983506e-05, "epoch": 0.2567344160264514, "percentage": 8.56, "elapsed_time": "3:58:14", "remaining_time": "1 day, 18:25:57", "throughput": 26810.33, "total_tokens": 383243200} +{"current_steps": 1330, "total_steps": 15426, "loss": 0.419, "lr": 4.697580335828569e-05, "epoch": 0.2586793737236215, "percentage": 8.62, "elapsed_time": "4:00:03", "remaining_time": "1 day, 18:24:12", "throughput": 26810.06, "total_tokens": 386150784} +{"current_steps": 1340, "total_steps": 15426, "loss": 0.4187, "lr": 4.6955084527234076e-05, "epoch": 0.2606243314207916, "percentage": 8.69, "elapsed_time": "4:02:02", "remaining_time": "1 day, 18:24:18", "throughput": 26795.9, "total_tokens": 389140864} +{"current_steps": 1350, "total_steps": 15426, "loss": 0.4217, "lr": 4.6934393086346034e-05, "epoch": 0.2625692891179617, "percentage": 8.75, "elapsed_time": "4:03:49", "remaining_time": "1 day, 18:22:12", "throughput": 26795.25, "total_tokens": 391989312} +{"current_steps": 1360, "total_steps": 15426, "loss": 0.4157, "lr": 4.6913728975325324e-05, "epoch": 0.26451424681513175, "percentage": 8.82, "elapsed_time": "4:05:44", "remaining_time": "1 day, 18:21:40", "throughput": 26780.53, "total_tokens": 394873856} +{"current_steps": 1370, "total_steps": 15426, "loss": 0.4124, "lr": 4.6893092134061393e-05, "epoch": 0.2664592045123019, "percentage": 8.88, "elapsed_time": "4:07:45", "remaining_time": "1 day, 18:22:02", "throughput": 26758.13, "total_tokens": 397785216} +{"current_steps": 1380, "total_steps": 15426, "loss": 0.4143, "lr": 4.687248250262859e-05, "epoch": 0.26840416220947194, "percentage": 8.95, "elapsed_time": "4:09:35", "remaining_time": "1 day, 18:20:27", "throughput": 26753.17, "total_tokens": 400649792} +{"current_steps": 1390, "total_steps": 15426, "loss": 0.4134, "lr": 4.685190002128548e-05, "epoch": 0.270349119906642, "percentage": 9.01, "elapsed_time": "4:11:34", "remaining_time": "1 day, 18:20:18", "throughput": 26737.38, "total_tokens": 403579520} +{"current_steps": 1400, "total_steps": 15426, "loss": 0.415, "lr": 4.6831344630474114e-05, "epoch": 0.27229407760381213, "percentage": 9.08, "elapsed_time": "4:13:44", "remaining_time": "1 day, 18:22:06", "throughput": 26700.91, "total_tokens": 406504064} +{"current_steps": 1410, "total_steps": 15426, "loss": 0.4166, "lr": 4.6810816270819276e-05, "epoch": 0.2742390353009822, "percentage": 9.14, "elapsed_time": "4:15:43", "remaining_time": "1 day, 18:21:59", "throughput": 26683.71, "total_tokens": 409417792} +{"current_steps": 1420, "total_steps": 15426, "loss": 0.4113, "lr": 4.679031488312777e-05, "epoch": 0.2761839929981523, "percentage": 9.21, "elapsed_time": "4:17:40", "remaining_time": "1 day, 18:21:35", "throughput": 26671.57, "total_tokens": 412362176} +{"current_steps": 1430, "total_steps": 15426, "loss": 0.4094, "lr": 4.6769840408387717e-05, "epoch": 0.2781289506953224, "percentage": 9.27, "elapsed_time": "4:19:33", "remaining_time": "1 day, 18:20:20", "throughput": 26665.29, "total_tokens": 415260288} +{"current_steps": 1440, "total_steps": 15426, "loss": 0.415, "lr": 4.674939278776787e-05, "epoch": 0.28007390839249247, "percentage": 9.33, "elapsed_time": "4:21:27", "remaining_time": "1 day, 18:19:28", "throughput": 26652.06, "total_tokens": 418113088} +{"current_steps": 1450, "total_steps": 15426, "loss": 0.412, "lr": 4.672897196261683e-05, "epoch": 0.28201886608966253, "percentage": 9.4, "elapsed_time": "4:23:27", "remaining_time": "1 day, 18:19:24", "throughput": 26634.78, "total_tokens": 421034944} +{"current_steps": 1460, "total_steps": 15426, "loss": 0.4093, "lr": 4.670857787446238e-05, "epoch": 0.28396382378683266, "percentage": 9.46, "elapsed_time": "4:25:24", "remaining_time": "1 day, 18:18:51", "throughput": 26622.81, "total_tokens": 423959680} +{"current_steps": 1470, "total_steps": 15426, "loss": 0.4138, "lr": 4.668821046501082e-05, "epoch": 0.2859087814840027, "percentage": 9.53, "elapsed_time": "4:27:20", "remaining_time": "1 day, 18:18:07", "throughput": 26614.46, "total_tokens": 426913024} +{"current_steps": 1480, "total_steps": 15426, "loss": 0.4105, "lr": 4.6667869676146194e-05, "epoch": 0.2878537391811728, "percentage": 9.59, "elapsed_time": "4:29:23", "remaining_time": "1 day, 18:18:25", "throughput": 26591.48, "total_tokens": 429803328} +{"current_steps": 1490, "total_steps": 15426, "loss": 0.4115, "lr": 4.6647555449929645e-05, "epoch": 0.2897986968783429, "percentage": 9.66, "elapsed_time": "4:31:19", "remaining_time": "1 day, 18:17:41", "throughput": 26581.32, "total_tokens": 432728832} +{"current_steps": 1500, "total_steps": 15426, "loss": 0.4113, "lr": 4.662726772859869e-05, "epoch": 0.291743654575513, "percentage": 9.72, "elapsed_time": "4:33:11", "remaining_time": "1 day, 18:16:17", "throughput": 26576.75, "total_tokens": 435628608} +{"current_steps": 1510, "total_steps": 15426, "loss": 0.4152, "lr": 4.660700645456655e-05, "epoch": 0.29368861227268306, "percentage": 9.79, "elapsed_time": "4:34:58", "remaining_time": "1 day, 18:14:10", "throughput": 26579.17, "total_tokens": 438522560} +{"current_steps": 1520, "total_steps": 15426, "loss": 0.4128, "lr": 4.658677157042149e-05, "epoch": 0.2956335699698532, "percentage": 9.85, "elapsed_time": "4:36:49", "remaining_time": "1 day, 18:12:30", "throughput": 26572.17, "total_tokens": 441337856} +{"current_steps": 1530, "total_steps": 15426, "loss": 0.4112, "lr": 4.656656301892605e-05, "epoch": 0.29757852766702325, "percentage": 9.92, "elapsed_time": "4:38:35", "remaining_time": "1 day, 18:10:17", "throughput": 26576.39, "total_tokens": 444242048} +{"current_steps": 1540, "total_steps": 15426, "loss": 0.4128, "lr": 4.6546380743016465e-05, "epoch": 0.2995234853641933, "percentage": 9.98, "elapsed_time": "4:40:26", "remaining_time": "1 day, 18:08:45", "throughput": 26573.78, "total_tokens": 447151616} +{"current_steps": 1550, "total_steps": 15426, "loss": 0.4093, "lr": 4.652622468580193e-05, "epoch": 0.3014684430613634, "percentage": 10.05, "elapsed_time": "4:42:14", "remaining_time": "1 day, 18:06:43", "throughput": 26573.32, "total_tokens": 450009472} +{"current_steps": 1560, "total_steps": 15426, "loss": 0.414, "lr": 4.650609479056392e-05, "epoch": 0.3034134007585335, "percentage": 10.11, "elapsed_time": "4:44:24", "remaining_time": "1 day, 18:08:00", "throughput": 26540.09, "total_tokens": 452904320} +{"current_steps": 1570, "total_steps": 15426, "loss": 0.4115, "lr": 4.648599100075556e-05, "epoch": 0.3053583584557036, "percentage": 10.18, "elapsed_time": "4:46:23", "remaining_time": "1 day, 18:07:34", "throughput": 26529.73, "total_tokens": 455880768} +{"current_steps": 1580, "total_steps": 15426, "loss": 0.4104, "lr": 4.6465913260000945e-05, "epoch": 0.30730331615287365, "percentage": 10.24, "elapsed_time": "4:48:15", "remaining_time": "1 day, 18:06:05", "throughput": 26525.75, "total_tokens": 458774912} +{"current_steps": 1590, "total_steps": 15426, "loss": 0.4063, "lr": 4.644586151209444e-05, "epoch": 0.3092482738500438, "percentage": 10.31, "elapsed_time": "4:50:10", "remaining_time": "1 day, 18:05:06", "throughput": 26520.57, "total_tokens": 461742400} +{"current_steps": 1600, "total_steps": 15426, "loss": 0.4091, "lr": 4.6425835701000084e-05, "epoch": 0.31119323154721384, "percentage": 10.37, "elapsed_time": "4:52:06", "remaining_time": "1 day, 18:04:11", "throughput": 26511.06, "total_tokens": 464649472} +{"current_steps": 1610, "total_steps": 15426, "loss": 0.4113, "lr": 4.640583577085084e-05, "epoch": 0.3131381892443839, "percentage": 10.44, "elapsed_time": "4:53:54", "remaining_time": "1 day, 18:02:05", "throughput": 26515.06, "total_tokens": 467570560} +{"current_steps": 1620, "total_steps": 15426, "loss": 0.4066, "lr": 4.638586166594806e-05, "epoch": 0.31508314694155404, "percentage": 10.5, "elapsed_time": "4:55:48", "remaining_time": "1 day, 18:00:57", "throughput": 26506.33, "total_tokens": 470451712} +{"current_steps": 1630, "total_steps": 15426, "loss": 0.4073, "lr": 4.6365913330760726e-05, "epoch": 0.3170281046387241, "percentage": 10.57, "elapsed_time": "4:57:46", "remaining_time": "1 day, 18:00:18", "throughput": 26494.68, "total_tokens": 473368000} +{"current_steps": 1640, "total_steps": 15426, "loss": 0.4025, "lr": 4.6345990709924855e-05, "epoch": 0.3189730623358942, "percentage": 10.63, "elapsed_time": "4:59:55", "remaining_time": "1 day, 18:01:09", "throughput": 26466.53, "total_tokens": 476269952} +{"current_steps": 1650, "total_steps": 15426, "loss": 0.4077, "lr": 4.632609374824284e-05, "epoch": 0.3209180200330643, "percentage": 10.7, "elapsed_time": "5:02:07", "remaining_time": "1 day, 18:02:32", "throughput": 26432.07, "total_tokens": 479159872} +{"current_steps": 1660, "total_steps": 15426, "loss": 0.4069, "lr": 4.630622239068285e-05, "epoch": 0.32286297773023437, "percentage": 10.76, "elapsed_time": "5:04:04", "remaining_time": "1 day, 18:01:38", "throughput": 26426.1, "total_tokens": 482134656} +{"current_steps": 1670, "total_steps": 15426, "loss": 0.402, "lr": 4.628637658237808e-05, "epoch": 0.32480793542740444, "percentage": 10.83, "elapsed_time": "5:05:53", "remaining_time": "1 day, 17:59:38", "throughput": 26424.94, "total_tokens": 484986624} +{"current_steps": 1680, "total_steps": 15426, "loss": 0.4016, "lr": 4.626655626862625e-05, "epoch": 0.32675289312457456, "percentage": 10.89, "elapsed_time": "5:07:41", "remaining_time": "1 day, 17:57:30", "throughput": 26425.08, "total_tokens": 487833792} +{"current_steps": 1690, "total_steps": 15426, "loss": 0.4041, "lr": 4.624676139488888e-05, "epoch": 0.32869785082174463, "percentage": 10.96, "elapsed_time": "5:09:27", "remaining_time": "1 day, 17:55:10", "throughput": 26430.6, "total_tokens": 490740864} +{"current_steps": 1700, "total_steps": 15426, "loss": 0.4073, "lr": 4.6226991906790686e-05, "epoch": 0.3306428085189147, "percentage": 11.02, "elapsed_time": "5:11:09", "remaining_time": "1 day, 17:52:21", "throughput": 26438.42, "total_tokens": 493596608} +{"current_steps": 1710, "total_steps": 15426, "loss": 0.4049, "lr": 4.620724775011897e-05, "epoch": 0.3325877662160848, "percentage": 11.09, "elapsed_time": "5:13:05", "remaining_time": "1 day, 17:51:17", "throughput": 26431.38, "total_tokens": 496519552} +{"current_steps": 1720, "total_steps": 15426, "loss": 0.4031, "lr": 4.618752887082297e-05, "epoch": 0.3345327239132549, "percentage": 11.15, "elapsed_time": "5:15:00", "remaining_time": "1 day, 17:50:08", "throughput": 26426.54, "total_tokens": 499468864} +{"current_steps": 1730, "total_steps": 15426, "loss": 0.4075, "lr": 4.616783521501325e-05, "epoch": 0.33647768161042496, "percentage": 11.21, "elapsed_time": "5:17:00", "remaining_time": "1 day, 17:49:43", "throughput": 26410.29, "total_tokens": 502345728} +{"current_steps": 1740, "total_steps": 15426, "loss": 0.4025, "lr": 4.614816672896108e-05, "epoch": 0.3384226393075951, "percentage": 11.28, "elapsed_time": "5:19:01", "remaining_time": "1 day, 17:49:15", "throughput": 26398.07, "total_tokens": 505291136} +{"current_steps": 1750, "total_steps": 15426, "loss": 0.4051, "lr": 4.612852335909782e-05, "epoch": 0.34036759700476515, "percentage": 11.34, "elapsed_time": "5:21:06", "remaining_time": "1 day, 17:49:27", "throughput": 26374.32, "total_tokens": 508149632} +{"current_steps": 1760, "total_steps": 15426, "loss": 0.403, "lr": 4.6108905052014323e-05, "epoch": 0.3423125547019352, "percentage": 11.41, "elapsed_time": "5:23:00", "remaining_time": "1 day, 17:48:03", "throughput": 26366.79, "total_tokens": 510997440} +{"current_steps": 1770, "total_steps": 15426, "loss": 0.4024, "lr": 4.608931175446027e-05, "epoch": 0.3442575123991053, "percentage": 11.47, "elapsed_time": "5:24:47", "remaining_time": "1 day, 17:45:49", "throughput": 26372.46, "total_tokens": 513927808} +{"current_steps": 1780, "total_steps": 15426, "loss": 0.399, "lr": 4.606974341334367e-05, "epoch": 0.3462024700962754, "percentage": 11.54, "elapsed_time": "5:26:42", "remaining_time": "1 day, 17:44:36", "throughput": 26367.22, "total_tokens": 516856320} +{"current_steps": 1790, "total_steps": 15426, "loss": 0.4049, "lr": 4.605019997573011e-05, "epoch": 0.3481474277934455, "percentage": 11.6, "elapsed_time": "5:28:33", "remaining_time": "1 day, 17:42:56", "throughput": 26365.02, "total_tokens": 519753024} +{"current_steps": 1800, "total_steps": 15426, "loss": 0.4005, "lr": 4.603068138884229e-05, "epoch": 0.35009238549061555, "percentage": 11.67, "elapsed_time": "5:30:22", "remaining_time": "1 day, 17:40:53", "throughput": 26367.03, "total_tokens": 522651904} +{"current_steps": 1810, "total_steps": 15426, "loss": 0.4004, "lr": 4.6011187600059345e-05, "epoch": 0.3520373431877857, "percentage": 11.73, "elapsed_time": "5:32:09", "remaining_time": "1 day, 17:38:40", "throughput": 26372.83, "total_tokens": 525588032} +{"current_steps": 1820, "total_steps": 15426, "loss": 0.4013, "lr": 4.599171855691629e-05, "epoch": 0.35398230088495575, "percentage": 11.8, "elapsed_time": "5:34:01", "remaining_time": "1 day, 17:37:03", "throughput": 26372.16, "total_tokens": 528524928} +{"current_steps": 1830, "total_steps": 15426, "loss": 0.4036, "lr": 4.597227420710335e-05, "epoch": 0.3559272585821258, "percentage": 11.86, "elapsed_time": "5:35:48", "remaining_time": "1 day, 17:34:55", "throughput": 26370.49, "total_tokens": 531332480} +{"current_steps": 1840, "total_steps": 15426, "loss": 0.4022, "lr": 4.595285449846551e-05, "epoch": 0.35787221627929594, "percentage": 11.93, "elapsed_time": "5:37:46", "remaining_time": "1 day, 17:34:01", "throughput": 26358.74, "total_tokens": 534198336} +{"current_steps": 1850, "total_steps": 15426, "loss": 0.399, "lr": 4.593345937900178e-05, "epoch": 0.359817173976466, "percentage": 11.99, "elapsed_time": "5:39:53", "remaining_time": "1 day, 17:34:13", "throughput": 26337.14, "total_tokens": 537100224} +{"current_steps": 1860, "total_steps": 15426, "loss": 0.402, "lr": 4.591408879686472e-05, "epoch": 0.3617621316736361, "percentage": 12.06, "elapsed_time": "5:41:43", "remaining_time": "1 day, 17:32:24", "throughput": 26335.01, "total_tokens": 539965376} +{"current_steps": 1870, "total_steps": 15426, "loss": 0.4031, "lr": 4.5894742700359775e-05, "epoch": 0.3637070893708062, "percentage": 12.12, "elapsed_time": "5:43:29", "remaining_time": "1 day, 17:30:00", "throughput": 26341.04, "total_tokens": 542870080} +{"current_steps": 1880, "total_steps": 15426, "loss": 0.3996, "lr": 4.587542103794477e-05, "epoch": 0.36565204706797627, "percentage": 12.19, "elapsed_time": "5:45:14", "remaining_time": "1 day, 17:27:34", "throughput": 26347.74, "total_tokens": 545779712} +{"current_steps": 1890, "total_steps": 15426, "loss": 0.4006, "lr": 4.5856123758229247e-05, "epoch": 0.36759700476514634, "percentage": 12.25, "elapsed_time": "5:47:00", "remaining_time": "1 day, 17:25:15", "throughput": 26352.46, "total_tokens": 548675392} +{"current_steps": 1900, "total_steps": 15426, "loss": 0.3973, "lr": 4.5836850809973993e-05, "epoch": 0.36954196246231646, "percentage": 12.32, "elapsed_time": "5:48:45", "remaining_time": "1 day, 17:22:50", "throughput": 26356.69, "total_tokens": 551537024} +{"current_steps": 1910, "total_steps": 15426, "loss": 0.4004, "lr": 4.5817602142090385e-05, "epoch": 0.37148692015948653, "percentage": 12.38, "elapsed_time": "5:50:34", "remaining_time": "1 day, 17:20:50", "throughput": 26360.68, "total_tokens": 554486208} +{"current_steps": 1920, "total_steps": 15426, "loss": 0.4035, "lr": 4.579837770363989e-05, "epoch": 0.3734318778566566, "percentage": 12.45, "elapsed_time": "5:52:20", "remaining_time": "1 day, 17:18:31", "throughput": 26363.89, "total_tokens": 557348928} +{"current_steps": 1930, "total_steps": 15426, "loss": 0.3976, "lr": 4.57791774438334e-05, "epoch": 0.3753768355538267, "percentage": 12.51, "elapsed_time": "5:54:10", "remaining_time": "1 day, 17:16:39", "throughput": 26366.11, "total_tokens": 560293888} +{"current_steps": 1940, "total_steps": 15426, "loss": 0.3989, "lr": 4.576000131203078e-05, "epoch": 0.3773217932509968, "percentage": 12.58, "elapsed_time": "5:55:56", "remaining_time": "1 day, 17:14:19", "throughput": 26373.94, "total_tokens": 563249728} +{"current_steps": 1950, "total_steps": 15426, "loss": 0.398, "lr": 4.574084925774023e-05, "epoch": 0.37926675094816686, "percentage": 12.64, "elapsed_time": "5:57:40", "remaining_time": "1 day, 17:11:49", "throughput": 26381.93, "total_tokens": 566173632} +{"current_steps": 1960, "total_steps": 15426, "loss": 0.3941, "lr": 4.5721721230617795e-05, "epoch": 0.381211708645337, "percentage": 12.71, "elapsed_time": "5:59:28", "remaining_time": "1 day, 17:09:41", "throughput": 26386.0, "total_tokens": 569094784} +{"current_steps": 1970, "total_steps": 15426, "loss": 0.3979, "lr": 4.57026171804667e-05, "epoch": 0.38315666634250706, "percentage": 12.77, "elapsed_time": "6:01:11", "remaining_time": "1 day, 17:07:05", "throughput": 26391.19, "total_tokens": 571932672} +{"current_steps": 1980, "total_steps": 15426, "loss": 0.3943, "lr": 4.568353705723692e-05, "epoch": 0.3851016240396771, "percentage": 12.84, "elapsed_time": "6:02:58", "remaining_time": "1 day, 17:04:57", "throughput": 26394.34, "total_tokens": 574836864} +{"current_steps": 1990, "total_steps": 15426, "loss": 0.3965, "lr": 4.566448081102455e-05, "epoch": 0.38704658173684725, "percentage": 12.9, "elapsed_time": "6:04:39", "remaining_time": "1 day, 17:02:08", "throughput": 26404.0, "total_tokens": 577717632} +{"current_steps": 2000, "total_steps": 15426, "loss": 0.3942, "lr": 4.564544839207128e-05, "epoch": 0.3889915394340173, "percentage": 12.97, "elapsed_time": "6:06:20", "remaining_time": "1 day, 16:59:18", "throughput": 26413.43, "total_tokens": 580592448} +{"current_steps": 2010, "total_steps": 15426, "loss": 0.3956, "lr": 4.562643975076387e-05, "epoch": 0.3909364971311874, "percentage": 13.03, "elapsed_time": "6:08:03", "remaining_time": "1 day, 16:56:38", "throughput": 26422.29, "total_tokens": 583495424} +{"current_steps": 2020, "total_steps": 15426, "loss": 0.3991, "lr": 4.560745483763357e-05, "epoch": 0.39288145482835746, "percentage": 13.09, "elapsed_time": "6:09:49", "remaining_time": "1 day, 16:54:25", "throughput": 26427.48, "total_tokens": 586420736} +{"current_steps": 2030, "total_steps": 15426, "loss": 0.3984, "lr": 4.5588493603355595e-05, "epoch": 0.3948264125255276, "percentage": 13.16, "elapsed_time": "6:11:32", "remaining_time": "1 day, 16:51:46", "throughput": 26434.52, "total_tokens": 589283200} +{"current_steps": 2040, "total_steps": 15426, "loss": 0.3975, "lr": 4.556955599874859e-05, "epoch": 0.39677137022269765, "percentage": 13.22, "elapsed_time": "6:13:15", "remaining_time": "1 day, 16:49:11", "throughput": 26441.71, "total_tokens": 592166464} +{"current_steps": 2050, "total_steps": 15426, "loss": 0.3958, "lr": 4.555064197477409e-05, "epoch": 0.3987163279198677, "percentage": 13.29, "elapsed_time": "6:14:57", "remaining_time": "1 day, 16:46:31", "throughput": 26450.47, "total_tokens": 595061056} +{"current_steps": 2060, "total_steps": 15426, "loss": 0.3953, "lr": 4.5531751482536e-05, "epoch": 0.40066128561703784, "percentage": 13.35, "elapsed_time": "6:16:44", "remaining_time": "1 day, 16:44:23", "throughput": 26455.03, "total_tokens": 597993408} +{"current_steps": 2070, "total_steps": 15426, "loss": 0.3964, "lr": 4.5512884473280024e-05, "epoch": 0.4026062433142079, "percentage": 13.42, "elapsed_time": "6:18:30", "remaining_time": "1 day, 16:42:12", "throughput": 26458.18, "total_tokens": 600878208} +{"current_steps": 2080, "total_steps": 15426, "loss": 0.3978, "lr": 4.549404089839322e-05, "epoch": 0.404551201011378, "percentage": 13.48, "elapsed_time": "6:20:09", "remaining_time": "1 day, 16:39:15", "throughput": 26468.3, "total_tokens": 603737600} +{"current_steps": 2090, "total_steps": 15426, "loss": 0.3995, "lr": 4.547522070940335e-05, "epoch": 0.4064961587085481, "percentage": 13.55, "elapsed_time": "6:21:56", "remaining_time": "1 day, 16:37:07", "throughput": 26473.38, "total_tokens": 606677632} +{"current_steps": 2100, "total_steps": 15426, "loss": 0.4004, "lr": 4.545642385797848e-05, "epoch": 0.4084411164057182, "percentage": 13.61, "elapsed_time": "6:23:41", "remaining_time": "1 day, 16:34:49", "throughput": 26478.38, "total_tokens": 609577152} +{"current_steps": 2110, "total_steps": 15426, "loss": 0.3927, "lr": 4.543765029592637e-05, "epoch": 0.41038607410288824, "percentage": 13.68, "elapsed_time": "6:25:24", "remaining_time": "1 day, 16:32:19", "throughput": 26485.53, "total_tokens": 612477760} +{"current_steps": 2120, "total_steps": 15426, "loss": 0.3967, "lr": 4.541889997519403e-05, "epoch": 0.41233103180005837, "percentage": 13.74, "elapsed_time": "6:27:08", "remaining_time": "1 day, 16:29:54", "throughput": 26492.0, "total_tokens": 615380416} +{"current_steps": 2130, "total_steps": 15426, "loss": 0.3943, "lr": 4.5400172847867095e-05, "epoch": 0.41427598949722844, "percentage": 13.81, "elapsed_time": "6:28:49", "remaining_time": "1 day, 16:27:09", "throughput": 26499.29, "total_tokens": 618216768} +{"current_steps": 2140, "total_steps": 15426, "loss": 0.3909, "lr": 4.5381468866169466e-05, "epoch": 0.4162209471943985, "percentage": 13.87, "elapsed_time": "6:30:33", "remaining_time": "1 day, 16:24:46", "throughput": 26506.4, "total_tokens": 621145664} +{"current_steps": 2150, "total_steps": 15426, "loss": 0.3896, "lr": 4.5362787982462616e-05, "epoch": 0.41816590489156863, "percentage": 13.94, "elapsed_time": "6:32:15", "remaining_time": "1 day, 16:22:09", "throughput": 26514.21, "total_tokens": 624025920} +{"current_steps": 2160, "total_steps": 15426, "loss": 0.3925, "lr": 4.5344130149245275e-05, "epoch": 0.4201108625887387, "percentage": 14.0, "elapsed_time": "6:33:57", "remaining_time": "1 day, 16:19:33", "throughput": 26522.21, "total_tokens": 626916416} +{"current_steps": 2170, "total_steps": 15426, "loss": 0.3941, "lr": 4.5325495319152715e-05, "epoch": 0.42205582028590877, "percentage": 14.07, "elapsed_time": "6:35:42", "remaining_time": "1 day, 16:17:19", "throughput": 26527.31, "total_tokens": 629836352} +{"current_steps": 2180, "total_steps": 15426, "loss": 0.3929, "lr": 4.530688344495644e-05, "epoch": 0.4240007779830789, "percentage": 14.13, "elapsed_time": "6:37:27", "remaining_time": "1 day, 16:14:58", "throughput": 26531.24, "total_tokens": 632693696} +{"current_steps": 2190, "total_steps": 15426, "loss": 0.3927, "lr": 4.528829447956357e-05, "epoch": 0.42594573568024896, "percentage": 14.2, "elapsed_time": "6:39:08", "remaining_time": "1 day, 16:12:22", "throughput": 26538.93, "total_tokens": 635573760} +{"current_steps": 2200, "total_steps": 15426, "loss": 0.397, "lr": 4.526972837601633e-05, "epoch": 0.42789069337741903, "percentage": 14.26, "elapsed_time": "6:40:54", "remaining_time": "1 day, 16:10:09", "throughput": 26542.93, "total_tokens": 638468608} +{"current_steps": 2210, "total_steps": 15426, "loss": 0.3942, "lr": 4.525118508749165e-05, "epoch": 0.42983565107458915, "percentage": 14.33, "elapsed_time": "6:42:35", "remaining_time": "1 day, 16:07:31", "throughput": 26550.67, "total_tokens": 641341376} +{"current_steps": 2220, "total_steps": 15426, "loss": 0.3931, "lr": 4.5232664567300546e-05, "epoch": 0.4317806087717592, "percentage": 14.39, "elapsed_time": "6:44:16", "remaining_time": "1 day, 16:04:55", "throughput": 26557.74, "total_tokens": 644208000} +{"current_steps": 2230, "total_steps": 15426, "loss": 0.3923, "lr": 4.521416676888773e-05, "epoch": 0.4337255664689293, "percentage": 14.46, "elapsed_time": "6:46:02", "remaining_time": "1 day, 16:02:45", "throughput": 26562.81, "total_tokens": 647141632} +{"current_steps": 2240, "total_steps": 15426, "loss": 0.3948, "lr": 4.519569164583107e-05, "epoch": 0.43567052416609936, "percentage": 14.52, "elapsed_time": "6:47:47", "remaining_time": "1 day, 16:00:33", "throughput": 26568.3, "total_tokens": 650071872} +{"current_steps": 2250, "total_steps": 15426, "loss": 0.3967, "lr": 4.517723915184109e-05, "epoch": 0.4376154818632695, "percentage": 14.59, "elapsed_time": "6:49:36", "remaining_time": "1 day, 15:58:39", "throughput": 26573.01, "total_tokens": 653070208} +{"current_steps": 2260, "total_steps": 15426, "loss": 0.3891, "lr": 4.5158809240760506e-05, "epoch": 0.43956043956043955, "percentage": 14.65, "elapsed_time": "6:51:19", "remaining_time": "1 day, 15:56:12", "throughput": 26579.77, "total_tokens": 655966912} +{"current_steps": 2270, "total_steps": 15426, "loss": 0.3946, "lr": 4.514040186656375e-05, "epoch": 0.4415053972576096, "percentage": 14.72, "elapsed_time": "6:52:59", "remaining_time": "1 day, 15:53:34", "throughput": 26587.89, "total_tokens": 658845632} +{"current_steps": 2280, "total_steps": 15426, "loss": 0.3839, "lr": 4.512201698335644e-05, "epoch": 0.44345035495477975, "percentage": 14.78, "elapsed_time": "6:54:41", "remaining_time": "1 day, 15:51:02", "throughput": 26596.5, "total_tokens": 661767232} +{"current_steps": 2290, "total_steps": 15426, "loss": 0.3951, "lr": 4.510365454537496e-05, "epoch": 0.4453953126519498, "percentage": 14.85, "elapsed_time": "6:56:26", "remaining_time": "1 day, 15:48:48", "throughput": 26599.5, "total_tokens": 664625856} +{"current_steps": 2300, "total_steps": 15426, "loss": 0.3912, "lr": 4.5085314506985945e-05, "epoch": 0.4473402703491199, "percentage": 14.91, "elapsed_time": "6:58:10", "remaining_time": "1 day, 15:46:32", "throughput": 26604.49, "total_tokens": 667529472} +{"current_steps": 2310, "total_steps": 15426, "loss": 0.3929, "lr": 4.50669968226858e-05, "epoch": 0.44928522804629, "percentage": 14.97, "elapsed_time": "6:59:53", "remaining_time": "1 day, 15:44:06", "throughput": 26611.22, "total_tokens": 670428608} +{"current_steps": 2320, "total_steps": 15426, "loss": 0.3893, "lr": 4.504870144710027e-05, "epoch": 0.4512301857434601, "percentage": 15.04, "elapsed_time": "7:01:34", "remaining_time": "1 day, 15:41:29", "throughput": 26619.36, "total_tokens": 673313344} +{"current_steps": 2330, "total_steps": 15426, "loss": 0.3866, "lr": 4.5030428334983884e-05, "epoch": 0.45317514344063015, "percentage": 15.1, "elapsed_time": "7:03:16", "remaining_time": "1 day, 15:39:01", "throughput": 26626.1, "total_tokens": 676201280} +{"current_steps": 2340, "total_steps": 15426, "loss": 0.3916, "lr": 4.501217744121959e-05, "epoch": 0.45512010113780027, "percentage": 15.17, "elapsed_time": "7:04:56", "remaining_time": "1 day, 15:36:23", "throughput": 26634.43, "total_tokens": 679080192} +{"current_steps": 2350, "total_steps": 15426, "loss": 0.3941, "lr": 4.499394872081821e-05, "epoch": 0.45706505883497034, "percentage": 15.23, "elapsed_time": "7:06:37", "remaining_time": "1 day, 15:33:52", "throughput": 26642.31, "total_tokens": 681982016} +{"current_steps": 2360, "total_steps": 15426, "loss": 0.3891, "lr": 4.4975742128918e-05, "epoch": 0.4590100165321404, "percentage": 15.3, "elapsed_time": "7:08:16", "remaining_time": "1 day, 15:31:07", "throughput": 26650.04, "total_tokens": 684816320} +{"current_steps": 2370, "total_steps": 15426, "loss": 0.3914, "lr": 4.495755762078418e-05, "epoch": 0.46095497422931053, "percentage": 15.36, "elapsed_time": "7:09:58", "remaining_time": "1 day, 15:28:41", "throughput": 26657.33, "total_tokens": 687724608} +{"current_steps": 2380, "total_steps": 15426, "loss": 0.3914, "lr": 4.49393951518085e-05, "epoch": 0.4628999319264806, "percentage": 15.43, "elapsed_time": "7:11:40", "remaining_time": "1 day, 15:26:15", "throughput": 26665.01, "total_tokens": 690647168} +{"current_steps": 2390, "total_steps": 15426, "loss": 0.3903, "lr": 4.4921254677508716e-05, "epoch": 0.46484488962365067, "percentage": 15.49, "elapsed_time": "7:13:22", "remaining_time": "1 day, 15:23:50", "throughput": 26672.35, "total_tokens": 693560320} +{"current_steps": 2400, "total_steps": 15426, "loss": 0.3904, "lr": 4.490313615352821e-05, "epoch": 0.4667898473208208, "percentage": 15.56, "elapsed_time": "7:15:05", "remaining_time": "1 day, 15:21:28", "throughput": 26678.37, "total_tokens": 696458112} +{"current_steps": 2410, "total_steps": 15426, "loss": 0.3887, "lr": 4.48850395356355e-05, "epoch": 0.46873480501799086, "percentage": 15.62, "elapsed_time": "7:16:48", "remaining_time": "1 day, 15:19:06", "throughput": 26685.25, "total_tokens": 699375488} +{"current_steps": 2420, "total_steps": 15426, "loss": 0.389, "lr": 4.486696477972375e-05, "epoch": 0.47067976271516093, "percentage": 15.69, "elapsed_time": "7:18:32", "remaining_time": "1 day, 15:16:54", "throughput": 26689.62, "total_tokens": 702273856} +{"current_steps": 2430, "total_steps": 15426, "loss": 0.3834, "lr": 4.484891184181041e-05, "epoch": 0.47262472041233106, "percentage": 15.75, "elapsed_time": "7:20:17", "remaining_time": "1 day, 15:14:43", "throughput": 26693.01, "total_tokens": 705154688} +{"current_steps": 2440, "total_steps": 15426, "loss": 0.3881, "lr": 4.483088067803662e-05, "epoch": 0.4745696781095011, "percentage": 15.82, "elapsed_time": "7:21:59", "remaining_time": "1 day, 15:12:21", "throughput": 26699.78, "total_tokens": 708071296} +{"current_steps": 2450, "total_steps": 15426, "loss": 0.3887, "lr": 4.481287124466697e-05, "epoch": 0.4765146358066712, "percentage": 15.88, "elapsed_time": "7:23:44", "remaining_time": "1 day, 15:10:13", "throughput": 26705.32, "total_tokens": 711022976} +{"current_steps": 2460, "total_steps": 15426, "loss": 0.3918, "lr": 4.479488349808885e-05, "epoch": 0.4784595935038413, "percentage": 15.95, "elapsed_time": "7:25:23", "remaining_time": "1 day, 15:07:30", "throughput": 26713.79, "total_tokens": 713877376} +{"current_steps": 2470, "total_steps": 15426, "loss": 0.3892, "lr": 4.4776917394812114e-05, "epoch": 0.4804045512010114, "percentage": 16.01, "elapsed_time": "7:27:08", "remaining_time": "1 day, 15:05:23", "throughput": 26718.33, "total_tokens": 716806656} +{"current_steps": 2480, "total_steps": 15426, "loss": 0.3841, "lr": 4.475897289146862e-05, "epoch": 0.48234950889818146, "percentage": 16.08, "elapsed_time": "7:28:51", "remaining_time": "1 day, 15:03:08", "throughput": 26723.99, "total_tokens": 719725504} +{"current_steps": 2490, "total_steps": 15426, "loss": 0.3878, "lr": 4.4741049944811806e-05, "epoch": 0.4842944665953515, "percentage": 16.14, "elapsed_time": "7:30:39", "remaining_time": "1 day, 15:01:15", "throughput": 26726.13, "total_tokens": 722664704} +{"current_steps": 2500, "total_steps": 15426, "loss": 0.3813, "lr": 4.472314851171621e-05, "epoch": 0.48623942429252165, "percentage": 16.21, "elapsed_time": "7:32:18", "remaining_time": "1 day, 14:58:35", "throughput": 26735.63, "total_tokens": 725559488} +{"current_steps": 2510, "total_steps": 15426, "loss": 0.3848, "lr": 4.4705268549177084e-05, "epoch": 0.4881843819896917, "percentage": 16.27, "elapsed_time": "7:34:03", "remaining_time": "1 day, 14:56:28", "throughput": 26740.46, "total_tokens": 728493696} +{"current_steps": 2520, "total_steps": 15426, "loss": 0.3885, "lr": 4.468741001430989e-05, "epoch": 0.4901293396868618, "percentage": 16.34, "elapsed_time": "7:35:44", "remaining_time": "1 day, 14:54:02", "throughput": 26747.12, "total_tokens": 731384768} +{"current_steps": 2530, "total_steps": 15426, "loss": 0.387, "lr": 4.466957286434997e-05, "epoch": 0.4920742973840319, "percentage": 16.4, "elapsed_time": "7:37:25", "remaining_time": "1 day, 14:51:37", "throughput": 26753.29, "total_tokens": 734266368} +{"current_steps": 2540, "total_steps": 15426, "loss": 0.3826, "lr": 4.4651757056652e-05, "epoch": 0.494019255081202, "percentage": 16.47, "elapsed_time": "7:39:11", "remaining_time": "1 day, 14:49:35", "throughput": 26757.94, "total_tokens": 737223616} +{"current_steps": 2550, "total_steps": 15426, "loss": 0.3831, "lr": 4.463396254868968e-05, "epoch": 0.49596421277837205, "percentage": 16.53, "elapsed_time": "7:40:51", "remaining_time": "1 day, 14:47:02", "throughput": 26765.86, "total_tokens": 740107712} +{"current_steps": 2560, "total_steps": 15426, "loss": 0.3852, "lr": 4.461618929805519e-05, "epoch": 0.4979091704755422, "percentage": 16.6, "elapsed_time": "7:42:32", "remaining_time": "1 day, 14:44:40", "throughput": 26771.58, "total_tokens": 742990080} +{"current_steps": 2570, "total_steps": 15426, "loss": 0.3852, "lr": 4.459843726245888e-05, "epoch": 0.49985412817271224, "percentage": 16.66, "elapsed_time": "7:44:20", "remaining_time": "1 day, 14:42:49", "throughput": 26774.82, "total_tokens": 745971584} +{"current_steps": 2580, "total_steps": 15426, "loss": 0.3855, "lr": 4.458070639972875e-05, "epoch": 0.5017990858698823, "percentage": 16.73, "elapsed_time": "7:46:04", "remaining_time": "1 day, 14:40:36", "throughput": 26779.57, "total_tokens": 748873664} +{"current_steps": 2590, "total_steps": 15426, "loss": 0.3798, "lr": 4.456299666781007e-05, "epoch": 0.5037440435670524, "percentage": 16.79, "elapsed_time": "7:47:49", "remaining_time": "1 day, 14:38:33", "throughput": 26784.08, "total_tokens": 751826240} +{"current_steps": 2600, "total_steps": 15426, "loss": 0.3819, "lr": 4.4545308024764984e-05, "epoch": 0.5056890012642224, "percentage": 16.85, "elapsed_time": "7:49:28", "remaining_time": "1 day, 14:35:59", "throughput": 26791.06, "total_tokens": 754675456} +{"current_steps": 2610, "total_steps": 15426, "loss": 0.3896, "lr": 4.452764042877207e-05, "epoch": 0.5076339589613926, "percentage": 16.92, "elapsed_time": "7:51:11", "remaining_time": "1 day, 14:33:44", "throughput": 26795.77, "total_tokens": 757565120} +{"current_steps": 2620, "total_steps": 15426, "loss": 0.3844, "lr": 4.45099938381259e-05, "epoch": 0.5095789166585627, "percentage": 16.98, "elapsed_time": "7:52:51", "remaining_time": "1 day, 14:31:16", "throughput": 26802.01, "total_tokens": 760425792} +{"current_steps": 2630, "total_steps": 15426, "loss": 0.3811, "lr": 4.449236821123667e-05, "epoch": 0.5115238743557328, "percentage": 17.05, "elapsed_time": "7:54:33", "remaining_time": "1 day, 14:28:55", "throughput": 26808.18, "total_tokens": 763325056} +{"current_steps": 2640, "total_steps": 15426, "loss": 0.3834, "lr": 4.447476350662976e-05, "epoch": 0.5134688320529028, "percentage": 17.11, "elapsed_time": "7:56:17", "remaining_time": "1 day, 14:26:45", "throughput": 26811.75, "total_tokens": 766212544} +{"current_steps": 2650, "total_steps": 15426, "loss": 0.3813, "lr": 4.4457179682945346e-05, "epoch": 0.5154137897500729, "percentage": 17.18, "elapsed_time": "7:58:02", "remaining_time": "1 day, 14:24:41", "throughput": 26817.09, "total_tokens": 769177152} +{"current_steps": 2660, "total_steps": 15426, "loss": 0.3864, "lr": 4.443961669893798e-05, "epoch": 0.517358747447243, "percentage": 17.24, "elapsed_time": "7:59:45", "remaining_time": "1 day, 14:22:27", "throughput": 26822.77, "total_tokens": 772100416} +{"current_steps": 2670, "total_steps": 15426, "loss": 0.3852, "lr": 4.4422074513476155e-05, "epoch": 0.5193037051444132, "percentage": 17.31, "elapsed_time": "8:01:26", "remaining_time": "1 day, 14:20:04", "throughput": 26829.58, "total_tokens": 775005312} +{"current_steps": 2680, "total_steps": 15426, "loss": 0.3821, "lr": 4.4404553085541955e-05, "epoch": 0.5212486628415832, "percentage": 17.37, "elapsed_time": "8:03:13", "remaining_time": "1 day, 14:18:11", "throughput": 26832.98, "total_tokens": 777976960} +{"current_steps": 2690, "total_steps": 15426, "loss": 0.3851, "lr": 4.438705237423063e-05, "epoch": 0.5231936205387533, "percentage": 17.44, "elapsed_time": "8:04:56", "remaining_time": "1 day, 14:15:58", "throughput": 26836.98, "total_tokens": 780857408} +{"current_steps": 2700, "total_steps": 15426, "loss": 0.3821, "lr": 4.436957233875017e-05, "epoch": 0.5251385782359234, "percentage": 17.5, "elapsed_time": "8:06:38", "remaining_time": "1 day, 14:13:44", "throughput": 26841.55, "total_tokens": 783745664} +{"current_steps": 2710, "total_steps": 15426, "loss": 0.3808, "lr": 4.4352112938420956e-05, "epoch": 0.5270835359330934, "percentage": 17.57, "elapsed_time": "8:08:20", "remaining_time": "1 day, 14:11:27", "throughput": 26846.88, "total_tokens": 786639168} +{"current_steps": 2720, "total_steps": 15426, "loss": 0.3816, "lr": 4.433467413267529e-05, "epoch": 0.5290284936302635, "percentage": 17.63, "elapsed_time": "8:10:01", "remaining_time": "1 day, 14:09:05", "throughput": 26853.27, "total_tokens": 789536704} +{"current_steps": 2730, "total_steps": 15426, "loss": 0.3792, "lr": 4.431725588105708e-05, "epoch": 0.5309734513274337, "percentage": 17.7, "elapsed_time": "8:11:45", "remaining_time": "1 day, 14:06:58", "throughput": 26858.34, "total_tokens": 792476352} +{"current_steps": 2740, "total_steps": 15426, "loss": 0.3856, "lr": 4.4299858143221377e-05, "epoch": 0.5329184090246037, "percentage": 17.76, "elapsed_time": "8:13:22", "remaining_time": "1 day, 14:04:19", "throughput": 26865.89, "total_tokens": 795307584} +{"current_steps": 2750, "total_steps": 15426, "loss": 0.3808, "lr": 4.4282480878934065e-05, "epoch": 0.5348633667217738, "percentage": 17.83, "elapsed_time": "8:15:05", "remaining_time": "1 day, 14:02:04", "throughput": 26870.11, "total_tokens": 798180224} +{"current_steps": 2760, "total_steps": 15426, "loss": 0.3824, "lr": 4.4265124048071346e-05, "epoch": 0.5368083244189439, "percentage": 17.89, "elapsed_time": "8:16:47", "remaining_time": "1 day, 13:59:51", "throughput": 26875.49, "total_tokens": 801095872} +{"current_steps": 2770, "total_steps": 15426, "loss": 0.3797, "lr": 4.4247787610619477e-05, "epoch": 0.538753282116114, "percentage": 17.96, "elapsed_time": "8:18:33", "remaining_time": "1 day, 13:57:51", "throughput": 26879.1, "total_tokens": 804034816} +{"current_steps": 2780, "total_steps": 15426, "loss": 0.3812, "lr": 4.42304715266743e-05, "epoch": 0.540698239813284, "percentage": 18.02, "elapsed_time": "8:20:15", "remaining_time": "1 day, 13:55:38", "throughput": 26884.62, "total_tokens": 806959744} +{"current_steps": 2790, "total_steps": 15426, "loss": 0.384, "lr": 4.421317575644092e-05, "epoch": 0.5426431975104542, "percentage": 18.09, "elapsed_time": "8:21:50", "remaining_time": "1 day, 13:52:53", "throughput": 26893.36, "total_tokens": 809784960} +{"current_steps": 2800, "total_steps": 15426, "loss": 0.3821, "lr": 4.419590026023325e-05, "epoch": 0.5445881552076243, "percentage": 18.15, "elapsed_time": "8:23:29", "remaining_time": "1 day, 13:50:25", "throughput": 26900.3, "total_tokens": 812655808} +{"current_steps": 2810, "total_steps": 15426, "loss": 0.3791, "lr": 4.417864499847368e-05, "epoch": 0.5465331129047943, "percentage": 18.22, "elapsed_time": "8:25:12", "remaining_time": "1 day, 13:48:14", "throughput": 26906.11, "total_tokens": 815595328} +{"current_steps": 2820, "total_steps": 15426, "loss": 0.3796, "lr": 4.4161409931692676e-05, "epoch": 0.5484780706019644, "percentage": 18.28, "elapsed_time": "8:26:58", "remaining_time": "1 day, 13:46:18", "throughput": 26911.3, "total_tokens": 818611520} +{"current_steps": 2830, "total_steps": 15426, "loss": 0.3816, "lr": 4.414419502052841e-05, "epoch": 0.5504230282991345, "percentage": 18.35, "elapsed_time": "8:28:41", "remaining_time": "1 day, 13:44:06", "throughput": 26917.1, "total_tokens": 821544896} +{"current_steps": 2840, "total_steps": 15426, "loss": 0.3835, "lr": 4.412700022572637e-05, "epoch": 0.5523679859963045, "percentage": 18.41, "elapsed_time": "8:30:23", "remaining_time": "1 day, 13:41:53", "throughput": 26922.98, "total_tokens": 824476224} +{"current_steps": 2850, "total_steps": 15426, "loss": 0.3826, "lr": 4.410982550813902e-05, "epoch": 0.5543129436934746, "percentage": 18.48, "elapsed_time": "8:32:06", "remaining_time": "1 day, 13:39:45", "throughput": 26928.48, "total_tokens": 827422080} +{"current_steps": 2860, "total_steps": 15426, "loss": 0.3848, "lr": 4.409267082872535e-05, "epoch": 0.5562579013906448, "percentage": 18.54, "elapsed_time": "8:33:49", "remaining_time": "1 day, 13:37:36", "throughput": 26932.59, "total_tokens": 830323008} +{"current_steps": 2870, "total_steps": 15426, "loss": 0.3801, "lr": 4.407553614855059e-05, "epoch": 0.5582028590878149, "percentage": 18.6, "elapsed_time": "8:35:33", "remaining_time": "1 day, 13:35:33", "throughput": 26936.72, "total_tokens": 833258944} +{"current_steps": 2880, "total_steps": 15426, "loss": 0.3814, "lr": 4.405842142878579e-05, "epoch": 0.5601478167849849, "percentage": 18.67, "elapsed_time": "8:37:17", "remaining_time": "1 day, 13:33:25", "throughput": 26940.15, "total_tokens": 836145408} +{"current_steps": 2890, "total_steps": 15426, "loss": 0.3838, "lr": 4.404132663070745e-05, "epoch": 0.562092774482155, "percentage": 18.73, "elapsed_time": "8:38:59", "remaining_time": "1 day, 13:31:14", "throughput": 26944.8, "total_tokens": 839049344} +{"current_steps": 2900, "total_steps": 15426, "loss": 0.3819, "lr": 4.402425171569716e-05, "epoch": 0.5640377321793251, "percentage": 18.8, "elapsed_time": "8:40:37", "remaining_time": "1 day, 13:28:43", "throughput": 26951.34, "total_tokens": 841885888} +{"current_steps": 2910, "total_steps": 15426, "loss": 0.378, "lr": 4.400719664524127e-05, "epoch": 0.5659826898764951, "percentage": 18.86, "elapsed_time": "8:42:19", "remaining_time": "1 day, 13:26:30", "throughput": 26956.98, "total_tokens": 844806144} +{"current_steps": 2920, "total_steps": 15426, "loss": 0.3788, "lr": 4.399016138093044e-05, "epoch": 0.5679276475736653, "percentage": 18.93, "elapsed_time": "8:44:01", "remaining_time": "1 day, 13:24:19", "throughput": 26962.55, "total_tokens": 847739712} +{"current_steps": 2930, "total_steps": 15426, "loss": 0.3813, "lr": 4.397314588445937e-05, "epoch": 0.5698726052708354, "percentage": 18.99, "elapsed_time": "8:45:41", "remaining_time": "1 day, 13:22:00", "throughput": 26969.19, "total_tokens": 850654912} +{"current_steps": 2940, "total_steps": 15426, "loss": 0.3774, "lr": 4.395615011762637e-05, "epoch": 0.5718175629680055, "percentage": 19.06, "elapsed_time": "8:47:27", "remaining_time": "1 day, 13:20:03", "throughput": 26971.36, "total_tokens": 853564800} +{"current_steps": 2950, "total_steps": 15426, "loss": 0.3789, "lr": 4.3939174042333057e-05, "epoch": 0.5737625206651755, "percentage": 19.12, "elapsed_time": "8:49:13", "remaining_time": "1 day, 13:18:08", "throughput": 26972.23, "total_tokens": 856453312} +{"current_steps": 2960, "total_steps": 15426, "loss": 0.3806, "lr": 4.3922217620583904e-05, "epoch": 0.5757074783623456, "percentage": 19.19, "elapsed_time": "8:50:54", "remaining_time": "1 day, 13:15:56", "throughput": 26976.49, "total_tokens": 859335360} +{"current_steps": 2970, "total_steps": 15426, "loss": 0.3766, "lr": 4.3905280814486025e-05, "epoch": 0.5776524360595157, "percentage": 19.25, "elapsed_time": "8:52:40", "remaining_time": "1 day, 13:14:00", "throughput": 26979.12, "total_tokens": 862265280} +{"current_steps": 2980, "total_steps": 15426, "loss": 0.377, "lr": 4.388836358624867e-05, "epoch": 0.5795973937566858, "percentage": 19.32, "elapsed_time": "8:54:18", "remaining_time": "1 day, 13:11:34", "throughput": 26986.86, "total_tokens": 865168768} +{"current_steps": 2990, "total_steps": 15426, "loss": 0.3801, "lr": 4.3871465898182976e-05, "epoch": 0.5815423514538559, "percentage": 19.38, "elapsed_time": "8:55:58", "remaining_time": "1 day, 13:09:14", "throughput": 26992.02, "total_tokens": 868032128} +{"current_steps": 3000, "total_steps": 15426, "loss": 0.3779, "lr": 4.385458771270156e-05, "epoch": 0.583487309151026, "percentage": 19.45, "elapsed_time": "8:57:39", "remaining_time": "1 day, 13:07:00", "throughput": 26997.18, "total_tokens": 870927872} +{"current_steps": 3010, "total_steps": 15426, "loss": 0.3792, "lr": 4.3837728992318205e-05, "epoch": 0.585432266848196, "percentage": 19.51, "elapsed_time": "8:59:26", "remaining_time": "1 day, 13:05:07", "throughput": 26999.73, "total_tokens": 873874240} +{"current_steps": 3020, "total_steps": 15426, "loss": 0.379, "lr": 4.382088969964746e-05, "epoch": 0.5873772245453661, "percentage": 19.58, "elapsed_time": "9:01:04", "remaining_time": "1 day, 13:02:41", "throughput": 27006.65, "total_tokens": 876752768} +{"current_steps": 3030, "total_steps": 15426, "loss": 0.3767, "lr": 4.380406979740436e-05, "epoch": 0.5893221822425362, "percentage": 19.64, "elapsed_time": "9:02:44", "remaining_time": "1 day, 13:00:24", "throughput": 27012.49, "total_tokens": 879646720} +{"current_steps": 3040, "total_steps": 15426, "loss": 0.3798, "lr": 4.3787269248403994e-05, "epoch": 0.5912671399397064, "percentage": 19.71, "elapsed_time": "9:04:27", "remaining_time": "1 day, 12:58:17", "throughput": 27016.29, "total_tokens": 882547776} +{"current_steps": 3050, "total_steps": 15426, "loss": 0.3806, "lr": 4.377048801556126e-05, "epoch": 0.5932120976368764, "percentage": 19.77, "elapsed_time": "9:06:11", "remaining_time": "1 day, 12:56:17", "throughput": 27019.16, "total_tokens": 885464448} +{"current_steps": 3060, "total_steps": 15426, "loss": 0.3794, "lr": 4.3753726061890446e-05, "epoch": 0.5951570553340465, "percentage": 19.84, "elapsed_time": "9:07:53", "remaining_time": "1 day, 12:54:07", "throughput": 27022.86, "total_tokens": 888338432} +{"current_steps": 3070, "total_steps": 15426, "loss": 0.3772, "lr": 4.373698335050488e-05, "epoch": 0.5971020130312166, "percentage": 19.9, "elapsed_time": "9:09:30", "remaining_time": "1 day, 12:51:37", "throughput": 27030.73, "total_tokens": 891209408} +{"current_steps": 3080, "total_steps": 15426, "loss": 0.3793, "lr": 4.372025984461667e-05, "epoch": 0.5990469707283866, "percentage": 19.97, "elapsed_time": "9:11:11", "remaining_time": "1 day, 12:49:24", "throughput": 27035.25, "total_tokens": 894092352} +{"current_steps": 3090, "total_steps": 15426, "loss": 0.377, "lr": 4.370355550753629e-05, "epoch": 0.6009919284255567, "percentage": 20.03, "elapsed_time": "9:12:56", "remaining_time": "1 day, 12:47:27", "throughput": 27037.97, "total_tokens": 897016704} +{"current_steps": 3100, "total_steps": 15426, "loss": 0.38, "lr": 4.368687030267226e-05, "epoch": 0.6029368861227268, "percentage": 20.1, "elapsed_time": "9:14:37", "remaining_time": "1 day, 12:45:17", "throughput": 27042.37, "total_tokens": 899914752} +{"current_steps": 3110, "total_steps": 15426, "loss": 0.3799, "lr": 4.367020419353081e-05, "epoch": 0.604881843819897, "percentage": 20.16, "elapsed_time": "9:16:19", "remaining_time": "1 day, 12:43:08", "throughput": 27046.95, "total_tokens": 902820992} +{"current_steps": 3120, "total_steps": 15426, "loss": 0.3776, "lr": 4.365355714371558e-05, "epoch": 0.606826801517067, "percentage": 20.23, "elapsed_time": "9:18:02", "remaining_time": "1 day, 12:41:01", "throughput": 27049.8, "total_tokens": 905687936} +{"current_steps": 3130, "total_steps": 15426, "loss": 0.3771, "lr": 4.3636929116927235e-05, "epoch": 0.6087717592142371, "percentage": 20.29, "elapsed_time": "9:19:45", "remaining_time": "1 day, 12:38:57", "throughput": 27052.97, "total_tokens": 908584192} +{"current_steps": 3140, "total_steps": 15426, "loss": 0.3796, "lr": 4.362032007696314e-05, "epoch": 0.6107167169114072, "percentage": 20.36, "elapsed_time": "9:21:20", "remaining_time": "1 day, 12:36:24", "throughput": 27059.18, "total_tokens": 911374208} +{"current_steps": 3150, "total_steps": 15426, "loss": 0.3798, "lr": 4.360372998771707e-05, "epoch": 0.6126616746085772, "percentage": 20.42, "elapsed_time": "9:23:00", "remaining_time": "1 day, 12:34:06", "throughput": 27064.45, "total_tokens": 914246464} +{"current_steps": 3160, "total_steps": 15426, "loss": 0.3765, "lr": 4.358715881317884e-05, "epoch": 0.6146066323057473, "percentage": 20.48, "elapsed_time": "9:24:40", "remaining_time": "1 day, 12:31:51", "throughput": 27069.31, "total_tokens": 917119104} +{"current_steps": 3170, "total_steps": 15426, "loss": 0.3786, "lr": 4.357060651743399e-05, "epoch": 0.6165515900029175, "percentage": 20.55, "elapsed_time": "9:26:19", "remaining_time": "1 day, 12:29:34", "throughput": 27073.84, "total_tokens": 919963456} +{"current_steps": 3180, "total_steps": 15426, "loss": 0.376, "lr": 4.3554073064663454e-05, "epoch": 0.6184965477000876, "percentage": 20.61, "elapsed_time": "9:27:58", "remaining_time": "1 day, 12:27:15", "throughput": 27080.15, "total_tokens": 922857216} +{"current_steps": 3190, "total_steps": 15426, "loss": 0.3714, "lr": 4.353755841914325e-05, "epoch": 0.6204415053972576, "percentage": 20.68, "elapsed_time": "9:29:39", "remaining_time": "1 day, 12:25:02", "throughput": 27085.18, "total_tokens": 925753344} +{"current_steps": 3200, "total_steps": 15426, "loss": 0.3781, "lr": 4.3521062545244116e-05, "epoch": 0.6223864630944277, "percentage": 20.74, "elapsed_time": "9:31:23", "remaining_time": "1 day, 12:23:04", "throughput": 27089.04, "total_tokens": 928704704} +{"current_steps": 3210, "total_steps": 15426, "loss": 0.3778, "lr": 4.350458540743126e-05, "epoch": 0.6243314207915978, "percentage": 20.81, "elapsed_time": "9:33:05", "remaining_time": "1 day, 12:20:56", "throughput": 27092.57, "total_tokens": 931584064} +{"current_steps": 3220, "total_steps": 15426, "loss": 0.3773, "lr": 4.3488126970263955e-05, "epoch": 0.6262763784887678, "percentage": 20.87, "elapsed_time": "9:34:45", "remaining_time": "1 day, 12:18:43", "throughput": 27096.03, "total_tokens": 934421568} +{"current_steps": 3230, "total_steps": 15426, "loss": 0.3716, "lr": 4.347168719839527e-05, "epoch": 0.628221336185938, "percentage": 20.94, "elapsed_time": "9:36:29", "remaining_time": "1 day, 12:16:43", "throughput": 27099.02, "total_tokens": 937333184} +{"current_steps": 3240, "total_steps": 15426, "loss": 0.3791, "lr": 4.345526605657173e-05, "epoch": 0.6301662938831081, "percentage": 21.0, "elapsed_time": "9:38:10", "remaining_time": "1 day, 12:14:33", "throughput": 27102.78, "total_tokens": 940201792} +{"current_steps": 3250, "total_steps": 15426, "loss": 0.3751, "lr": 4.343886350963304e-05, "epoch": 0.6321112515802781, "percentage": 21.07, "elapsed_time": "9:39:53", "remaining_time": "1 day, 12:12:30", "throughput": 27105.69, "total_tokens": 943088384} +{"current_steps": 3260, "total_steps": 15426, "loss": 0.3765, "lr": 4.3422479522511697e-05, "epoch": 0.6340562092774482, "percentage": 21.13, "elapsed_time": "9:41:34", "remaining_time": "1 day, 12:10:23", "throughput": 27109.23, "total_tokens": 945970560} +{"current_steps": 3270, "total_steps": 15426, "loss": 0.3749, "lr": 4.340611406023272e-05, "epoch": 0.6360011669746183, "percentage": 21.2, "elapsed_time": "9:43:16", "remaining_time": "1 day, 12:08:18", "throughput": 27114.28, "total_tokens": 948914624} +{"current_steps": 3280, "total_steps": 15426, "loss": 0.3778, "lr": 4.338976708791336e-05, "epoch": 0.6379461246717884, "percentage": 21.26, "elapsed_time": "9:44:50", "remaining_time": "1 day, 12:05:41", "throughput": 27121.46, "total_tokens": 951704256} +{"current_steps": 3290, "total_steps": 15426, "loss": 0.376, "lr": 4.337343857076272e-05, "epoch": 0.6398910823689585, "percentage": 21.33, "elapsed_time": "9:46:35", "remaining_time": "1 day, 12:03:48", "throughput": 27124.38, "total_tokens": 954665856} +{"current_steps": 3300, "total_steps": 15426, "loss": 0.3687, "lr": 4.33571284740815e-05, "epoch": 0.6418360400661286, "percentage": 21.39, "elapsed_time": "9:48:18", "remaining_time": "1 day, 12:01:46", "throughput": 27128.38, "total_tokens": 957593536} +{"current_steps": 3310, "total_steps": 15426, "loss": 0.3751, "lr": 4.3340836763261675e-05, "epoch": 0.6437809977632987, "percentage": 21.46, "elapsed_time": "9:49:57", "remaining_time": "1 day, 11:59:31", "throughput": 27132.09, "total_tokens": 960419712} +{"current_steps": 3320, "total_steps": 15426, "loss": 0.3751, "lr": 4.332456340378618e-05, "epoch": 0.6457259554604687, "percentage": 21.52, "elapsed_time": "9:51:39", "remaining_time": "1 day, 11:57:26", "throughput": 27136.05, "total_tokens": 963326400} +{"current_steps": 3330, "total_steps": 15426, "loss": 0.3772, "lr": 4.3308308361228586e-05, "epoch": 0.6476709131576388, "percentage": 21.59, "elapsed_time": "9:53:23", "remaining_time": "1 day, 11:55:27", "throughput": 27139.29, "total_tokens": 966254208} +{"current_steps": 3340, "total_steps": 15426, "loss": 0.3771, "lr": 4.329207160125282e-05, "epoch": 0.6496158708548089, "percentage": 21.65, "elapsed_time": "9:55:07", "remaining_time": "1 day, 11:53:30", "throughput": 27142.47, "total_tokens": 969193152} +{"current_steps": 3350, "total_steps": 15426, "loss": 0.378, "lr": 4.327585308961287e-05, "epoch": 0.6515608285519789, "percentage": 21.72, "elapsed_time": "9:56:51", "remaining_time": "1 day, 11:51:32", "throughput": 27145.01, "total_tokens": 972105792} +{"current_steps": 3360, "total_steps": 15426, "loss": 0.3705, "lr": 4.325965279215243e-05, "epoch": 0.6535057862491491, "percentage": 21.78, "elapsed_time": "9:58:34", "remaining_time": "1 day, 11:49:32", "throughput": 27148.12, "total_tokens": 975020928} +{"current_steps": 3370, "total_steps": 15426, "loss": 0.3752, "lr": 4.3243470674804686e-05, "epoch": 0.6554507439463192, "percentage": 21.85, "elapsed_time": "10:00:16", "remaining_time": "1 day, 11:47:27", "throughput": 27153.9, "total_tokens": 977994304} +{"current_steps": 3380, "total_steps": 15426, "loss": 0.3712, "lr": 4.3227306703591904e-05, "epoch": 0.6573957016434893, "percentage": 21.91, "elapsed_time": "10:01:58", "remaining_time": "1 day, 11:45:21", "throughput": 27158.15, "total_tokens": 980903808} +{"current_steps": 3390, "total_steps": 15426, "loss": 0.3783, "lr": 4.32111608446252e-05, "epoch": 0.6593406593406593, "percentage": 21.98, "elapsed_time": "10:03:41", "remaining_time": "1 day, 11:43:22", "throughput": 27161.46, "total_tokens": 983830976} +{"current_steps": 3400, "total_steps": 15426, "loss": 0.3788, "lr": 4.319503306410426e-05, "epoch": 0.6612856170378294, "percentage": 22.04, "elapsed_time": "10:05:23", "remaining_time": "1 day, 11:41:17", "throughput": 27165.91, "total_tokens": 986754816} +{"current_steps": 3410, "total_steps": 15426, "loss": 0.3722, "lr": 4.317892332831699e-05, "epoch": 0.6632305747349995, "percentage": 22.11, "elapsed_time": "10:07:02", "remaining_time": "1 day, 11:39:03", "throughput": 27170.92, "total_tokens": 989626304} +{"current_steps": 3420, "total_steps": 15426, "loss": 0.3739, "lr": 4.316283160363922e-05, "epoch": 0.6651755324321696, "percentage": 22.17, "elapsed_time": "10:08:41", "remaining_time": "1 day, 11:36:51", "throughput": 27175.91, "total_tokens": 992515456} +{"current_steps": 3430, "total_steps": 15426, "loss": 0.3747, "lr": 4.314675785653447e-05, "epoch": 0.6671204901293397, "percentage": 22.24, "elapsed_time": "10:10:20", "remaining_time": "1 day, 11:34:34", "throughput": 27181.17, "total_tokens": 995381440} +{"current_steps": 3440, "total_steps": 15426, "loss": 0.3738, "lr": 4.3130702053553606e-05, "epoch": 0.6690654478265098, "percentage": 22.3, "elapsed_time": "10:12:01", "remaining_time": "1 day, 11:32:29", "throughput": 27185.05, "total_tokens": 998278656} +{"current_steps": 3450, "total_steps": 15426, "loss": 0.3736, "lr": 4.3114664161334546e-05, "epoch": 0.6710104055236799, "percentage": 22.36, "elapsed_time": "10:13:42", "remaining_time": "1 day, 11:30:23", "throughput": 27190.01, "total_tokens": 1001216512} +{"current_steps": 3460, "total_steps": 15426, "loss": 0.3726, "lr": 4.3098644146601984e-05, "epoch": 0.6729553632208499, "percentage": 22.43, "elapsed_time": "10:15:23", "remaining_time": "1 day, 11:28:13", "throughput": 27194.56, "total_tokens": 1004105088} +{"current_steps": 3470, "total_steps": 15426, "loss": 0.3733, "lr": 4.30826419761671e-05, "epoch": 0.67490032091802, "percentage": 22.49, "elapsed_time": "10:17:06", "remaining_time": "1 day, 11:26:17", "throughput": 27196.89, "total_tokens": 1007015680} +{"current_steps": 3480, "total_steps": 15426, "loss": 0.3737, "lr": 4.30666576169273e-05, "epoch": 0.6768452786151902, "percentage": 22.56, "elapsed_time": "10:18:52", "remaining_time": "1 day, 11:24:27", "throughput": 27199.42, "total_tokens": 1009987520} +{"current_steps": 3490, "total_steps": 15426, "loss": 0.3749, "lr": 4.305069103586585e-05, "epoch": 0.6787902363123602, "percentage": 22.62, "elapsed_time": "10:20:27", "remaining_time": "1 day, 11:22:00", "throughput": 27206.62, "total_tokens": 1012838528} +{"current_steps": 3500, "total_steps": 15426, "loss": 0.3707, "lr": 4.303474220005164e-05, "epoch": 0.6807351940095303, "percentage": 22.69, "elapsed_time": "10:22:09", "remaining_time": "1 day, 11:19:58", "throughput": 27209.98, "total_tokens": 1015741120} +{"current_steps": 3510, "total_steps": 15426, "loss": 0.3671, "lr": 4.3018811076638944e-05, "epoch": 0.6826801517067004, "percentage": 22.75, "elapsed_time": "10:23:52", "remaining_time": "1 day, 11:17:57", "throughput": 27213.15, "total_tokens": 1018648320} +{"current_steps": 3520, "total_steps": 15426, "loss": 0.373, "lr": 4.300289763286704e-05, "epoch": 0.6846251094038704, "percentage": 22.82, "elapsed_time": "10:25:35", "remaining_time": "1 day, 11:16:00", "throughput": 27215.68, "total_tokens": 1021559296} +{"current_steps": 3530, "total_steps": 15426, "loss": 0.3692, "lr": 4.298700183606e-05, "epoch": 0.6865700671010405, "percentage": 22.88, "elapsed_time": "10:27:18", "remaining_time": "1 day, 11:14:01", "throughput": 27219.06, "total_tokens": 1024491968} +{"current_steps": 3540, "total_steps": 15426, "loss": 0.3713, "lr": 4.297112365362637e-05, "epoch": 0.6885150247982106, "percentage": 22.95, "elapsed_time": "10:29:00", "remaining_time": "1 day, 11:11:57", "throughput": 27222.76, "total_tokens": 1027392704} +{"current_steps": 3550, "total_steps": 15426, "loss": 0.374, "lr": 4.295526305305891e-05, "epoch": 0.6904599824953808, "percentage": 23.01, "elapsed_time": "10:30:36", "remaining_time": "1 day, 11:09:37", "throughput": 27228.66, "total_tokens": 1030249216} +{"current_steps": 3560, "total_steps": 15426, "loss": 0.3738, "lr": 4.293942000193429e-05, "epoch": 0.6924049401925508, "percentage": 23.08, "elapsed_time": "10:32:21", "remaining_time": "1 day, 11:07:43", "throughput": 27230.33, "total_tokens": 1033150208} +{"current_steps": 3570, "total_steps": 15426, "loss": 0.3752, "lr": 4.2923594467912866e-05, "epoch": 0.6943498978897209, "percentage": 23.14, "elapsed_time": "10:34:02", "remaining_time": "1 day, 11:05:38", "throughput": 27234.06, "total_tokens": 1036043520} +{"current_steps": 3580, "total_steps": 15426, "loss": 0.3766, "lr": 4.290778641873832e-05, "epoch": 0.696294855586891, "percentage": 23.21, "elapsed_time": "10:35:45", "remaining_time": "1 day, 11:03:40", "throughput": 27236.02, "total_tokens": 1038928064} +{"current_steps": 3590, "total_steps": 15426, "loss": 0.3753, "lr": 4.2891995822237455e-05, "epoch": 0.698239813284061, "percentage": 23.27, "elapsed_time": "10:37:27", "remaining_time": "1 day, 11:01:37", "throughput": 27239.64, "total_tokens": 1041834368} +{"current_steps": 3600, "total_steps": 15426, "loss": 0.3769, "lr": 4.28762226463199e-05, "epoch": 0.7001847709812311, "percentage": 23.34, "elapsed_time": "10:39:07", "remaining_time": "1 day, 10:59:31", "throughput": 27243.01, "total_tokens": 1044701760} +{"current_steps": 3610, "total_steps": 15426, "loss": 0.377, "lr": 4.286046685897781e-05, "epoch": 0.7021297286784013, "percentage": 23.4, "elapsed_time": "10:40:51", "remaining_time": "1 day, 10:57:36", "throughput": 27245.13, "total_tokens": 1047613824} +{"current_steps": 3620, "total_steps": 15426, "loss": 0.3737, "lr": 4.284472842828562e-05, "epoch": 0.7040746863755714, "percentage": 23.47, "elapsed_time": "10:42:31", "remaining_time": "1 day, 10:55:27", "throughput": 27249.99, "total_tokens": 1050514368} +{"current_steps": 3630, "total_steps": 15426, "loss": 0.3727, "lr": 4.282900732239977e-05, "epoch": 0.7060196440727414, "percentage": 23.53, "elapsed_time": "10:44:10", "remaining_time": "1 day, 10:53:19", "throughput": 27254.21, "total_tokens": 1053398528} +{"current_steps": 3640, "total_steps": 15426, "loss": 0.3741, "lr": 4.281330350955845e-05, "epoch": 0.7079646017699115, "percentage": 23.6, "elapsed_time": "10:45:50", "remaining_time": "1 day, 10:51:09", "throughput": 27258.29, "total_tokens": 1056259200} +{"current_steps": 3650, "total_steps": 15426, "loss": 0.3734, "lr": 4.279761695808125e-05, "epoch": 0.7099095594670816, "percentage": 23.66, "elapsed_time": "10:47:29", "remaining_time": "1 day, 10:49:00", "throughput": 27263.8, "total_tokens": 1059183744} +{"current_steps": 3660, "total_steps": 15426, "loss": 0.375, "lr": 4.278194763636904e-05, "epoch": 0.7118545171642516, "percentage": 23.73, "elapsed_time": "10:49:08", "remaining_time": "1 day, 10:46:51", "throughput": 27267.25, "total_tokens": 1062031040} +{"current_steps": 3670, "total_steps": 15426, "loss": 0.3713, "lr": 4.276629551290354e-05, "epoch": 0.7137994748614218, "percentage": 23.79, "elapsed_time": "10:50:59", "remaining_time": "1 day, 10:45:19", "throughput": 27264.83, "total_tokens": 1064958912} +{"current_steps": 3680, "total_steps": 15426, "loss": 0.3701, "lr": 4.2750660556247175e-05, "epoch": 0.7157444325585919, "percentage": 23.86, "elapsed_time": "10:52:41", "remaining_time": "1 day, 10:43:17", "throughput": 27267.98, "total_tokens": 1067851648} +{"current_steps": 3690, "total_steps": 15426, "loss": 0.3727, "lr": 4.273504273504274e-05, "epoch": 0.717689390255762, "percentage": 23.92, "elapsed_time": "10:54:24", "remaining_time": "1 day, 10:41:20", "throughput": 27270.61, "total_tokens": 1070770048} +{"current_steps": 3700, "total_steps": 15426, "loss": 0.3723, "lr": 4.271944201801317e-05, "epoch": 0.719634347952932, "percentage": 23.99, "elapsed_time": "10:56:05", "remaining_time": "1 day, 10:39:18", "throughput": 27273.33, "total_tokens": 1073640384} +{"current_steps": 3710, "total_steps": 15426, "loss": 0.3694, "lr": 4.270385837396127e-05, "epoch": 0.7215793056501021, "percentage": 24.05, "elapsed_time": "10:57:52", "remaining_time": "1 day, 10:37:31", "throughput": 27273.51, "total_tokens": 1076545792} +{"current_steps": 3720, "total_steps": 15426, "loss": 0.3676, "lr": 4.268829177176945e-05, "epoch": 0.7235242633472722, "percentage": 24.12, "elapsed_time": "10:59:33", "remaining_time": "1 day, 10:35:28", "throughput": 27276.63, "total_tokens": 1079427584} +{"current_steps": 3730, "total_steps": 15426, "loss": 0.3705, "lr": 4.2672742180399455e-05, "epoch": 0.7254692210444423, "percentage": 24.18, "elapsed_time": "11:01:14", "remaining_time": "1 day, 10:33:24", "throughput": 27280.18, "total_tokens": 1082318912} +{"current_steps": 3740, "total_steps": 15426, "loss": 0.3688, "lr": 4.265720956889213e-05, "epoch": 0.7274141787416124, "percentage": 24.24, "elapsed_time": "11:02:56", "remaining_time": "1 day, 10:31:26", "throughput": 27283.92, "total_tokens": 1085267072} +{"current_steps": 3750, "total_steps": 15426, "loss": 0.3716, "lr": 4.2641693906367113e-05, "epoch": 0.7293591364387825, "percentage": 24.31, "elapsed_time": "11:04:33", "remaining_time": "1 day, 10:29:09", "throughput": 27289.21, "total_tokens": 1088114112} +{"current_steps": 3760, "total_steps": 15426, "loss": 0.3651, "lr": 4.2626195162022646e-05, "epoch": 0.7313040941359525, "percentage": 24.37, "elapsed_time": "11:06:17", "remaining_time": "1 day, 10:27:17", "throughput": 27291.74, "total_tokens": 1091060224} +{"current_steps": 3770, "total_steps": 15426, "loss": 0.3734, "lr": 4.2610713305135255e-05, "epoch": 0.7332490518331226, "percentage": 24.44, "elapsed_time": "11:08:01", "remaining_time": "1 day, 10:25:23", "throughput": 27293.87, "total_tokens": 1093979392} +{"current_steps": 3780, "total_steps": 15426, "loss": 0.3708, "lr": 4.2595248305059546e-05, "epoch": 0.7351940095302927, "percentage": 24.5, "elapsed_time": "11:09:46", "remaining_time": "1 day, 10:23:31", "throughput": 27295.62, "total_tokens": 1096907072} +{"current_steps": 3790, "total_steps": 15426, "loss": 0.3697, "lr": 4.2579800131227916e-05, "epoch": 0.7371389672274627, "percentage": 24.57, "elapsed_time": "11:11:27", "remaining_time": "1 day, 10:21:29", "throughput": 27298.37, "total_tokens": 1099780288} +{"current_steps": 3800, "total_steps": 15426, "loss": 0.369, "lr": 4.256436875315028e-05, "epoch": 0.7390839249246329, "percentage": 24.63, "elapsed_time": "11:13:10", "remaining_time": "1 day, 10:19:33", "throughput": 27300.14, "total_tokens": 1102661312} +{"current_steps": 3810, "total_steps": 15426, "loss": 0.3658, "lr": 4.2548954140413895e-05, "epoch": 0.741028882621803, "percentage": 24.7, "elapsed_time": "11:14:55", "remaining_time": "1 day, 10:17:43", "throughput": 27302.83, "total_tokens": 1105642176} +{"current_steps": 3820, "total_steps": 15426, "loss": 0.3721, "lr": 4.253355626268302e-05, "epoch": 0.7429738403189731, "percentage": 24.76, "elapsed_time": "11:16:37", "remaining_time": "1 day, 10:15:43", "throughput": 27305.82, "total_tokens": 1108538944} +{"current_steps": 3830, "total_steps": 15426, "loss": 0.3702, "lr": 4.2518175089698716e-05, "epoch": 0.7449187980161431, "percentage": 24.83, "elapsed_time": "11:18:15", "remaining_time": "1 day, 10:13:33", "throughput": 27310.44, "total_tokens": 1111421248} +{"current_steps": 3840, "total_steps": 15426, "loss": 0.3688, "lr": 4.25028105912786e-05, "epoch": 0.7468637557133132, "percentage": 24.89, "elapsed_time": "11:20:00", "remaining_time": "1 day, 10:11:42", "throughput": 27311.86, "total_tokens": 1114338688} +{"current_steps": 3850, "total_steps": 15426, "loss": 0.3643, "lr": 4.2487462737316565e-05, "epoch": 0.7488087134104833, "percentage": 24.96, "elapsed_time": "11:21:43", "remaining_time": "1 day, 10:09:46", "throughput": 27314.59, "total_tokens": 1117260736} +{"current_steps": 3860, "total_steps": 15426, "loss": 0.3659, "lr": 4.2472131497782555e-05, "epoch": 0.7507536711076535, "percentage": 25.02, "elapsed_time": "11:23:25", "remaining_time": "1 day, 10:07:47", "throughput": 27316.9, "total_tokens": 1120138112} +{"current_steps": 3870, "total_steps": 15426, "loss": 0.3714, "lr": 4.245681684272231e-05, "epoch": 0.7526986288048235, "percentage": 25.09, "elapsed_time": "11:25:06", "remaining_time": "1 day, 10:05:46", "throughput": 27319.15, "total_tokens": 1122995840} +{"current_steps": 3880, "total_steps": 15426, "loss": 0.3732, "lr": 4.244151874225712e-05, "epoch": 0.7546435865019936, "percentage": 25.15, "elapsed_time": "11:26:46", "remaining_time": "1 day, 10:03:41", "throughput": 27323.24, "total_tokens": 1125895744} +{"current_steps": 3890, "total_steps": 15426, "loss": 0.3686, "lr": 4.2426237166583596e-05, "epoch": 0.7565885441991637, "percentage": 25.22, "elapsed_time": "11:28:28", "remaining_time": "1 day, 10:01:43", "throughput": 27326.54, "total_tokens": 1128824064} +{"current_steps": 3900, "total_steps": 15426, "loss": 0.3644, "lr": 4.241097208597339e-05, "epoch": 0.7585335018963337, "percentage": 25.28, "elapsed_time": "11:30:09", "remaining_time": "1 day, 9:59:41", "throughput": 27329.72, "total_tokens": 1131712256} +{"current_steps": 3910, "total_steps": 15426, "loss": 0.3698, "lr": 4.2395723470773005e-05, "epoch": 0.7604784595935038, "percentage": 25.35, "elapsed_time": "11:31:48", "remaining_time": "1 day, 9:57:34", "throughput": 27332.99, "total_tokens": 1134560064} +{"current_steps": 3920, "total_steps": 15426, "loss": 0.3721, "lr": 4.238049129140347e-05, "epoch": 0.762423417290674, "percentage": 25.41, "elapsed_time": "11:33:27", "remaining_time": "1 day, 9:55:27", "throughput": 27336.29, "total_tokens": 1137405888} +{"current_steps": 3930, "total_steps": 15426, "loss": 0.368, "lr": 4.236527551836022e-05, "epoch": 0.764368374987844, "percentage": 25.48, "elapsed_time": "11:35:09", "remaining_time": "1 day, 9:53:28", "throughput": 27339.87, "total_tokens": 1140337152} +{"current_steps": 3940, "total_steps": 15426, "loss": 0.3718, "lr": 4.235007612221274e-05, "epoch": 0.7663133326850141, "percentage": 25.54, "elapsed_time": "11:36:53", "remaining_time": "1 day, 9:51:35", "throughput": 27342.15, "total_tokens": 1143262656} +{"current_steps": 3950, "total_steps": 15426, "loss": 0.367, "lr": 4.2334893073604386e-05, "epoch": 0.7682582903821842, "percentage": 25.61, "elapsed_time": "11:38:33", "remaining_time": "1 day, 9:49:30", "throughput": 27345.6, "total_tokens": 1146139264} +{"current_steps": 3960, "total_steps": 15426, "loss": 0.3702, "lr": 4.231972634325214e-05, "epoch": 0.7702032480793543, "percentage": 25.67, "elapsed_time": "11:40:11", "remaining_time": "1 day, 9:47:23", "throughput": 27350.13, "total_tokens": 1149031872} +{"current_steps": 3970, "total_steps": 15426, "loss": 0.3647, "lr": 4.230457590194635e-05, "epoch": 0.7721482057765243, "percentage": 25.74, "elapsed_time": "11:41:52", "remaining_time": "1 day, 9:45:22", "throughput": 27353.37, "total_tokens": 1151928256} +{"current_steps": 3980, "total_steps": 15426, "loss": 0.3718, "lr": 4.228944172055053e-05, "epoch": 0.7740931634736945, "percentage": 25.8, "elapsed_time": "11:43:35", "remaining_time": "1 day, 9:43:26", "throughput": 27355.61, "total_tokens": 1154833024} +{"current_steps": 3990, "total_steps": 15426, "loss": 0.3687, "lr": 4.22743237700011e-05, "epoch": 0.7760381211708646, "percentage": 25.87, "elapsed_time": "11:45:19", "remaining_time": "1 day, 9:41:35", "throughput": 27357.82, "total_tokens": 1157772800} +{"current_steps": 4000, "total_steps": 15426, "loss": 0.3658, "lr": 4.225922202130716e-05, "epoch": 0.7779830788680346, "percentage": 25.93, "elapsed_time": "11:47:00", "remaining_time": "1 day, 9:39:35", "throughput": 27360.96, "total_tokens": 1160678528} +{"current_steps": 4010, "total_steps": 15426, "loss": 0.369, "lr": 4.224413644555024e-05, "epoch": 0.7799280365652047, "percentage": 26.0, "elapsed_time": "11:48:40", "remaining_time": "1 day, 9:37:30", "throughput": 27364.45, "total_tokens": 1163550080} +{"current_steps": 4020, "total_steps": 15426, "loss": 0.3717, "lr": 4.222906701388411e-05, "epoch": 0.7818729942623748, "percentage": 26.06, "elapsed_time": "11:50:22", "remaining_time": "1 day, 9:35:32", "throughput": 27367.21, "total_tokens": 1166451968} +{"current_steps": 4030, "total_steps": 15426, "loss": 0.3677, "lr": 4.2214013697534466e-05, "epoch": 0.7838179519595448, "percentage": 26.12, "elapsed_time": "11:52:01", "remaining_time": "1 day, 9:33:28", "throughput": 27370.84, "total_tokens": 1169328448} +{"current_steps": 4040, "total_steps": 15426, "loss": 0.3618, "lr": 4.219897646779882e-05, "epoch": 0.7857629096567149, "percentage": 26.19, "elapsed_time": "11:53:46", "remaining_time": "1 day, 9:31:39", "throughput": 27372.43, "total_tokens": 1172271232} +{"current_steps": 4050, "total_steps": 15426, "loss": 0.3666, "lr": 4.2183955296046145e-05, "epoch": 0.7877078673538851, "percentage": 26.25, "elapsed_time": "11:55:27", "remaining_time": "1 day, 9:29:38", "throughput": 27375.78, "total_tokens": 1175169920} +{"current_steps": 4060, "total_steps": 15426, "loss": 0.3662, "lr": 4.2168950153716746e-05, "epoch": 0.7896528250510552, "percentage": 26.32, "elapsed_time": "11:57:09", "remaining_time": "1 day, 9:27:40", "throughput": 27379.13, "total_tokens": 1178101888} +{"current_steps": 4070, "total_steps": 15426, "loss": 0.3659, "lr": 4.215396101232197e-05, "epoch": 0.7915977827482252, "percentage": 26.38, "elapsed_time": "11:58:53", "remaining_time": "1 day, 9:25:49", "throughput": 27381.08, "total_tokens": 1181034112} +{"current_steps": 4080, "total_steps": 15426, "loss": 0.365, "lr": 4.213898784344398e-05, "epoch": 0.7935427404453953, "percentage": 26.45, "elapsed_time": "12:00:35", "remaining_time": "1 day, 9:23:53", "throughput": 27383.93, "total_tokens": 1183962048} +{"current_steps": 4090, "total_steps": 15426, "loss": 0.3649, "lr": 4.21240306187356e-05, "epoch": 0.7954876981425654, "percentage": 26.51, "elapsed_time": "12:02:17", "remaining_time": "1 day, 9:21:55", "throughput": 27386.23, "total_tokens": 1186849664} +{"current_steps": 4100, "total_steps": 15426, "loss": 0.368, "lr": 4.2109089309919967e-05, "epoch": 0.7974326558397354, "percentage": 26.58, "elapsed_time": "12:03:55", "remaining_time": "1 day, 9:19:46", "throughput": 27390.75, "total_tokens": 1189721600} +{"current_steps": 4110, "total_steps": 15426, "loss": 0.3656, "lr": 4.2094163888790445e-05, "epoch": 0.7993776135369056, "percentage": 26.64, "elapsed_time": "12:05:34", "remaining_time": "1 day, 9:17:42", "throughput": 27394.21, "total_tokens": 1192590656} +{"current_steps": 4120, "total_steps": 15426, "loss": 0.3662, "lr": 4.2079254327210294e-05, "epoch": 0.8013225712340757, "percentage": 26.71, "elapsed_time": "12:07:17", "remaining_time": "1 day, 9:15:49", "throughput": 27396.7, "total_tokens": 1195531136} +{"current_steps": 4130, "total_steps": 15426, "loss": 0.364, "lr": 4.206436059711249e-05, "epoch": 0.8032675289312458, "percentage": 26.77, "elapsed_time": "12:08:56", "remaining_time": "1 day, 9:13:44", "throughput": 27400.25, "total_tokens": 1198398912} +{"current_steps": 4140, "total_steps": 15426, "loss": 0.3688, "lr": 4.20494826704995e-05, "epoch": 0.8052124866284158, "percentage": 26.84, "elapsed_time": "12:10:37", "remaining_time": "1 day, 9:11:43", "throughput": 27402.62, "total_tokens": 1201250240} +{"current_steps": 4150, "total_steps": 15426, "loss": 0.3663, "lr": 4.203462051944307e-05, "epoch": 0.8071574443255859, "percentage": 26.9, "elapsed_time": "12:12:13", "remaining_time": "1 day, 9:09:33", "throughput": 27406.64, "total_tokens": 1204079296} +{"current_steps": 4160, "total_steps": 15426, "loss": 0.3701, "lr": 4.201977411608398e-05, "epoch": 0.809102402022756, "percentage": 26.97, "elapsed_time": "12:13:54", "remaining_time": "1 day, 9:07:32", "throughput": 27409.19, "total_tokens": 1206943616} +{"current_steps": 4170, "total_steps": 15426, "loss": 0.366, "lr": 4.200494343263185e-05, "epoch": 0.8110473597199261, "percentage": 27.03, "elapsed_time": "12:15:33", "remaining_time": "1 day, 9:05:28", "throughput": 27412.72, "total_tokens": 1209813888} +{"current_steps": 4180, "total_steps": 15426, "loss": 0.3675, "lr": 4.1990128441364914e-05, "epoch": 0.8129923174170962, "percentage": 27.1, "elapsed_time": "12:17:15", "remaining_time": "1 day, 9:03:32", "throughput": 27415.43, "total_tokens": 1212730816} +{"current_steps": 4190, "total_steps": 15426, "loss": 0.367, "lr": 4.197532911462977e-05, "epoch": 0.8149372751142663, "percentage": 27.16, "elapsed_time": "12:18:53", "remaining_time": "1 day, 9:01:25", "throughput": 27419.68, "total_tokens": 1215607360} +{"current_steps": 4200, "total_steps": 15426, "loss": 0.3655, "lr": 4.196054542484125e-05, "epoch": 0.8168822328114363, "percentage": 27.23, "elapsed_time": "12:20:40", "remaining_time": "1 day, 8:59:42", "throughput": 27420.37, "total_tokens": 1218565440} +{"current_steps": 4210, "total_steps": 15426, "loss": 0.3609, "lr": 4.1945777344482084e-05, "epoch": 0.8188271905086064, "percentage": 27.29, "elapsed_time": "12:22:22", "remaining_time": "1 day, 8:57:47", "throughput": 27422.58, "total_tokens": 1221474176} +{"current_steps": 4220, "total_steps": 15426, "loss": 0.3611, "lr": 4.19310248461028e-05, "epoch": 0.8207721482057765, "percentage": 27.36, "elapsed_time": "12:24:04", "remaining_time": "1 day, 8:55:51", "throughput": 27425.35, "total_tokens": 1224390016} +{"current_steps": 4230, "total_steps": 15426, "loss": 0.3651, "lr": 4.1916287902321405e-05, "epoch": 0.8227171059029467, "percentage": 27.42, "elapsed_time": "12:25:42", "remaining_time": "1 day, 8:53:46", "throughput": 27429.06, "total_tokens": 1227257472} +{"current_steps": 4240, "total_steps": 15426, "loss": 0.3642, "lr": 4.190156648582328e-05, "epoch": 0.8246620636001167, "percentage": 27.49, "elapsed_time": "12:27:22", "remaining_time": "1 day, 8:51:42", "throughput": 27433.15, "total_tokens": 1230161152} +{"current_steps": 4250, "total_steps": 15426, "loss": 0.3656, "lr": 4.188686056936087e-05, "epoch": 0.8266070212972868, "percentage": 27.55, "elapsed_time": "12:29:07", "remaining_time": "1 day, 8:49:56", "throughput": 27434.67, "total_tokens": 1233120768} +{"current_steps": 4260, "total_steps": 15426, "loss": 0.3624, "lr": 4.187217012575352e-05, "epoch": 0.8285519789944569, "percentage": 27.62, "elapsed_time": "12:30:46", "remaining_time": "1 day, 8:47:51", "throughput": 27438.79, "total_tokens": 1236009856} +{"current_steps": 4270, "total_steps": 15426, "loss": 0.3669, "lr": 4.185749512788727e-05, "epoch": 0.8304969366916269, "percentage": 27.68, "elapsed_time": "12:32:25", "remaining_time": "1 day, 8:45:49", "throughput": 27442.55, "total_tokens": 1238909696} +{"current_steps": 4280, "total_steps": 15426, "loss": 0.3676, "lr": 4.184283554871462e-05, "epoch": 0.832441894388797, "percentage": 27.75, "elapsed_time": "12:34:07", "remaining_time": "1 day, 8:43:54", "throughput": 27444.2, "total_tokens": 1241784960} +{"current_steps": 4290, "total_steps": 15426, "loss": 0.3663, "lr": 4.1828191361254344e-05, "epoch": 0.8343868520859671, "percentage": 27.81, "elapsed_time": "12:35:50", "remaining_time": "1 day, 8:42:01", "throughput": 27445.86, "total_tokens": 1244688704} +{"current_steps": 4300, "total_steps": 15426, "loss": 0.3669, "lr": 4.181356253859127e-05, "epoch": 0.8363318097831373, "percentage": 27.88, "elapsed_time": "12:37:30", "remaining_time": "1 day, 8:40:00", "throughput": 27448.45, "total_tokens": 1247545920} +{"current_steps": 4310, "total_steps": 15426, "loss": 0.362, "lr": 4.179894905387606e-05, "epoch": 0.8382767674803073, "percentage": 27.94, "elapsed_time": "12:39:12", "remaining_time": "1 day, 8:38:04", "throughput": 27450.73, "total_tokens": 1250443840} +{"current_steps": 4320, "total_steps": 15426, "loss": 0.364, "lr": 4.178435088032502e-05, "epoch": 0.8402217251774774, "percentage": 28.0, "elapsed_time": "12:40:54", "remaining_time": "1 day, 8:36:09", "throughput": 27452.87, "total_tokens": 1253336832} +{"current_steps": 4330, "total_steps": 15426, "loss": 0.364, "lr": 4.176976799121989e-05, "epoch": 0.8421666828746475, "percentage": 28.07, "elapsed_time": "12:42:31", "remaining_time": "1 day, 8:34:03", "throughput": 27456.79, "total_tokens": 1256200576} +{"current_steps": 4340, "total_steps": 15426, "loss": 0.3619, "lr": 4.1755200359907657e-05, "epoch": 0.8441116405718175, "percentage": 28.13, "elapsed_time": "12:44:11", "remaining_time": "1 day, 8:32:02", "throughput": 27460.48, "total_tokens": 1259103424} +{"current_steps": 4350, "total_steps": 15426, "loss": 0.3599, "lr": 4.174064795980028e-05, "epoch": 0.8460565982689876, "percentage": 28.2, "elapsed_time": "12:45:54", "remaining_time": "1 day, 8:30:08", "throughput": 27462.58, "total_tokens": 1262018112} +{"current_steps": 4360, "total_steps": 15426, "loss": 0.3603, "lr": 4.17261107643746e-05, "epoch": 0.8480015559661578, "percentage": 28.26, "elapsed_time": "12:47:33", "remaining_time": "1 day, 8:28:07", "throughput": 27466.08, "total_tokens": 1264908992} +{"current_steps": 4370, "total_steps": 15426, "loss": 0.3636, "lr": 4.171158874717204e-05, "epoch": 0.8499465136633279, "percentage": 28.33, "elapsed_time": "12:49:15", "remaining_time": "1 day, 8:26:12", "throughput": 27468.16, "total_tokens": 1267805504} +{"current_steps": 4380, "total_steps": 15426, "loss": 0.3607, "lr": 4.169708188179844e-05, "epoch": 0.8518914713604979, "percentage": 28.39, "elapsed_time": "12:50:57", "remaining_time": "1 day, 8:24:16", "throughput": 27470.64, "total_tokens": 1270711552} +{"current_steps": 4390, "total_steps": 15426, "loss": 0.3635, "lr": 4.1682590141923846e-05, "epoch": 0.853836429057668, "percentage": 28.46, "elapsed_time": "12:52:35", "remaining_time": "1 day, 8:22:12", "throughput": 27473.85, "total_tokens": 1273563712} +{"current_steps": 4400, "total_steps": 15426, "loss": 0.3608, "lr": 4.1668113501282335e-05, "epoch": 0.8557813867548381, "percentage": 28.52, "elapsed_time": "12:54:19", "remaining_time": "1 day, 8:20:24", "throughput": 27474.83, "total_tokens": 1276477824} +{"current_steps": 4410, "total_steps": 15426, "loss": 0.3584, "lr": 4.165365193367178e-05, "epoch": 0.8577263444520081, "percentage": 28.59, "elapsed_time": "12:56:02", "remaining_time": "1 day, 8:18:30", "throughput": 27477.28, "total_tokens": 1279402624} +{"current_steps": 4420, "total_steps": 15426, "loss": 0.3637, "lr": 4.163920541295369e-05, "epoch": 0.8596713021491783, "percentage": 28.65, "elapsed_time": "12:57:43", "remaining_time": "1 day, 8:16:33", "throughput": 27480.08, "total_tokens": 1282307712} +{"current_steps": 4430, "total_steps": 15426, "loss": 0.3598, "lr": 4.1624773913052946e-05, "epoch": 0.8616162598463484, "percentage": 28.72, "elapsed_time": "12:59:27", "remaining_time": "1 day, 8:14:44", "throughput": 27481.04, "total_tokens": 1285217792} +{"current_steps": 4440, "total_steps": 15426, "loss": 0.3642, "lr": 4.161035740795769e-05, "epoch": 0.8635612175435184, "percentage": 28.78, "elapsed_time": "13:01:08", "remaining_time": "1 day, 8:12:49", "throughput": 27483.83, "total_tokens": 1288139456} +{"current_steps": 4450, "total_steps": 15426, "loss": 0.3621, "lr": 4.1595955871719055e-05, "epoch": 0.8655061752406885, "percentage": 28.85, "elapsed_time": "13:02:53", "remaining_time": "1 day, 8:11:00", "throughput": 27484.66, "total_tokens": 1291040704} +{"current_steps": 4460, "total_steps": 15426, "loss": 0.3609, "lr": 4.158156927845101e-05, "epoch": 0.8674511329378586, "percentage": 28.91, "elapsed_time": "13:04:35", "remaining_time": "1 day, 8:09:06", "throughput": 27486.57, "total_tokens": 1293943936} +{"current_steps": 4470, "total_steps": 15426, "loss": 0.364, "lr": 4.156719760233016e-05, "epoch": 0.8693960906350287, "percentage": 28.98, "elapsed_time": "13:06:11", "remaining_time": "1 day, 8:06:57", "throughput": 27490.65, "total_tokens": 1296774016} +{"current_steps": 4480, "total_steps": 15426, "loss": 0.3648, "lr": 4.155284081759552e-05, "epoch": 0.8713410483321987, "percentage": 29.04, "elapsed_time": "13:07:53", "remaining_time": "1 day, 8:05:02", "throughput": 27492.72, "total_tokens": 1299667584} +{"current_steps": 4490, "total_steps": 15426, "loss": 0.3619, "lr": 4.1538498898548356e-05, "epoch": 0.8732860060293689, "percentage": 29.11, "elapsed_time": "13:09:33", "remaining_time": "1 day, 8:03:05", "throughput": 27495.59, "total_tokens": 1302569088} +{"current_steps": 4500, "total_steps": 15426, "loss": 0.366, "lr": 4.1524171819552e-05, "epoch": 0.875230963726539, "percentage": 29.17, "elapsed_time": "13:11:12", "remaining_time": "1 day, 8:01:02", "throughput": 27498.46, "total_tokens": 1305416000} +{"current_steps": 4510, "total_steps": 15426, "loss": 0.36, "lr": 4.15098595550316e-05, "epoch": 0.877175921423709, "percentage": 29.24, "elapsed_time": "13:12:55", "remaining_time": "1 day, 7:59:11", "throughput": 27500.44, "total_tokens": 1308345216} +{"current_steps": 4520, "total_steps": 15426, "loss": 0.3614, "lr": 4.1495562079474e-05, "epoch": 0.8791208791208791, "percentage": 29.3, "elapsed_time": "13:14:40", "remaining_time": "1 day, 7:57:25", "throughput": 27501.26, "total_tokens": 1311275200} +{"current_steps": 4530, "total_steps": 15426, "loss": 0.361, "lr": 4.148127936742749e-05, "epoch": 0.8810658368180492, "percentage": 29.37, "elapsed_time": "13:16:19", "remaining_time": "1 day, 7:55:24", "throughput": 27503.75, "total_tokens": 1314119488} +{"current_steps": 4540, "total_steps": 15426, "loss": 0.3625, "lr": 4.146701139350166e-05, "epoch": 0.8830107945152192, "percentage": 29.43, "elapsed_time": "13:18:03", "remaining_time": "1 day, 7:53:34", "throughput": 27505.42, "total_tokens": 1317052160} +{"current_steps": 4550, "total_steps": 15426, "loss": 0.3599, "lr": 4.1452758132367196e-05, "epoch": 0.8849557522123894, "percentage": 29.5, "elapsed_time": "13:19:46", "remaining_time": "1 day, 7:51:43", "throughput": 27507.79, "total_tokens": 1319999040} +{"current_steps": 4560, "total_steps": 15426, "loss": 0.362, "lr": 4.1438519558755656e-05, "epoch": 0.8869007099095595, "percentage": 29.56, "elapsed_time": "13:21:27", "remaining_time": "1 day, 7:49:46", "throughput": 27510.4, "total_tokens": 1322899584} +{"current_steps": 4570, "total_steps": 15426, "loss": 0.3581, "lr": 4.1424295647459336e-05, "epoch": 0.8888456676067296, "percentage": 29.63, "elapsed_time": "13:23:10", "remaining_time": "1 day, 7:47:56", "throughput": 27512.86, "total_tokens": 1325859328} +{"current_steps": 4580, "total_steps": 15426, "loss": 0.3614, "lr": 4.141008637333106e-05, "epoch": 0.8907906253038996, "percentage": 29.69, "elapsed_time": "13:24:49", "remaining_time": "1 day, 7:45:54", "throughput": 27515.9, "total_tokens": 1328716928} +{"current_steps": 4590, "total_steps": 15426, "loss": 0.3627, "lr": 4.1395891711283974e-05, "epoch": 0.8927355830010697, "percentage": 29.75, "elapsed_time": "13:26:24", "remaining_time": "1 day, 7:43:44", "throughput": 27520.5, "total_tokens": 1331558208} +{"current_steps": 4600, "total_steps": 15426, "loss": 0.3588, "lr": 4.1381711636291395e-05, "epoch": 0.8946805406982398, "percentage": 29.82, "elapsed_time": "13:28:03", "remaining_time": "1 day, 7:41:44", "throughput": 27523.77, "total_tokens": 1334449344} +{"current_steps": 4610, "total_steps": 15426, "loss": 0.3619, "lr": 4.1367546123386604e-05, "epoch": 0.89662549839541, "percentage": 29.88, "elapsed_time": "13:29:44", "remaining_time": "1 day, 7:39:48", "throughput": 27525.78, "total_tokens": 1337319360} +{"current_steps": 4620, "total_steps": 15426, "loss": 0.3665, "lr": 4.1353395147662673e-05, "epoch": 0.89857045609258, "percentage": 29.95, "elapsed_time": "13:31:24", "remaining_time": "1 day, 7:37:50", "throughput": 27527.66, "total_tokens": 1340161536} +{"current_steps": 4630, "total_steps": 15426, "loss": 0.357, "lr": 4.133925868427225e-05, "epoch": 0.9005154137897501, "percentage": 30.01, "elapsed_time": "13:33:02", "remaining_time": "1 day, 7:35:48", "throughput": 27531.02, "total_tokens": 1343026432} +{"current_steps": 4640, "total_steps": 15426, "loss": 0.368, "lr": 4.132513670842744e-05, "epoch": 0.9024603714869202, "percentage": 30.08, "elapsed_time": "13:34:42", "remaining_time": "1 day, 7:33:51", "throughput": 27533.23, "total_tokens": 1345905664} +{"current_steps": 4650, "total_steps": 15426, "loss": 0.3589, "lr": 4.1311029195399534e-05, "epoch": 0.9044053291840902, "percentage": 30.14, "elapsed_time": "13:36:26", "remaining_time": "1 day, 7:32:02", "throughput": 27534.44, "total_tokens": 1348815680} +{"current_steps": 4660, "total_steps": 15426, "loss": 0.3588, "lr": 4.129693612051892e-05, "epoch": 0.9063502868812603, "percentage": 30.21, "elapsed_time": "13:38:07", "remaining_time": "1 day, 7:30:07", "throughput": 27536.94, "total_tokens": 1351725504} +{"current_steps": 4670, "total_steps": 15426, "loss": 0.364, "lr": 4.1282857459174826e-05, "epoch": 0.9082952445784305, "percentage": 30.27, "elapsed_time": "13:39:46", "remaining_time": "1 day, 7:28:06", "throughput": 27539.53, "total_tokens": 1354573504} +{"current_steps": 4680, "total_steps": 15426, "loss": 0.363, "lr": 4.1268793186815184e-05, "epoch": 0.9102402022756005, "percentage": 30.34, "elapsed_time": "13:41:29", "remaining_time": "1 day, 7:26:17", "throughput": 27541.11, "total_tokens": 1357496384} +{"current_steps": 4690, "total_steps": 15426, "loss": 0.3657, "lr": 4.1254743278946456e-05, "epoch": 0.9121851599727706, "percentage": 30.4, "elapsed_time": "13:43:11", "remaining_time": "1 day, 7:24:23", "throughput": 27542.37, "total_tokens": 1360364160} +{"current_steps": 4700, "total_steps": 15426, "loss": 0.3591, "lr": 4.1240707711133394e-05, "epoch": 0.9141301176699407, "percentage": 30.47, "elapsed_time": "13:44:55", "remaining_time": "1 day, 7:22:34", "throughput": 27543.83, "total_tokens": 1363285504} +{"current_steps": 4710, "total_steps": 15426, "loss": 0.3611, "lr": 4.122668645899893e-05, "epoch": 0.9160750753671107, "percentage": 30.53, "elapsed_time": "13:46:38", "remaining_time": "1 day, 7:20:43", "throughput": 27545.67, "total_tokens": 1366212608} +{"current_steps": 4720, "total_steps": 15426, "loss": 0.3617, "lr": 4.1212679498223975e-05, "epoch": 0.9180200330642808, "percentage": 30.6, "elapsed_time": "13:48:15", "remaining_time": "1 day, 7:18:39", "throughput": 27548.45, "total_tokens": 1369021888} +{"current_steps": 4730, "total_steps": 15426, "loss": 0.3589, "lr": 4.1198686804547215e-05, "epoch": 0.9199649907614509, "percentage": 30.66, "elapsed_time": "13:49:57", "remaining_time": "1 day, 7:16:48", "throughput": 27549.92, "total_tokens": 1371928256} +{"current_steps": 4740, "total_steps": 15426, "loss": 0.3604, "lr": 4.118470835376499e-05, "epoch": 0.9219099484586211, "percentage": 30.73, "elapsed_time": "13:51:40", "remaining_time": "1 day, 7:14:56", "throughput": 27552.59, "total_tokens": 1374877952} +{"current_steps": 4750, "total_steps": 15426, "loss": 0.3624, "lr": 4.117074412173107e-05, "epoch": 0.9238549061557911, "percentage": 30.79, "elapsed_time": "13:53:20", "remaining_time": "1 day, 7:13:00", "throughput": 27555.68, "total_tokens": 1377803968} +{"current_steps": 4760, "total_steps": 15426, "loss": 0.3602, "lr": 4.115679408435648e-05, "epoch": 0.9257998638529612, "percentage": 30.86, "elapsed_time": "13:54:55", "remaining_time": "1 day, 7:10:52", "throughput": 27559.69, "total_tokens": 1380623872} +{"current_steps": 4770, "total_steps": 15426, "loss": 0.3624, "lr": 4.114285821760937e-05, "epoch": 0.9277448215501313, "percentage": 30.92, "elapsed_time": "13:56:39", "remaining_time": "1 day, 7:09:04", "throughput": 27560.1, "total_tokens": 1383511808} +{"current_steps": 4780, "total_steps": 15426, "loss": 0.3611, "lr": 4.11289364975148e-05, "epoch": 0.9296897792473013, "percentage": 30.99, "elapsed_time": "13:58:19", "remaining_time": "1 day, 7:07:06", "throughput": 27562.83, "total_tokens": 1386387648} +{"current_steps": 4790, "total_steps": 15426, "loss": 0.3595, "lr": 4.111502890015456e-05, "epoch": 0.9316347369444714, "percentage": 31.05, "elapsed_time": "13:59:57", "remaining_time": "1 day, 7:05:05", "throughput": 27566.53, "total_tokens": 1389277824} +{"current_steps": 4800, "total_steps": 15426, "loss": 0.3611, "lr": 4.1101135401667056e-05, "epoch": 0.9335796946416416, "percentage": 31.12, "elapsed_time": "14:01:42", "remaining_time": "1 day, 7:03:19", "throughput": 27566.77, "total_tokens": 1392186496} +{"current_steps": 4810, "total_steps": 15426, "loss": 0.3597, "lr": 4.108725597824708e-05, "epoch": 0.9355246523388117, "percentage": 31.18, "elapsed_time": "14:03:21", "remaining_time": "1 day, 7:01:20", "throughput": 27569.62, "total_tokens": 1395059008} +{"current_steps": 4820, "total_steps": 15426, "loss": 0.3625, "lr": 4.107339060614564e-05, "epoch": 0.9374696100359817, "percentage": 31.25, "elapsed_time": "14:05:02", "remaining_time": "1 day, 6:59:26", "throughput": 27571.59, "total_tokens": 1397952256} +{"current_steps": 4830, "total_steps": 15426, "loss": 0.3625, "lr": 4.1059539261669825e-05, "epoch": 0.9394145677331518, "percentage": 31.31, "elapsed_time": "14:06:48", "remaining_time": "1 day, 6:57:42", "throughput": 27573.07, "total_tokens": 1400934912} +{"current_steps": 4840, "total_steps": 15426, "loss": 0.3621, "lr": 4.104570192118262e-05, "epoch": 0.9413595254303219, "percentage": 31.38, "elapsed_time": "14:08:28", "remaining_time": "1 day, 6:55:47", "throughput": 27575.2, "total_tokens": 1403820416} +{"current_steps": 4850, "total_steps": 15426, "loss": 0.3608, "lr": 4.1031878561102714e-05, "epoch": 0.9433044831274919, "percentage": 31.44, "elapsed_time": "14:10:08", "remaining_time": "1 day, 6:53:50", "throughput": 27577.22, "total_tokens": 1406680320} +{"current_steps": 4860, "total_steps": 15426, "loss": 0.3612, "lr": 4.1018069157904385e-05, "epoch": 0.9452494408246621, "percentage": 31.51, "elapsed_time": "14:11:46", "remaining_time": "1 day, 6:51:48", "throughput": 27581.0, "total_tokens": 1409557056} +{"current_steps": 4870, "total_steps": 15426, "loss": 0.3564, "lr": 4.100427368811727e-05, "epoch": 0.9471943985218322, "percentage": 31.57, "elapsed_time": "14:13:29", "remaining_time": "1 day, 6:49:59", "throughput": 27582.83, "total_tokens": 1412501696} +{"current_steps": 4880, "total_steps": 15426, "loss": 0.3579, "lr": 4.099049212832622e-05, "epoch": 0.9491393562190023, "percentage": 31.63, "elapsed_time": "14:15:14", "remaining_time": "1 day, 6:48:14", "throughput": 27583.91, "total_tokens": 1415454784} +{"current_steps": 4890, "total_steps": 15426, "loss": 0.3596, "lr": 4.0976724455171155e-05, "epoch": 0.9510843139161723, "percentage": 31.7, "elapsed_time": "14:16:59", "remaining_time": "1 day, 6:46:29", "throughput": 27584.5, "total_tokens": 1418391360} +{"current_steps": 4900, "total_steps": 15426, "loss": 0.3608, "lr": 4.096297064534688e-05, "epoch": 0.9530292716133424, "percentage": 31.76, "elapsed_time": "14:18:40", "remaining_time": "1 day, 6:44:35", "throughput": 27587.1, "total_tokens": 1421313984} +{"current_steps": 4910, "total_steps": 15426, "loss": 0.3602, "lr": 4.0949230675602904e-05, "epoch": 0.9549742293105125, "percentage": 31.83, "elapsed_time": "14:20:20", "remaining_time": "1 day, 6:42:38", "throughput": 27589.14, "total_tokens": 1424173440} +{"current_steps": 4920, "total_steps": 15426, "loss": 0.3542, "lr": 4.09355045227433e-05, "epoch": 0.9569191870076826, "percentage": 31.89, "elapsed_time": "14:22:05", "remaining_time": "1 day, 6:40:52", "throughput": 27590.9, "total_tokens": 1427150080} +{"current_steps": 4930, "total_steps": 15426, "loss": 0.3621, "lr": 4.092179216362654e-05, "epoch": 0.9588641447048527, "percentage": 31.96, "elapsed_time": "14:23:46", "remaining_time": "1 day, 6:38:59", "throughput": 27592.15, "total_tokens": 1430007232} +{"current_steps": 4940, "total_steps": 15426, "loss": 0.3607, "lr": 4.090809357516532e-05, "epoch": 0.9608091024020228, "percentage": 32.02, "elapsed_time": "14:25:25", "remaining_time": "1 day, 6:37:01", "throughput": 27594.98, "total_tokens": 1432895424} +{"current_steps": 4950, "total_steps": 15426, "loss": 0.3584, "lr": 4.089440873432638e-05, "epoch": 0.9627540600991928, "percentage": 32.09, "elapsed_time": "14:27:07", "remaining_time": "1 day, 6:35:08", "throughput": 27596.93, "total_tokens": 1435788032} +{"current_steps": 4960, "total_steps": 15426, "loss": 0.362, "lr": 4.088073761813037e-05, "epoch": 0.9646990177963629, "percentage": 32.15, "elapsed_time": "14:28:47", "remaining_time": "1 day, 6:33:12", "throughput": 27599.61, "total_tokens": 1438693952} +{"current_steps": 4970, "total_steps": 15426, "loss": 0.3575, "lr": 4.086708020365172e-05, "epoch": 0.966643975493533, "percentage": 32.22, "elapsed_time": "14:30:31", "remaining_time": "1 day, 6:31:26", "throughput": 27600.72, "total_tokens": 1441638848} +{"current_steps": 4980, "total_steps": 15426, "loss": 0.362, "lr": 4.0853436468018354e-05, "epoch": 0.968588933190703, "percentage": 32.28, "elapsed_time": "14:32:13", "remaining_time": "1 day, 6:29:33", "throughput": 27602.43, "total_tokens": 1444518080} +{"current_steps": 4990, "total_steps": 15426, "loss": 0.3614, "lr": 4.0839806388411686e-05, "epoch": 0.9705338908878732, "percentage": 32.35, "elapsed_time": "14:33:51", "remaining_time": "1 day, 6:27:33", "throughput": 27605.63, "total_tokens": 1447397120} +{"current_steps": 5000, "total_steps": 15426, "loss": 0.3578, "lr": 4.0826189942066346e-05, "epoch": 0.9724788485850433, "percentage": 32.41, "elapsed_time": "14:35:29", "remaining_time": "1 day, 6:25:35", "throughput": 27607.98, "total_tokens": 1450243776} +{"current_steps": 5000, "total_steps": 15426, "eval_loss": 0.31900840997695923, "epoch": 0.9724788485850433, "percentage": 32.41, "elapsed_time": "14:35:32", "remaining_time": "1 day, 6:25:39", "throughput": 27606.85, "total_tokens": 1450243776} +{"current_steps": 5010, "total_steps": 15426, "loss": 0.3624, "lr": 4.081258710627008e-05, "epoch": 0.9744238062822134, "percentage": 32.48, "elapsed_time": "14:38:25", "remaining_time": "1 day, 6:26:17", "throughput": 27571.37, "total_tokens": 1453165696} +{"current_steps": 5020, "total_steps": 15426, "loss": 0.3602, "lr": 4.0798997858363557e-05, "epoch": 0.9763687639793834, "percentage": 32.54, "elapsed_time": "14:40:06", "remaining_time": "1 day, 6:24:23", "throughput": 27573.73, "total_tokens": 1456076096} +{"current_steps": 5030, "total_steps": 15426, "loss": 0.3567, "lr": 4.078542217574024e-05, "epoch": 0.9783137216765535, "percentage": 32.61, "elapsed_time": "14:41:46", "remaining_time": "1 day, 6:22:26", "throughput": 27575.75, "total_tokens": 1458932224} +{"current_steps": 5040, "total_steps": 15426, "loss": 0.3565, "lr": 4.0771860035846196e-05, "epoch": 0.9802586793737236, "percentage": 32.67, "elapsed_time": "14:43:25", "remaining_time": "1 day, 6:20:28", "throughput": 27578.11, "total_tokens": 1461787648} +{"current_steps": 5050, "total_steps": 15426, "loss": 0.3552, "lr": 4.0758311416179965e-05, "epoch": 0.9822036370708938, "percentage": 32.74, "elapsed_time": "14:45:02", "remaining_time": "1 day, 6:18:26", "throughput": 27581.19, "total_tokens": 1464621760} +{"current_steps": 5060, "total_steps": 15426, "loss": 0.3598, "lr": 4.0744776294292386e-05, "epoch": 0.9841485947680638, "percentage": 32.8, "elapsed_time": "14:46:39", "remaining_time": "1 day, 6:16:25", "throughput": 27583.99, "total_tokens": 1467458880} +{"current_steps": 5070, "total_steps": 15426, "loss": 0.3602, "lr": 4.073125464778646e-05, "epoch": 0.9860935524652339, "percentage": 32.87, "elapsed_time": "14:48:22", "remaining_time": "1 day, 6:14:35", "throughput": 27585.27, "total_tokens": 1470364224} +{"current_steps": 5080, "total_steps": 15426, "loss": 0.3603, "lr": 4.071774645431717e-05, "epoch": 0.988038510162404, "percentage": 32.93, "elapsed_time": "14:50:07", "remaining_time": "1 day, 6:12:49", "throughput": 27585.33, "total_tokens": 1473253056} +{"current_steps": 5090, "total_steps": 15426, "loss": 0.3596, "lr": 4.070425169159135e-05, "epoch": 0.989983467859574, "percentage": 33.0, "elapsed_time": "14:51:47", "remaining_time": "1 day, 6:10:55", "throughput": 27587.81, "total_tokens": 1476159616} +{"current_steps": 5100, "total_steps": 15426, "loss": 0.3617, "lr": 4.069077033736751e-05, "epoch": 0.9919284255567441, "percentage": 33.06, "elapsed_time": "14:53:28", "remaining_time": "1 day, 6:09:00", "throughput": 27589.61, "total_tokens": 1479031040} +{"current_steps": 5110, "total_steps": 15426, "loss": 0.3586, "lr": 4.06773023694557e-05, "epoch": 0.9938733832539143, "percentage": 33.13, "elapsed_time": "14:55:06", "remaining_time": "1 day, 6:07:02", "throughput": 27592.79, "total_tokens": 1481921472} +{"current_steps": 5120, "total_steps": 15426, "loss": 0.3649, "lr": 4.066384776571732e-05, "epoch": 0.9958183409510843, "percentage": 33.19, "elapsed_time": "14:56:48", "remaining_time": "1 day, 6:05:09", "throughput": 27594.65, "total_tokens": 1484816000} +{"current_steps": 5130, "total_steps": 15426, "loss": 0.3579, "lr": 4.065040650406504e-05, "epoch": 0.9977632986482544, "percentage": 33.26, "elapsed_time": "14:58:26", "remaining_time": "1 day, 6:03:11", "throughput": 27597.82, "total_tokens": 1487703488} +{"current_steps": 5140, "total_steps": 15426, "loss": 0.3569, "lr": 4.0636978562462576e-05, "epoch": 0.9997082563454245, "percentage": 33.32, "elapsed_time": "15:00:02", "remaining_time": "1 day, 6:01:08", "throughput": 27600.94, "total_tokens": 1490532800} +{"current_steps": 5150, "total_steps": 15426, "loss": 0.3475, "lr": 4.062356391892456e-05, "epoch": 1.001555966157736, "percentage": 33.39, "elapsed_time": "15:01:38", "remaining_time": "1 day, 5:59:05", "throughput": 27601.92, "total_tokens": 1493231872} +{"current_steps": 5160, "total_steps": 15426, "loss": 0.341, "lr": 4.0610162551516395e-05, "epoch": 1.0035009238549062, "percentage": 33.45, "elapsed_time": "15:03:19", "remaining_time": "1 day, 5:57:11", "throughput": 27603.66, "total_tokens": 1496093696} +{"current_steps": 5170, "total_steps": 15426, "loss": 0.3436, "lr": 4.059677443835412e-05, "epoch": 1.0054458815520761, "percentage": 33.51, "elapsed_time": "15:05:00", "remaining_time": "1 day, 5:55:19", "throughput": 27605.55, "total_tokens": 1499002304} +{"current_steps": 5180, "total_steps": 15426, "loss": 0.343, "lr": 4.058339955760423e-05, "epoch": 1.0073908392492463, "percentage": 33.58, "elapsed_time": "15:06:45", "remaining_time": "1 day, 5:53:33", "throughput": 27606.31, "total_tokens": 1501935232} +{"current_steps": 5190, "total_steps": 15426, "loss": 0.3446, "lr": 4.0570037887483535e-05, "epoch": 1.0093357969464165, "percentage": 33.64, "elapsed_time": "15:08:25", "remaining_time": "1 day, 5:51:39", "throughput": 27608.38, "total_tokens": 1504815744} +{"current_steps": 5200, "total_steps": 15426, "loss": 0.3451, "lr": 4.0556689406259025e-05, "epoch": 1.0112807546435865, "percentage": 33.71, "elapsed_time": "15:10:07", "remaining_time": "1 day, 5:49:47", "throughput": 27610.58, "total_tokens": 1507745536} +{"current_steps": 5210, "total_steps": 15426, "loss": 0.3441, "lr": 4.054335409224771e-05, "epoch": 1.0132257123407566, "percentage": 33.77, "elapsed_time": "15:11:45", "remaining_time": "1 day, 5:47:49", "throughput": 27613.74, "total_tokens": 1510625920} +{"current_steps": 5220, "total_steps": 15426, "loss": 0.3399, "lr": 4.053003192381646e-05, "epoch": 1.0151706700379266, "percentage": 33.84, "elapsed_time": "15:13:29", "remaining_time": "1 day, 5:46:01", "throughput": 27614.52, "total_tokens": 1513537216} +{"current_steps": 5230, "total_steps": 15426, "loss": 0.344, "lr": 4.051672287938189e-05, "epoch": 1.0171156277350968, "percentage": 33.9, "elapsed_time": "15:15:13", "remaining_time": "1 day, 5:44:15", "throughput": 27615.87, "total_tokens": 1516490432} +{"current_steps": 5240, "total_steps": 15426, "loss": 0.3389, "lr": 4.050342693741019e-05, "epoch": 1.0190605854322667, "percentage": 33.97, "elapsed_time": "15:16:56", "remaining_time": "1 day, 5:42:26", "throughput": 27617.1, "total_tokens": 1519394624} +{"current_steps": 5250, "total_steps": 15426, "loss": 0.3428, "lr": 4.049014407641699e-05, "epoch": 1.021005543129437, "percentage": 34.03, "elapsed_time": "15:18:37", "remaining_time": "1 day, 5:40:33", "throughput": 27618.15, "total_tokens": 1522246400} +{"current_steps": 5260, "total_steps": 15426, "loss": 0.3475, "lr": 4.047687427496717e-05, "epoch": 1.022950500826607, "percentage": 34.1, "elapsed_time": "15:20:18", "remaining_time": "1 day, 5:38:41", "throughput": 27619.52, "total_tokens": 1525113984} +{"current_steps": 5270, "total_steps": 15426, "loss": 0.3446, "lr": 4.046361751167479e-05, "epoch": 1.024895458523777, "percentage": 34.16, "elapsed_time": "15:21:59", "remaining_time": "1 day, 5:36:48", "throughput": 27620.86, "total_tokens": 1527981632} +{"current_steps": 5280, "total_steps": 15426, "loss": 0.3435, "lr": 4.045037376520292e-05, "epoch": 1.0268404162209472, "percentage": 34.23, "elapsed_time": "15:23:44", "remaining_time": "1 day, 5:35:02", "throughput": 27621.16, "total_tokens": 1530878336} +{"current_steps": 5290, "total_steps": 15426, "loss": 0.3447, "lr": 4.043714301426344e-05, "epoch": 1.0287853739181172, "percentage": 34.29, "elapsed_time": "15:25:30", "remaining_time": "1 day, 5:33:20", "throughput": 27622.28, "total_tokens": 1533887552} +{"current_steps": 5300, "total_steps": 15426, "loss": 0.3408, "lr": 4.042392523761696e-05, "epoch": 1.0307303316152874, "percentage": 34.36, "elapsed_time": "15:27:10", "remaining_time": "1 day, 5:31:24", "throughput": 27624.69, "total_tokens": 1536763392} +{"current_steps": 5310, "total_steps": 15426, "loss": 0.3414, "lr": 4.041072041407267e-05, "epoch": 1.0326752893124576, "percentage": 34.42, "elapsed_time": "15:28:49", "remaining_time": "1 day, 5:29:29", "throughput": 27626.55, "total_tokens": 1539620416} +{"current_steps": 5320, "total_steps": 15426, "loss": 0.344, "lr": 4.039752852248815e-05, "epoch": 1.0346202470096275, "percentage": 34.49, "elapsed_time": "15:30:33", "remaining_time": "1 day, 5:27:41", "throughput": 27627.2, "total_tokens": 1542510720} +{"current_steps": 5330, "total_steps": 15426, "loss": 0.3443, "lr": 4.0384349541769286e-05, "epoch": 1.0365652047067977, "percentage": 34.55, "elapsed_time": "15:32:16", "remaining_time": "1 day, 5:25:53", "throughput": 27628.04, "total_tokens": 1545402624} +{"current_steps": 5340, "total_steps": 15426, "loss": 0.3443, "lr": 4.037118345087011e-05, "epoch": 1.0385101624039677, "percentage": 34.62, "elapsed_time": "15:33:58", "remaining_time": "1 day, 5:24:04", "throughput": 27629.16, "total_tokens": 1548305408} +{"current_steps": 5350, "total_steps": 15426, "loss": 0.34, "lr": 4.0358030228792636e-05, "epoch": 1.0404551201011378, "percentage": 34.68, "elapsed_time": "15:35:37", "remaining_time": "1 day, 5:22:08", "throughput": 27631.86, "total_tokens": 1551194560} +{"current_steps": 5360, "total_steps": 15426, "loss": 0.3433, "lr": 4.034488985458673e-05, "epoch": 1.0424000777983078, "percentage": 34.75, "elapsed_time": "15:37:20", "remaining_time": "1 day, 5:20:17", "throughput": 27632.97, "total_tokens": 1554079296} +{"current_steps": 5370, "total_steps": 15426, "loss": 0.3437, "lr": 4.033176230735001e-05, "epoch": 1.044345035495478, "percentage": 34.81, "elapsed_time": "15:38:56", "remaining_time": "1 day, 5:18:17", "throughput": 27636.33, "total_tokens": 1556934656} +{"current_steps": 5380, "total_steps": 15426, "loss": 0.3432, "lr": 4.0318647566227626e-05, "epoch": 1.0462899931926481, "percentage": 34.88, "elapsed_time": "15:40:35", "remaining_time": "1 day, 5:16:21", "throughput": 27638.03, "total_tokens": 1559774848} +{"current_steps": 5390, "total_steps": 15426, "loss": 0.3419, "lr": 4.0305545610412205e-05, "epoch": 1.048234950889818, "percentage": 34.94, "elapsed_time": "15:42:15", "remaining_time": "1 day, 5:14:27", "throughput": 27640.5, "total_tokens": 1562671040} +{"current_steps": 5400, "total_steps": 15426, "loss": 0.3438, "lr": 4.029245641914365e-05, "epoch": 1.0501799085869883, "percentage": 35.01, "elapsed_time": "15:43:59", "remaining_time": "1 day, 5:12:40", "throughput": 27641.37, "total_tokens": 1565593536} +{"current_steps": 5410, "total_steps": 15426, "loss": 0.3452, "lr": 4.027937997170904e-05, "epoch": 1.0521248662841582, "percentage": 35.07, "elapsed_time": "15:45:38", "remaining_time": "1 day, 5:10:44", "throughput": 27643.81, "total_tokens": 1568461952} +{"current_steps": 5420, "total_steps": 15426, "loss": 0.341, "lr": 4.026631624744247e-05, "epoch": 1.0540698239813284, "percentage": 35.14, "elapsed_time": "15:47:18", "remaining_time": "1 day, 5:08:51", "throughput": 27645.8, "total_tokens": 1571354176} +{"current_steps": 5430, "total_steps": 15426, "loss": 0.3427, "lr": 4.025326522572493e-05, "epoch": 1.0560147816784986, "percentage": 35.2, "elapsed_time": "15:49:00", "remaining_time": "1 day, 5:07:00", "throughput": 27647.7, "total_tokens": 1574267712} +{"current_steps": 5440, "total_steps": 15426, "loss": 0.3422, "lr": 4.024022688598415e-05, "epoch": 1.0579597393756686, "percentage": 35.27, "elapsed_time": "15:50:37", "remaining_time": "1 day, 5:05:02", "throughput": 27650.4, "total_tokens": 1577120960} +{"current_steps": 5450, "total_steps": 15426, "loss": 0.3443, "lr": 4.0227201207694494e-05, "epoch": 1.0599046970728387, "percentage": 35.33, "elapsed_time": "15:52:20", "remaining_time": "1 day, 5:03:14", "throughput": 27651.72, "total_tokens": 1580043840} +{"current_steps": 5460, "total_steps": 15426, "loss": 0.3443, "lr": 4.021418817037677e-05, "epoch": 1.0618496547700087, "percentage": 35.39, "elapsed_time": "15:54:06", "remaining_time": "1 day, 5:01:30", "throughput": 27652.5, "total_tokens": 1583007360} +{"current_steps": 5470, "total_steps": 15426, "loss": 0.3407, "lr": 4.0201187753598174e-05, "epoch": 1.0637946124671789, "percentage": 35.46, "elapsed_time": "15:55:51", "remaining_time": "1 day, 4:59:45", "throughput": 27653.57, "total_tokens": 1585964416} +{"current_steps": 5480, "total_steps": 15426, "loss": 0.3402, "lr": 4.018819993697208e-05, "epoch": 1.0657395701643488, "percentage": 35.52, "elapsed_time": "15:57:30", "remaining_time": "1 day, 4:57:51", "throughput": 27655.92, "total_tokens": 1588853824} +{"current_steps": 5490, "total_steps": 15426, "loss": 0.3412, "lr": 4.017522470015793e-05, "epoch": 1.067684527861519, "percentage": 35.59, "elapsed_time": "15:59:11", "remaining_time": "1 day, 4:55:58", "throughput": 27658.2, "total_tokens": 1591763584} +{"current_steps": 5500, "total_steps": 15426, "loss": 0.3432, "lr": 4.0162262022861144e-05, "epoch": 1.0696294855586892, "percentage": 35.65, "elapsed_time": "16:00:51", "remaining_time": "1 day, 4:54:05", "throughput": 27659.88, "total_tokens": 1594643584} +{"current_steps": 5510, "total_steps": 15426, "loss": 0.3451, "lr": 4.0149311884832906e-05, "epoch": 1.0715744432558592, "percentage": 35.72, "elapsed_time": "16:02:30", "remaining_time": "1 day, 4:52:10", "throughput": 27661.98, "total_tokens": 1597498688} +{"current_steps": 5520, "total_steps": 15426, "loss": 0.3419, "lr": 4.0136374265870116e-05, "epoch": 1.0735194009530293, "percentage": 35.78, "elapsed_time": "16:04:10", "remaining_time": "1 day, 4:50:16", "throughput": 27664.01, "total_tokens": 1600377856} +{"current_steps": 5530, "total_steps": 15426, "loss": 0.3453, "lr": 4.0123449145815174e-05, "epoch": 1.0754643586501993, "percentage": 35.85, "elapsed_time": "16:05:51", "remaining_time": "1 day, 4:48:25", "throughput": 27665.61, "total_tokens": 1603275840} +{"current_steps": 5540, "total_steps": 15426, "loss": 0.3413, "lr": 4.011053650455592e-05, "epoch": 1.0774093163473695, "percentage": 35.91, "elapsed_time": "16:07:34", "remaining_time": "1 day, 4:46:37", "throughput": 27666.97, "total_tokens": 1606197248} +{"current_steps": 5550, "total_steps": 15426, "loss": 0.3391, "lr": 4.0097636322025466e-05, "epoch": 1.0793542740445394, "percentage": 35.98, "elapsed_time": "16:09:15", "remaining_time": "1 day, 4:44:45", "throughput": 27669.02, "total_tokens": 1609104576} +{"current_steps": 5560, "total_steps": 15426, "loss": 0.3406, "lr": 4.008474857820206e-05, "epoch": 1.0812992317417096, "percentage": 36.04, "elapsed_time": "16:10:58", "remaining_time": "1 day, 4:42:57", "throughput": 27670.21, "total_tokens": 1612027712} +{"current_steps": 5570, "total_steps": 15426, "loss": 0.3407, "lr": 4.007187325310899e-05, "epoch": 1.0832441894388798, "percentage": 36.11, "elapsed_time": "16:12:37", "remaining_time": "1 day, 4:41:02", "throughput": 27672.9, "total_tokens": 1614927424} +{"current_steps": 5580, "total_steps": 15426, "loss": 0.3405, "lr": 4.00590103268144e-05, "epoch": 1.0851891471360497, "percentage": 36.17, "elapsed_time": "16:14:15", "remaining_time": "1 day, 4:39:06", "throughput": 27675.38, "total_tokens": 1617786560} +{"current_steps": 5590, "total_steps": 15426, "loss": 0.3384, "lr": 4.004615977943124e-05, "epoch": 1.08713410483322, "percentage": 36.24, "elapsed_time": "16:15:57", "remaining_time": "1 day, 4:37:16", "throughput": 27676.66, "total_tokens": 1620685504} +{"current_steps": 5600, "total_steps": 15426, "loss": 0.3406, "lr": 4.0033321591117025e-05, "epoch": 1.0890790625303899, "percentage": 36.3, "elapsed_time": "16:17:40", "remaining_time": "1 day, 4:35:29", "throughput": 27677.99, "total_tokens": 1623618816} +{"current_steps": 5610, "total_steps": 15426, "loss": 0.3434, "lr": 4.002049574207381e-05, "epoch": 1.09102402022756, "percentage": 36.37, "elapsed_time": "16:19:20", "remaining_time": "1 day, 4:33:35", "throughput": 27680.05, "total_tokens": 1626496960} +{"current_steps": 5620, "total_steps": 15426, "loss": 0.3388, "lr": 4.000768221254803e-05, "epoch": 1.09296897792473, "percentage": 36.43, "elapsed_time": "16:21:00", "remaining_time": "1 day, 4:31:42", "throughput": 27682.34, "total_tokens": 1629401728} +{"current_steps": 5630, "total_steps": 15426, "loss": 0.3469, "lr": 3.999488098283034e-05, "epoch": 1.0949139356219002, "percentage": 36.5, "elapsed_time": "16:22:39", "remaining_time": "1 day, 4:29:46", "throughput": 27684.6, "total_tokens": 1632256704} +{"current_steps": 5640, "total_steps": 15426, "loss": 0.3453, "lr": 3.9982092033255506e-05, "epoch": 1.0968588933190704, "percentage": 36.56, "elapsed_time": "16:24:16", "remaining_time": "1 day, 4:27:49", "throughput": 27687.48, "total_tokens": 1635128704} +{"current_steps": 5650, "total_steps": 15426, "loss": 0.3445, "lr": 3.996931534420232e-05, "epoch": 1.0988038510162403, "percentage": 36.63, "elapsed_time": "16:25:55", "remaining_time": "1 day, 4:25:55", "throughput": 27689.6, "total_tokens": 1637997696} +{"current_steps": 5660, "total_steps": 15426, "loss": 0.3398, "lr": 3.995655089609339e-05, "epoch": 1.1007488087134105, "percentage": 36.69, "elapsed_time": "16:27:39", "remaining_time": "1 day, 4:24:09", "throughput": 27689.18, "total_tokens": 1640856768} +{"current_steps": 5670, "total_steps": 15426, "loss": 0.3438, "lr": 3.994379866939511e-05, "epoch": 1.1026937664105805, "percentage": 36.76, "elapsed_time": "16:29:19", "remaining_time": "1 day, 4:22:16", "throughput": 27691.43, "total_tokens": 1643753152} +{"current_steps": 5680, "total_steps": 15426, "loss": 0.3475, "lr": 3.993105864461745e-05, "epoch": 1.1046387241077507, "percentage": 36.82, "elapsed_time": "16:31:01", "remaining_time": "1 day, 4:20:27", "throughput": 27692.41, "total_tokens": 1646636544} +{"current_steps": 5690, "total_steps": 15426, "loss": 0.3399, "lr": 3.9918330802313866e-05, "epoch": 1.1065836818049208, "percentage": 36.89, "elapsed_time": "16:32:45", "remaining_time": "1 day, 4:18:40", "throughput": 27693.17, "total_tokens": 1649554496} +{"current_steps": 5700, "total_steps": 15426, "loss": 0.3409, "lr": 3.9905615123081206e-05, "epoch": 1.1085286395020908, "percentage": 36.95, "elapsed_time": "16:34:25", "remaining_time": "1 day, 4:16:48", "throughput": 27694.79, "total_tokens": 1652420928} +{"current_steps": 5710, "total_steps": 15426, "loss": 0.3439, "lr": 3.989291158755953e-05, "epoch": 1.110473597199261, "percentage": 37.02, "elapsed_time": "16:36:04", "remaining_time": "1 day, 4:14:54", "throughput": 27697.09, "total_tokens": 1655307520} +{"current_steps": 5720, "total_steps": 15426, "loss": 0.3407, "lr": 3.988022017643201e-05, "epoch": 1.112418554896431, "percentage": 37.08, "elapsed_time": "16:37:46", "remaining_time": "1 day, 4:13:04", "throughput": 27698.39, "total_tokens": 1658208640} +{"current_steps": 5730, "total_steps": 15426, "loss": 0.3408, "lr": 3.9867540870424826e-05, "epoch": 1.1143635125936011, "percentage": 37.15, "elapsed_time": "16:39:30", "remaining_time": "1 day, 4:11:18", "throughput": 27699.89, "total_tokens": 1661168064} +{"current_steps": 5740, "total_steps": 15426, "loss": 0.3414, "lr": 3.985487365030702e-05, "epoch": 1.1163084702907713, "percentage": 37.21, "elapsed_time": "16:41:10", "remaining_time": "1 day, 4:09:26", "throughput": 27701.95, "total_tokens": 1664069504} +{"current_steps": 5750, "total_steps": 15426, "loss": 0.3393, "lr": 3.984221849689036e-05, "epoch": 1.1182534279879413, "percentage": 37.27, "elapsed_time": "16:42:50", "remaining_time": "1 day, 4:07:34", "throughput": 27704.17, "total_tokens": 1666984256} +{"current_steps": 5760, "total_steps": 15426, "loss": 0.3426, "lr": 3.982957539102927e-05, "epoch": 1.1201983856851114, "percentage": 37.34, "elapsed_time": "16:44:28", "remaining_time": "1 day, 4:05:38", "throughput": 27706.87, "total_tokens": 1669850944} +{"current_steps": 5770, "total_steps": 15426, "loss": 0.3378, "lr": 3.981694431362065e-05, "epoch": 1.1221433433822814, "percentage": 37.4, "elapsed_time": "16:46:11", "remaining_time": "1 day, 4:03:51", "throughput": 27708.03, "total_tokens": 1672781504} +{"current_steps": 5780, "total_steps": 15426, "loss": 0.341, "lr": 3.9804325245603786e-05, "epoch": 1.1240883010794516, "percentage": 37.47, "elapsed_time": "16:47:50", "remaining_time": "1 day, 4:01:57", "throughput": 27710.15, "total_tokens": 1675658240} +{"current_steps": 5790, "total_steps": 15426, "loss": 0.3435, "lr": 3.9791718167960226e-05, "epoch": 1.1260332587766215, "percentage": 37.53, "elapsed_time": "16:49:34", "remaining_time": "1 day, 4:00:10", "throughput": 27710.79, "total_tokens": 1678565504} +{"current_steps": 5800, "total_steps": 15426, "loss": 0.3435, "lr": 3.9779123061713665e-05, "epoch": 1.1279782164737917, "percentage": 37.6, "elapsed_time": "16:51:18", "remaining_time": "1 day, 3:58:24", "throughput": 27711.26, "total_tokens": 1681466496} +{"current_steps": 5810, "total_steps": 15426, "loss": 0.3458, "lr": 3.976653990792979e-05, "epoch": 1.1299231741709619, "percentage": 37.66, "elapsed_time": "16:53:02", "remaining_time": "1 day, 3:56:40", "throughput": 27712.19, "total_tokens": 1684422848} +{"current_steps": 5820, "total_steps": 15426, "loss": 0.3394, "lr": 3.9753968687716206e-05, "epoch": 1.1318681318681318, "percentage": 37.73, "elapsed_time": "16:54:43", "remaining_time": "1 day, 3:54:49", "throughput": 27713.68, "total_tokens": 1687315968} +{"current_steps": 5830, "total_steps": 15426, "loss": 0.341, "lr": 3.974140938222232e-05, "epoch": 1.133813089565302, "percentage": 37.79, "elapsed_time": "16:56:27", "remaining_time": "1 day, 3:53:02", "throughput": 27714.36, "total_tokens": 1690219584} +{"current_steps": 5840, "total_steps": 15426, "loss": 0.3419, "lr": 3.972886197263915e-05, "epoch": 1.135758047262472, "percentage": 37.86, "elapsed_time": "16:58:10", "remaining_time": "1 day, 3:51:16", "throughput": 27715.21, "total_tokens": 1693131712} +{"current_steps": 5850, "total_steps": 15426, "loss": 0.343, "lr": 3.97163264401993e-05, "epoch": 1.1377030049596422, "percentage": 37.92, "elapsed_time": "16:59:55", "remaining_time": "1 day, 3:49:31", "throughput": 27715.63, "total_tokens": 1696058432} +{"current_steps": 5860, "total_steps": 15426, "loss": 0.3423, "lr": 3.970380276617677e-05, "epoch": 1.1396479626568121, "percentage": 37.99, "elapsed_time": "17:01:35", "remaining_time": "1 day, 3:47:40", "throughput": 27717.14, "total_tokens": 1698942400} +{"current_steps": 5870, "total_steps": 15426, "loss": 0.3416, "lr": 3.96912909318869e-05, "epoch": 1.1415929203539823, "percentage": 38.05, "elapsed_time": "17:03:13", "remaining_time": "1 day, 3:45:44", "throughput": 27719.21, "total_tokens": 1701767168} +{"current_steps": 5880, "total_steps": 15426, "loss": 0.3402, "lr": 3.96787909186862e-05, "epoch": 1.1435378780511525, "percentage": 38.12, "elapsed_time": "17:04:51", "remaining_time": "1 day, 3:43:49", "throughput": 27721.09, "total_tokens": 1704613184} +{"current_steps": 5890, "total_steps": 15426, "loss": 0.3422, "lr": 3.9666302707972244e-05, "epoch": 1.1454828357483224, "percentage": 38.18, "elapsed_time": "17:06:31", "remaining_time": "1 day, 3:41:57", "throughput": 27723.2, "total_tokens": 1707513536} +{"current_steps": 5900, "total_steps": 15426, "loss": 0.3382, "lr": 3.965382628118358e-05, "epoch": 1.1474277934454926, "percentage": 38.25, "elapsed_time": "17:08:12", "remaining_time": "1 day, 3:40:07", "throughput": 27724.79, "total_tokens": 1710416832} +{"current_steps": 5910, "total_steps": 15426, "loss": 0.341, "lr": 3.964136161979959e-05, "epoch": 1.1493727511426626, "percentage": 38.31, "elapsed_time": "17:09:51", "remaining_time": "1 day, 3:38:14", "throughput": 27727.12, "total_tokens": 1713312512} +{"current_steps": 5920, "total_steps": 15426, "loss": 0.3405, "lr": 3.9628908705340406e-05, "epoch": 1.1513177088398328, "percentage": 38.38, "elapsed_time": "17:11:32", "remaining_time": "1 day, 3:36:23", "throughput": 27728.92, "total_tokens": 1716212800} +{"current_steps": 5930, "total_steps": 15426, "loss": 0.3399, "lr": 3.961646751936673e-05, "epoch": 1.1532626665370027, "percentage": 38.44, "elapsed_time": "17:13:15", "remaining_time": "1 day, 3:34:36", "throughput": 27730.25, "total_tokens": 1719157632} +{"current_steps": 5940, "total_steps": 15426, "loss": 0.3431, "lr": 3.960403804347979e-05, "epoch": 1.155207624234173, "percentage": 38.51, "elapsed_time": "17:14:56", "remaining_time": "1 day, 3:32:46", "throughput": 27732.22, "total_tokens": 1722077376} +{"current_steps": 5950, "total_steps": 15426, "loss": 0.3413, "lr": 3.959162025932119e-05, "epoch": 1.157152581931343, "percentage": 38.57, "elapsed_time": "17:16:36", "remaining_time": "1 day, 3:30:53", "throughput": 27733.77, "total_tokens": 1724931456} +{"current_steps": 5960, "total_steps": 15426, "loss": 0.3366, "lr": 3.95792141485728e-05, "epoch": 1.159097539628513, "percentage": 38.64, "elapsed_time": "17:18:17", "remaining_time": "1 day, 3:29:04", "throughput": 27735.42, "total_tokens": 1727851648} +{"current_steps": 5970, "total_steps": 15426, "loss": 0.3377, "lr": 3.956681969295664e-05, "epoch": 1.1610424973256832, "percentage": 38.7, "elapsed_time": "17:20:02", "remaining_time": "1 day, 3:27:20", "throughput": 27736.02, "total_tokens": 1730795584} +{"current_steps": 5980, "total_steps": 15426, "loss": 0.345, "lr": 3.955443687423479e-05, "epoch": 1.1629874550228532, "percentage": 38.77, "elapsed_time": "17:21:43", "remaining_time": "1 day, 3:25:30", "throughput": 27737.42, "total_tokens": 1733682304} +{"current_steps": 5990, "total_steps": 15426, "loss": 0.3408, "lr": 3.954206567420924e-05, "epoch": 1.1649324127200233, "percentage": 38.83, "elapsed_time": "17:23:23", "remaining_time": "1 day, 3:23:38", "throughput": 27738.6, "total_tokens": 1736534208} +{"current_steps": 6000, "total_steps": 15426, "loss": 0.3434, "lr": 3.952970607472179e-05, "epoch": 1.1668773704171933, "percentage": 38.9, "elapsed_time": "17:25:02", "remaining_time": "1 day, 3:21:45", "throughput": 27739.98, "total_tokens": 1739364736} +{"current_steps": 6010, "total_steps": 15426, "loss": 0.3434, "lr": 3.951735805765399e-05, "epoch": 1.1688223281143635, "percentage": 38.96, "elapsed_time": "17:26:45", "remaining_time": "1 day, 3:19:59", "throughput": 27740.91, "total_tokens": 1742286272} +{"current_steps": 6020, "total_steps": 15426, "loss": 0.3436, "lr": 3.950502160492692e-05, "epoch": 1.1707672858115337, "percentage": 39.03, "elapsed_time": "17:28:26", "remaining_time": "1 day, 3:18:08", "throughput": 27742.65, "total_tokens": 1745185792} +{"current_steps": 6030, "total_steps": 15426, "loss": 0.3401, "lr": 3.9492696698501205e-05, "epoch": 1.1727122435087036, "percentage": 39.09, "elapsed_time": "17:30:06", "remaining_time": "1 day, 3:16:17", "throughput": 27743.98, "total_tokens": 1748061056} +{"current_steps": 6040, "total_steps": 15426, "loss": 0.3392, "lr": 3.9480383320376784e-05, "epoch": 1.1746572012058738, "percentage": 39.15, "elapsed_time": "17:31:48", "remaining_time": "1 day, 3:14:29", "throughput": 27744.85, "total_tokens": 1750948928} +{"current_steps": 6050, "total_steps": 15426, "loss": 0.3424, "lr": 3.94680814525929e-05, "epoch": 1.176602158903044, "percentage": 39.22, "elapsed_time": "17:33:27", "remaining_time": "1 day, 3:12:36", "throughput": 27746.74, "total_tokens": 1753803840} +{"current_steps": 6060, "total_steps": 15426, "loss": 0.3413, "lr": 3.945579107722792e-05, "epoch": 1.178547116600214, "percentage": 39.28, "elapsed_time": "17:35:10", "remaining_time": "1 day, 3:10:49", "throughput": 27747.36, "total_tokens": 1756699072} +{"current_steps": 6070, "total_steps": 15426, "loss": 0.3451, "lr": 3.9443512176399276e-05, "epoch": 1.1804920742973841, "percentage": 39.35, "elapsed_time": "17:36:50", "remaining_time": "1 day, 3:08:57", "throughput": 27748.63, "total_tokens": 1759549504} +{"current_steps": 6080, "total_steps": 15426, "loss": 0.3416, "lr": 3.9431244732263307e-05, "epoch": 1.182437031994554, "percentage": 39.41, "elapsed_time": "17:38:32", "remaining_time": "1 day, 3:07:10", "throughput": 27749.4, "total_tokens": 1762444480} +{"current_steps": 6090, "total_steps": 15426, "loss": 0.3383, "lr": 3.941898872701519e-05, "epoch": 1.1843819896917243, "percentage": 39.48, "elapsed_time": "17:40:15", "remaining_time": "1 day, 3:05:23", "throughput": 27750.62, "total_tokens": 1765379584} +{"current_steps": 6100, "total_steps": 15426, "loss": 0.3446, "lr": 3.940674414288882e-05, "epoch": 1.1863269473888942, "percentage": 39.54, "elapsed_time": "17:41:58", "remaining_time": "1 day, 3:03:35", "throughput": 27751.86, "total_tokens": 1768301120} +{"current_steps": 6110, "total_steps": 15426, "loss": 0.3397, "lr": 3.939451096215668e-05, "epoch": 1.1882719050860644, "percentage": 39.61, "elapsed_time": "17:43:41", "remaining_time": "1 day, 3:01:49", "throughput": 27753.27, "total_tokens": 1771264256} +{"current_steps": 6120, "total_steps": 15426, "loss": 0.3349, "lr": 3.938228916712978e-05, "epoch": 1.1902168627832346, "percentage": 39.67, "elapsed_time": "17:45:27", "remaining_time": "1 day, 3:00:06", "throughput": 27753.93, "total_tokens": 1774225984} +{"current_steps": 6130, "total_steps": 15426, "loss": 0.3463, "lr": 3.937007874015748e-05, "epoch": 1.1921618204804045, "percentage": 39.74, "elapsed_time": "17:47:08", "remaining_time": "1 day, 2:58:17", "throughput": 27754.68, "total_tokens": 1777082368} +{"current_steps": 6140, "total_steps": 15426, "loss": 0.3435, "lr": 3.935787966362748e-05, "epoch": 1.1941067781775747, "percentage": 39.8, "elapsed_time": "17:48:46", "remaining_time": "1 day, 2:56:22", "throughput": 27757.37, "total_tokens": 1779971328} +{"current_steps": 6150, "total_steps": 15426, "loss": 0.3411, "lr": 3.9345691919965595e-05, "epoch": 1.1960517358747447, "percentage": 39.87, "elapsed_time": "17:50:24", "remaining_time": "1 day, 2:54:29", "throughput": 27758.94, "total_tokens": 1782804160} +{"current_steps": 6160, "total_steps": 15426, "loss": 0.3426, "lr": 3.9333515491635764e-05, "epoch": 1.1979966935719149, "percentage": 39.93, "elapsed_time": "17:52:03", "remaining_time": "1 day, 2:52:37", "throughput": 27760.71, "total_tokens": 1785676864} +{"current_steps": 6170, "total_steps": 15426, "loss": 0.3387, "lr": 3.932135036113987e-05, "epoch": 1.1999416512690848, "percentage": 40.0, "elapsed_time": "17:53:47", "remaining_time": "1 day, 2:50:51", "throughput": 27761.73, "total_tokens": 1788620096} +{"current_steps": 6180, "total_steps": 15426, "loss": 0.34, "lr": 3.930919651101764e-05, "epoch": 1.201886608966255, "percentage": 40.06, "elapsed_time": "17:55:26", "remaining_time": "1 day, 2:48:59", "throughput": 27764.04, "total_tokens": 1791524992} +{"current_steps": 6190, "total_steps": 15426, "loss": 0.3379, "lr": 3.9297053923846576e-05, "epoch": 1.2038315666634252, "percentage": 40.13, "elapsed_time": "17:57:06", "remaining_time": "1 day, 2:47:08", "throughput": 27765.75, "total_tokens": 1794403584} +{"current_steps": 6200, "total_steps": 15426, "loss": 0.3405, "lr": 3.928492258224183e-05, "epoch": 1.2057765243605951, "percentage": 40.19, "elapsed_time": "17:58:51", "remaining_time": "1 day, 2:45:24", "throughput": 27766.19, "total_tokens": 1797341504} +{"current_steps": 6210, "total_steps": 15426, "loss": 0.3437, "lr": 3.927280246885609e-05, "epoch": 1.2077214820577653, "percentage": 40.26, "elapsed_time": "18:00:31", "remaining_time": "1 day, 2:43:33", "throughput": 27767.57, "total_tokens": 1800217792} +{"current_steps": 6220, "total_steps": 15426, "loss": 0.341, "lr": 3.9260693566379486e-05, "epoch": 1.2096664397549353, "percentage": 40.32, "elapsed_time": "18:02:15", "remaining_time": "1 day, 2:41:48", "throughput": 27767.96, "total_tokens": 1803117632} +{"current_steps": 6230, "total_steps": 15426, "loss": 0.3401, "lr": 3.924859585753948e-05, "epoch": 1.2116113974521054, "percentage": 40.39, "elapsed_time": "18:03:59", "remaining_time": "1 day, 2:40:03", "throughput": 27768.64, "total_tokens": 1806062912} +{"current_steps": 6240, "total_steps": 15426, "loss": 0.343, "lr": 3.923650932510079e-05, "epoch": 1.2135563551492754, "percentage": 40.45, "elapsed_time": "18:05:37", "remaining_time": "1 day, 2:38:10", "throughput": 27770.87, "total_tokens": 1808934144} +{"current_steps": 6250, "total_steps": 15426, "loss": 0.3425, "lr": 3.9224433951865215e-05, "epoch": 1.2155013128464456, "percentage": 40.52, "elapsed_time": "18:07:15", "remaining_time": "1 day, 2:36:16", "throughput": 27773.25, "total_tokens": 1811799232} +{"current_steps": 6260, "total_steps": 15426, "loss": 0.3392, "lr": 3.921236972067165e-05, "epoch": 1.2174462705436158, "percentage": 40.58, "elapsed_time": "18:08:56", "remaining_time": "1 day, 2:34:27", "throughput": 27774.52, "total_tokens": 1814702784} +{"current_steps": 6270, "total_steps": 15426, "loss": 0.3401, "lr": 3.920031661439585e-05, "epoch": 1.2193912282407857, "percentage": 40.65, "elapsed_time": "18:10:38", "remaining_time": "1 day, 2:32:38", "throughput": 27775.95, "total_tokens": 1817610624} +{"current_steps": 6280, "total_steps": 15426, "loss": 0.3402, "lr": 3.918827461595045e-05, "epoch": 1.221336185937956, "percentage": 40.71, "elapsed_time": "18:12:18", "remaining_time": "1 day, 2:30:48", "throughput": 27777.16, "total_tokens": 1820483072} +{"current_steps": 6290, "total_steps": 15426, "loss": 0.3398, "lr": 3.9176243708284746e-05, "epoch": 1.2232811436351259, "percentage": 40.78, "elapsed_time": "18:14:02", "remaining_time": "1 day, 2:29:03", "throughput": 27778.12, "total_tokens": 1823421952} +{"current_steps": 6300, "total_steps": 15426, "loss": 0.3375, "lr": 3.9164223874384715e-05, "epoch": 1.225226101332296, "percentage": 40.84, "elapsed_time": "18:15:40", "remaining_time": "1 day, 2:27:09", "throughput": 27780.43, "total_tokens": 1826299392} +{"current_steps": 6310, "total_steps": 15426, "loss": 0.3386, "lr": 3.91522150972728e-05, "epoch": 1.227171059029466, "percentage": 40.9, "elapsed_time": "18:17:26", "remaining_time": "1 day, 2:25:28", "throughput": 27780.92, "total_tokens": 1829285184} +{"current_steps": 6320, "total_steps": 15426, "loss": 0.3393, "lr": 3.9140217360007896e-05, "epoch": 1.2291160167266362, "percentage": 40.97, "elapsed_time": "18:19:10", "remaining_time": "1 day, 2:23:43", "throughput": 27781.53, "total_tokens": 1832211136} +{"current_steps": 6330, "total_steps": 15426, "loss": 0.3374, "lr": 3.912823064568521e-05, "epoch": 1.2310609744238064, "percentage": 41.03, "elapsed_time": "18:20:55", "remaining_time": "1 day, 2:21:59", "throughput": 27781.77, "total_tokens": 1835137792} +{"current_steps": 6340, "total_steps": 15426, "loss": 0.3443, "lr": 3.9116254937436155e-05, "epoch": 1.2330059321209763, "percentage": 41.1, "elapsed_time": "18:22:39", "remaining_time": "1 day, 2:20:14", "throughput": 27782.14, "total_tokens": 1838041984} +{"current_steps": 6350, "total_steps": 15426, "loss": 0.34, "lr": 3.910429021842825e-05, "epoch": 1.2349508898181465, "percentage": 41.16, "elapsed_time": "18:24:19", "remaining_time": "1 day, 2:18:24", "throughput": 27783.5, "total_tokens": 1840932544} +{"current_steps": 6360, "total_steps": 15426, "loss": 0.3381, "lr": 3.9092336471865084e-05, "epoch": 1.2368958475153164, "percentage": 41.23, "elapsed_time": "18:26:01", "remaining_time": "1 day, 2:16:36", "throughput": 27784.39, "total_tokens": 1843821440} +{"current_steps": 6370, "total_steps": 15426, "loss": 0.3425, "lr": 3.908039368098611e-05, "epoch": 1.2388408052124866, "percentage": 41.29, "elapsed_time": "18:27:45", "remaining_time": "1 day, 2:14:51", "throughput": 27784.87, "total_tokens": 1846726528} +{"current_steps": 6380, "total_steps": 15426, "loss": 0.3384, "lr": 3.9068461829066633e-05, "epoch": 1.2407857629096568, "percentage": 41.36, "elapsed_time": "18:29:26", "remaining_time": "1 day, 2:13:02", "throughput": 27785.99, "total_tokens": 1849613440} +{"current_steps": 6390, "total_steps": 15426, "loss": 0.338, "lr": 3.9056540899417656e-05, "epoch": 1.2427307206068268, "percentage": 41.42, "elapsed_time": "18:31:06", "remaining_time": "1 day, 2:11:12", "throughput": 27787.35, "total_tokens": 1852490816} +{"current_steps": 6400, "total_steps": 15426, "loss": 0.343, "lr": 3.904463087538585e-05, "epoch": 1.244675678303997, "percentage": 41.49, "elapsed_time": "18:32:48", "remaining_time": "1 day, 2:09:24", "throughput": 27788.43, "total_tokens": 1855388544} +{"current_steps": 6410, "total_steps": 15426, "loss": 0.3379, "lr": 3.903273174035336e-05, "epoch": 1.246620636001167, "percentage": 41.55, "elapsed_time": "18:34:35", "remaining_time": "1 day, 2:07:43", "throughput": 27788.89, "total_tokens": 1858389952} +{"current_steps": 6420, "total_steps": 15426, "loss": 0.3392, "lr": 3.902084347773779e-05, "epoch": 1.248565593698337, "percentage": 41.62, "elapsed_time": "18:36:16", "remaining_time": "1 day, 2:05:54", "throughput": 27790.26, "total_tokens": 1861293184} +{"current_steps": 6430, "total_steps": 15426, "loss": 0.339, "lr": 3.900896607099207e-05, "epoch": 1.2505105513955073, "percentage": 41.68, "elapsed_time": "18:37:58", "remaining_time": "1 day, 2:04:06", "throughput": 27792.25, "total_tokens": 1864250432} +{"current_steps": 6440, "total_steps": 15426, "loss": 0.341, "lr": 3.899709950360437e-05, "epoch": 1.2524555090926772, "percentage": 41.75, "elapsed_time": "18:39:38", "remaining_time": "1 day, 2:02:16", "throughput": 27793.91, "total_tokens": 1867143424} +{"current_steps": 6450, "total_steps": 15426, "loss": 0.3355, "lr": 3.8985243759097997e-05, "epoch": 1.2544004667898474, "percentage": 41.81, "elapsed_time": "18:41:15", "remaining_time": "1 day, 2:00:22", "throughput": 27796.72, "total_tokens": 1870041408} +{"current_steps": 6460, "total_steps": 15426, "loss": 0.3346, "lr": 3.897339882103129e-05, "epoch": 1.2563454244870174, "percentage": 41.88, "elapsed_time": "18:42:57", "remaining_time": "1 day, 1:58:34", "throughput": 27798.32, "total_tokens": 1872980736} +{"current_steps": 6470, "total_steps": 15426, "loss": 0.3413, "lr": 3.8961564672997544e-05, "epoch": 1.2582903821841875, "percentage": 41.94, "elapsed_time": "18:44:37", "remaining_time": "1 day, 1:56:45", "throughput": 27799.98, "total_tokens": 1875880320} +{"current_steps": 6480, "total_steps": 15426, "loss": 0.3441, "lr": 3.8949741298624924e-05, "epoch": 1.2602353398813575, "percentage": 42.01, "elapsed_time": "18:46:19", "remaining_time": "1 day, 1:54:57", "throughput": 27800.9, "total_tokens": 1878771008} +{"current_steps": 6490, "total_steps": 15426, "loss": 0.3394, "lr": 3.8937928681576305e-05, "epoch": 1.2621802975785277, "percentage": 42.07, "elapsed_time": "18:48:01", "remaining_time": "1 day, 1:53:09", "throughput": 27802.14, "total_tokens": 1881678080} +{"current_steps": 6500, "total_steps": 15426, "loss": 0.3354, "lr": 3.8926126805549276e-05, "epoch": 1.2641252552756979, "percentage": 42.14, "elapsed_time": "18:49:39", "remaining_time": "1 day, 1:51:16", "throughput": 27804.29, "total_tokens": 1884560896} +{"current_steps": 6510, "total_steps": 15426, "loss": 0.338, "lr": 3.891433565427596e-05, "epoch": 1.2660702129728678, "percentage": 42.2, "elapsed_time": "18:51:18", "remaining_time": "1 day, 1:49:25", "throughput": 27805.97, "total_tokens": 1887428800} +{"current_steps": 6520, "total_steps": 15426, "loss": 0.3401, "lr": 3.8902555211522964e-05, "epoch": 1.268015170670038, "percentage": 42.27, "elapsed_time": "18:53:00", "remaining_time": "1 day, 1:47:37", "throughput": 27806.67, "total_tokens": 1890303168} +{"current_steps": 6530, "total_steps": 15426, "loss": 0.3424, "lr": 3.889078546109127e-05, "epoch": 1.269960128367208, "percentage": 42.33, "elapsed_time": "18:54:40", "remaining_time": "1 day, 1:45:48", "throughput": 27807.52, "total_tokens": 1893151424} +{"current_steps": 6540, "total_steps": 15426, "loss": 0.3389, "lr": 3.887902638681616e-05, "epoch": 1.2719050860643781, "percentage": 42.4, "elapsed_time": "18:56:20", "remaining_time": "1 day, 1:43:58", "throughput": 27809.16, "total_tokens": 1896054464} +{"current_steps": 6550, "total_steps": 15426, "loss": 0.3391, "lr": 3.886727797256707e-05, "epoch": 1.273850043761548, "percentage": 42.46, "elapsed_time": "18:58:00", "remaining_time": "1 day, 1:42:08", "throughput": 27810.72, "total_tokens": 1898937984} +{"current_steps": 6560, "total_steps": 15426, "loss": 0.3416, "lr": 3.88555402022476e-05, "epoch": 1.2757950014587183, "percentage": 42.53, "elapsed_time": "18:59:42", "remaining_time": "1 day, 1:40:20", "throughput": 27811.67, "total_tokens": 1901823488} +{"current_steps": 6570, "total_steps": 15426, "loss": 0.3412, "lr": 3.884381305979528e-05, "epoch": 1.2777399591558884, "percentage": 42.59, "elapsed_time": "19:01:24", "remaining_time": "1 day, 1:38:33", "throughput": 27812.39, "total_tokens": 1904718336} +{"current_steps": 6580, "total_steps": 15426, "loss": 0.3402, "lr": 3.883209652918163e-05, "epoch": 1.2796849168530584, "percentage": 42.66, "elapsed_time": "19:03:05", "remaining_time": "1 day, 1:36:45", "throughput": 27813.57, "total_tokens": 1907620032} +{"current_steps": 6590, "total_steps": 15426, "loss": 0.3401, "lr": 3.8820390594411935e-05, "epoch": 1.2816298745502286, "percentage": 42.72, "elapsed_time": "19:04:40", "remaining_time": "1 day, 1:34:48", "throughput": 27816.35, "total_tokens": 1910447552} +{"current_steps": 6600, "total_steps": 15426, "loss": 0.34, "lr": 3.880869523952524e-05, "epoch": 1.2835748322473985, "percentage": 42.78, "elapsed_time": "19:06:20", "remaining_time": "1 day, 1:32:58", "throughput": 27818.16, "total_tokens": 1913345280} +{"current_steps": 6610, "total_steps": 15426, "loss": 0.3371, "lr": 3.879701044859422e-05, "epoch": 1.2855197899445687, "percentage": 42.85, "elapsed_time": "19:08:01", "remaining_time": "1 day, 1:31:09", "throughput": 27819.95, "total_tokens": 1916278208} +{"current_steps": 6620, "total_steps": 15426, "loss": 0.3365, "lr": 3.87853362057251e-05, "epoch": 1.2874647476417387, "percentage": 42.91, "elapsed_time": "19:09:38", "remaining_time": "1 day, 1:29:15", "throughput": 27822.48, "total_tokens": 1919139904} +{"current_steps": 6630, "total_steps": 15426, "loss": 0.3322, "lr": 3.8773672495057576e-05, "epoch": 1.2894097053389089, "percentage": 42.98, "elapsed_time": "19:11:17", "remaining_time": "1 day, 1:27:25", "throughput": 27824.43, "total_tokens": 1922053440} +{"current_steps": 6640, "total_steps": 15426, "loss": 0.3393, "lr": 3.8762019300764674e-05, "epoch": 1.291354663036079, "percentage": 43.04, "elapsed_time": "19:13:01", "remaining_time": "1 day, 1:25:40", "throughput": 27825.05, "total_tokens": 1924974336} +{"current_steps": 6650, "total_steps": 15426, "loss": 0.3358, "lr": 3.875037660705273e-05, "epoch": 1.293299620733249, "percentage": 43.11, "elapsed_time": "19:14:41", "remaining_time": "1 day, 1:23:50", "throughput": 27826.48, "total_tokens": 1927849472} +{"current_steps": 6660, "total_steps": 15426, "loss": 0.34, "lr": 3.873874439816127e-05, "epoch": 1.2952445784304192, "percentage": 43.17, "elapsed_time": "19:16:23", "remaining_time": "1 day, 1:22:03", "throughput": 27827.63, "total_tokens": 1930778688} +{"current_steps": 6670, "total_steps": 15426, "loss": 0.341, "lr": 3.872712265836289e-05, "epoch": 1.2971895361275894, "percentage": 43.24, "elapsed_time": "19:18:05", "remaining_time": "1 day, 1:20:17", "throughput": 27828.48, "total_tokens": 1933684672} +{"current_steps": 6680, "total_steps": 15426, "loss": 0.338, "lr": 3.8715511371963225e-05, "epoch": 1.2991344938247593, "percentage": 43.3, "elapsed_time": "19:19:49", "remaining_time": "1 day, 1:18:31", "throughput": 27829.66, "total_tokens": 1936640704} +{"current_steps": 6690, "total_steps": 15426, "loss": 0.341, "lr": 3.87039105233008e-05, "epoch": 1.3010794515219293, "percentage": 43.37, "elapsed_time": "19:21:32", "remaining_time": "1 day, 1:16:46", "throughput": 27830.37, "total_tokens": 1939575360} +{"current_steps": 6700, "total_steps": 15426, "loss": 0.3396, "lr": 3.8692320096746975e-05, "epoch": 1.3030244092190995, "percentage": 43.43, "elapsed_time": "19:23:14", "remaining_time": "1 day, 1:14:59", "throughput": 27831.55, "total_tokens": 1942487808} +{"current_steps": 6710, "total_steps": 15426, "loss": 0.3387, "lr": 3.868074007670589e-05, "epoch": 1.3049693669162696, "percentage": 43.5, "elapsed_time": "19:24:50", "remaining_time": "1 day, 1:13:05", "throughput": 27833.26, "total_tokens": 1945292352} +{"current_steps": 6720, "total_steps": 15426, "loss": 0.3413, "lr": 3.866917044761428e-05, "epoch": 1.3069143246134396, "percentage": 43.56, "elapsed_time": "19:26:28", "remaining_time": "1 day, 1:11:13", "throughput": 27835.3, "total_tokens": 1948161472} +{"current_steps": 6730, "total_steps": 15426, "loss": 0.3374, "lr": 3.8657611193941486e-05, "epoch": 1.3088592823106098, "percentage": 43.63, "elapsed_time": "19:28:09", "remaining_time": "1 day, 1:09:24", "throughput": 27836.28, "total_tokens": 1951031872} +{"current_steps": 6740, "total_steps": 15426, "loss": 0.3382, "lr": 3.8646062300189315e-05, "epoch": 1.31080424000778, "percentage": 43.69, "elapsed_time": "19:29:52", "remaining_time": "1 day, 1:07:38", "throughput": 27836.38, "total_tokens": 1953898048} +{"current_steps": 6750, "total_steps": 15426, "loss": 0.3364, "lr": 3.8634523750891984e-05, "epoch": 1.31274919770495, "percentage": 43.76, "elapsed_time": "19:31:36", "remaining_time": "1 day, 1:05:53", "throughput": 27836.98, "total_tokens": 1956829888} +{"current_steps": 6760, "total_steps": 15426, "loss": 0.3438, "lr": 3.862299553061597e-05, "epoch": 1.3146941554021199, "percentage": 43.82, "elapsed_time": "19:33:18", "remaining_time": "1 day, 1:04:07", "throughput": 27837.12, "total_tokens": 1959690624} +{"current_steps": 6770, "total_steps": 15426, "loss": 0.339, "lr": 3.861147762396e-05, "epoch": 1.31663911309929, "percentage": 43.89, "elapsed_time": "19:34:58", "remaining_time": "1 day, 1:02:18", "throughput": 27838.69, "total_tokens": 1962599424} +{"current_steps": 6780, "total_steps": 15426, "loss": 0.3341, "lr": 3.859997001555494e-05, "epoch": 1.3185840707964602, "percentage": 43.95, "elapsed_time": "19:36:45", "remaining_time": "1 day, 1:00:37", "throughput": 27838.45, "total_tokens": 1965553024} +{"current_steps": 6790, "total_steps": 15426, "loss": 0.3371, "lr": 3.8588472690063676e-05, "epoch": 1.3205290284936302, "percentage": 44.02, "elapsed_time": "19:38:27", "remaining_time": "1 day, 0:58:51", "throughput": 27839.47, "total_tokens": 1968470912} +{"current_steps": 6800, "total_steps": 15426, "loss": 0.3377, "lr": 3.857698563218106e-05, "epoch": 1.3224739861908004, "percentage": 44.08, "elapsed_time": "19:40:11", "remaining_time": "1 day, 0:57:06", "throughput": 27840.09, "total_tokens": 1971390016} +{"current_steps": 6810, "total_steps": 15426, "loss": 0.3367, "lr": 3.8565508826633836e-05, "epoch": 1.3244189438879705, "percentage": 44.15, "elapsed_time": "19:41:54", "remaining_time": "1 day, 0:55:21", "throughput": 27841.22, "total_tokens": 1974348992} +{"current_steps": 6820, "total_steps": 15426, "loss": 0.3292, "lr": 3.855404225818049e-05, "epoch": 1.3263639015851405, "percentage": 44.21, "elapsed_time": "19:43:37", "remaining_time": "1 day, 0:53:35", "throughput": 27841.76, "total_tokens": 1977262976} +{"current_steps": 6830, "total_steps": 15426, "loss": 0.3399, "lr": 3.8542585911611286e-05, "epoch": 1.3283088592823107, "percentage": 44.28, "elapsed_time": "19:45:18", "remaining_time": "1 day, 0:51:47", "throughput": 27843.18, "total_tokens": 1980166400} +{"current_steps": 6840, "total_steps": 15426, "loss": 0.3389, "lr": 3.853113977174803e-05, "epoch": 1.3302538169794806, "percentage": 44.34, "elapsed_time": "19:47:00", "remaining_time": "1 day, 0:50:00", "throughput": 27845.26, "total_tokens": 1983145088} +{"current_steps": 6850, "total_steps": 15426, "loss": 0.3368, "lr": 3.851970382344411e-05, "epoch": 1.3321987746766508, "percentage": 44.41, "elapsed_time": "19:48:44", "remaining_time": "1 day, 0:48:16", "throughput": 27846.06, "total_tokens": 1986117504} +{"current_steps": 6860, "total_steps": 15426, "loss": 0.3385, "lr": 3.850827805158433e-05, "epoch": 1.3341437323738208, "percentage": 44.47, "elapsed_time": "19:50:23", "remaining_time": "1 day, 0:46:25", "throughput": 27848.09, "total_tokens": 1989012480} +{"current_steps": 6870, "total_steps": 15426, "loss": 0.3386, "lr": 3.8496862441084896e-05, "epoch": 1.336088690070991, "percentage": 44.54, "elapsed_time": "19:52:05", "remaining_time": "1 day, 0:44:39", "throughput": 27848.76, "total_tokens": 1991905920} +{"current_steps": 6880, "total_steps": 15426, "loss": 0.3368, "lr": 3.848545697689328e-05, "epoch": 1.3380336477681611, "percentage": 44.6, "elapsed_time": "19:53:41", "remaining_time": "1 day, 0:42:44", "throughput": 27851.8, "total_tokens": 1994781312} +{"current_steps": 6890, "total_steps": 15426, "loss": 0.3423, "lr": 3.8474061643988136e-05, "epoch": 1.339978605465331, "percentage": 44.66, "elapsed_time": "19:55:26", "remaining_time": "1 day, 0:41:01", "throughput": 27851.8, "total_tokens": 1997703232} +{"current_steps": 6900, "total_steps": 15426, "loss": 0.3397, "lr": 3.846267642737925e-05, "epoch": 1.3419235631625013, "percentage": 44.73, "elapsed_time": "19:57:06", "remaining_time": "1 day, 0:39:12", "throughput": 27853.6, "total_tokens": 2000628608} +{"current_steps": 6910, "total_steps": 15426, "loss": 0.34, "lr": 3.8451301312107455e-05, "epoch": 1.3438685208596712, "percentage": 44.79, "elapsed_time": "19:58:45", "remaining_time": "1 day, 0:37:22", "throughput": 27855.46, "total_tokens": 2003525952} +{"current_steps": 6920, "total_steps": 15426, "loss": 0.3337, "lr": 3.843993628324451e-05, "epoch": 1.3458134785568414, "percentage": 44.86, "elapsed_time": "20:00:28", "remaining_time": "1 day, 0:35:36", "throughput": 27856.32, "total_tokens": 2006452416} +{"current_steps": 6930, "total_steps": 15426, "loss": 0.3347, "lr": 3.8428581325893034e-05, "epoch": 1.3477584362540114, "percentage": 44.92, "elapsed_time": "20:02:09", "remaining_time": "1 day, 0:33:48", "throughput": 27857.79, "total_tokens": 2009369664} +{"current_steps": 6940, "total_steps": 15426, "loss": 0.3366, "lr": 3.8417236425186484e-05, "epoch": 1.3497033939511816, "percentage": 44.99, "elapsed_time": "20:03:54", "remaining_time": "1 day, 0:32:05", "throughput": 27858.76, "total_tokens": 2012350464} +{"current_steps": 6950, "total_steps": 15426, "loss": 0.3368, "lr": 3.840590156628895e-05, "epoch": 1.3516483516483517, "percentage": 45.05, "elapsed_time": "20:05:35", "remaining_time": "1 day, 0:30:17", "throughput": 27859.66, "total_tokens": 2015229760} +{"current_steps": 6960, "total_steps": 15426, "loss": 0.3355, "lr": 3.8394576734395205e-05, "epoch": 1.3535933093455217, "percentage": 45.12, "elapsed_time": "20:07:13", "remaining_time": "1 day, 0:28:26", "throughput": 27861.43, "total_tokens": 2018099392} +{"current_steps": 6970, "total_steps": 15426, "loss": 0.3342, "lr": 3.838326191473054e-05, "epoch": 1.3555382670426919, "percentage": 45.18, "elapsed_time": "20:08:56", "remaining_time": "1 day, 0:26:41", "throughput": 27862.54, "total_tokens": 2021062016} +{"current_steps": 6980, "total_steps": 15426, "loss": 0.3353, "lr": 3.837195709255069e-05, "epoch": 1.3574832247398618, "percentage": 45.25, "elapsed_time": "20:10:37", "remaining_time": "1 day, 0:24:52", "throughput": 27863.53, "total_tokens": 2023924544} +{"current_steps": 6990, "total_steps": 15426, "loss": 0.3431, "lr": 3.8360662253141796e-05, "epoch": 1.359428182437032, "percentage": 45.31, "elapsed_time": "20:12:17", "remaining_time": "1 day, 0:23:04", "throughput": 27864.99, "total_tokens": 2026820928} +{"current_steps": 7000, "total_steps": 15426, "loss": 0.3358, "lr": 3.834937738182029e-05, "epoch": 1.361373140134202, "percentage": 45.38, "elapsed_time": "20:13:59", "remaining_time": "1 day, 0:21:17", "throughput": 27865.82, "total_tokens": 2029728512} +{"current_steps": 7010, "total_steps": 15426, "loss": 0.3362, "lr": 3.833810246393281e-05, "epoch": 1.3633180978313721, "percentage": 45.44, "elapsed_time": "20:15:38", "remaining_time": "1 day, 0:19:28", "throughput": 27867.29, "total_tokens": 2032610816} +{"current_steps": 7020, "total_steps": 15426, "loss": 0.3355, "lr": 3.832683748485616e-05, "epoch": 1.3652630555285423, "percentage": 45.51, "elapsed_time": "20:17:27", "remaining_time": "1 day, 0:17:49", "throughput": 27866.68, "total_tokens": 2035597568} +{"current_steps": 7030, "total_steps": 15426, "loss": 0.3398, "lr": 3.8315582429997184e-05, "epoch": 1.3672080132257123, "percentage": 45.57, "elapsed_time": "20:19:06", "remaining_time": "1 day, 0:16:00", "throughput": 27868.01, "total_tokens": 2038456960} +{"current_steps": 7040, "total_steps": 15426, "loss": 0.336, "lr": 3.830433728479272e-05, "epoch": 1.3691529709228825, "percentage": 45.64, "elapsed_time": "20:20:54", "remaining_time": "1 day, 0:14:20", "throughput": 27867.64, "total_tokens": 2041439104} +{"current_steps": 7050, "total_steps": 15426, "loss": 0.3372, "lr": 3.829310203470948e-05, "epoch": 1.3710979286200526, "percentage": 45.7, "elapsed_time": "20:22:38", "remaining_time": "1 day, 0:12:35", "throughput": 27868.4, "total_tokens": 2044378624} +{"current_steps": 7060, "total_steps": 15426, "loss": 0.3339, "lr": 3.828187666524403e-05, "epoch": 1.3730428863172226, "percentage": 45.77, "elapsed_time": "20:24:18", "remaining_time": "1 day, 0:10:46", "throughput": 27869.22, "total_tokens": 2047223488} +{"current_steps": 7070, "total_steps": 15426, "loss": 0.3368, "lr": 3.827066116192266e-05, "epoch": 1.3749878440143926, "percentage": 45.83, "elapsed_time": "20:25:59", "remaining_time": "1 day, 0:08:59", "throughput": 27870.92, "total_tokens": 2050166208} +{"current_steps": 7080, "total_steps": 15426, "loss": 0.3396, "lr": 3.825945551030135e-05, "epoch": 1.3769328017115627, "percentage": 45.9, "elapsed_time": "20:27:39", "remaining_time": "1 day, 0:07:10", "throughput": 27871.5, "total_tokens": 2052988928} +{"current_steps": 7090, "total_steps": 15426, "loss": 0.337, "lr": 3.824825969596561e-05, "epoch": 1.378877759408733, "percentage": 45.96, "elapsed_time": "20:29:21", "remaining_time": "1 day, 0:05:24", "throughput": 27872.1, "total_tokens": 2055887872} +{"current_steps": 7100, "total_steps": 15426, "loss": 0.3328, "lr": 3.823707370453054e-05, "epoch": 1.3808227171059029, "percentage": 46.03, "elapsed_time": "20:30:59", "remaining_time": "1 day, 0:03:33", "throughput": 27874.02, "total_tokens": 2058770560} +{"current_steps": 7110, "total_steps": 15426, "loss": 0.3351, "lr": 3.8225897521640614e-05, "epoch": 1.382767674803073, "percentage": 46.09, "elapsed_time": "20:32:36", "remaining_time": "1 day, 0:01:40", "throughput": 27875.87, "total_tokens": 2061593856} +{"current_steps": 7120, "total_steps": 15426, "loss": 0.3361, "lr": 3.8214731132969675e-05, "epoch": 1.3847126325002432, "percentage": 46.16, "elapsed_time": "20:34:18", "remaining_time": "23:59:55", "throughput": 27876.57, "total_tokens": 2064507392} +{"current_steps": 7130, "total_steps": 15426, "loss": 0.3361, "lr": 3.820357452422084e-05, "epoch": 1.3866575901974132, "percentage": 46.22, "elapsed_time": "20:35:59", "remaining_time": "23:58:07", "throughput": 27877.6, "total_tokens": 2067386496} +{"current_steps": 7140, "total_steps": 15426, "loss": 0.3385, "lr": 3.8192427681126445e-05, "epoch": 1.3886025478945834, "percentage": 46.29, "elapsed_time": "20:37:38", "remaining_time": "23:56:17", "throughput": 27879.48, "total_tokens": 2070289920} +{"current_steps": 7150, "total_steps": 15426, "loss": 0.3383, "lr": 3.818129058944793e-05, "epoch": 1.3905475055917533, "percentage": 46.35, "elapsed_time": "20:39:21", "remaining_time": "23:54:31", "throughput": 27880.4, "total_tokens": 2073223744} +{"current_steps": 7160, "total_steps": 15426, "loss": 0.3324, "lr": 3.817016323497578e-05, "epoch": 1.3924924632889235, "percentage": 46.42, "elapsed_time": "20:41:00", "remaining_time": "23:52:42", "throughput": 27882.14, "total_tokens": 2076126464} +{"current_steps": 7170, "total_steps": 15426, "loss": 0.3388, "lr": 3.8159045603529455e-05, "epoch": 1.3944374209860935, "percentage": 46.48, "elapsed_time": "20:42:40", "remaining_time": "23:50:53", "throughput": 27883.79, "total_tokens": 2079020032} +{"current_steps": 7180, "total_steps": 15426, "loss": 0.3354, "lr": 3.8147937680957334e-05, "epoch": 1.3963823786832636, "percentage": 46.54, "elapsed_time": "20:44:20", "remaining_time": "23:49:05", "throughput": 27884.65, "total_tokens": 2081888512} +{"current_steps": 7190, "total_steps": 15426, "loss": 0.337, "lr": 3.813683945313658e-05, "epoch": 1.3983273363804338, "percentage": 46.61, "elapsed_time": "20:45:59", "remaining_time": "23:47:15", "throughput": 27886.41, "total_tokens": 2084769472} +{"current_steps": 7200, "total_steps": 15426, "loss": 0.3385, "lr": 3.812575090597313e-05, "epoch": 1.4002722940776038, "percentage": 46.67, "elapsed_time": "20:47:39", "remaining_time": "23:45:26", "throughput": 27887.81, "total_tokens": 2087660096} +{"current_steps": 7210, "total_steps": 15426, "loss": 0.3377, "lr": 3.811467202540156e-05, "epoch": 1.402217251774774, "percentage": 46.74, "elapsed_time": "20:49:22", "remaining_time": "23:43:41", "throughput": 27888.22, "total_tokens": 2090570176} +{"current_steps": 7220, "total_steps": 15426, "loss": 0.3399, "lr": 3.810360279738507e-05, "epoch": 1.404162209471944, "percentage": 46.8, "elapsed_time": "20:51:01", "remaining_time": "23:41:52", "throughput": 27889.84, "total_tokens": 2093460864} +{"current_steps": 7230, "total_steps": 15426, "loss": 0.341, "lr": 3.809254320791535e-05, "epoch": 1.406107167169114, "percentage": 46.87, "elapsed_time": "20:52:41", "remaining_time": "23:40:04", "throughput": 27891.01, "total_tokens": 2096336832} +{"current_steps": 7240, "total_steps": 15426, "loss": 0.3347, "lr": 3.808149324301256e-05, "epoch": 1.408052124866284, "percentage": 46.93, "elapsed_time": "20:54:22", "remaining_time": "23:38:15", "throughput": 27892.59, "total_tokens": 2099253248} +{"current_steps": 7250, "total_steps": 15426, "loss": 0.3365, "lr": 3.807045288872522e-05, "epoch": 1.4099970825634542, "percentage": 47.0, "elapsed_time": "20:56:01", "remaining_time": "23:36:26", "throughput": 27893.52, "total_tokens": 2102087808} +{"current_steps": 7260, "total_steps": 15426, "loss": 0.3317, "lr": 3.805942213113015e-05, "epoch": 1.4119420402606244, "percentage": 47.06, "elapsed_time": "20:57:48", "remaining_time": "23:34:46", "throughput": 27893.05, "total_tokens": 2105044864} +{"current_steps": 7270, "total_steps": 15426, "loss": 0.3392, "lr": 3.8048400956332385e-05, "epoch": 1.4138869979577944, "percentage": 47.13, "elapsed_time": "20:59:30", "remaining_time": "23:33:00", "throughput": 27893.54, "total_tokens": 2107940544} +{"current_steps": 7280, "total_steps": 15426, "loss": 0.3295, "lr": 3.803738935046512e-05, "epoch": 1.4158319556549646, "percentage": 47.19, "elapsed_time": "21:01:15", "remaining_time": "23:31:17", "throughput": 27893.98, "total_tokens": 2110889664} +{"current_steps": 7290, "total_steps": 15426, "loss": 0.3383, "lr": 3.802638729968962e-05, "epoch": 1.4177769133521345, "percentage": 47.26, "elapsed_time": "21:02:52", "remaining_time": "23:29:25", "throughput": 27895.92, "total_tokens": 2113745408} +{"current_steps": 7300, "total_steps": 15426, "loss": 0.3427, "lr": 3.8015394790195145e-05, "epoch": 1.4197218710493047, "percentage": 47.32, "elapsed_time": "21:04:31", "remaining_time": "23:27:36", "throughput": 27897.82, "total_tokens": 2116652352} +{"current_steps": 7310, "total_steps": 15426, "loss": 0.3356, "lr": 3.800441180819891e-05, "epoch": 1.4216668287464747, "percentage": 47.39, "elapsed_time": "21:06:14", "remaining_time": "23:25:51", "throughput": 27898.86, "total_tokens": 2119605760} +{"current_steps": 7320, "total_steps": 15426, "loss": 0.3397, "lr": 3.7993438339945965e-05, "epoch": 1.4236117864436448, "percentage": 47.45, "elapsed_time": "21:07:53", "remaining_time": "23:24:01", "throughput": 27900.05, "total_tokens": 2122444096} +{"current_steps": 7330, "total_steps": 15426, "loss": 0.3361, "lr": 3.798247437170914e-05, "epoch": 1.425556744140815, "percentage": 47.52, "elapsed_time": "21:09:31", "remaining_time": "23:22:11", "throughput": 27902.12, "total_tokens": 2125338624} +{"current_steps": 7340, "total_steps": 15426, "loss": 0.3374, "lr": 3.797151988978901e-05, "epoch": 1.427501701837985, "percentage": 47.58, "elapsed_time": "21:11:10", "remaining_time": "23:20:22", "throughput": 27903.2, "total_tokens": 2128204672} +{"current_steps": 7350, "total_steps": 15426, "loss": 0.3345, "lr": 3.796057488051377e-05, "epoch": 1.4294466595351552, "percentage": 47.65, "elapsed_time": "21:12:52", "remaining_time": "23:18:36", "throughput": 27903.93, "total_tokens": 2131095296} +{"current_steps": 7360, "total_steps": 15426, "loss": 0.3368, "lr": 3.794963933023918e-05, "epoch": 1.4313916172323253, "percentage": 47.71, "elapsed_time": "21:14:28", "remaining_time": "23:16:43", "throughput": 27905.59, "total_tokens": 2133905216} +{"current_steps": 7370, "total_steps": 15426, "loss": 0.3374, "lr": 3.79387132253485e-05, "epoch": 1.4333365749294953, "percentage": 47.78, "elapsed_time": "21:16:10", "remaining_time": "23:14:57", "throughput": 27906.38, "total_tokens": 2136794368} +{"current_steps": 7380, "total_steps": 15426, "loss": 0.3369, "lr": 3.792779655225243e-05, "epoch": 1.4352815326266652, "percentage": 47.84, "elapsed_time": "21:17:55", "remaining_time": "23:13:14", "throughput": 27906.32, "total_tokens": 2139730624} +{"current_steps": 7390, "total_steps": 15426, "loss": 0.3339, "lr": 3.791688929738902e-05, "epoch": 1.4372264903238354, "percentage": 47.91, "elapsed_time": "21:19:34", "remaining_time": "23:11:25", "throughput": 27907.85, "total_tokens": 2142603392} +{"current_steps": 7400, "total_steps": 15426, "loss": 0.3352, "lr": 3.79059914472236e-05, "epoch": 1.4391714480210056, "percentage": 47.97, "elapsed_time": "21:21:13", "remaining_time": "23:09:36", "throughput": 27909.13, "total_tokens": 2145474176} +{"current_steps": 7410, "total_steps": 15426, "loss": 0.3345, "lr": 3.7895102988248716e-05, "epoch": 1.4411164057181756, "percentage": 48.04, "elapsed_time": "21:22:55", "remaining_time": "23:07:50", "throughput": 27910.2, "total_tokens": 2148395136} +{"current_steps": 7420, "total_steps": 15426, "loss": 0.3354, "lr": 3.7884223906984064e-05, "epoch": 1.4430613634153457, "percentage": 48.1, "elapsed_time": "21:24:35", "remaining_time": "23:06:03", "throughput": 27911.21, "total_tokens": 2151281536} +{"current_steps": 7430, "total_steps": 15426, "loss": 0.3414, "lr": 3.787335418997641e-05, "epoch": 1.445006321112516, "percentage": 48.17, "elapsed_time": "21:26:13", "remaining_time": "23:04:12", "throughput": 27912.89, "total_tokens": 2154128512} +{"current_steps": 7440, "total_steps": 15426, "loss": 0.3384, "lr": 3.786249382379952e-05, "epoch": 1.4469512788096859, "percentage": 48.23, "elapsed_time": "21:27:50", "remaining_time": "23:02:21", "throughput": 27915.02, "total_tokens": 2157018752} +{"current_steps": 7450, "total_steps": 15426, "loss": 0.3388, "lr": 3.785164279505411e-05, "epoch": 1.4488962365068558, "percentage": 48.3, "elapsed_time": "21:29:30", "remaining_time": "23:00:33", "throughput": 27916.54, "total_tokens": 2159915584} +{"current_steps": 7460, "total_steps": 15426, "loss": 0.3367, "lr": 3.7840801090367744e-05, "epoch": 1.450841194204026, "percentage": 48.36, "elapsed_time": "21:31:14", "remaining_time": "22:58:49", "throughput": 27916.11, "total_tokens": 2162778752} +{"current_steps": 7470, "total_steps": 15426, "loss": 0.3388, "lr": 3.782996869639479e-05, "epoch": 1.4527861519011962, "percentage": 48.42, "elapsed_time": "21:32:59", "remaining_time": "22:57:07", "throughput": 27916.05, "total_tokens": 2165719424} +{"current_steps": 7480, "total_steps": 15426, "loss": 0.3338, "lr": 3.7819145599816354e-05, "epoch": 1.4547311095983662, "percentage": 48.49, "elapsed_time": "21:34:44", "remaining_time": "22:55:24", "throughput": 27916.04, "total_tokens": 2168643520} +{"current_steps": 7490, "total_steps": 15426, "loss": 0.3386, "lr": 3.780833178734018e-05, "epoch": 1.4566760672955363, "percentage": 48.55, "elapsed_time": "21:36:22", "remaining_time": "22:53:34", "throughput": 27917.41, "total_tokens": 2171499584} +{"current_steps": 7500, "total_steps": 15426, "loss": 0.341, "lr": 3.77975272457006e-05, "epoch": 1.4586210249927065, "percentage": 48.62, "elapsed_time": "21:38:05", "remaining_time": "22:51:48", "throughput": 27917.56, "total_tokens": 2174362240} +{"current_steps": 7510, "total_steps": 15426, "loss": 0.3365, "lr": 3.778673196165851e-05, "epoch": 1.4605659826898765, "percentage": 48.68, "elapsed_time": "21:39:48", "remaining_time": "22:50:04", "throughput": 27918.2, "total_tokens": 2177290112} +{"current_steps": 7520, "total_steps": 15426, "loss": 0.3388, "lr": 3.7775945922001186e-05, "epoch": 1.4625109403870467, "percentage": 48.75, "elapsed_time": "21:41:30", "remaining_time": "22:48:18", "throughput": 27919.39, "total_tokens": 2180230656} +{"current_steps": 7530, "total_steps": 15426, "loss": 0.3345, "lr": 3.776516911354236e-05, "epoch": 1.4644558980842166, "percentage": 48.81, "elapsed_time": "21:43:13", "remaining_time": "22:46:34", "throughput": 27920.01, "total_tokens": 2183174592} +{"current_steps": 7540, "total_steps": 15426, "loss": 0.3386, "lr": 3.775440152312205e-05, "epoch": 1.4664008557813868, "percentage": 48.88, "elapsed_time": "21:44:52", "remaining_time": "22:44:45", "throughput": 27920.98, "total_tokens": 2186009536} +{"current_steps": 7550, "total_steps": 15426, "loss": 0.3377, "lr": 3.774364313760652e-05, "epoch": 1.4683458134785567, "percentage": 48.94, "elapsed_time": "21:46:32", "remaining_time": "22:42:57", "throughput": 27922.37, "total_tokens": 2188899328} +{"current_steps": 7560, "total_steps": 15426, "loss": 0.3343, "lr": 3.7732893943888224e-05, "epoch": 1.470290771175727, "percentage": 49.01, "elapsed_time": "21:48:12", "remaining_time": "22:41:09", "throughput": 27923.64, "total_tokens": 2191806912} +{"current_steps": 7570, "total_steps": 15426, "loss": 0.336, "lr": 3.772215392888574e-05, "epoch": 1.472235728872897, "percentage": 49.07, "elapsed_time": "21:49:54", "remaining_time": "22:39:24", "throughput": 27924.72, "total_tokens": 2194737024} +{"current_steps": 7580, "total_steps": 15426, "loss": 0.3394, "lr": 3.771142307954368e-05, "epoch": 1.474180686570067, "percentage": 49.14, "elapsed_time": "21:51:33", "remaining_time": "22:37:34", "throughput": 27926.82, "total_tokens": 2197651200} +{"current_steps": 7590, "total_steps": 15426, "loss": 0.3328, "lr": 3.770070138283264e-05, "epoch": 1.4761256442672372, "percentage": 49.2, "elapsed_time": "21:53:15", "remaining_time": "22:35:49", "throughput": 27927.39, "total_tokens": 2200558592} +{"current_steps": 7600, "total_steps": 15426, "loss": 0.3342, "lr": 3.768998882574915e-05, "epoch": 1.4780706019644072, "percentage": 49.27, "elapsed_time": "21:54:56", "remaining_time": "22:34:02", "throughput": 27928.57, "total_tokens": 2203471040} +{"current_steps": 7610, "total_steps": 15426, "loss": 0.3371, "lr": 3.767928539531557e-05, "epoch": 1.4800155596615774, "percentage": 49.33, "elapsed_time": "21:56:39", "remaining_time": "22:32:18", "throughput": 27929.54, "total_tokens": 2206431488} +{"current_steps": 7620, "total_steps": 15426, "loss": 0.3353, "lr": 3.7668591078580055e-05, "epoch": 1.4819605173587473, "percentage": 49.4, "elapsed_time": "21:58:20", "remaining_time": "22:30:31", "throughput": 27930.69, "total_tokens": 2209343104} +{"current_steps": 7630, "total_steps": 15426, "loss": 0.3385, "lr": 3.765790586261647e-05, "epoch": 1.4839054750559175, "percentage": 49.46, "elapsed_time": "21:59:57", "remaining_time": "22:28:40", "throughput": 27933.05, "total_tokens": 2212220800} +{"current_steps": 7640, "total_steps": 15426, "loss": 0.3378, "lr": 3.7647229734524326e-05, "epoch": 1.4858504327530877, "percentage": 49.53, "elapsed_time": "22:01:36", "remaining_time": "22:26:51", "throughput": 27934.63, "total_tokens": 2215117120} +{"current_steps": 7650, "total_steps": 15426, "loss": 0.3313, "lr": 3.7636562681428744e-05, "epoch": 1.4877953904502577, "percentage": 49.59, "elapsed_time": "22:03:20", "remaining_time": "22:25:08", "throughput": 27935.18, "total_tokens": 2218069952} +{"current_steps": 7660, "total_steps": 15426, "loss": 0.3352, "lr": 3.7625904690480346e-05, "epoch": 1.4897403481474278, "percentage": 49.66, "elapsed_time": "22:05:01", "remaining_time": "22:23:21", "throughput": 27936.19, "total_tokens": 2220961152} +{"current_steps": 7670, "total_steps": 15426, "loss": 0.333, "lr": 3.7615255748855224e-05, "epoch": 1.4916853058445978, "percentage": 49.72, "elapsed_time": "22:06:39", "remaining_time": "22:21:31", "throughput": 27937.47, "total_tokens": 2223796096} +{"current_steps": 7680, "total_steps": 15426, "loss": 0.3373, "lr": 3.7604615843754845e-05, "epoch": 1.493630263541768, "percentage": 49.79, "elapsed_time": "22:08:17", "remaining_time": "22:19:42", "throughput": 27939.12, "total_tokens": 2226679360} +{"current_steps": 7690, "total_steps": 15426, "loss": 0.3393, "lr": 3.759398496240601e-05, "epoch": 1.495575221238938, "percentage": 49.85, "elapsed_time": "22:09:58", "remaining_time": "22:17:56", "throughput": 27939.32, "total_tokens": 2229528640} +{"current_steps": 7700, "total_steps": 15426, "loss": 0.3332, "lr": 3.7583363092060815e-05, "epoch": 1.4975201789361081, "percentage": 49.92, "elapsed_time": "22:11:41", "remaining_time": "22:16:11", "throughput": 27939.84, "total_tokens": 2232446272} +{"current_steps": 7710, "total_steps": 15426, "loss": 0.3356, "lr": 3.757275021999649e-05, "epoch": 1.4994651366332783, "percentage": 49.98, "elapsed_time": "22:13:25", "remaining_time": "22:14:27", "throughput": 27940.44, "total_tokens": 2235381056} +{"current_steps": 7720, "total_steps": 15426, "loss": 0.338, "lr": 3.7562146333515445e-05, "epoch": 1.5014100943304483, "percentage": 50.05, "elapsed_time": "22:15:08", "remaining_time": "22:12:43", "throughput": 27941.11, "total_tokens": 2238327488} +{"current_steps": 7730, "total_steps": 15426, "loss": 0.3339, "lr": 3.7551551419945167e-05, "epoch": 1.5033550520276184, "percentage": 50.11, "elapsed_time": "22:16:52", "remaining_time": "22:10:59", "throughput": 27941.98, "total_tokens": 2241298240} +{"current_steps": 7740, "total_steps": 15426, "loss": 0.331, "lr": 3.7540965466638104e-05, "epoch": 1.5053000097247886, "percentage": 50.18, "elapsed_time": "22:18:35", "remaining_time": "22:09:14", "throughput": 27942.68, "total_tokens": 2244222848} +{"current_steps": 7750, "total_steps": 15426, "loss": 0.3408, "lr": 3.753038846097172e-05, "epoch": 1.5072449674219586, "percentage": 50.24, "elapsed_time": "22:20:12", "remaining_time": "22:07:25", "throughput": 27944.12, "total_tokens": 2247067968} +{"current_steps": 7760, "total_steps": 15426, "loss": 0.3343, "lr": 3.751982039034827e-05, "epoch": 1.5091899251191285, "percentage": 50.3, "elapsed_time": "22:21:54", "remaining_time": "22:05:38", "throughput": 27944.59, "total_tokens": 2249935232} +{"current_steps": 7770, "total_steps": 15426, "loss": 0.3359, "lr": 3.75092612421949e-05, "epoch": 1.5111348828162987, "percentage": 50.37, "elapsed_time": "22:23:33", "remaining_time": "22:03:50", "throughput": 27946.0, "total_tokens": 2252814400} +{"current_steps": 7780, "total_steps": 15426, "loss": 0.339, "lr": 3.7498711003963475e-05, "epoch": 1.5130798405134689, "percentage": 50.43, "elapsed_time": "22:25:13", "remaining_time": "22:02:03", "throughput": 27947.11, "total_tokens": 2255710080} +{"current_steps": 7790, "total_steps": 15426, "loss": 0.337, "lr": 3.748816966313058e-05, "epoch": 1.5150247982106388, "percentage": 50.5, "elapsed_time": "22:26:53", "remaining_time": "22:00:15", "throughput": 27948.26, "total_tokens": 2258592192} +{"current_steps": 7800, "total_steps": 15426, "loss": 0.3388, "lr": 3.7477637207197374e-05, "epoch": 1.516969755907809, "percentage": 50.56, "elapsed_time": "22:28:31", "remaining_time": "21:58:26", "throughput": 27949.85, "total_tokens": 2261455104} +{"current_steps": 7810, "total_steps": 15426, "loss": 0.3377, "lr": 3.7467113623689666e-05, "epoch": 1.5189147136049792, "percentage": 50.63, "elapsed_time": "22:30:09", "remaining_time": "21:56:37", "throughput": 27951.12, "total_tokens": 2264300800} +{"current_steps": 7820, "total_steps": 15426, "loss": 0.3372, "lr": 3.745659890015768e-05, "epoch": 1.5208596713021492, "percentage": 50.69, "elapsed_time": "22:31:52", "remaining_time": "21:54:53", "throughput": 27951.52, "total_tokens": 2267230848} +{"current_steps": 7830, "total_steps": 15426, "loss": 0.3408, "lr": 3.744609302417615e-05, "epoch": 1.5228046289993191, "percentage": 50.76, "elapsed_time": "22:33:31", "remaining_time": "21:53:04", "throughput": 27952.82, "total_tokens": 2270100672} +{"current_steps": 7840, "total_steps": 15426, "loss": 0.335, "lr": 3.7435595983344175e-05, "epoch": 1.5247495866964893, "percentage": 50.82, "elapsed_time": "22:35:12", "remaining_time": "21:51:18", "throughput": 27953.39, "total_tokens": 2272973248} +{"current_steps": 7850, "total_steps": 15426, "loss": 0.3344, "lr": 3.7425107765285155e-05, "epoch": 1.5266945443936595, "percentage": 50.89, "elapsed_time": "22:36:49", "remaining_time": "21:49:27", "throughput": 27955.63, "total_tokens": 2275846336} +{"current_steps": 7860, "total_steps": 15426, "loss": 0.3354, "lr": 3.741462835764676e-05, "epoch": 1.5286395020908294, "percentage": 50.95, "elapsed_time": "22:38:31", "remaining_time": "21:47:42", "throughput": 27956.63, "total_tokens": 2278794496} +{"current_steps": 7870, "total_steps": 15426, "loss": 0.3366, "lr": 3.740415774810088e-05, "epoch": 1.5305844597879996, "percentage": 51.02, "elapsed_time": "22:40:28", "remaining_time": "21:46:11", "throughput": 27952.56, "total_tokens": 2281723072} +{"current_steps": 7880, "total_steps": 15426, "loss": 0.3388, "lr": 3.739369592434351e-05, "epoch": 1.5325294174851698, "percentage": 51.08, "elapsed_time": "22:43:44", "remaining_time": "21:45:56", "throughput": 27920.99, "total_tokens": 2284632768} +{"current_steps": 7890, "total_steps": 15426, "loss": 0.3359, "lr": 3.738324287409473e-05, "epoch": 1.5344743751823398, "percentage": 51.15, "elapsed_time": "22:46:50", "remaining_time": "21:45:31", "throughput": 27893.2, "total_tokens": 2287540544} +{"current_steps": 7900, "total_steps": 15426, "loss": 0.3374, "lr": 3.7372798585098644e-05, "epoch": 1.5364193328795097, "percentage": 51.21, "elapsed_time": "22:51:01", "remaining_time": "21:46:06", "throughput": 27843.39, "total_tokens": 2290427904} +{"current_steps": 7910, "total_steps": 15426, "loss": 0.3321, "lr": 3.736236304512331e-05, "epoch": 1.5383642905766801, "percentage": 51.28, "elapsed_time": "22:54:05", "remaining_time": "21:45:38", "throughput": 27816.25, "total_tokens": 2293310912} +{"current_steps": 7920, "total_steps": 15426, "loss": 0.3341, "lr": 3.735193624196067e-05, "epoch": 1.54030924827385, "percentage": 51.34, "elapsed_time": "22:56:54", "remaining_time": "21:44:56", "throughput": 27794.58, "total_tokens": 2296243520} +{"current_steps": 7930, "total_steps": 15426, "loss": 0.3361, "lr": 3.7341518163426514e-05, "epoch": 1.54225420597102, "percentage": 51.41, "elapsed_time": "22:59:17", "remaining_time": "21:43:48", "throughput": 27781.45, "total_tokens": 2299129728} +{"current_steps": 7940, "total_steps": 15426, "loss": 0.3364, "lr": 3.73311087973604e-05, "epoch": 1.5441991636681902, "percentage": 51.47, "elapsed_time": "23:01:36", "remaining_time": "21:42:36", "throughput": 27769.49, "total_tokens": 2302005120} +{"current_steps": 7950, "total_steps": 15426, "loss": 0.3337, "lr": 3.732070813162561e-05, "epoch": 1.5461441213653604, "percentage": 51.54, "elapsed_time": "23:04:51", "remaining_time": "21:42:17", "throughput": 27739.62, "total_tokens": 2304922880} +{"current_steps": 7960, "total_steps": 15426, "loss": 0.3353, "lr": 3.731031615410908e-05, "epoch": 1.5480890790625303, "percentage": 51.6, "elapsed_time": "23:09:02", "remaining_time": "21:42:50", "throughput": 27691.06, "total_tokens": 2307840832} +{"current_steps": 7970, "total_steps": 15426, "loss": 0.3368, "lr": 3.729993285272132e-05, "epoch": 1.5500340367597005, "percentage": 51.67, "elapsed_time": "23:12:01", "remaining_time": "21:42:15", "throughput": 27666.84, "total_tokens": 2310785856} +{"current_steps": 7980, "total_steps": 15426, "loss": 0.3345, "lr": 3.7289558215396414e-05, "epoch": 1.5519789944568707, "percentage": 51.73, "elapsed_time": "23:14:52", "remaining_time": "21:41:32", "throughput": 27644.99, "total_tokens": 2313681920} +{"current_steps": 7990, "total_steps": 15426, "loss": 0.3344, "lr": 3.727919223009191e-05, "epoch": 1.5539239521540407, "percentage": 51.8, "elapsed_time": "23:17:23", "remaining_time": "21:40:30", "throughput": 27630.07, "total_tokens": 2316608448} +{"current_steps": 8000, "total_steps": 15426, "loss": 0.3365, "lr": 3.726883488478877e-05, "epoch": 1.5558689098512106, "percentage": 51.86, "elapsed_time": "23:19:48", "remaining_time": "21:39:22", "throughput": 27617.22, "total_tokens": 2319525952} +{"current_steps": 8010, "total_steps": 15426, "loss": 0.3335, "lr": 3.7258486167491323e-05, "epoch": 1.5578138675483808, "percentage": 51.93, "elapsed_time": "23:22:08", "remaining_time": "21:38:09", "throughput": 27605.47, "total_tokens": 2322413504} +{"current_steps": 8020, "total_steps": 15426, "loss": 0.3363, "lr": 3.724814606622721e-05, "epoch": 1.559758825245551, "percentage": 51.99, "elapsed_time": "23:24:25", "remaining_time": "21:36:53", "throughput": 27595.71, "total_tokens": 2325356928} +{"current_steps": 8030, "total_steps": 15426, "loss": 0.3358, "lr": 3.7237814569047294e-05, "epoch": 1.561703782942721, "percentage": 52.05, "elapsed_time": "23:26:47", "remaining_time": "21:35:43", "throughput": 27583.82, "total_tokens": 2328290624} +{"current_steps": 8040, "total_steps": 15426, "loss": 0.3324, "lr": 3.7227491664025656e-05, "epoch": 1.5636487406398911, "percentage": 52.12, "elapsed_time": "23:29:08", "remaining_time": "21:34:30", "throughput": 27572.98, "total_tokens": 2331249600} +{"current_steps": 8050, "total_steps": 15426, "loss": 0.3346, "lr": 3.721717733925948e-05, "epoch": 1.5655936983370613, "percentage": 52.18, "elapsed_time": "23:31:13", "remaining_time": "21:33:03", "throughput": 27566.73, "total_tokens": 2334162752} +{"current_steps": 8060, "total_steps": 15426, "loss": 0.3372, "lr": 3.720687158286904e-05, "epoch": 1.5675386560342313, "percentage": 52.25, "elapsed_time": "23:33:22", "remaining_time": "21:31:40", "throughput": 27558.63, "total_tokens": 2337037696} +{"current_steps": 8070, "total_steps": 15426, "loss": 0.3334, "lr": 3.719657438299762e-05, "epoch": 1.5694836137314012, "percentage": 52.31, "elapsed_time": "23:35:25", "remaining_time": "21:30:11", "throughput": 27553.74, "total_tokens": 2340012928} +{"current_steps": 8080, "total_steps": 15426, "loss": 0.3369, "lr": 3.7186285727811446e-05, "epoch": 1.5714285714285714, "percentage": 52.38, "elapsed_time": "23:37:29", "remaining_time": "21:28:43", "throughput": 27547.86, "total_tokens": 2342920448} +{"current_steps": 8090, "total_steps": 15426, "loss": 0.3312, "lr": 3.717600560549967e-05, "epoch": 1.5733735291257416, "percentage": 52.44, "elapsed_time": "23:39:42", "remaining_time": "21:27:22", "throughput": 27539.64, "total_tokens": 2345883968} +{"current_steps": 8100, "total_steps": 15426, "loss": 0.3358, "lr": 3.716573400427426e-05, "epoch": 1.5753184868229115, "percentage": 52.51, "elapsed_time": "23:41:48", "remaining_time": "21:25:56", "throughput": 27533.37, "total_tokens": 2348829504} +{"current_steps": 8110, "total_steps": 15426, "loss": 0.3329, "lr": 3.7155470912370004e-05, "epoch": 1.5772634445200817, "percentage": 52.57, "elapsed_time": "23:43:59", "remaining_time": "21:24:34", "throughput": 27526.03, "total_tokens": 2351804992} +{"current_steps": 8120, "total_steps": 15426, "loss": 0.3347, "lr": 3.714521631804439e-05, "epoch": 1.579208402217252, "percentage": 52.64, "elapsed_time": "23:45:57", "remaining_time": "21:23:00", "throughput": 27521.38, "total_tokens": 2354660608} +{"current_steps": 8130, "total_steps": 15426, "loss": 0.3381, "lr": 3.713497020957759e-05, "epoch": 1.5811533599144219, "percentage": 52.7, "elapsed_time": "23:47:59", "remaining_time": "21:21:30", "throughput": 27515.85, "total_tokens": 2357553920} +{"current_steps": 8140, "total_steps": 15426, "loss": 0.3365, "lr": 3.712473257527238e-05, "epoch": 1.5830983176115918, "percentage": 52.77, "elapsed_time": "23:50:01", "remaining_time": "21:19:59", "throughput": 27510.87, "total_tokens": 2360468672} +{"current_steps": 8150, "total_steps": 15426, "loss": 0.3326, "lr": 3.711450340345412e-05, "epoch": 1.585043275308762, "percentage": 52.83, "elapsed_time": "23:52:12", "remaining_time": "21:18:36", "throughput": 27503.02, "total_tokens": 2363391552} +{"current_steps": 8160, "total_steps": 15426, "loss": 0.3347, "lr": 3.710428268247067e-05, "epoch": 1.5869882330059322, "percentage": 52.9, "elapsed_time": "23:54:18", "remaining_time": "21:17:10", "throughput": 27496.27, "total_tokens": 2366287296} +{"current_steps": 8170, "total_steps": 15426, "loss": 0.3346, "lr": 3.709407040069233e-05, "epoch": 1.5889331907031021, "percentage": 52.96, "elapsed_time": "23:56:46", "remaining_time": "21:16:02", "throughput": 27483.16, "total_tokens": 2369227776} +{"current_steps": 8180, "total_steps": 15426, "loss": 0.3335, "lr": 3.708386654651179e-05, "epoch": 1.5908781484002723, "percentage": 53.03, "elapsed_time": "23:59:18", "remaining_time": "21:14:58", "throughput": 27468.18, "total_tokens": 2372122496} +{"current_steps": 8190, "total_steps": 15426, "loss": 0.3332, "lr": 3.707367110834409e-05, "epoch": 1.5928231060974425, "percentage": 53.09, "elapsed_time": "1 day, 0:01:52", "remaining_time": "21:13:54", "throughput": 27453.43, "total_tokens": 2375052544} +{"current_steps": 8200, "total_steps": 15426, "loss": 0.3354, "lr": 3.7063484074626555e-05, "epoch": 1.5947680637946124, "percentage": 53.16, "elapsed_time": "1 day, 0:04:19", "remaining_time": "21:12:46", "throughput": 27439.63, "total_tokens": 2377914560} +{"current_steps": 8210, "total_steps": 15426, "loss": 0.3339, "lr": 3.7053305433818725e-05, "epoch": 1.5967130214917824, "percentage": 53.22, "elapsed_time": "1 day, 0:06:43", "remaining_time": "21:11:34", "throughput": 27427.41, "total_tokens": 2380801984} +{"current_steps": 8220, "total_steps": 15426, "loss": 0.3352, "lr": 3.704313517440232e-05, "epoch": 1.5986579791889528, "percentage": 53.29, "elapsed_time": "1 day, 0:09:05", "remaining_time": "21:10:19", "throughput": 27416.32, "total_tokens": 2383713920} +{"current_steps": 8230, "total_steps": 15426, "loss": 0.3304, "lr": 3.703297328488118e-05, "epoch": 1.6006029368861228, "percentage": 53.35, "elapsed_time": "1 day, 0:11:10", "remaining_time": "21:08:50", "throughput": 27410.26, "total_tokens": 2386618688} +{"current_steps": 8240, "total_steps": 15426, "loss": 0.3349, "lr": 3.70228197537812e-05, "epoch": 1.6025478945832927, "percentage": 53.42, "elapsed_time": "1 day, 0:13:20", "remaining_time": "21:07:26", "throughput": 27402.92, "total_tokens": 2389540096} +{"current_steps": 8250, "total_steps": 15426, "loss": 0.336, "lr": 3.7012674569650305e-05, "epoch": 1.604492852280463, "percentage": 53.48, "elapsed_time": "1 day, 0:15:26", "remaining_time": "21:05:57", "throughput": 27396.81, "total_tokens": 2392454208} +{"current_steps": 8260, "total_steps": 15426, "loss": 0.3352, "lr": 3.700253772105835e-05, "epoch": 1.606437809977633, "percentage": 53.55, "elapsed_time": "1 day, 0:17:24", "remaining_time": "21:04:22", "throughput": 27392.71, "total_tokens": 2395339904} +{"current_steps": 8270, "total_steps": 15426, "loss": 0.3341, "lr": 3.699240919659711e-05, "epoch": 1.608382767674803, "percentage": 53.61, "elapsed_time": "1 day, 0:19:28", "remaining_time": "21:02:52", "throughput": 27387.0, "total_tokens": 2398230656} +{"current_steps": 8280, "total_steps": 15426, "loss": 0.3391, "lr": 3.698228898488019e-05, "epoch": 1.610327725371973, "percentage": 53.68, "elapsed_time": "1 day, 0:21:29", "remaining_time": "21:01:20", "throughput": 27382.55, "total_tokens": 2401171200} +{"current_steps": 8290, "total_steps": 15426, "loss": 0.3319, "lr": 3.6972177074543e-05, "epoch": 1.6122726830691434, "percentage": 53.74, "elapsed_time": "1 day, 0:23:44", "remaining_time": "20:59:59", "throughput": 27374.17, "total_tokens": 2404128640} +{"current_steps": 8300, "total_steps": 15426, "loss": 0.3356, "lr": 3.69620734542427e-05, "epoch": 1.6142176407663134, "percentage": 53.81, "elapsed_time": "1 day, 0:26:08", "remaining_time": "20:58:45", "throughput": 27361.66, "total_tokens": 2406971072} +{"current_steps": 8310, "total_steps": 15426, "loss": 0.3332, "lr": 3.695197811265811e-05, "epoch": 1.6161625984634833, "percentage": 53.87, "elapsed_time": "1 day, 0:28:47", "remaining_time": "20:57:45", "throughput": 27345.46, "total_tokens": 2409890880} +{"current_steps": 8320, "total_steps": 15426, "loss": 0.334, "lr": 3.6941891038489694e-05, "epoch": 1.6181075561606535, "percentage": 53.93, "elapsed_time": "1 day, 0:30:51", "remaining_time": "20:56:14", "throughput": 27340.15, "total_tokens": 2412796544} +{"current_steps": 8330, "total_steps": 15426, "loss": 0.3364, "lr": 3.693181222045952e-05, "epoch": 1.6200525138578237, "percentage": 54.0, "elapsed_time": "1 day, 0:32:49", "remaining_time": "20:54:38", "throughput": 27336.09, "total_tokens": 2415667200} +{"current_steps": 8340, "total_steps": 15426, "loss": 0.3361, "lr": 3.692174164731113e-05, "epoch": 1.6219974715549936, "percentage": 54.06, "elapsed_time": "1 day, 0:34:57", "remaining_time": "20:53:10", "throughput": 27328.98, "total_tokens": 2418540480} +{"current_steps": 8350, "total_steps": 15426, "loss": 0.3323, "lr": 3.6911679307809595e-05, "epoch": 1.6239424292521638, "percentage": 54.13, "elapsed_time": "1 day, 0:37:08", "remaining_time": "20:51:46", "throughput": 27321.81, "total_tokens": 2421491392} +{"current_steps": 8360, "total_steps": 15426, "loss": 0.3378, "lr": 3.690162519074137e-05, "epoch": 1.625887386949334, "percentage": 54.19, "elapsed_time": "1 day, 0:39:02", "remaining_time": "20:50:06", "throughput": 27318.74, "total_tokens": 2424345728} +{"current_steps": 8370, "total_steps": 15426, "loss": 0.3376, "lr": 3.689157928491431e-05, "epoch": 1.627832344646504, "percentage": 54.26, "elapsed_time": "1 day, 0:40:57", "remaining_time": "20:48:28", "throughput": 27315.66, "total_tokens": 2427209664} +{"current_steps": 8380, "total_steps": 15426, "loss": 0.3364, "lr": 3.6881541579157566e-05, "epoch": 1.629777302343674, "percentage": 54.32, "elapsed_time": "1 day, 0:42:55", "remaining_time": "20:46:51", "throughput": 27311.64, "total_tokens": 2430074624} +{"current_steps": 8390, "total_steps": 15426, "loss": 0.3373, "lr": 3.687151206232154e-05, "epoch": 1.631722260040844, "percentage": 54.39, "elapsed_time": "1 day, 0:44:58", "remaining_time": "20:45:19", "throughput": 27306.57, "total_tokens": 2432986432} +{"current_steps": 8400, "total_steps": 15426, "loss": 0.3368, "lr": 3.686149072327788e-05, "epoch": 1.6336672177380143, "percentage": 54.45, "elapsed_time": "1 day, 0:47:00", "remaining_time": "20:43:46", "throughput": 27301.41, "total_tokens": 2435842816} +{"current_steps": 8410, "total_steps": 15426, "loss": 0.3387, "lr": 3.685147755091937e-05, "epoch": 1.6356121754351842, "percentage": 54.52, "elapsed_time": "1 day, 0:49:09", "remaining_time": "20:42:18", "throughput": 27295.34, "total_tokens": 2438812736} +{"current_steps": 8420, "total_steps": 15426, "loss": 0.3378, "lr": 3.684147253415992e-05, "epoch": 1.6375571331323544, "percentage": 54.58, "elapsed_time": "1 day, 0:51:21", "remaining_time": "20:40:54", "throughput": 27287.28, "total_tokens": 2441700544} +{"current_steps": 8430, "total_steps": 15426, "loss": 0.3333, "lr": 3.683147566193448e-05, "epoch": 1.6395020908295246, "percentage": 54.65, "elapsed_time": "1 day, 0:53:46", "remaining_time": "20:39:40", "throughput": 27275.55, "total_tokens": 2444613696} +{"current_steps": 8440, "total_steps": 15426, "loss": 0.3369, "lr": 3.6821486923199025e-05, "epoch": 1.6414470485266945, "percentage": 54.71, "elapsed_time": "1 day, 0:55:52", "remaining_time": "20:38:10", "throughput": 27269.31, "total_tokens": 2447495488} +{"current_steps": 8450, "total_steps": 15426, "loss": 0.3333, "lr": 3.681150630693046e-05, "epoch": 1.6433920062238645, "percentage": 54.78, "elapsed_time": "1 day, 0:57:47", "remaining_time": "20:36:31", "throughput": 27266.51, "total_tokens": 2450383296} +{"current_steps": 8460, "total_steps": 15426, "loss": 0.3364, "lr": 3.6801533802126615e-05, "epoch": 1.6453369639210347, "percentage": 54.84, "elapsed_time": "1 day, 0:59:48", "remaining_time": "20:34:56", "throughput": 27261.71, "total_tokens": 2453238848} +{"current_steps": 8470, "total_steps": 15426, "loss": 0.3364, "lr": 3.679156939780617e-05, "epoch": 1.6472819216182049, "percentage": 54.91, "elapsed_time": "1 day, 1:01:52", "remaining_time": "20:33:25", "throughput": 27256.2, "total_tokens": 2456129664} +{"current_steps": 8480, "total_steps": 15426, "loss": 0.3385, "lr": 3.6781613083008594e-05, "epoch": 1.6492268793153748, "percentage": 54.97, "elapsed_time": "1 day, 1:03:59", "remaining_time": "20:31:55", "throughput": 27249.6, "total_tokens": 2459001152} +{"current_steps": 8490, "total_steps": 15426, "loss": 0.3293, "lr": 3.677166484679412e-05, "epoch": 1.651171837012545, "percentage": 55.04, "elapsed_time": "1 day, 1:06:12", "remaining_time": "20:30:30", "throughput": 27241.97, "total_tokens": 2461922240} +{"current_steps": 8500, "total_steps": 15426, "loss": 0.3331, "lr": 3.676172467824368e-05, "epoch": 1.6531167947097152, "percentage": 55.1, "elapsed_time": "1 day, 1:08:08", "remaining_time": "20:28:51", "throughput": 27239.18, "total_tokens": 2464824064} +{"current_steps": 8510, "total_steps": 15426, "loss": 0.3324, "lr": 3.675179256645885e-05, "epoch": 1.6550617524068851, "percentage": 55.17, "elapsed_time": "1 day, 1:10:19", "remaining_time": "20:27:25", "throughput": 27231.48, "total_tokens": 2467712640} +{"current_steps": 8520, "total_steps": 15426, "loss": 0.333, "lr": 3.674186850056181e-05, "epoch": 1.657006710104055, "percentage": 55.23, "elapsed_time": "1 day, 1:12:41", "remaining_time": "20:26:07", "throughput": 27220.28, "total_tokens": 2470545600} +{"current_steps": 8530, "total_steps": 15426, "loss": 0.332, "lr": 3.67319524696953e-05, "epoch": 1.6589516678012253, "percentage": 55.3, "elapsed_time": "1 day, 1:14:47", "remaining_time": "20:24:37", "throughput": 27214.04, "total_tokens": 2473411072} +{"current_steps": 8540, "total_steps": 15426, "loss": 0.3341, "lr": 3.6722044463022536e-05, "epoch": 1.6608966254983955, "percentage": 55.36, "elapsed_time": "1 day, 1:16:57", "remaining_time": "20:23:09", "throughput": 27207.71, "total_tokens": 2476375744} +{"current_steps": 8550, "total_steps": 15426, "loss": 0.3344, "lr": 3.6712144469727214e-05, "epoch": 1.6628415831955654, "percentage": 55.43, "elapsed_time": "1 day, 1:18:59", "remaining_time": "20:21:35", "throughput": 27202.94, "total_tokens": 2479267584} +{"current_steps": 8560, "total_steps": 15426, "loss": 0.338, "lr": 3.67022524790134e-05, "epoch": 1.6647865408927356, "percentage": 55.49, "elapsed_time": "1 day, 1:20:57", "remaining_time": "20:19:57", "throughput": 27199.87, "total_tokens": 2482194624} +{"current_steps": 8570, "total_steps": 15426, "loss": 0.3343, "lr": 3.6692368480105546e-05, "epoch": 1.6667314985899058, "percentage": 55.56, "elapsed_time": "1 day, 1:23:08", "remaining_time": "20:18:31", "throughput": 27192.87, "total_tokens": 2485122496} +{"current_steps": 8580, "total_steps": 15426, "loss": 0.334, "lr": 3.6682492462248374e-05, "epoch": 1.6686764562870757, "percentage": 55.62, "elapsed_time": "1 day, 1:25:41", "remaining_time": "20:17:21", "throughput": 27178.95, "total_tokens": 2487997568} +{"current_steps": 8590, "total_steps": 15426, "loss": 0.3338, "lr": 3.667262441470689e-05, "epoch": 1.6706214139842457, "percentage": 55.69, "elapsed_time": "1 day, 1:28:02", "remaining_time": "20:16:01", "throughput": 27168.25, "total_tokens": 2490847808} +{"current_steps": 8600, "total_steps": 15426, "loss": 0.3335, "lr": 3.6662764326766255e-05, "epoch": 1.672566371681416, "percentage": 55.75, "elapsed_time": "1 day, 1:30:22", "remaining_time": "20:14:41", "throughput": 27158.75, "total_tokens": 2493789952} +{"current_steps": 8610, "total_steps": 15426, "loss": 0.3353, "lr": 3.665291218773185e-05, "epoch": 1.674511329378586, "percentage": 55.81, "elapsed_time": "1 day, 1:32:47", "remaining_time": "20:13:25", "throughput": 27147.32, "total_tokens": 2496674816} +{"current_steps": 8620, "total_steps": 15426, "loss": 0.3326, "lr": 3.664306798692912e-05, "epoch": 1.676456287075756, "percentage": 55.88, "elapsed_time": "1 day, 1:34:58", "remaining_time": "20:11:57", "throughput": 27139.63, "total_tokens": 2499525248} +{"current_steps": 8630, "total_steps": 15426, "loss": 0.3318, "lr": 3.6633231713703576e-05, "epoch": 1.6784012447729262, "percentage": 55.94, "elapsed_time": "1 day, 1:37:13", "remaining_time": "20:10:32", "throughput": 27131.06, "total_tokens": 2502388416} +{"current_steps": 8640, "total_steps": 15426, "loss": 0.336, "lr": 3.6623403357420745e-05, "epoch": 1.6803462024700964, "percentage": 56.01, "elapsed_time": "1 day, 1:39:38", "remaining_time": "20:09:15", "throughput": 27120.18, "total_tokens": 2505326208} +{"current_steps": 8650, "total_steps": 15426, "loss": 0.3368, "lr": 3.661358290746611e-05, "epoch": 1.6822911601672663, "percentage": 56.07, "elapsed_time": "1 day, 1:41:44", "remaining_time": "20:07:43", "throughput": 27114.85, "total_tokens": 2508233536} +{"current_steps": 8660, "total_steps": 15426, "loss": 0.3359, "lr": 3.6603770353245056e-05, "epoch": 1.6842361178644365, "percentage": 56.14, "elapsed_time": "1 day, 1:43:39", "remaining_time": "20:06:03", "throughput": 27112.73, "total_tokens": 2511177152} +{"current_steps": 8670, "total_steps": 15426, "loss": 0.3334, "lr": 3.659396568418286e-05, "epoch": 1.6861810755616067, "percentage": 56.2, "elapsed_time": "1 day, 1:45:28", "remaining_time": "20:04:17", "throughput": 27112.16, "total_tokens": 2514075392} +{"current_steps": 8680, "total_steps": 15426, "loss": 0.3354, "lr": 3.658416888972459e-05, "epoch": 1.6881260332587766, "percentage": 56.27, "elapsed_time": "1 day, 1:47:31", "remaining_time": "20:02:43", "throughput": 27108.13, "total_tokens": 2517030656} +{"current_steps": 8690, "total_steps": 15426, "loss": 0.3328, "lr": 3.6574379959335106e-05, "epoch": 1.6900709909559466, "percentage": 56.33, "elapsed_time": "1 day, 1:49:37", "remaining_time": "20:01:11", "throughput": 27102.9, "total_tokens": 2519967232} +{"current_steps": 8700, "total_steps": 15426, "loss": 0.3308, "lr": 3.6564598882498976e-05, "epoch": 1.6920159486531168, "percentage": 56.4, "elapsed_time": "1 day, 1:51:35", "remaining_time": "19:59:32", "throughput": 27100.26, "total_tokens": 2522921984} +{"current_steps": 8710, "total_steps": 15426, "loss": 0.3355, "lr": 3.655482564872043e-05, "epoch": 1.693960906350287, "percentage": 56.46, "elapsed_time": "1 day, 1:53:42", "remaining_time": "19:58:00", "throughput": 27094.82, "total_tokens": 2525850176} +{"current_steps": 8720, "total_steps": 15426, "loss": 0.3312, "lr": 3.654506024752336e-05, "epoch": 1.695905864047457, "percentage": 56.53, "elapsed_time": "1 day, 1:55:59", "remaining_time": "19:56:36", "throughput": 27086.44, "total_tokens": 2528768960} +{"current_steps": 8730, "total_steps": 15426, "loss": 0.3354, "lr": 3.653530266845121e-05, "epoch": 1.697850821744627, "percentage": 56.59, "elapsed_time": "1 day, 1:58:05", "remaining_time": "19:55:04", "throughput": 27080.65, "total_tokens": 2531640000} +{"current_steps": 8740, "total_steps": 15426, "loss": 0.3279, "lr": 3.652555290106696e-05, "epoch": 1.6997957794417973, "percentage": 56.66, "elapsed_time": "1 day, 2:00:12", "remaining_time": "19:53:32", "throughput": 27075.12, "total_tokens": 2534579840} +{"current_steps": 8750, "total_steps": 15426, "loss": 0.3333, "lr": 3.6515810934953084e-05, "epoch": 1.7017407371389672, "percentage": 56.72, "elapsed_time": "1 day, 2:02:16", "remaining_time": "19:51:58", "throughput": 27070.07, "total_tokens": 2537457024} +{"current_steps": 8760, "total_steps": 15426, "loss": 0.3325, "lr": 3.650607675971151e-05, "epoch": 1.7036856948361372, "percentage": 56.79, "elapsed_time": "1 day, 2:04:17", "remaining_time": "19:50:21", "throughput": 27066.51, "total_tokens": 2540387840} +{"current_steps": 8770, "total_steps": 15426, "loss": 0.3344, "lr": 3.649635036496351e-05, "epoch": 1.7056306525333074, "percentage": 56.85, "elapsed_time": "1 day, 2:06:16", "remaining_time": "19:48:43", "throughput": 27063.16, "total_tokens": 2543291008} +{"current_steps": 8780, "total_steps": 15426, "loss": 0.3325, "lr": 3.6486631740349746e-05, "epoch": 1.7075756102304775, "percentage": 56.92, "elapsed_time": "1 day, 2:08:25", "remaining_time": "19:47:12", "throughput": 27056.7, "total_tokens": 2546185216} +{"current_steps": 8790, "total_steps": 15426, "loss": 0.3344, "lr": 3.647692087553018e-05, "epoch": 1.7095205679276475, "percentage": 56.98, "elapsed_time": "1 day, 2:10:30", "remaining_time": "19:45:38", "throughput": 27051.58, "total_tokens": 2549072384} +{"current_steps": 8800, "total_steps": 15426, "loss": 0.3345, "lr": 3.6467217760184005e-05, "epoch": 1.7114655256248177, "percentage": 57.05, "elapsed_time": "1 day, 2:12:40", "remaining_time": "19:44:09", "throughput": 27045.01, "total_tokens": 2551992832} +{"current_steps": 8810, "total_steps": 15426, "loss": 0.3359, "lr": 3.6457522384009625e-05, "epoch": 1.7134104833219879, "percentage": 57.11, "elapsed_time": "1 day, 2:14:51", "remaining_time": "19:42:39", "throughput": 27038.34, "total_tokens": 2554888640} +{"current_steps": 8820, "total_steps": 15426, "loss": 0.3351, "lr": 3.644783473672462e-05, "epoch": 1.7153554410191578, "percentage": 57.18, "elapsed_time": "1 day, 2:16:52", "remaining_time": "19:41:02", "throughput": 27034.48, "total_tokens": 2557787072} +{"current_steps": 8830, "total_steps": 15426, "loss": 0.3336, "lr": 3.643815480806568e-05, "epoch": 1.7173003987163278, "percentage": 57.24, "elapsed_time": "1 day, 2:18:51", "remaining_time": "19:39:24", "throughput": 27030.91, "total_tokens": 2560685824} +{"current_steps": 8840, "total_steps": 15426, "loss": 0.3324, "lr": 3.6428482587788555e-05, "epoch": 1.719245356413498, "percentage": 57.31, "elapsed_time": "1 day, 2:20:48", "remaining_time": "19:37:44", "throughput": 27028.33, "total_tokens": 2563603072} +{"current_steps": 8850, "total_steps": 15426, "loss": 0.335, "lr": 3.641881806566803e-05, "epoch": 1.7211903141106681, "percentage": 57.37, "elapsed_time": "1 day, 2:22:46", "remaining_time": "19:36:05", "throughput": 27025.45, "total_tokens": 2566522240} +{"current_steps": 8860, "total_steps": 15426, "loss": 0.3293, "lr": 3.640916123149788e-05, "epoch": 1.723135271807838, "percentage": 57.44, "elapsed_time": "1 day, 2:24:57", "remaining_time": "19:34:35", "throughput": 27018.9, "total_tokens": 2569437120} +{"current_steps": 8870, "total_steps": 15426, "loss": 0.3326, "lr": 3.639951207509079e-05, "epoch": 1.7250802295050083, "percentage": 57.5, "elapsed_time": "1 day, 2:26:53", "remaining_time": "19:32:54", "throughput": 27016.0, "total_tokens": 2572290176} +{"current_steps": 8880, "total_steps": 15426, "loss": 0.3322, "lr": 3.6389870586278333e-05, "epoch": 1.7270251872021785, "percentage": 57.57, "elapsed_time": "1 day, 2:28:53", "remaining_time": "19:31:16", "throughput": 27011.99, "total_tokens": 2575148288} +{"current_steps": 8890, "total_steps": 15426, "loss": 0.3334, "lr": 3.6380236754910965e-05, "epoch": 1.7289701448993484, "percentage": 57.63, "elapsed_time": "1 day, 2:30:58", "remaining_time": "19:29:42", "throughput": 27007.13, "total_tokens": 2578067584} +{"current_steps": 8900, "total_steps": 15426, "loss": 0.335, "lr": 3.6370610570857897e-05, "epoch": 1.7309151025965184, "percentage": 57.69, "elapsed_time": "1 day, 2:33:03", "remaining_time": "19:28:07", "throughput": 27001.98, "total_tokens": 2580948928} +{"current_steps": 8910, "total_steps": 15426, "loss": 0.3389, "lr": 3.6360992024007114e-05, "epoch": 1.7328600602936888, "percentage": 57.76, "elapsed_time": "1 day, 2:35:11", "remaining_time": "19:26:35", "throughput": 26996.14, "total_tokens": 2583849536} +{"current_steps": 8920, "total_steps": 15426, "loss": 0.3348, "lr": 3.6351381104265304e-05, "epoch": 1.7348050179908587, "percentage": 57.82, "elapsed_time": "1 day, 2:37:39", "remaining_time": "19:25:17", "throughput": 26984.46, "total_tokens": 2586727488} +{"current_steps": 8930, "total_steps": 15426, "loss": 0.3368, "lr": 3.634177780155783e-05, "epoch": 1.7367499756880287, "percentage": 57.89, "elapsed_time": "1 day, 2:39:52", "remaining_time": "19:23:48", "throughput": 26977.39, "total_tokens": 2589619712} +{"current_steps": 8940, "total_steps": 15426, "loss": 0.3289, "lr": 3.633218210582867e-05, "epoch": 1.7386949333851989, "percentage": 57.95, "elapsed_time": "1 day, 2:41:52", "remaining_time": "19:22:10", "throughput": 26974.3, "total_tokens": 2592577152} +{"current_steps": 8950, "total_steps": 15426, "loss": 0.3335, "lr": 3.6322594007040376e-05, "epoch": 1.740639891082369, "percentage": 58.02, "elapsed_time": "1 day, 2:43:43", "remaining_time": "19:20:24", "throughput": 26972.99, "total_tokens": 2595423168} +{"current_steps": 8960, "total_steps": 15426, "loss": 0.3293, "lr": 3.631301349517403e-05, "epoch": 1.742584848779539, "percentage": 58.08, "elapsed_time": "1 day, 2:45:52", "remaining_time": "19:18:52", "throughput": 26966.86, "total_tokens": 2598313664} +{"current_steps": 8970, "total_steps": 15426, "loss": 0.3317, "lr": 3.6303440560229216e-05, "epoch": 1.744529806476709, "percentage": 58.15, "elapsed_time": "1 day, 2:47:54", "remaining_time": "19:17:15", "throughput": 26963.02, "total_tokens": 2601233216} +{"current_steps": 8980, "total_steps": 15426, "loss": 0.3314, "lr": 3.629387519222395e-05, "epoch": 1.7464747641738794, "percentage": 58.21, "elapsed_time": "1 day, 2:50:02", "remaining_time": "19:15:43", "throughput": 26957.41, "total_tokens": 2604155584} +{"current_steps": 8990, "total_steps": 15426, "loss": 0.3393, "lr": 3.628431738119464e-05, "epoch": 1.7484197218710493, "percentage": 58.28, "elapsed_time": "1 day, 2:51:57", "remaining_time": "19:14:00", "throughput": 26955.08, "total_tokens": 2607031808} +{"current_steps": 9000, "total_steps": 15426, "loss": 0.3333, "lr": 3.62747671171961e-05, "epoch": 1.7503646795682193, "percentage": 58.34, "elapsed_time": "1 day, 2:54:00", "remaining_time": "19:12:24", "throughput": 26950.48, "total_tokens": 2609901888} +{"current_steps": 9010, "total_steps": 15426, "loss": 0.332, "lr": 3.626522439030138e-05, "epoch": 1.7523096372653895, "percentage": 58.41, "elapsed_time": "1 day, 2:56:09", "remaining_time": "19:10:51", "throughput": 26944.78, "total_tokens": 2612821632} +{"current_steps": 9020, "total_steps": 15426, "loss": 0.3312, "lr": 3.6255689190601863e-05, "epoch": 1.7542545949625596, "percentage": 58.47, "elapsed_time": "1 day, 2:58:10", "remaining_time": "19:09:13", "throughput": 26941.24, "total_tokens": 2615728448} +{"current_steps": 9030, "total_steps": 15426, "loss": 0.3323, "lr": 3.624616150820714e-05, "epoch": 1.7561995526597296, "percentage": 58.54, "elapsed_time": "1 day, 3:00:08", "remaining_time": "19:07:32", "throughput": 26938.0, "total_tokens": 2618588800} +{"current_steps": 9040, "total_steps": 15426, "loss": 0.3329, "lr": 3.623664133324499e-05, "epoch": 1.7581445103568998, "percentage": 58.6, "elapsed_time": "1 day, 3:02:07", "remaining_time": "19:05:53", "throughput": 26934.48, "total_tokens": 2621474944} +{"current_steps": 9050, "total_steps": 15426, "loss": 0.3314, "lr": 3.622712865586131e-05, "epoch": 1.76008946805407, "percentage": 58.67, "elapsed_time": "1 day, 3:04:02", "remaining_time": "19:04:11", "throughput": 26932.83, "total_tokens": 2624405696} +{"current_steps": 9060, "total_steps": 15426, "loss": 0.3337, "lr": 3.621762346622014e-05, "epoch": 1.76203442575124, "percentage": 58.73, "elapsed_time": "1 day, 3:05:57", "remaining_time": "19:02:28", "throughput": 26930.91, "total_tokens": 2627305664} +{"current_steps": 9070, "total_steps": 15426, "loss": 0.3351, "lr": 3.620812575450352e-05, "epoch": 1.7639793834484099, "percentage": 58.8, "elapsed_time": "1 day, 3:07:48", "remaining_time": "19:00:43", "throughput": 26930.15, "total_tokens": 2630231936} +{"current_steps": 9080, "total_steps": 15426, "loss": 0.3302, "lr": 3.6198635510911556e-05, "epoch": 1.76592434114558, "percentage": 58.86, "elapsed_time": "1 day, 3:09:49", "remaining_time": "18:59:04", "throughput": 26926.59, "total_tokens": 2633127360} +{"current_steps": 9090, "total_steps": 15426, "loss": 0.3336, "lr": 3.618915272566228e-05, "epoch": 1.7678692988427502, "percentage": 58.93, "elapsed_time": "1 day, 3:11:42", "remaining_time": "18:57:20", "throughput": 26925.18, "total_tokens": 2636044352} +{"current_steps": 9100, "total_steps": 15426, "loss": 0.3362, "lr": 3.6179677388991694e-05, "epoch": 1.7698142565399202, "percentage": 58.99, "elapsed_time": "1 day, 3:13:34", "remaining_time": "18:55:36", "throughput": 26924.12, "total_tokens": 2638950912} +{"current_steps": 9110, "total_steps": 15426, "loss": 0.3368, "lr": 3.617020949115366e-05, "epoch": 1.7717592142370904, "percentage": 59.06, "elapsed_time": "1 day, 3:15:29", "remaining_time": "18:53:53", "throughput": 26921.61, "total_tokens": 2641806784} +{"current_steps": 9120, "total_steps": 15426, "loss": 0.337, "lr": 3.6160749022419886e-05, "epoch": 1.7737041719342606, "percentage": 59.12, "elapsed_time": "1 day, 3:17:22", "remaining_time": "18:52:09", "throughput": 26919.85, "total_tokens": 2644670784} +{"current_steps": 9130, "total_steps": 15426, "loss": 0.3289, "lr": 3.6151295973079887e-05, "epoch": 1.7756491296314305, "percentage": 59.19, "elapsed_time": "1 day, 3:19:21", "remaining_time": "18:50:29", "throughput": 26917.68, "total_tokens": 2647663488} +{"current_steps": 9140, "total_steps": 15426, "loss": 0.3338, "lr": 3.6141850333440934e-05, "epoch": 1.7775940873286005, "percentage": 59.25, "elapsed_time": "1 day, 3:21:25", "remaining_time": "18:48:52", "throughput": 26913.88, "total_tokens": 2650615232} +{"current_steps": 9150, "total_steps": 15426, "loss": 0.3371, "lr": 3.613241209382803e-05, "epoch": 1.7795390450257706, "percentage": 59.32, "elapsed_time": "1 day, 3:23:15", "remaining_time": "18:47:07", "throughput": 26913.52, "total_tokens": 2653564352} +{"current_steps": 9160, "total_steps": 15426, "loss": 0.3332, "lr": 3.6122981244583834e-05, "epoch": 1.7814840027229408, "percentage": 59.38, "elapsed_time": "1 day, 3:25:04", "remaining_time": "18:45:20", "throughput": 26913.15, "total_tokens": 2656456512} +{"current_steps": 9170, "total_steps": 15426, "loss": 0.3315, "lr": 3.6113557776068644e-05, "epoch": 1.7834289604201108, "percentage": 59.45, "elapsed_time": "1 day, 3:26:52", "remaining_time": "18:43:32", "throughput": 26913.87, "total_tokens": 2659416704} +{"current_steps": 9180, "total_steps": 15426, "loss": 0.3353, "lr": 3.6104141678660386e-05, "epoch": 1.785373918117281, "percentage": 59.51, "elapsed_time": "1 day, 3:28:39", "remaining_time": "18:41:44", "throughput": 26913.3, "total_tokens": 2662255040} +{"current_steps": 9190, "total_steps": 15426, "loss": 0.335, "lr": 3.6094732942754487e-05, "epoch": 1.7873188758144511, "percentage": 59.57, "elapsed_time": "1 day, 3:30:31", "remaining_time": "18:39:59", "throughput": 26911.85, "total_tokens": 2665128448} +{"current_steps": 9200, "total_steps": 15426, "loss": 0.3345, "lr": 3.60853315587639e-05, "epoch": 1.789263833511621, "percentage": 59.64, "elapsed_time": "1 day, 3:32:18", "remaining_time": "18:38:11", "throughput": 26911.67, "total_tokens": 2667992576} +{"current_steps": 9210, "total_steps": 15426, "loss": 0.3354, "lr": 3.607593751711909e-05, "epoch": 1.791208791208791, "percentage": 59.7, "elapsed_time": "1 day, 3:34:09", "remaining_time": "18:36:25", "throughput": 26911.01, "total_tokens": 2670907584} +{"current_steps": 9220, "total_steps": 15426, "loss": 0.3314, "lr": 3.60665508082679e-05, "epoch": 1.7931537489059612, "percentage": 59.77, "elapsed_time": "1 day, 3:36:15", "remaining_time": "18:34:50", "throughput": 26906.19, "total_tokens": 2673827776} +{"current_steps": 9230, "total_steps": 15426, "loss": 0.3303, "lr": 3.6057171422675585e-05, "epoch": 1.7950987066031314, "percentage": 59.83, "elapsed_time": "1 day, 3:38:11", "remaining_time": "18:33:07", "throughput": 26904.39, "total_tokens": 2676755136} +{"current_steps": 9240, "total_steps": 15426, "loss": 0.3348, "lr": 3.604779935082474e-05, "epoch": 1.7970436643003014, "percentage": 59.9, "elapsed_time": "1 day, 3:40:05", "remaining_time": "18:31:23", "throughput": 26902.71, "total_tokens": 2679659968} +{"current_steps": 9250, "total_steps": 15426, "loss": 0.3303, "lr": 3.603843458321526e-05, "epoch": 1.7989886219974716, "percentage": 59.96, "elapsed_time": "1 day, 3:41:52", "remaining_time": "18:29:35", "throughput": 26902.87, "total_tokens": 2682549952} +{"current_steps": 9260, "total_steps": 15426, "loss": 0.3324, "lr": 3.6029077110364355e-05, "epoch": 1.8009335796946417, "percentage": 60.03, "elapsed_time": "1 day, 3:43:44", "remaining_time": "18:27:50", "throughput": 26902.11, "total_tokens": 2685487552} +{"current_steps": 9270, "total_steps": 15426, "loss": 0.3324, "lr": 3.60197269228064e-05, "epoch": 1.8028785373918117, "percentage": 60.09, "elapsed_time": "1 day, 3:45:43", "remaining_time": "18:26:10", "throughput": 26899.13, "total_tokens": 2688389568} +{"current_steps": 9280, "total_steps": 15426, "loss": 0.3322, "lr": 3.601038401109299e-05, "epoch": 1.8048234950889817, "percentage": 60.16, "elapsed_time": "1 day, 3:47:32", "remaining_time": "18:24:23", "throughput": 26898.4, "total_tokens": 2691244352} +{"current_steps": 9290, "total_steps": 15426, "loss": 0.3345, "lr": 3.6001048365792846e-05, "epoch": 1.806768452786152, "percentage": 60.22, "elapsed_time": "1 day, 3:49:24", "remaining_time": "18:22:38", "throughput": 26897.32, "total_tokens": 2694163264} +{"current_steps": 9300, "total_steps": 15426, "loss": 0.3351, "lr": 3.599171997749182e-05, "epoch": 1.808713410483322, "percentage": 60.29, "elapsed_time": "1 day, 3:51:10", "remaining_time": "18:20:49", "throughput": 26897.34, "total_tokens": 2697010560} +{"current_steps": 9310, "total_steps": 15426, "loss": 0.3322, "lr": 3.598239883679281e-05, "epoch": 1.810658368180492, "percentage": 60.35, "elapsed_time": "1 day, 3:53:03", "remaining_time": "18:19:04", "throughput": 26896.04, "total_tokens": 2699907072} +{"current_steps": 9320, "total_steps": 15426, "loss": 0.3295, "lr": 3.597308493431576e-05, "epoch": 1.8126033258776622, "percentage": 60.42, "elapsed_time": "1 day, 3:54:49", "remaining_time": "18:17:15", "throughput": 26896.14, "total_tokens": 2702784320} +{"current_steps": 9330, "total_steps": 15426, "loss": 0.3341, "lr": 3.596377826069758e-05, "epoch": 1.8145482835748323, "percentage": 60.48, "elapsed_time": "1 day, 3:56:38", "remaining_time": "18:15:28", "throughput": 26895.9, "total_tokens": 2705681216} +{"current_steps": 9340, "total_steps": 15426, "loss": 0.3354, "lr": 3.5954478806592155e-05, "epoch": 1.8164932412720023, "percentage": 60.55, "elapsed_time": "1 day, 3:58:24", "remaining_time": "18:13:39", "throughput": 26896.48, "total_tokens": 2708595840} +{"current_steps": 9350, "total_steps": 15426, "loss": 0.333, "lr": 3.594518656267024e-05, "epoch": 1.8184381989691725, "percentage": 60.61, "elapsed_time": "1 day, 4:00:10", "remaining_time": "18:11:50", "throughput": 26896.34, "total_tokens": 2711436224} +{"current_steps": 9360, "total_steps": 15426, "loss": 0.3289, "lr": 3.5935901519619496e-05, "epoch": 1.8203831566663426, "percentage": 60.68, "elapsed_time": "1 day, 4:02:03", "remaining_time": "18:10:06", "throughput": 26894.98, "total_tokens": 2714336576} +{"current_steps": 9370, "total_steps": 15426, "loss": 0.3317, "lr": 3.5926623668144385e-05, "epoch": 1.8223281143635126, "percentage": 60.74, "elapsed_time": "1 day, 4:03:50", "remaining_time": "18:08:17", "throughput": 26895.3, "total_tokens": 2717249152} +{"current_steps": 9380, "total_steps": 15426, "loss": 0.3285, "lr": 3.5917352998966194e-05, "epoch": 1.8242730720606826, "percentage": 60.81, "elapsed_time": "1 day, 4:05:41", "remaining_time": "18:06:31", "throughput": 26894.84, "total_tokens": 2720172416} +{"current_steps": 9390, "total_steps": 15426, "loss": 0.3336, "lr": 3.5908089502822914e-05, "epoch": 1.8262180297578527, "percentage": 60.87, "elapsed_time": "1 day, 4:07:33", "remaining_time": "18:04:47", "throughput": 26893.26, "total_tokens": 2723042496} +{"current_steps": 9400, "total_steps": 15426, "loss": 0.3351, "lr": 3.589883317046929e-05, "epoch": 1.828162987455023, "percentage": 60.94, "elapsed_time": "1 day, 4:09:19", "remaining_time": "18:02:57", "throughput": 26893.69, "total_tokens": 2725929024} +{"current_steps": 9410, "total_steps": 15426, "loss": 0.3341, "lr": 3.5889583992676715e-05, "epoch": 1.8301079451521929, "percentage": 61.0, "elapsed_time": "1 day, 4:11:05", "remaining_time": "18:01:08", "throughput": 26894.01, "total_tokens": 2728812800} +{"current_steps": 9420, "total_steps": 15426, "loss": 0.3292, "lr": 3.5880341960233244e-05, "epoch": 1.832052902849363, "percentage": 61.07, "elapsed_time": "1 day, 4:12:47", "remaining_time": "17:59:17", "throughput": 26895.19, "total_tokens": 2731682816} +{"current_steps": 9430, "total_steps": 15426, "loss": 0.3307, "lr": 3.58711070639435e-05, "epoch": 1.8339978605465332, "percentage": 61.13, "elapsed_time": "1 day, 4:14:38", "remaining_time": "17:57:31", "throughput": 26894.65, "total_tokens": 2734600320} +{"current_steps": 9440, "total_steps": 15426, "loss": 0.3313, "lr": 3.586187929462869e-05, "epoch": 1.8359428182437032, "percentage": 61.2, "elapsed_time": "1 day, 4:16:29", "remaining_time": "17:55:45", "throughput": 26894.08, "total_tokens": 2737527168} +{"current_steps": 9450, "total_steps": 15426, "loss": 0.3334, "lr": 3.585265864312651e-05, "epoch": 1.8378877759408732, "percentage": 61.26, "elapsed_time": "1 day, 4:18:13", "remaining_time": "17:53:55", "throughput": 26894.85, "total_tokens": 2740423936} +{"current_steps": 9460, "total_steps": 15426, "loss": 0.3316, "lr": 3.584344510029118e-05, "epoch": 1.8398327336380433, "percentage": 61.33, "elapsed_time": "1 day, 4:20:02", "remaining_time": "17:52:08", "throughput": 26894.57, "total_tokens": 2743326464} +{"current_steps": 9470, "total_steps": 15426, "loss": 0.3356, "lr": 3.583423865699333e-05, "epoch": 1.8417776913352135, "percentage": 61.39, "elapsed_time": "1 day, 4:21:46", "remaining_time": "17:50:18", "throughput": 26895.46, "total_tokens": 2746209408} +{"current_steps": 9480, "total_steps": 15426, "loss": 0.3358, "lr": 3.5825039304119994e-05, "epoch": 1.8437226490323835, "percentage": 61.45, "elapsed_time": "1 day, 4:23:33", "remaining_time": "17:48:29", "throughput": 26895.81, "total_tokens": 2749110848} +{"current_steps": 9490, "total_steps": 15426, "loss": 0.3348, "lr": 3.581584703257461e-05, "epoch": 1.8456676067295537, "percentage": 61.52, "elapsed_time": "1 day, 4:25:20", "remaining_time": "17:46:41", "throughput": 26895.63, "total_tokens": 2751980672} +{"current_steps": 9500, "total_steps": 15426, "loss": 0.3334, "lr": 3.580666183327689e-05, "epoch": 1.8476125644267238, "percentage": 61.58, "elapsed_time": "1 day, 4:27:08", "remaining_time": "17:44:53", "throughput": 26896.0, "total_tokens": 2754920640} +{"current_steps": 9510, "total_steps": 15426, "loss": 0.3307, "lr": 3.5797483697162906e-05, "epoch": 1.8495575221238938, "percentage": 61.65, "elapsed_time": "1 day, 4:28:52", "remaining_time": "17:43:03", "throughput": 26897.59, "total_tokens": 2757869632} +{"current_steps": 9520, "total_steps": 15426, "loss": 0.3348, "lr": 3.5788312615184936e-05, "epoch": 1.8515024798210638, "percentage": 61.71, "elapsed_time": "1 day, 4:30:39", "remaining_time": "17:41:15", "throughput": 26898.15, "total_tokens": 2760804416} +{"current_steps": 9530, "total_steps": 15426, "loss": 0.3328, "lr": 3.5779148578311476e-05, "epoch": 1.853447437518234, "percentage": 61.78, "elapsed_time": "1 day, 4:32:28", "remaining_time": "17:39:28", "throughput": 26897.82, "total_tokens": 2763704896} +{"current_steps": 9540, "total_steps": 15426, "loss": 0.3329, "lr": 3.5769991577527236e-05, "epoch": 1.8553923952154041, "percentage": 61.84, "elapsed_time": "1 day, 4:34:13", "remaining_time": "17:37:38", "throughput": 26898.04, "total_tokens": 2766567808} +{"current_steps": 9550, "total_steps": 15426, "loss": 0.3371, "lr": 3.5760841603833034e-05, "epoch": 1.857337352912574, "percentage": 61.91, "elapsed_time": "1 day, 4:35:58", "remaining_time": "17:35:49", "throughput": 26898.33, "total_tokens": 2769410752} +{"current_steps": 9560, "total_steps": 15426, "loss": 0.3338, "lr": 3.5751698648245814e-05, "epoch": 1.8592823106097442, "percentage": 61.97, "elapsed_time": "1 day, 4:37:40", "remaining_time": "17:33:58", "throughput": 26899.13, "total_tokens": 2772249472} +{"current_steps": 9570, "total_steps": 15426, "loss": 0.3335, "lr": 3.574256270179857e-05, "epoch": 1.8612272683069144, "percentage": 62.04, "elapsed_time": "1 day, 4:39:29", "remaining_time": "17:32:10", "throughput": 26899.29, "total_tokens": 2775189056} +{"current_steps": 9580, "total_steps": 15426, "loss": 0.3329, "lr": 3.573343375554037e-05, "epoch": 1.8631722260040844, "percentage": 62.1, "elapsed_time": "1 day, 4:41:10", "remaining_time": "17:30:18", "throughput": 26901.23, "total_tokens": 2778104000} +{"current_steps": 9590, "total_steps": 15426, "loss": 0.3322, "lr": 3.572431180053621e-05, "epoch": 1.8651171837012543, "percentage": 62.17, "elapsed_time": "1 day, 4:42:53", "remaining_time": "17:28:28", "throughput": 26902.64, "total_tokens": 2781029376} +{"current_steps": 9600, "total_steps": 15426, "loss": 0.3303, "lr": 3.571519682786711e-05, "epoch": 1.8670621413984247, "percentage": 62.23, "elapsed_time": "1 day, 4:44:37", "remaining_time": "17:26:37", "throughput": 26903.71, "total_tokens": 2783924416} +{"current_steps": 9610, "total_steps": 15426, "loss": 0.3303, "lr": 3.570608882862996e-05, "epoch": 1.8690070990955947, "percentage": 62.3, "elapsed_time": "1 day, 4:46:25", "remaining_time": "17:24:50", "throughput": 26903.4, "total_tokens": 2786810368} +{"current_steps": 9620, "total_steps": 15426, "loss": 0.332, "lr": 3.569698779393757e-05, "epoch": 1.8709520567927647, "percentage": 62.36, "elapsed_time": "1 day, 4:48:08", "remaining_time": "17:22:59", "throughput": 26904.19, "total_tokens": 2789646144} +{"current_steps": 9630, "total_steps": 15426, "loss": 0.3325, "lr": 3.568789371491859e-05, "epoch": 1.8728970144899348, "percentage": 62.43, "elapsed_time": "1 day, 4:49:54", "remaining_time": "17:21:10", "throughput": 26904.79, "total_tokens": 2792577088} +{"current_steps": 9640, "total_steps": 15426, "loss": 0.3298, "lr": 3.567880658271748e-05, "epoch": 1.874841972187105, "percentage": 62.49, "elapsed_time": "1 day, 4:51:40", "remaining_time": "17:19:22", "throughput": 26905.5, "total_tokens": 2795505792} +{"current_steps": 9650, "total_steps": 15426, "loss": 0.3315, "lr": 3.566972638849445e-05, "epoch": 1.876786929884275, "percentage": 62.56, "elapsed_time": "1 day, 4:53:29", "remaining_time": "17:17:34", "throughput": 26905.15, "total_tokens": 2798388672} +{"current_steps": 9660, "total_steps": 15426, "loss": 0.3328, "lr": 3.566065312342551e-05, "epoch": 1.878731887581445, "percentage": 62.62, "elapsed_time": "1 day, 4:55:12", "remaining_time": "17:15:43", "throughput": 26906.3, "total_tokens": 2801271808} +{"current_steps": 9670, "total_steps": 15426, "loss": 0.3319, "lr": 3.565158677870231e-05, "epoch": 1.8806768452786153, "percentage": 62.69, "elapsed_time": "1 day, 4:56:58", "remaining_time": "17:13:55", "throughput": 26906.64, "total_tokens": 2804168128} +{"current_steps": 9680, "total_steps": 15426, "loss": 0.3322, "lr": 3.564252734553221e-05, "epoch": 1.8826218029757853, "percentage": 62.75, "elapsed_time": "1 day, 4:58:45", "remaining_time": "17:12:06", "throughput": 26907.27, "total_tokens": 2807101312} +{"current_steps": 9690, "total_steps": 15426, "loss": 0.3329, "lr": 3.563347481513818e-05, "epoch": 1.8845667606729553, "percentage": 62.82, "elapsed_time": "1 day, 5:00:29", "remaining_time": "17:10:16", "throughput": 26908.09, "total_tokens": 2809990464} +{"current_steps": 9700, "total_steps": 15426, "loss": 0.3361, "lr": 3.56244291787588e-05, "epoch": 1.8865117183701254, "percentage": 62.88, "elapsed_time": "1 day, 5:02:18", "remaining_time": "17:08:30", "throughput": 26907.95, "total_tokens": 2812926528} +{"current_steps": 9710, "total_steps": 15426, "loss": 0.3328, "lr": 3.5615390427648216e-05, "epoch": 1.8884566760672956, "percentage": 62.95, "elapsed_time": "1 day, 5:04:03", "remaining_time": "17:06:40", "throughput": 26908.99, "total_tokens": 2815850752} +{"current_steps": 9720, "total_steps": 15426, "loss": 0.3351, "lr": 3.5606358553076075e-05, "epoch": 1.8904016337644656, "percentage": 63.01, "elapsed_time": "1 day, 5:05:44", "remaining_time": "17:04:49", "throughput": 26909.85, "total_tokens": 2818670784} +{"current_steps": 9730, "total_steps": 15426, "loss": 0.3334, "lr": 3.5597333546327526e-05, "epoch": 1.8923465914616358, "percentage": 63.08, "elapsed_time": "1 day, 5:07:29", "remaining_time": "17:02:59", "throughput": 26910.49, "total_tokens": 2821552832} +{"current_steps": 9740, "total_steps": 15426, "loss": 0.3298, "lr": 3.5588315398703186e-05, "epoch": 1.894291549158806, "percentage": 63.14, "elapsed_time": "1 day, 5:09:15", "remaining_time": "17:01:10", "throughput": 26910.86, "total_tokens": 2824432704} +{"current_steps": 9750, "total_steps": 15426, "loss": 0.3354, "lr": 3.557930410151907e-05, "epoch": 1.896236506855976, "percentage": 63.2, "elapsed_time": "1 day, 5:10:58", "remaining_time": "16:59:20", "throughput": 26912.32, "total_tokens": 2827372160} +{"current_steps": 9760, "total_steps": 15426, "loss": 0.3305, "lr": 3.5570299646106606e-05, "epoch": 1.8981814645531458, "percentage": 63.27, "elapsed_time": "1 day, 5:12:44", "remaining_time": "16:57:31", "throughput": 26913.45, "total_tokens": 2830326208} +{"current_steps": 9770, "total_steps": 15426, "loss": 0.3296, "lr": 3.556130202381253e-05, "epoch": 1.900126422250316, "percentage": 63.33, "elapsed_time": "1 day, 5:14:29", "remaining_time": "16:55:42", "throughput": 26914.05, "total_tokens": 2833236992} +{"current_steps": 9780, "total_steps": 15426, "loss": 0.3313, "lr": 3.555231122599892e-05, "epoch": 1.9020713799474862, "percentage": 63.4, "elapsed_time": "1 day, 5:16:15", "remaining_time": "16:53:53", "throughput": 26914.24, "total_tokens": 2836112128} +{"current_steps": 9790, "total_steps": 15426, "loss": 0.3319, "lr": 3.554332724404313e-05, "epoch": 1.9040163376446562, "percentage": 63.46, "elapsed_time": "1 day, 5:18:06", "remaining_time": "16:52:07", "throughput": 26913.68, "total_tokens": 2839018880} +{"current_steps": 9800, "total_steps": 15426, "loss": 0.3337, "lr": 3.553435006933777e-05, "epoch": 1.9059612953418263, "percentage": 63.53, "elapsed_time": "1 day, 5:19:57", "remaining_time": "16:50:21", "throughput": 26912.67, "total_tokens": 2841898496} +{"current_steps": 9810, "total_steps": 15426, "loss": 0.3336, "lr": 3.5525379693290626e-05, "epoch": 1.9079062530389965, "percentage": 63.59, "elapsed_time": "1 day, 5:21:46", "remaining_time": "16:48:34", "throughput": 26912.65, "total_tokens": 2844850944} +{"current_steps": 9820, "total_steps": 15426, "loss": 0.3309, "lr": 3.551641610732469e-05, "epoch": 1.9098512107361665, "percentage": 63.66, "elapsed_time": "1 day, 5:23:30", "remaining_time": "16:46:44", "throughput": 26913.37, "total_tokens": 2847707840} +{"current_steps": 9830, "total_steps": 15426, "loss": 0.3346, "lr": 3.55074593028781e-05, "epoch": 1.9117961684333364, "percentage": 63.72, "elapsed_time": "1 day, 5:25:17", "remaining_time": "16:44:56", "throughput": 26913.68, "total_tokens": 2850624832} +{"current_steps": 9840, "total_steps": 15426, "loss": 0.334, "lr": 3.5498509271404065e-05, "epoch": 1.9137411261305066, "percentage": 63.79, "elapsed_time": "1 day, 5:26:59", "remaining_time": "16:43:05", "throughput": 26915.17, "total_tokens": 2853542720} +{"current_steps": 9850, "total_steps": 15426, "loss": 0.3313, "lr": 3.5489566004370893e-05, "epoch": 1.9156860838276768, "percentage": 63.85, "elapsed_time": "1 day, 5:28:44", "remaining_time": "16:41:15", "throughput": 26916.15, "total_tokens": 2856452352} +{"current_steps": 9860, "total_steps": 15426, "loss": 0.3282, "lr": 3.548062949326194e-05, "epoch": 1.9176310415248468, "percentage": 63.92, "elapsed_time": "1 day, 5:30:26", "remaining_time": "16:39:25", "throughput": 26917.06, "total_tokens": 2859313344} +{"current_steps": 9870, "total_steps": 15426, "loss": 0.3322, "lr": 3.547169972957554e-05, "epoch": 1.919575999222017, "percentage": 63.98, "elapsed_time": "1 day, 5:32:09", "remaining_time": "16:37:34", "throughput": 26918.33, "total_tokens": 2862207680} +{"current_steps": 9880, "total_steps": 15426, "loss": 0.3352, "lr": 3.5462776704825e-05, "epoch": 1.9215209569191871, "percentage": 64.05, "elapsed_time": "1 day, 5:33:59", "remaining_time": "16:35:48", "throughput": 26917.52, "total_tokens": 2865078592} +{"current_steps": 9890, "total_steps": 15426, "loss": 0.3333, "lr": 3.5453860410538594e-05, "epoch": 1.923465914616357, "percentage": 64.11, "elapsed_time": "1 day, 5:35:48", "remaining_time": "16:34:01", "throughput": 26917.38, "total_tokens": 2868012352} +{"current_steps": 9900, "total_steps": 15426, "loss": 0.3323, "lr": 3.5444950838259455e-05, "epoch": 1.925410872313527, "percentage": 64.18, "elapsed_time": "1 day, 5:37:40", "remaining_time": "16:32:15", "throughput": 26916.76, "total_tokens": 2870943680} +{"current_steps": 9910, "total_steps": 15426, "loss": 0.3316, "lr": 3.543604797954563e-05, "epoch": 1.9273558300106972, "percentage": 64.24, "elapsed_time": "1 day, 5:39:25", "remaining_time": "16:30:26", "throughput": 26917.58, "total_tokens": 2873869568} +{"current_steps": 9920, "total_steps": 15426, "loss": 0.3349, "lr": 3.542715182596996e-05, "epoch": 1.9293007877078674, "percentage": 64.31, "elapsed_time": "1 day, 5:41:04", "remaining_time": "16:28:34", "throughput": 26919.16, "total_tokens": 2876713536} +{"current_steps": 9930, "total_steps": 15426, "loss": 0.3319, "lr": 3.5418262369120115e-05, "epoch": 1.9312457454050374, "percentage": 64.37, "elapsed_time": "1 day, 5:42:52", "remaining_time": "16:26:46", "throughput": 26919.26, "total_tokens": 2879617536} +{"current_steps": 9940, "total_steps": 15426, "loss": 0.3346, "lr": 3.5409379600598526e-05, "epoch": 1.9331907031022075, "percentage": 64.44, "elapsed_time": "1 day, 5:44:37", "remaining_time": "16:24:57", "throughput": 26919.96, "total_tokens": 2882516416} +{"current_steps": 9950, "total_steps": 15426, "loss": 0.33, "lr": 3.540050351202235e-05, "epoch": 1.9351356607993777, "percentage": 64.5, "elapsed_time": "1 day, 5:46:25", "remaining_time": "16:23:09", "throughput": 26920.06, "total_tokens": 2885450624} +{"current_steps": 9960, "total_steps": 15426, "loss": 0.3345, "lr": 3.539163409502347e-05, "epoch": 1.9370806184965477, "percentage": 64.57, "elapsed_time": "1 day, 5:48:10", "remaining_time": "16:21:20", "throughput": 26920.45, "total_tokens": 2888302976} +{"current_steps": 9970, "total_steps": 15426, "loss": 0.334, "lr": 3.5382771341248416e-05, "epoch": 1.9390255761937176, "percentage": 64.63, "elapsed_time": "1 day, 5:49:56", "remaining_time": "16:19:31", "throughput": 26921.12, "total_tokens": 2891233664} +{"current_steps": 9980, "total_steps": 15426, "loss": 0.3341, "lr": 3.537391524235835e-05, "epoch": 1.940970533890888, "percentage": 64.7, "elapsed_time": "1 day, 5:51:41", "remaining_time": "16:17:42", "throughput": 26922.52, "total_tokens": 2894212096} +{"current_steps": 9990, "total_steps": 15426, "loss": 0.3325, "lr": 3.5365065790029055e-05, "epoch": 1.942915491588058, "percentage": 64.76, "elapsed_time": "1 day, 5:53:25", "remaining_time": "16:15:52", "throughput": 26923.4, "total_tokens": 2897104576} +{"current_steps": 10000, "total_steps": 15426, "loss": 0.331, "lr": 3.535622297595087e-05, "epoch": 1.944860449285228, "percentage": 64.83, "elapsed_time": "1 day, 5:55:09", "remaining_time": "16:14:03", "throughput": 26924.57, "total_tokens": 2900038400} +{"current_steps": 10000, "total_steps": 15426, "eval_loss": 0.3123312294483185, "epoch": 1.944860449285228, "percentage": 64.83, "elapsed_time": "1 day, 5:55:11", "remaining_time": "16:14:04", "throughput": 26924.03, "total_tokens": 2900038400} +{"current_steps": 10010, "total_steps": 15426, "loss": 0.3323, "lr": 3.534738679182869e-05, "epoch": 1.9468054069823981, "percentage": 64.89, "elapsed_time": "1 day, 5:59:23", "remaining_time": "16:13:34", "throughput": 26888.44, "total_tokens": 2902971520} +{"current_steps": 10020, "total_steps": 15426, "loss": 0.334, "lr": 3.533855722938188e-05, "epoch": 1.9487503646795683, "percentage": 64.96, "elapsed_time": "1 day, 6:01:09", "remaining_time": "16:11:45", "throughput": 26888.92, "total_tokens": 2905858944} +{"current_steps": 10030, "total_steps": 15426, "loss": 0.3349, "lr": 3.5329734280344325e-05, "epoch": 1.9506953223767383, "percentage": 65.02, "elapsed_time": "1 day, 6:02:54", "remaining_time": "16:09:56", "throughput": 26889.53, "total_tokens": 2908749248} +{"current_steps": 10040, "total_steps": 15426, "loss": 0.3351, "lr": 3.5320917936464294e-05, "epoch": 1.9526402800739084, "percentage": 65.08, "elapsed_time": "1 day, 6:04:39", "remaining_time": "16:08:07", "throughput": 26890.01, "total_tokens": 2911649472} +{"current_steps": 10050, "total_steps": 15426, "loss": 0.3334, "lr": 3.5312108189504505e-05, "epoch": 1.9545852377710786, "percentage": 65.15, "elapsed_time": "1 day, 6:06:32", "remaining_time": "16:06:21", "throughput": 26889.08, "total_tokens": 2914570944} +{"current_steps": 10060, "total_steps": 15426, "loss": 0.3315, "lr": 3.530330503124204e-05, "epoch": 1.9565301954682486, "percentage": 65.21, "elapsed_time": "1 day, 6:08:13", "remaining_time": "16:04:30", "throughput": 26890.45, "total_tokens": 2917426176} +{"current_steps": 10070, "total_steps": 15426, "loss": 0.3355, "lr": 3.5294508453468325e-05, "epoch": 1.9584751531654185, "percentage": 65.28, "elapsed_time": "1 day, 6:09:48", "remaining_time": "16:02:35", "throughput": 26892.93, "total_tokens": 2920270784} +{"current_steps": 10080, "total_steps": 15426, "loss": 0.3283, "lr": 3.528571844798908e-05, "epoch": 1.9604201108625887, "percentage": 65.34, "elapsed_time": "1 day, 6:11:33", "remaining_time": "16:00:46", "throughput": 26894.08, "total_tokens": 2923217856} +{"current_steps": 10090, "total_steps": 15426, "loss": 0.331, "lr": 3.527693500662431e-05, "epoch": 1.962365068559759, "percentage": 65.41, "elapsed_time": "1 day, 6:13:17", "remaining_time": "15:58:56", "throughput": 26894.81, "total_tokens": 2926080768} +{"current_steps": 10100, "total_steps": 15426, "loss": 0.3335, "lr": 3.5268158121208294e-05, "epoch": 1.9643100262569289, "percentage": 65.47, "elapsed_time": "1 day, 6:14:57", "remaining_time": "15:57:04", "throughput": 26896.2, "total_tokens": 2928936576} +{"current_steps": 10110, "total_steps": 15426, "loss": 0.3286, "lr": 3.525938778358949e-05, "epoch": 1.966254983954099, "percentage": 65.54, "elapsed_time": "1 day, 6:16:43", "remaining_time": "15:55:15", "throughput": 26896.95, "total_tokens": 2931858176} +{"current_steps": 10120, "total_steps": 15426, "loss": 0.3303, "lr": 3.5250623985630537e-05, "epoch": 1.9681999416512692, "percentage": 65.6, "elapsed_time": "1 day, 6:18:27", "remaining_time": "15:53:25", "throughput": 26897.89, "total_tokens": 2934754560} +{"current_steps": 10130, "total_steps": 15426, "loss": 0.3367, "lr": 3.524186671920826e-05, "epoch": 1.9701448993484392, "percentage": 65.67, "elapsed_time": "1 day, 6:20:09", "remaining_time": "15:51:35", "throughput": 26899.21, "total_tokens": 2937658368} +{"current_steps": 10140, "total_steps": 15426, "loss": 0.3323, "lr": 3.523311597621358e-05, "epoch": 1.9720898570456091, "percentage": 65.73, "elapsed_time": "1 day, 6:21:52", "remaining_time": "15:49:44", "throughput": 26900.74, "total_tokens": 2940575616} +{"current_steps": 10150, "total_steps": 15426, "loss": 0.3275, "lr": 3.5224371748551505e-05, "epoch": 1.9740348147427793, "percentage": 65.8, "elapsed_time": "1 day, 6:23:35", "remaining_time": "15:47:54", "throughput": 26901.97, "total_tokens": 2943499200} +{"current_steps": 10160, "total_steps": 15426, "loss": 0.33, "lr": 3.521563402814109e-05, "epoch": 1.9759797724399495, "percentage": 65.86, "elapsed_time": "1 day, 6:25:20", "remaining_time": "15:46:05", "throughput": 26902.39, "total_tokens": 2946375808} +{"current_steps": 10170, "total_steps": 15426, "loss": 0.332, "lr": 3.5206902806915436e-05, "epoch": 1.9779247301371194, "percentage": 65.93, "elapsed_time": "1 day, 6:27:02", "remaining_time": "15:44:14", "throughput": 26904.06, "total_tokens": 2949298816} +{"current_steps": 10180, "total_steps": 15426, "loss": 0.3289, "lr": 3.5198178076821644e-05, "epoch": 1.9798696878342896, "percentage": 65.99, "elapsed_time": "1 day, 6:28:44", "remaining_time": "15:42:23", "throughput": 26905.53, "total_tokens": 2952202112} +{"current_steps": 10190, "total_steps": 15426, "loss": 0.3336, "lr": 3.5189459829820743e-05, "epoch": 1.9818146455314598, "percentage": 66.06, "elapsed_time": "1 day, 6:30:34", "remaining_time": "15:40:37", "throughput": 26904.83, "total_tokens": 2955086400} +{"current_steps": 10200, "total_steps": 15426, "loss": 0.336, "lr": 3.5180748057887714e-05, "epoch": 1.9837596032286298, "percentage": 66.12, "elapsed_time": "1 day, 6:32:13", "remaining_time": "15:38:44", "throughput": 26906.42, "total_tokens": 2957918912} +{"current_steps": 10210, "total_steps": 15426, "loss": 0.3322, "lr": 3.517204275301144e-05, "epoch": 1.9857045609257997, "percentage": 66.19, "elapsed_time": "1 day, 6:33:54", "remaining_time": "15:36:53", "throughput": 26907.58, "total_tokens": 2960756416} +{"current_steps": 10220, "total_steps": 15426, "loss": 0.3311, "lr": 3.5163343907194676e-05, "epoch": 1.98764951862297, "percentage": 66.25, "elapsed_time": "1 day, 6:35:37", "remaining_time": "15:35:03", "throughput": 26908.6, "total_tokens": 2963655872} +{"current_steps": 10230, "total_steps": 15426, "loss": 0.3351, "lr": 3.5154651512453995e-05, "epoch": 1.98959447632014, "percentage": 66.32, "elapsed_time": "1 day, 6:37:20", "remaining_time": "15:33:12", "throughput": 26909.58, "total_tokens": 2966518272} +{"current_steps": 10240, "total_steps": 15426, "loss": 0.3324, "lr": 3.514596556081981e-05, "epoch": 1.99153943401731, "percentage": 66.38, "elapsed_time": "1 day, 6:38:58", "remaining_time": "15:31:20", "throughput": 26911.52, "total_tokens": 2969377664} +{"current_steps": 10250, "total_steps": 15426, "loss": 0.3336, "lr": 3.513728604433628e-05, "epoch": 1.9934843917144802, "percentage": 66.45, "elapsed_time": "1 day, 6:40:40", "remaining_time": "15:29:29", "throughput": 26912.73, "total_tokens": 2972249024} +{"current_steps": 10260, "total_steps": 15426, "loss": 0.3292, "lr": 3.5128612955061334e-05, "epoch": 1.9954293494116504, "percentage": 66.51, "elapsed_time": "1 day, 6:42:20", "remaining_time": "15:27:38", "throughput": 26913.93, "total_tokens": 2975073984} +{"current_steps": 10270, "total_steps": 15426, "loss": 0.3358, "lr": 3.5119946285066595e-05, "epoch": 1.9973743071088204, "percentage": 66.58, "elapsed_time": "1 day, 6:44:11", "remaining_time": "15:25:52", "throughput": 26912.97, "total_tokens": 2977960512} +{"current_steps": 10280, "total_steps": 15426, "loss": 0.3307, "lr": 3.511128602643739e-05, "epoch": 1.9993192648059903, "percentage": 66.64, "elapsed_time": "1 day, 6:45:52", "remaining_time": "15:24:01", "throughput": 26914.23, "total_tokens": 2980832064} +{"current_steps": 10290, "total_steps": 15426, "loss": 0.3262, "lr": 3.510263217127269e-05, "epoch": 2.001166974618302, "percentage": 66.71, "elapsed_time": "1 day, 6:47:30", "remaining_time": "15:22:08", "throughput": 26914.91, "total_tokens": 2983533248} +{"current_steps": 10300, "total_steps": 15426, "loss": 0.3199, "lr": 3.50939847116851e-05, "epoch": 2.003111932315472, "percentage": 66.77, "elapsed_time": "1 day, 6:49:12", "remaining_time": "15:20:17", "throughput": 26916.58, "total_tokens": 2986453952} +{"current_steps": 10310, "total_steps": 15426, "loss": 0.3177, "lr": 3.508534363980081e-05, "epoch": 2.0050568900126424, "percentage": 66.84, "elapsed_time": "1 day, 6:50:55", "remaining_time": "15:18:27", "throughput": 26917.89, "total_tokens": 2989373824} +{"current_steps": 10320, "total_steps": 15426, "loss": 0.32, "lr": 3.507670894775958e-05, "epoch": 2.0070018477098124, "percentage": 66.9, "elapsed_time": "1 day, 6:52:34", "remaining_time": "15:16:35", "throughput": 26919.46, "total_tokens": 2992216896} +{"current_steps": 10330, "total_steps": 15426, "loss": 0.3204, "lr": 3.506808062771471e-05, "epoch": 2.0089468054069823, "percentage": 66.96, "elapsed_time": "1 day, 6:54:13", "remaining_time": "15:14:43", "throughput": 26921.04, "total_tokens": 2995047232} +{"current_steps": 10340, "total_steps": 15426, "loss": 0.3168, "lr": 3.505945867183298e-05, "epoch": 2.0108917631041523, "percentage": 67.03, "elapsed_time": "1 day, 6:55:54", "remaining_time": "15:12:52", "throughput": 26922.36, "total_tokens": 2997929088} +{"current_steps": 10350, "total_steps": 15426, "loss": 0.3196, "lr": 3.505084307229468e-05, "epoch": 2.0128367208013227, "percentage": 67.09, "elapsed_time": "1 day, 6:57:42", "remaining_time": "15:11:04", "throughput": 26923.18, "total_tokens": 3000912704} +{"current_steps": 10360, "total_steps": 15426, "loss": 0.3225, "lr": 3.5042233821293525e-05, "epoch": 2.0147816784984927, "percentage": 67.16, "elapsed_time": "1 day, 6:59:22", "remaining_time": "15:09:13", "throughput": 26924.75, "total_tokens": 3003801344} +{"current_steps": 10370, "total_steps": 15426, "loss": 0.3181, "lr": 3.503363091103664e-05, "epoch": 2.0167266361956626, "percentage": 67.22, "elapsed_time": "1 day, 7:01:05", "remaining_time": "15:07:23", "throughput": 26926.36, "total_tokens": 3006749760} +{"current_steps": 10380, "total_steps": 15426, "loss": 0.3242, "lr": 3.5025034333744545e-05, "epoch": 2.018671593892833, "percentage": 67.29, "elapsed_time": "1 day, 7:02:50", "remaining_time": "15:05:34", "throughput": 26927.23, "total_tokens": 3009680832} +{"current_steps": 10390, "total_steps": 15426, "loss": 0.3225, "lr": 3.501644408165112e-05, "epoch": 2.020616551590003, "percentage": 67.35, "elapsed_time": "1 day, 7:04:34", "remaining_time": "15:03:45", "throughput": 26928.55, "total_tokens": 3012616128} +{"current_steps": 10400, "total_steps": 15426, "loss": 0.3188, "lr": 3.500786014700357e-05, "epoch": 2.022561509287173, "percentage": 67.42, "elapsed_time": "1 day, 7:06:17", "remaining_time": "15:01:55", "throughput": 26929.57, "total_tokens": 3015504000} +{"current_steps": 10410, "total_steps": 15426, "loss": 0.3208, "lr": 3.499928252206237e-05, "epoch": 2.024506466984343, "percentage": 67.48, "elapsed_time": "1 day, 7:08:04", "remaining_time": "15:00:07", "throughput": 26930.13, "total_tokens": 3018443328} +{"current_steps": 10420, "total_steps": 15426, "loss": 0.3199, "lr": 3.499071119910131e-05, "epoch": 2.0264514246815133, "percentage": 67.55, "elapsed_time": "1 day, 7:09:47", "remaining_time": "14:58:17", "throughput": 26931.49, "total_tokens": 3021385728} +{"current_steps": 10430, "total_steps": 15426, "loss": 0.3223, "lr": 3.498214617040739e-05, "epoch": 2.0283963823786832, "percentage": 67.61, "elapsed_time": "1 day, 7:11:29", "remaining_time": "14:56:26", "throughput": 26932.85, "total_tokens": 3024276032} +{"current_steps": 10440, "total_steps": 15426, "loss": 0.3132, "lr": 3.49735874282808e-05, "epoch": 2.030341340075853, "percentage": 67.68, "elapsed_time": "1 day, 7:13:13", "remaining_time": "14:54:37", "throughput": 26934.2, "total_tokens": 3027242048} +{"current_steps": 10450, "total_steps": 15426, "loss": 0.3235, "lr": 3.4965034965034965e-05, "epoch": 2.0322862977730236, "percentage": 67.74, "elapsed_time": "1 day, 7:14:55", "remaining_time": "14:52:47", "throughput": 26935.75, "total_tokens": 3030145216} +{"current_steps": 10460, "total_steps": 15426, "loss": 0.3174, "lr": 3.495648877299642e-05, "epoch": 2.0342312554701936, "percentage": 67.81, "elapsed_time": "1 day, 7:16:34", "remaining_time": "14:50:55", "throughput": 26937.65, "total_tokens": 3033019712} +{"current_steps": 10470, "total_steps": 15426, "loss": 0.3241, "lr": 3.494794884450483e-05, "epoch": 2.0361762131673635, "percentage": 67.87, "elapsed_time": "1 day, 7:18:15", "remaining_time": "14:49:04", "throughput": 26938.83, "total_tokens": 3035884544} +{"current_steps": 10480, "total_steps": 15426, "loss": 0.3181, "lr": 3.4939415171912954e-05, "epoch": 2.0381211708645335, "percentage": 67.94, "elapsed_time": "1 day, 7:19:56", "remaining_time": "14:47:14", "throughput": 26940.65, "total_tokens": 3038823296} +{"current_steps": 10490, "total_steps": 15426, "loss": 0.3253, "lr": 3.4930887747586616e-05, "epoch": 2.040066128561704, "percentage": 68.0, "elapsed_time": "1 day, 7:21:39", "remaining_time": "14:45:24", "throughput": 26941.64, "total_tokens": 3041693632} +{"current_steps": 10500, "total_steps": 15426, "loss": 0.3208, "lr": 3.492236656390469e-05, "epoch": 2.042011086258874, "percentage": 68.07, "elapsed_time": "1 day, 7:23:20", "remaining_time": "14:43:33", "throughput": 26943.3, "total_tokens": 3044607680} +{"current_steps": 10510, "total_steps": 15426, "loss": 0.3248, "lr": 3.4913851613259034e-05, "epoch": 2.043956043956044, "percentage": 68.13, "elapsed_time": "1 day, 7:25:01", "remaining_time": "14:41:42", "throughput": 26944.59, "total_tokens": 3047465856} +{"current_steps": 10520, "total_steps": 15426, "loss": 0.3214, "lr": 3.490534288805452e-05, "epoch": 2.045901001653214, "percentage": 68.2, "elapsed_time": "1 day, 7:26:43", "remaining_time": "14:39:52", "throughput": 26946.24, "total_tokens": 3050397888} +{"current_steps": 10530, "total_steps": 15426, "loss": 0.3193, "lr": 3.489684038070891e-05, "epoch": 2.047845959350384, "percentage": 68.26, "elapsed_time": "1 day, 7:28:24", "remaining_time": "14:38:01", "throughput": 26947.22, "total_tokens": 3053252672} +{"current_steps": 10540, "total_steps": 15426, "loss": 0.3245, "lr": 3.488834408365296e-05, "epoch": 2.049790917047554, "percentage": 68.33, "elapsed_time": "1 day, 7:30:03", "remaining_time": "14:36:10", "throughput": 26949.19, "total_tokens": 3056127552} +{"current_steps": 10550, "total_steps": 15426, "loss": 0.3203, "lr": 3.487985398933027e-05, "epoch": 2.0517358747447245, "percentage": 68.39, "elapsed_time": "1 day, 7:31:47", "remaining_time": "14:34:20", "throughput": 26950.15, "total_tokens": 3059047040} +{"current_steps": 10560, "total_steps": 15426, "loss": 0.318, "lr": 3.4871370090197324e-05, "epoch": 2.0536808324418945, "percentage": 68.46, "elapsed_time": "1 day, 7:33:35", "remaining_time": "14:32:33", "throughput": 26950.43, "total_tokens": 3061982400} +{"current_steps": 10570, "total_steps": 15426, "loss": 0.3175, "lr": 3.486289237872343e-05, "epoch": 2.0556257901390644, "percentage": 68.52, "elapsed_time": "1 day, 7:35:19", "remaining_time": "14:30:44", "throughput": 26951.5, "total_tokens": 3064914496} +{"current_steps": 10580, "total_steps": 15426, "loss": 0.3247, "lr": 3.485442084739075e-05, "epoch": 2.0575707478362344, "percentage": 68.59, "elapsed_time": "1 day, 7:36:59", "remaining_time": "14:28:53", "throughput": 26953.2, "total_tokens": 3067802368} +{"current_steps": 10590, "total_steps": 15426, "loss": 0.3196, "lr": 3.484595548869416e-05, "epoch": 2.059515705533405, "percentage": 68.65, "elapsed_time": "1 day, 7:38:43", "remaining_time": "14:27:03", "throughput": 26954.15, "total_tokens": 3070711808} +{"current_steps": 10600, "total_steps": 15426, "loss": 0.3235, "lr": 3.4837496295141335e-05, "epoch": 2.0614606632305748, "percentage": 68.72, "elapsed_time": "1 day, 7:40:25", "remaining_time": "14:25:14", "throughput": 26955.49, "total_tokens": 3073620800} +{"current_steps": 10610, "total_steps": 15426, "loss": 0.3247, "lr": 3.482904325925266e-05, "epoch": 2.0634056209277447, "percentage": 68.78, "elapsed_time": "1 day, 7:42:06", "remaining_time": "14:23:23", "throughput": 26957.13, "total_tokens": 3076534464} +{"current_steps": 10620, "total_steps": 15426, "loss": 0.3223, "lr": 3.482059637356124e-05, "epoch": 2.065350578624915, "percentage": 68.84, "elapsed_time": "1 day, 7:43:51", "remaining_time": "14:21:34", "throughput": 26958.16, "total_tokens": 3079467520} +{"current_steps": 10630, "total_steps": 15426, "loss": 0.3199, "lr": 3.481215563061281e-05, "epoch": 2.067295536322085, "percentage": 68.91, "elapsed_time": "1 day, 7:45:32", "remaining_time": "14:19:43", "throughput": 26959.89, "total_tokens": 3082381312} +{"current_steps": 10640, "total_steps": 15426, "loss": 0.3183, "lr": 3.4803721022965785e-05, "epoch": 2.069240494019255, "percentage": 68.97, "elapsed_time": "1 day, 7:47:13", "remaining_time": "14:17:53", "throughput": 26961.16, "total_tokens": 3085271360} +{"current_steps": 10650, "total_steps": 15426, "loss": 0.3174, "lr": 3.479529254319117e-05, "epoch": 2.071185451716425, "percentage": 69.04, "elapsed_time": "1 day, 7:48:57", "remaining_time": "14:16:04", "throughput": 26962.23, "total_tokens": 3088185472} +{"current_steps": 10660, "total_steps": 15426, "loss": 0.3207, "lr": 3.478687018387257e-05, "epoch": 2.0731304094135954, "percentage": 69.1, "elapsed_time": "1 day, 7:50:38", "remaining_time": "14:14:13", "throughput": 26963.37, "total_tokens": 3091041600} +{"current_steps": 10670, "total_steps": 15426, "loss": 0.3178, "lr": 3.477845393760616e-05, "epoch": 2.0750753671107653, "percentage": 69.17, "elapsed_time": "1 day, 7:52:22", "remaining_time": "14:12:24", "throughput": 26964.11, "total_tokens": 3093935104} +{"current_steps": 10680, "total_steps": 15426, "loss": 0.3212, "lr": 3.4770043797000614e-05, "epoch": 2.0770203248079353, "percentage": 69.23, "elapsed_time": "1 day, 7:54:09", "remaining_time": "14:10:36", "throughput": 26964.52, "total_tokens": 3096856128} +{"current_steps": 10690, "total_steps": 15426, "loss": 0.3222, "lr": 3.4761639754677146e-05, "epoch": 2.0789652825051057, "percentage": 69.3, "elapsed_time": "1 day, 7:55:49", "remaining_time": "14:08:46", "throughput": 26966.4, "total_tokens": 3099772800} +{"current_steps": 10700, "total_steps": 15426, "loss": 0.3216, "lr": 3.4753241803269435e-05, "epoch": 2.0809102402022757, "percentage": 69.36, "elapsed_time": "1 day, 7:57:29", "remaining_time": "14:06:55", "throughput": 26968.12, "total_tokens": 3102673408} +{"current_steps": 10710, "total_steps": 15426, "loss": 0.3217, "lr": 3.474484993542361e-05, "epoch": 2.0828551978994456, "percentage": 69.43, "elapsed_time": "1 day, 7:59:13", "remaining_time": "14:05:06", "throughput": 26968.83, "total_tokens": 3105552832} +{"current_steps": 10720, "total_steps": 15426, "loss": 0.3222, "lr": 3.473646414379822e-05, "epoch": 2.0848001555966156, "percentage": 69.49, "elapsed_time": "1 day, 8:00:53", "remaining_time": "14:03:15", "throughput": 26970.06, "total_tokens": 3108394240} +{"current_steps": 10730, "total_steps": 15426, "loss": 0.3252, "lr": 3.472808442106422e-05, "epoch": 2.086745113293786, "percentage": 69.56, "elapsed_time": "1 day, 8:02:36", "remaining_time": "14:01:25", "throughput": 26971.05, "total_tokens": 3111273984} +{"current_steps": 10740, "total_steps": 15426, "loss": 0.3193, "lr": 3.4719710759904936e-05, "epoch": 2.088690070990956, "percentage": 69.62, "elapsed_time": "1 day, 8:04:20", "remaining_time": "13:59:36", "throughput": 26971.71, "total_tokens": 3114172608} +{"current_steps": 10750, "total_steps": 15426, "loss": 0.3182, "lr": 3.471134315301603e-05, "epoch": 2.090635028688126, "percentage": 69.69, "elapsed_time": "1 day, 8:06:05", "remaining_time": "13:57:48", "throughput": 26972.46, "total_tokens": 3117087744} +{"current_steps": 10760, "total_steps": 15426, "loss": 0.3214, "lr": 3.470298159310549e-05, "epoch": 2.0925799863852963, "percentage": 69.75, "elapsed_time": "1 day, 8:07:47", "remaining_time": "13:55:58", "throughput": 26973.59, "total_tokens": 3119973184} +{"current_steps": 10770, "total_steps": 15426, "loss": 0.3199, "lr": 3.4694626072893585e-05, "epoch": 2.0945249440824663, "percentage": 69.82, "elapsed_time": "1 day, 8:09:29", "remaining_time": "13:54:08", "throughput": 26974.99, "total_tokens": 3122875136} +{"current_steps": 10780, "total_steps": 15426, "loss": 0.3215, "lr": 3.468627658511285e-05, "epoch": 2.096469901779636, "percentage": 69.88, "elapsed_time": "1 day, 8:11:13", "remaining_time": "13:52:19", "throughput": 26976.12, "total_tokens": 3125811136} +{"current_steps": 10790, "total_steps": 15426, "loss": 0.3233, "lr": 3.467793312250806e-05, "epoch": 2.098414859476806, "percentage": 69.95, "elapsed_time": "1 day, 8:13:05", "remaining_time": "13:50:34", "throughput": 26975.14, "total_tokens": 3128734592} +{"current_steps": 10800, "total_steps": 15426, "loss": 0.3202, "lr": 3.466959567783619e-05, "epoch": 2.1003598171739766, "percentage": 70.01, "elapsed_time": "1 day, 8:14:50", "remaining_time": "13:48:45", "throughput": 26975.85, "total_tokens": 3131634752} +{"current_steps": 10810, "total_steps": 15426, "loss": 0.3196, "lr": 3.466126424386642e-05, "epoch": 2.1023047748711465, "percentage": 70.08, "elapsed_time": "1 day, 8:16:36", "remaining_time": "13:46:57", "throughput": 26976.51, "total_tokens": 3134588544} +{"current_steps": 10820, "total_steps": 15426, "loss": 0.3207, "lr": 3.4652938813380056e-05, "epoch": 2.1042497325683165, "percentage": 70.14, "elapsed_time": "1 day, 8:18:20", "remaining_time": "13:45:08", "throughput": 26977.79, "total_tokens": 3137524608} +{"current_steps": 10830, "total_steps": 15426, "loss": 0.3214, "lr": 3.464461937917057e-05, "epoch": 2.106194690265487, "percentage": 70.21, "elapsed_time": "1 day, 8:19:59", "remaining_time": "13:43:17", "throughput": 26979.46, "total_tokens": 3140383360} +{"current_steps": 10840, "total_steps": 15426, "loss": 0.3213, "lr": 3.4636305934043525e-05, "epoch": 2.108139647962657, "percentage": 70.27, "elapsed_time": "1 day, 8:21:40", "remaining_time": "13:41:27", "throughput": 26980.7, "total_tokens": 3143265024} +{"current_steps": 10850, "total_steps": 15426, "loss": 0.3172, "lr": 3.4627998470816544e-05, "epoch": 2.110084605659827, "percentage": 70.34, "elapsed_time": "1 day, 8:23:30", "remaining_time": "13:39:40", "throughput": 26980.65, "total_tokens": 3146229440} +{"current_steps": 10860, "total_steps": 15426, "loss": 0.3205, "lr": 3.4619696982319334e-05, "epoch": 2.112029563356997, "percentage": 70.4, "elapsed_time": "1 day, 8:25:17", "remaining_time": "13:37:53", "throughput": 26981.11, "total_tokens": 3149171648} +{"current_steps": 10870, "total_steps": 15426, "loss": 0.3172, "lr": 3.461140146139361e-05, "epoch": 2.113974521054167, "percentage": 70.47, "elapsed_time": "1 day, 8:27:00", "remaining_time": "13:36:03", "throughput": 26982.05, "total_tokens": 3152067008} +{"current_steps": 10880, "total_steps": 15426, "loss": 0.319, "lr": 3.460311190089309e-05, "epoch": 2.115919478751337, "percentage": 70.53, "elapsed_time": "1 day, 8:28:46", "remaining_time": "13:34:15", "throughput": 26982.37, "total_tokens": 3154960704} +{"current_steps": 10890, "total_steps": 15426, "loss": 0.3178, "lr": 3.459482829368348e-05, "epoch": 2.117864436448507, "percentage": 70.6, "elapsed_time": "1 day, 8:30:31", "remaining_time": "13:32:26", "throughput": 26983.23, "total_tokens": 3157887616} +{"current_steps": 10900, "total_steps": 15426, "loss": 0.3206, "lr": 3.4586550632642425e-05, "epoch": 2.1198093941456775, "percentage": 70.66, "elapsed_time": "1 day, 8:32:09", "remaining_time": "13:30:35", "throughput": 26985.13, "total_tokens": 3160760320} +{"current_steps": 10910, "total_steps": 15426, "loss": 0.3216, "lr": 3.457827891065949e-05, "epoch": 2.1217543518428474, "percentage": 70.72, "elapsed_time": "1 day, 8:33:49", "remaining_time": "13:28:45", "throughput": 26986.44, "total_tokens": 3163609408} +{"current_steps": 10920, "total_steps": 15426, "loss": 0.3215, "lr": 3.457001312063614e-05, "epoch": 2.1236993095400174, "percentage": 70.79, "elapsed_time": "1 day, 8:35:32", "remaining_time": "13:26:55", "throughput": 26987.79, "total_tokens": 3166542592} +{"current_steps": 10930, "total_steps": 15426, "loss": 0.3196, "lr": 3.45617532554857e-05, "epoch": 2.1256442672371874, "percentage": 70.85, "elapsed_time": "1 day, 8:37:13", "remaining_time": "13:25:05", "throughput": 26988.9, "total_tokens": 3169413440} +{"current_steps": 10940, "total_steps": 15426, "loss": 0.3214, "lr": 3.455349930813339e-05, "epoch": 2.1275892249343578, "percentage": 70.92, "elapsed_time": "1 day, 8:38:55", "remaining_time": "13:23:16", "throughput": 26989.87, "total_tokens": 3172280640} +{"current_steps": 10950, "total_steps": 15426, "loss": 0.322, "lr": 3.45452512715162e-05, "epoch": 2.1295341826315277, "percentage": 70.98, "elapsed_time": "1 day, 8:40:34", "remaining_time": "13:21:24", "throughput": 26991.62, "total_tokens": 3175136192} +{"current_steps": 10960, "total_steps": 15426, "loss": 0.3207, "lr": 3.4537009138582935e-05, "epoch": 2.1314791403286977, "percentage": 71.05, "elapsed_time": "1 day, 8:42:14", "remaining_time": "13:19:34", "throughput": 26993.04, "total_tokens": 3178007104} +{"current_steps": 10970, "total_steps": 15426, "loss": 0.3193, "lr": 3.4528772902294174e-05, "epoch": 2.133424098025868, "percentage": 71.11, "elapsed_time": "1 day, 8:44:00", "remaining_time": "13:17:46", "throughput": 26993.5, "total_tokens": 3180921664} +{"current_steps": 10980, "total_steps": 15426, "loss": 0.3211, "lr": 3.452054255562222e-05, "epoch": 2.135369055723038, "percentage": 71.18, "elapsed_time": "1 day, 8:45:45", "remaining_time": "13:15:58", "throughput": 26994.43, "total_tokens": 3183862656} +{"current_steps": 10990, "total_steps": 15426, "loss": 0.3194, "lr": 3.451231809155115e-05, "epoch": 2.137314013420208, "percentage": 71.24, "elapsed_time": "1 day, 8:47:30", "remaining_time": "13:14:09", "throughput": 26995.24, "total_tokens": 3186801088} +{"current_steps": 11000, "total_steps": 15426, "loss": 0.3187, "lr": 3.450409950307666e-05, "epoch": 2.1392589711173784, "percentage": 71.31, "elapsed_time": "1 day, 8:49:15", "remaining_time": "13:12:21", "throughput": 26995.67, "total_tokens": 3189677120} +{"current_steps": 11010, "total_steps": 15426, "loss": 0.3221, "lr": 3.449588678320619e-05, "epoch": 2.1412039288145484, "percentage": 71.37, "elapsed_time": "1 day, 8:50:58", "remaining_time": "13:10:32", "throughput": 26997.07, "total_tokens": 3192640576} +{"current_steps": 11020, "total_steps": 15426, "loss": 0.3176, "lr": 3.4487679924958767e-05, "epoch": 2.1431488865117183, "percentage": 71.44, "elapsed_time": "1 day, 8:52:40", "remaining_time": "13:08:42", "throughput": 26998.66, "total_tokens": 3195578176} +{"current_steps": 11030, "total_steps": 15426, "loss": 0.3167, "lr": 3.4479478921365076e-05, "epoch": 2.1450938442088883, "percentage": 71.5, "elapsed_time": "1 day, 8:54:21", "remaining_time": "13:06:52", "throughput": 27000.24, "total_tokens": 3198491584} +{"current_steps": 11040, "total_steps": 15426, "loss": 0.3225, "lr": 3.447128376546738e-05, "epoch": 2.1470388019060587, "percentage": 71.57, "elapsed_time": "1 day, 8:56:03", "remaining_time": "13:05:03", "throughput": 27001.45, "total_tokens": 3201393792} +{"current_steps": 11050, "total_steps": 15426, "loss": 0.3212, "lr": 3.4463094450319505e-05, "epoch": 2.1489837596032286, "percentage": 71.63, "elapsed_time": "1 day, 8:57:45", "remaining_time": "13:03:13", "throughput": 27002.82, "total_tokens": 3204303232} +{"current_steps": 11060, "total_steps": 15426, "loss": 0.3206, "lr": 3.4454910968986855e-05, "epoch": 2.1509287173003986, "percentage": 71.7, "elapsed_time": "1 day, 8:59:29", "remaining_time": "13:01:25", "throughput": 27003.62, "total_tokens": 3207211328} +{"current_steps": 11070, "total_steps": 15426, "loss": 0.3165, "lr": 3.4446733314546336e-05, "epoch": 2.152873674997569, "percentage": 71.76, "elapsed_time": "1 day, 9:01:13", "remaining_time": "12:59:36", "throughput": 27004.88, "total_tokens": 3210162368} +{"current_steps": 11080, "total_steps": 15426, "loss": 0.3198, "lr": 3.443856148008633e-05, "epoch": 2.154818632694739, "percentage": 71.83, "elapsed_time": "1 day, 9:03:01", "remaining_time": "12:57:49", "throughput": 27005.24, "total_tokens": 3213120064} +{"current_steps": 11090, "total_steps": 15426, "loss": 0.32, "lr": 3.443039545870672e-05, "epoch": 2.156763590391909, "percentage": 71.89, "elapsed_time": "1 day, 9:04:41", "remaining_time": "12:55:58", "throughput": 27006.69, "total_tokens": 3216002496} +{"current_steps": 11100, "total_steps": 15426, "loss": 0.3232, "lr": 3.442223524351883e-05, "epoch": 2.158708548089079, "percentage": 71.96, "elapsed_time": "1 day, 9:06:22", "remaining_time": "12:54:09", "throughput": 27008.32, "total_tokens": 3218930432} +{"current_steps": 11110, "total_steps": 15426, "loss": 0.3208, "lr": 3.44140808276454e-05, "epoch": 2.1606535057862493, "percentage": 72.02, "elapsed_time": "1 day, 9:08:01", "remaining_time": "12:52:18", "throughput": 27009.9, "total_tokens": 3221788160} +{"current_steps": 11120, "total_steps": 15426, "loss": 0.319, "lr": 3.4405932204220575e-05, "epoch": 2.162598463483419, "percentage": 72.09, "elapsed_time": "1 day, 9:09:42", "remaining_time": "12:50:28", "throughput": 27011.25, "total_tokens": 3224664064} +{"current_steps": 11130, "total_steps": 15426, "loss": 0.3193, "lr": 3.4397789366389876e-05, "epoch": 2.164543421180589, "percentage": 72.15, "elapsed_time": "1 day, 9:11:24", "remaining_time": "12:48:39", "throughput": 27012.4, "total_tokens": 3227575808} +{"current_steps": 11140, "total_steps": 15426, "loss": 0.3203, "lr": 3.438965230731016e-05, "epoch": 2.1664883788777596, "percentage": 72.22, "elapsed_time": "1 day, 9:13:08", "remaining_time": "12:46:50", "throughput": 27013.47, "total_tokens": 3230505152} +{"current_steps": 11150, "total_steps": 15426, "loss": 0.3197, "lr": 3.438152102014964e-05, "epoch": 2.1684333365749295, "percentage": 72.28, "elapsed_time": "1 day, 9:14:52", "remaining_time": "12:45:01", "throughput": 27014.51, "total_tokens": 3233428096} +{"current_steps": 11160, "total_steps": 15426, "loss": 0.3207, "lr": 3.437339549808778e-05, "epoch": 2.1703782942720995, "percentage": 72.35, "elapsed_time": "1 day, 9:16:40", "remaining_time": "12:43:14", "throughput": 27015.03, "total_tokens": 3236411648} +{"current_steps": 11170, "total_steps": 15426, "loss": 0.3231, "lr": 3.43652757343154e-05, "epoch": 2.17232325196927, "percentage": 72.41, "elapsed_time": "1 day, 9:18:22", "remaining_time": "12:41:25", "throughput": 27016.63, "total_tokens": 3239349184} +{"current_steps": 11180, "total_steps": 15426, "loss": 0.3242, "lr": 3.435716172203449e-05, "epoch": 2.17426820966644, "percentage": 72.48, "elapsed_time": "1 day, 9:20:07", "remaining_time": "12:39:37", "throughput": 27017.18, "total_tokens": 3242258496} +{"current_steps": 11190, "total_steps": 15426, "loss": 0.3214, "lr": 3.434905345445833e-05, "epoch": 2.17621316736361, "percentage": 72.54, "elapsed_time": "1 day, 9:21:49", "remaining_time": "12:37:47", "throughput": 27018.24, "total_tokens": 3245150016} +{"current_steps": 11200, "total_steps": 15426, "loss": 0.3185, "lr": 3.4340950924811374e-05, "epoch": 2.1781581250607798, "percentage": 72.6, "elapsed_time": "1 day, 9:23:31", "remaining_time": "12:35:58", "throughput": 27019.26, "total_tokens": 3248031552} +{"current_steps": 11210, "total_steps": 15426, "loss": 0.323, "lr": 3.433285412632927e-05, "epoch": 2.18010308275795, "percentage": 72.67, "elapsed_time": "1 day, 9:25:11", "remaining_time": "12:34:08", "throughput": 27020.89, "total_tokens": 3250910464} +{"current_steps": 11220, "total_steps": 15426, "loss": 0.3272, "lr": 3.4324763052258835e-05, "epoch": 2.18204804045512, "percentage": 72.73, "elapsed_time": "1 day, 9:26:52", "remaining_time": "12:32:18", "throughput": 27022.14, "total_tokens": 3253808576} +{"current_steps": 11230, "total_steps": 15426, "loss": 0.3245, "lr": 3.4316677695858003e-05, "epoch": 2.18399299815229, "percentage": 72.8, "elapsed_time": "1 day, 9:28:32", "remaining_time": "12:30:28", "throughput": 27023.68, "total_tokens": 3256686144} +{"current_steps": 11240, "total_steps": 15426, "loss": 0.3215, "lr": 3.430859805039583e-05, "epoch": 2.18593795584946, "percentage": 72.86, "elapsed_time": "1 day, 9:30:14", "remaining_time": "12:28:39", "throughput": 27024.95, "total_tokens": 3259611200} +{"current_steps": 11250, "total_steps": 15426, "loss": 0.3219, "lr": 3.430052410915246e-05, "epoch": 2.1878829135466304, "percentage": 72.93, "elapsed_time": "1 day, 9:31:55", "remaining_time": "12:26:49", "throughput": 27026.6, "total_tokens": 3262523520} +{"current_steps": 11260, "total_steps": 15426, "loss": 0.3224, "lr": 3.4292455865419086e-05, "epoch": 2.1898278712438004, "percentage": 72.99, "elapsed_time": "1 day, 9:33:42", "remaining_time": "12:25:02", "throughput": 27026.97, "total_tokens": 3265468800} +{"current_steps": 11270, "total_steps": 15426, "loss": 0.3225, "lr": 3.4284393312497973e-05, "epoch": 2.1917728289409704, "percentage": 73.06, "elapsed_time": "1 day, 9:35:23", "remaining_time": "12:23:12", "throughput": 27028.36, "total_tokens": 3268358464} +{"current_steps": 11280, "total_steps": 15426, "loss": 0.3195, "lr": 3.427633644370238e-05, "epoch": 2.1937177866381408, "percentage": 73.12, "elapsed_time": "1 day, 9:37:08", "remaining_time": "12:21:24", "throughput": 27028.87, "total_tokens": 3271265920} +{"current_steps": 11290, "total_steps": 15426, "loss": 0.3232, "lr": 3.4268285252356564e-05, "epoch": 2.1956627443353107, "percentage": 73.19, "elapsed_time": "1 day, 9:38:53", "remaining_time": "12:19:36", "throughput": 27029.45, "total_tokens": 3274177344} +{"current_steps": 11300, "total_steps": 15426, "loss": 0.3184, "lr": 3.426023973179575e-05, "epoch": 2.1976077020324807, "percentage": 73.25, "elapsed_time": "1 day, 9:40:34", "remaining_time": "12:17:46", "throughput": 27031.33, "total_tokens": 3277127808} +{"current_steps": 11310, "total_steps": 15426, "loss": 0.3195, "lr": 3.425219987536614e-05, "epoch": 2.199552659729651, "percentage": 73.32, "elapsed_time": "1 day, 9:42:20", "remaining_time": "12:15:58", "throughput": 27032.11, "total_tokens": 3280085440} +{"current_steps": 11320, "total_steps": 15426, "loss": 0.3203, "lr": 3.4244165676424815e-05, "epoch": 2.201497617426821, "percentage": 73.38, "elapsed_time": "1 day, 9:44:04", "remaining_time": "12:14:10", "throughput": 27033.24, "total_tokens": 3283025984} +{"current_steps": 11330, "total_steps": 15426, "loss": 0.3229, "lr": 3.423613712833979e-05, "epoch": 2.203442575123991, "percentage": 73.45, "elapsed_time": "1 day, 9:45:44", "remaining_time": "12:12:20", "throughput": 27034.86, "total_tokens": 3285934464} +{"current_steps": 11340, "total_steps": 15426, "loss": 0.3214, "lr": 3.422811422448995e-05, "epoch": 2.205387532821161, "percentage": 73.51, "elapsed_time": "1 day, 9:47:27", "remaining_time": "12:10:31", "throughput": 27035.75, "total_tokens": 3288831552} +{"current_steps": 11350, "total_steps": 15426, "loss": 0.3203, "lr": 3.422009695826503e-05, "epoch": 2.2073324905183314, "percentage": 73.58, "elapsed_time": "1 day, 9:49:10", "remaining_time": "12:08:42", "throughput": 27037.03, "total_tokens": 3291764672} +{"current_steps": 11360, "total_steps": 15426, "loss": 0.3189, "lr": 3.4212085323065626e-05, "epoch": 2.2092774482155013, "percentage": 73.64, "elapsed_time": "1 day, 9:50:57", "remaining_time": "12:06:55", "throughput": 27037.43, "total_tokens": 3294709312} +{"current_steps": 11370, "total_steps": 15426, "loss": 0.3216, "lr": 3.4204079312303103e-05, "epoch": 2.2112224059126713, "percentage": 73.71, "elapsed_time": "1 day, 9:52:35", "remaining_time": "12:05:04", "throughput": 27039.06, "total_tokens": 3297561536} +{"current_steps": 11380, "total_steps": 15426, "loss": 0.3201, "lr": 3.419607891939964e-05, "epoch": 2.2131673636098417, "percentage": 73.77, "elapsed_time": "1 day, 9:54:18", "remaining_time": "12:03:16", "throughput": 27040.34, "total_tokens": 3300494464} +{"current_steps": 11390, "total_steps": 15426, "loss": 0.3192, "lr": 3.4188084137788166e-05, "epoch": 2.2151123213070116, "percentage": 73.84, "elapsed_time": "1 day, 9:56:00", "remaining_time": "12:01:26", "throughput": 27041.45, "total_tokens": 3303388160} +{"current_steps": 11400, "total_steps": 15426, "loss": 0.325, "lr": 3.418009496091238e-05, "epoch": 2.2170572790041816, "percentage": 73.9, "elapsed_time": "1 day, 9:57:40", "remaining_time": "11:59:37", "throughput": 27042.6, "total_tokens": 3306234624} +{"current_steps": 11410, "total_steps": 15426, "loss": 0.3193, "lr": 3.417211138222666e-05, "epoch": 2.2190022367013515, "percentage": 73.97, "elapsed_time": "1 day, 9:59:23", "remaining_time": "11:57:48", "throughput": 27043.62, "total_tokens": 3309141184} +{"current_steps": 11420, "total_steps": 15426, "loss": 0.3198, "lr": 3.416413339519612e-05, "epoch": 2.220947194398522, "percentage": 74.03, "elapsed_time": "1 day, 10:01:00", "remaining_time": "11:55:57", "throughput": 27045.34, "total_tokens": 3311991936} +{"current_steps": 11430, "total_steps": 15426, "loss": 0.3185, "lr": 3.4156160993296524e-05, "epoch": 2.222892152095692, "percentage": 74.1, "elapsed_time": "1 day, 10:02:45", "remaining_time": "11:54:09", "throughput": 27046.21, "total_tokens": 3314922752} +{"current_steps": 11440, "total_steps": 15426, "loss": 0.3205, "lr": 3.4148194170014295e-05, "epoch": 2.224837109792862, "percentage": 74.16, "elapsed_time": "1 day, 10:04:27", "remaining_time": "11:52:20", "throughput": 27047.33, "total_tokens": 3317832512} +{"current_steps": 11450, "total_steps": 15426, "loss": 0.3185, "lr": 3.4140232918846484e-05, "epoch": 2.2267820674900323, "percentage": 74.23, "elapsed_time": "1 day, 10:06:08", "remaining_time": "11:50:31", "throughput": 27048.91, "total_tokens": 3320753920} +{"current_steps": 11460, "total_steps": 15426, "loss": 0.3244, "lr": 3.4132277233300753e-05, "epoch": 2.2287270251872022, "percentage": 74.29, "elapsed_time": "1 day, 10:07:52", "remaining_time": "11:48:42", "throughput": 27050.28, "total_tokens": 3323724096} +{"current_steps": 11470, "total_steps": 15426, "loss": 0.3171, "lr": 3.4124327106895356e-05, "epoch": 2.230671982884372, "percentage": 74.35, "elapsed_time": "1 day, 10:09:33", "remaining_time": "11:46:53", "throughput": 27051.69, "total_tokens": 3326653184} +{"current_steps": 11480, "total_steps": 15426, "loss": 0.32, "lr": 3.4116382533159097e-05, "epoch": 2.2326169405815426, "percentage": 74.42, "elapsed_time": "1 day, 10:11:21", "remaining_time": "11:45:06", "throughput": 27052.06, "total_tokens": 3329603200} +{"current_steps": 11490, "total_steps": 15426, "loss": 0.3218, "lr": 3.4108443505631335e-05, "epoch": 2.2345618982787125, "percentage": 74.48, "elapsed_time": "1 day, 10:13:01", "remaining_time": "11:43:16", "throughput": 27053.7, "total_tokens": 3332507328} +{"current_steps": 11500, "total_steps": 15426, "loss": 0.32, "lr": 3.410051001786192e-05, "epoch": 2.2365068559758825, "percentage": 74.55, "elapsed_time": "1 day, 10:14:44", "remaining_time": "11:41:28", "throughput": 27054.95, "total_tokens": 3335451840} +{"current_steps": 11510, "total_steps": 15426, "loss": 0.3245, "lr": 3.409258206341124e-05, "epoch": 2.2384518136730525, "percentage": 74.61, "elapsed_time": "1 day, 10:16:27", "remaining_time": "11:39:39", "throughput": 27055.84, "total_tokens": 3338349632} +{"current_steps": 11520, "total_steps": 15426, "loss": 0.3191, "lr": 3.4084659635850134e-05, "epoch": 2.240396771370223, "percentage": 74.68, "elapsed_time": "1 day, 10:18:06", "remaining_time": "11:37:49", "throughput": 27057.38, "total_tokens": 3341216768} +{"current_steps": 11530, "total_steps": 15426, "loss": 0.3212, "lr": 3.40767427287599e-05, "epoch": 2.242341729067393, "percentage": 74.74, "elapsed_time": "1 day, 10:19:47", "remaining_time": "11:36:00", "throughput": 27058.55, "total_tokens": 3344088704} +{"current_steps": 11540, "total_steps": 15426, "loss": 0.3225, "lr": 3.406883133573224e-05, "epoch": 2.2442866867645628, "percentage": 74.81, "elapsed_time": "1 day, 10:21:29", "remaining_time": "11:34:11", "throughput": 27059.62, "total_tokens": 3347000384} +{"current_steps": 11550, "total_steps": 15426, "loss": 0.3198, "lr": 3.406092545036932e-05, "epoch": 2.2462316444617327, "percentage": 74.87, "elapsed_time": "1 day, 10:23:08", "remaining_time": "11:32:21", "throughput": 27061.08, "total_tokens": 3349857472} +{"current_steps": 11560, "total_steps": 15426, "loss": 0.325, "lr": 3.405302506628365e-05, "epoch": 2.248176602158903, "percentage": 74.94, "elapsed_time": "1 day, 10:24:47", "remaining_time": "11:30:31", "throughput": 27062.75, "total_tokens": 3352726784} +{"current_steps": 11570, "total_steps": 15426, "loss": 0.321, "lr": 3.404513017709813e-05, "epoch": 2.250121559856073, "percentage": 75.0, "elapsed_time": "1 day, 10:26:25", "remaining_time": "11:28:41", "throughput": 27064.12, "total_tokens": 3355569152} +{"current_steps": 11580, "total_steps": 15426, "loss": 0.3178, "lr": 3.403724077644598e-05, "epoch": 2.252066517553243, "percentage": 75.07, "elapsed_time": "1 day, 10:28:09", "remaining_time": "11:26:53", "throughput": 27065.34, "total_tokens": 3358528000} +{"current_steps": 11590, "total_steps": 15426, "loss": 0.3192, "lr": 3.402935685797077e-05, "epoch": 2.2540114752504135, "percentage": 75.13, "elapsed_time": "1 day, 10:29:55", "remaining_time": "11:25:05", "throughput": 27065.95, "total_tokens": 3361477760} +{"current_steps": 11600, "total_steps": 15426, "loss": 0.3212, "lr": 3.4021478415326355e-05, "epoch": 2.2559564329475834, "percentage": 75.2, "elapsed_time": "1 day, 10:31:36", "remaining_time": "11:23:16", "throughput": 27067.15, "total_tokens": 3364349888} +{"current_steps": 11610, "total_steps": 15426, "loss": 0.3215, "lr": 3.401360544217687e-05, "epoch": 2.2579013906447534, "percentage": 75.26, "elapsed_time": "1 day, 10:33:15", "remaining_time": "11:21:26", "throughput": 27068.43, "total_tokens": 3367185216} +{"current_steps": 11620, "total_steps": 15426, "loss": 0.3235, "lr": 3.400573793219672e-05, "epoch": 2.2598463483419238, "percentage": 75.33, "elapsed_time": "1 day, 10:34:59", "remaining_time": "11:19:38", "throughput": 27068.83, "total_tokens": 3370049856} +{"current_steps": 11630, "total_steps": 15426, "loss": 0.3197, "lr": 3.3997875879070546e-05, "epoch": 2.2617913060390937, "percentage": 75.39, "elapsed_time": "1 day, 10:36:42", "remaining_time": "11:17:49", "throughput": 27069.84, "total_tokens": 3372975744} +{"current_steps": 11640, "total_steps": 15426, "loss": 0.3229, "lr": 3.399001927649318e-05, "epoch": 2.2637362637362637, "percentage": 75.46, "elapsed_time": "1 day, 10:38:26", "remaining_time": "11:16:01", "throughput": 27070.73, "total_tokens": 3375904000} +{"current_steps": 11650, "total_steps": 15426, "loss": 0.3181, "lr": 3.398216811816968e-05, "epoch": 2.2656812214334336, "percentage": 75.52, "elapsed_time": "1 day, 10:40:04", "remaining_time": "11:14:11", "throughput": 27072.64, "total_tokens": 3378777600} +{"current_steps": 11660, "total_steps": 15426, "loss": 0.3205, "lr": 3.397432239781527e-05, "epoch": 2.267626179130604, "percentage": 75.59, "elapsed_time": "1 day, 10:41:47", "remaining_time": "11:12:23", "throughput": 27073.34, "total_tokens": 3381670144} +{"current_steps": 11670, "total_steps": 15426, "loss": 0.3243, "lr": 3.396648210915531e-05, "epoch": 2.269571136827774, "percentage": 75.65, "elapsed_time": "1 day, 10:43:30", "remaining_time": "11:10:34", "throughput": 27074.06, "total_tokens": 3384545344} +{"current_steps": 11680, "total_steps": 15426, "loss": 0.3221, "lr": 3.3958647245925315e-05, "epoch": 2.271516094524944, "percentage": 75.72, "elapsed_time": "1 day, 10:45:13", "remaining_time": "11:08:46", "throughput": 27075.12, "total_tokens": 3387472000} +{"current_steps": 11690, "total_steps": 15426, "loss": 0.3212, "lr": 3.3950817801870885e-05, "epoch": 2.273461052222114, "percentage": 75.78, "elapsed_time": "1 day, 10:46:54", "remaining_time": "11:06:57", "throughput": 27076.32, "total_tokens": 3390347712} +{"current_steps": 11700, "total_steps": 15426, "loss": 0.3221, "lr": 3.3942993770747735e-05, "epoch": 2.2754060099192843, "percentage": 75.85, "elapsed_time": "1 day, 10:48:35", "remaining_time": "11:05:08", "throughput": 27077.45, "total_tokens": 3393215360} +{"current_steps": 11710, "total_steps": 15426, "loss": 0.3232, "lr": 3.3935175146321626e-05, "epoch": 2.2773509676164543, "percentage": 75.91, "elapsed_time": "1 day, 10:50:13", "remaining_time": "11:03:18", "throughput": 27079.19, "total_tokens": 3396094848} +{"current_steps": 11720, "total_steps": 15426, "loss": 0.3192, "lr": 3.392736192236839e-05, "epoch": 2.2792959253136242, "percentage": 75.98, "elapsed_time": "1 day, 10:51:52", "remaining_time": "11:01:28", "throughput": 27080.51, "total_tokens": 3398953664} +{"current_steps": 11730, "total_steps": 15426, "loss": 0.3178, "lr": 3.391955409267387e-05, "epoch": 2.2812408830107946, "percentage": 76.04, "elapsed_time": "1 day, 10:53:39", "remaining_time": "10:59:41", "throughput": 27081.15, "total_tokens": 3401932608} +{"current_steps": 11740, "total_steps": 15426, "loss": 0.3229, "lr": 3.3911751651033896e-05, "epoch": 2.2831858407079646, "percentage": 76.11, "elapsed_time": "1 day, 10:55:21", "remaining_time": "10:57:52", "throughput": 27082.43, "total_tokens": 3404843712} +{"current_steps": 11750, "total_steps": 15426, "loss": 0.3218, "lr": 3.3903954591254334e-05, "epoch": 2.2851307984051346, "percentage": 76.17, "elapsed_time": "1 day, 10:57:02", "remaining_time": "10:56:03", "throughput": 27083.94, "total_tokens": 3407778944} +{"current_steps": 11760, "total_steps": 15426, "loss": 0.3174, "lr": 3.389616290715097e-05, "epoch": 2.287075756102305, "percentage": 76.23, "elapsed_time": "1 day, 10:58:44", "remaining_time": "10:54:14", "throughput": 27085.26, "total_tokens": 3410689856} +{"current_steps": 11770, "total_steps": 15426, "loss": 0.319, "lr": 3.388837659254955e-05, "epoch": 2.289020713799475, "percentage": 76.3, "elapsed_time": "1 day, 11:00:25", "remaining_time": "10:52:25", "throughput": 27086.58, "total_tokens": 3413592512} +{"current_steps": 11780, "total_steps": 15426, "loss": 0.3185, "lr": 3.3880595641285746e-05, "epoch": 2.290965671496645, "percentage": 76.36, "elapsed_time": "1 day, 11:02:06", "remaining_time": "10:50:37", "throughput": 27087.79, "total_tokens": 3416488384} +{"current_steps": 11790, "total_steps": 15426, "loss": 0.3203, "lr": 3.387282004720513e-05, "epoch": 2.2929106291938153, "percentage": 76.43, "elapsed_time": "1 day, 11:03:49", "remaining_time": "10:48:48", "throughput": 27088.43, "total_tokens": 3419369472} +{"current_steps": 11800, "total_steps": 15426, "loss": 0.3208, "lr": 3.386504980416316e-05, "epoch": 2.2948555868909852, "percentage": 76.49, "elapsed_time": "1 day, 11:05:30", "remaining_time": "10:46:59", "throughput": 27089.66, "total_tokens": 3422252992} +{"current_steps": 11810, "total_steps": 15426, "loss": 0.3187, "lr": 3.385728490602515e-05, "epoch": 2.296800544588155, "percentage": 76.56, "elapsed_time": "1 day, 11:07:14", "remaining_time": "10:45:11", "throughput": 27090.67, "total_tokens": 3425183424} +{"current_steps": 11820, "total_steps": 15426, "loss": 0.3211, "lr": 3.384952534666625e-05, "epoch": 2.298745502285325, "percentage": 76.62, "elapsed_time": "1 day, 11:08:56", "remaining_time": "10:43:23", "throughput": 27091.7, "total_tokens": 3428093952} +{"current_steps": 11830, "total_steps": 15426, "loss": 0.3215, "lr": 3.3841771119971455e-05, "epoch": 2.3006904599824956, "percentage": 76.69, "elapsed_time": "1 day, 11:10:37", "remaining_time": "10:41:34", "throughput": 27092.91, "total_tokens": 3430980736} +{"current_steps": 11840, "total_steps": 15426, "loss": 0.3211, "lr": 3.383402221983554e-05, "epoch": 2.3026354176796655, "percentage": 76.75, "elapsed_time": "1 day, 11:12:16", "remaining_time": "10:39:44", "throughput": 27094.7, "total_tokens": 3433890176} +{"current_steps": 11850, "total_steps": 15426, "loss": 0.3183, "lr": 3.3826278640163064e-05, "epoch": 2.3045803753768355, "percentage": 76.82, "elapsed_time": "1 day, 11:13:52", "remaining_time": "10:37:54", "throughput": 27096.62, "total_tokens": 3436727360} +{"current_steps": 11860, "total_steps": 15426, "loss": 0.3203, "lr": 3.3818540374868354e-05, "epoch": 2.3065253330740054, "percentage": 76.88, "elapsed_time": "1 day, 11:15:37", "remaining_time": "10:36:06", "throughput": 27097.24, "total_tokens": 3439655616} +{"current_steps": 11870, "total_steps": 15426, "loss": 0.3197, "lr": 3.381080741787547e-05, "epoch": 2.308470290771176, "percentage": 76.95, "elapsed_time": "1 day, 11:17:19", "remaining_time": "10:34:18", "throughput": 27098.39, "total_tokens": 3442562944} +{"current_steps": 11880, "total_steps": 15426, "loss": 0.318, "lr": 3.38030797631182e-05, "epoch": 2.310415248468346, "percentage": 77.01, "elapsed_time": "1 day, 11:19:01", "remaining_time": "10:32:29", "throughput": 27099.45, "total_tokens": 3445455360} +{"current_steps": 11890, "total_steps": 15426, "loss": 0.3185, "lr": 3.379535740454003e-05, "epoch": 2.3123602061655157, "percentage": 77.08, "elapsed_time": "1 day, 11:20:45", "remaining_time": "10:30:41", "throughput": 27100.52, "total_tokens": 3448414400} +{"current_steps": 11900, "total_steps": 15426, "loss": 0.3179, "lr": 3.3787640336094126e-05, "epoch": 2.314305163862686, "percentage": 77.14, "elapsed_time": "1 day, 11:22:30", "remaining_time": "10:28:54", "throughput": 27101.12, "total_tokens": 3451330560} +{"current_steps": 11910, "total_steps": 15426, "loss": 0.3201, "lr": 3.3779928551743325e-05, "epoch": 2.316250121559856, "percentage": 77.21, "elapsed_time": "1 day, 11:24:11", "remaining_time": "10:27:05", "throughput": 27102.09, "total_tokens": 3454210432} +{"current_steps": 11920, "total_steps": 15426, "loss": 0.3208, "lr": 3.3772222045460084e-05, "epoch": 2.318195079257026, "percentage": 77.27, "elapsed_time": "1 day, 11:25:55", "remaining_time": "10:25:17", "throughput": 27102.84, "total_tokens": 3457120384} +{"current_steps": 11930, "total_steps": 15426, "loss": 0.3238, "lr": 3.37645208112265e-05, "epoch": 2.3201400369541965, "percentage": 77.34, "elapsed_time": "1 day, 11:27:30", "remaining_time": "10:23:27", "throughput": 27104.85, "total_tokens": 3459951488} +{"current_steps": 11940, "total_steps": 15426, "loss": 0.3226, "lr": 3.3756824843034255e-05, "epoch": 2.3220849946513664, "percentage": 77.4, "elapsed_time": "1 day, 11:29:12", "remaining_time": "10:21:38", "throughput": 27105.44, "total_tokens": 3462799360} +{"current_steps": 11950, "total_steps": 15426, "loss": 0.3192, "lr": 3.374913413488464e-05, "epoch": 2.3240299523485364, "percentage": 77.47, "elapsed_time": "1 day, 11:30:55", "remaining_time": "10:19:50", "throughput": 27106.33, "total_tokens": 3465704384} +{"current_steps": 11960, "total_steps": 15426, "loss": 0.3146, "lr": 3.374144868078848e-05, "epoch": 2.3259749100457063, "percentage": 77.53, "elapsed_time": "1 day, 11:32:41", "remaining_time": "10:18:03", "throughput": 27107.01, "total_tokens": 3468665984} +{"current_steps": 11970, "total_steps": 15426, "loss": 0.3196, "lr": 3.373376847476615e-05, "epoch": 2.3279198677428767, "percentage": 77.6, "elapsed_time": "1 day, 11:34:20", "remaining_time": "10:16:13", "throughput": 27108.83, "total_tokens": 3471574080} +{"current_steps": 11980, "total_steps": 15426, "loss": 0.3225, "lr": 3.3726093510847566e-05, "epoch": 2.3298648254400467, "percentage": 77.66, "elapsed_time": "1 day, 11:36:05", "remaining_time": "10:14:26", "throughput": 27109.87, "total_tokens": 3474556480} +{"current_steps": 11990, "total_steps": 15426, "loss": 0.3176, "lr": 3.371842378307212e-05, "epoch": 2.3318097831372167, "percentage": 77.73, "elapsed_time": "1 day, 11:37:49", "remaining_time": "10:12:38", "throughput": 27110.67, "total_tokens": 3477459584} +{"current_steps": 12000, "total_steps": 15426, "loss": 0.3214, "lr": 3.371075928548872e-05, "epoch": 2.3337547408343866, "percentage": 77.79, "elapsed_time": "1 day, 11:39:37", "remaining_time": "10:10:51", "throughput": 27111.1, "total_tokens": 3480451136} +{"current_steps": 12010, "total_steps": 15426, "loss": 0.3233, "lr": 3.37031000121557e-05, "epoch": 2.335699698531557, "percentage": 77.86, "elapsed_time": "1 day, 11:41:15", "remaining_time": "10:09:02", "throughput": 27112.71, "total_tokens": 3483318528} +{"current_steps": 12020, "total_steps": 15426, "loss": 0.3206, "lr": 3.369544595714088e-05, "epoch": 2.337644656228727, "percentage": 77.92, "elapsed_time": "1 day, 11:42:55", "remaining_time": "10:07:13", "throughput": 27113.93, "total_tokens": 3486193664} +{"current_steps": 12030, "total_steps": 15426, "loss": 0.3214, "lr": 3.368779711452148e-05, "epoch": 2.339589613925897, "percentage": 77.99, "elapsed_time": "1 day, 11:44:36", "remaining_time": "10:05:24", "throughput": 27115.5, "total_tokens": 3489116992} +{"current_steps": 12040, "total_steps": 15426, "loss": 0.3273, "lr": 3.368015347838413e-05, "epoch": 2.3415345716230673, "percentage": 78.05, "elapsed_time": "1 day, 11:46:18", "remaining_time": "10:03:36", "throughput": 27116.59, "total_tokens": 3492034816} +{"current_steps": 12050, "total_steps": 15426, "loss": 0.3185, "lr": 3.3672515042824855e-05, "epoch": 2.3434795293202373, "percentage": 78.11, "elapsed_time": "1 day, 11:47:57", "remaining_time": "10:01:47", "throughput": 27117.8, "total_tokens": 3494872384} +{"current_steps": 12060, "total_steps": 15426, "loss": 0.3245, "lr": 3.366488180194904e-05, "epoch": 2.3454244870174072, "percentage": 78.18, "elapsed_time": "1 day, 11:49:35", "remaining_time": "9:59:57", "throughput": 27118.88, "total_tokens": 3497683392} +{"current_steps": 12070, "total_steps": 15426, "loss": 0.3184, "lr": 3.365725374987143e-05, "epoch": 2.3473694447145776, "percentage": 78.24, "elapsed_time": "1 day, 11:51:19", "remaining_time": "9:58:09", "throughput": 27119.7, "total_tokens": 3500601024} +{"current_steps": 12080, "total_steps": 15426, "loss": 0.3186, "lr": 3.36496308807161e-05, "epoch": 2.3493144024117476, "percentage": 78.31, "elapsed_time": "1 day, 11:52:57", "remaining_time": "9:56:20", "throughput": 27121.57, "total_tokens": 3503509184} +{"current_steps": 12090, "total_steps": 15426, "loss": 0.3234, "lr": 3.3642013188616426e-05, "epoch": 2.3512593601089176, "percentage": 78.37, "elapsed_time": "1 day, 11:54:38", "remaining_time": "9:54:31", "throughput": 27123.06, "total_tokens": 3506421376} +{"current_steps": 12100, "total_steps": 15426, "loss": 0.3199, "lr": 3.3634400667715074e-05, "epoch": 2.353204317806088, "percentage": 78.44, "elapsed_time": "1 day, 11:56:21", "remaining_time": "9:52:43", "throughput": 27123.72, "total_tokens": 3509316160} +{"current_steps": 12110, "total_steps": 15426, "loss": 0.3244, "lr": 3.3626793312164013e-05, "epoch": 2.355149275503258, "percentage": 78.5, "elapsed_time": "1 day, 11:58:05", "remaining_time": "9:50:56", "throughput": 27124.17, "total_tokens": 3512191232} +{"current_steps": 12120, "total_steps": 15426, "loss": 0.323, "lr": 3.361919111612443e-05, "epoch": 2.357094233200428, "percentage": 78.57, "elapsed_time": "1 day, 11:59:44", "remaining_time": "9:49:07", "throughput": 27125.59, "total_tokens": 3515052416} +{"current_steps": 12130, "total_steps": 15426, "loss": 0.3208, "lr": 3.361159407376678e-05, "epoch": 2.359039190897598, "percentage": 78.63, "elapsed_time": "1 day, 12:01:23", "remaining_time": "9:47:18", "throughput": 27126.96, "total_tokens": 3517920192} +{"current_steps": 12140, "total_steps": 15426, "loss": 0.3215, "lr": 3.3604002179270685e-05, "epoch": 2.3609841485947682, "percentage": 78.7, "elapsed_time": "1 day, 12:03:04", "remaining_time": "9:45:29", "throughput": 27127.91, "total_tokens": 3520784768} +{"current_steps": 12150, "total_steps": 15426, "loss": 0.322, "lr": 3.359641542682504e-05, "epoch": 2.362929106291938, "percentage": 78.76, "elapsed_time": "1 day, 12:04:45", "remaining_time": "9:43:40", "throughput": 27129.18, "total_tokens": 3523686272} +{"current_steps": 12160, "total_steps": 15426, "loss": 0.3216, "lr": 3.3588833810627854e-05, "epoch": 2.364874063989108, "percentage": 78.83, "elapsed_time": "1 day, 12:06:21", "remaining_time": "9:41:51", "throughput": 27131.15, "total_tokens": 3526549504} +{"current_steps": 12170, "total_steps": 15426, "loss": 0.3168, "lr": 3.358125732488632e-05, "epoch": 2.366819021686278, "percentage": 78.89, "elapsed_time": "1 day, 12:08:07", "remaining_time": "9:40:04", "throughput": 27131.56, "total_tokens": 3529477568} +{"current_steps": 12180, "total_steps": 15426, "loss": 0.3216, "lr": 3.357368596381679e-05, "epoch": 2.3687639793834485, "percentage": 78.96, "elapsed_time": "1 day, 12:09:48", "remaining_time": "9:38:15", "throughput": 27132.99, "total_tokens": 3532402240} +{"current_steps": 12190, "total_steps": 15426, "loss": 0.3241, "lr": 3.356611972164471e-05, "epoch": 2.3707089370806185, "percentage": 79.02, "elapsed_time": "1 day, 12:11:25", "remaining_time": "9:36:26", "throughput": 27134.54, "total_tokens": 3535243584} +{"current_steps": 12200, "total_steps": 15426, "loss": 0.3213, "lr": 3.355855859260466e-05, "epoch": 2.3726538947777884, "percentage": 79.09, "elapsed_time": "1 day, 12:13:07", "remaining_time": "9:34:38", "throughput": 27135.7, "total_tokens": 3538169856} +{"current_steps": 12210, "total_steps": 15426, "loss": 0.3215, "lr": 3.3551002570940285e-05, "epoch": 2.374598852474959, "percentage": 79.15, "elapsed_time": "1 day, 12:14:44", "remaining_time": "9:32:48", "throughput": 27137.49, "total_tokens": 3541032832} +{"current_steps": 12220, "total_steps": 15426, "loss": 0.3192, "lr": 3.354345165090431e-05, "epoch": 2.376543810172129, "percentage": 79.22, "elapsed_time": "1 day, 12:16:25", "remaining_time": "9:30:59", "throughput": 27138.78, "total_tokens": 3543918976} +{"current_steps": 12230, "total_steps": 15426, "loss": 0.3223, "lr": 3.3535905826758515e-05, "epoch": 2.3784887678692987, "percentage": 79.28, "elapsed_time": "1 day, 12:18:06", "remaining_time": "9:29:11", "throughput": 27139.72, "total_tokens": 3546786752} +{"current_steps": 12240, "total_steps": 15426, "loss": 0.3209, "lr": 3.352836509277369e-05, "epoch": 2.380433725566469, "percentage": 79.35, "elapsed_time": "1 day, 12:19:45", "remaining_time": "9:27:22", "throughput": 27141.12, "total_tokens": 3549668544} +{"current_steps": 12250, "total_steps": 15426, "loss": 0.3237, "lr": 3.352082944322966e-05, "epoch": 2.382378683263639, "percentage": 79.41, "elapsed_time": "1 day, 12:21:26", "remaining_time": "9:25:34", "throughput": 27142.51, "total_tokens": 3552597952} +{"current_steps": 12260, "total_steps": 15426, "loss": 0.3186, "lr": 3.351329887241524e-05, "epoch": 2.384323640960809, "percentage": 79.48, "elapsed_time": "1 day, 12:23:09", "remaining_time": "9:23:46", "throughput": 27143.42, "total_tokens": 3555513152} +{"current_steps": 12270, "total_steps": 15426, "loss": 0.3224, "lr": 3.3505773374628225e-05, "epoch": 2.386268598657979, "percentage": 79.54, "elapsed_time": "1 day, 12:24:54", "remaining_time": "9:21:59", "throughput": 27144.15, "total_tokens": 3558458368} +{"current_steps": 12280, "total_steps": 15426, "loss": 0.3221, "lr": 3.3498252944175354e-05, "epoch": 2.3882135563551494, "percentage": 79.61, "elapsed_time": "1 day, 12:26:36", "remaining_time": "9:20:11", "throughput": 27145.05, "total_tokens": 3561326272} +{"current_steps": 12290, "total_steps": 15426, "loss": 0.3181, "lr": 3.3490737575372326e-05, "epoch": 2.3901585140523194, "percentage": 79.67, "elapsed_time": "1 day, 12:28:18", "remaining_time": "9:18:23", "throughput": 27146.01, "total_tokens": 3564240384} +{"current_steps": 12300, "total_steps": 15426, "loss": 0.3204, "lr": 3.348322726254375e-05, "epoch": 2.3921034717494893, "percentage": 79.74, "elapsed_time": "1 day, 12:29:59", "remaining_time": "9:16:34", "throughput": 27147.05, "total_tokens": 3567119040} +{"current_steps": 12310, "total_steps": 15426, "loss": 0.3205, "lr": 3.347572200002315e-05, "epoch": 2.3940484294466593, "percentage": 79.8, "elapsed_time": "1 day, 12:31:39", "remaining_time": "9:14:46", "throughput": 27148.85, "total_tokens": 3570050624} +{"current_steps": 12320, "total_steps": 15426, "loss": 0.3216, "lr": 3.3468221782152924e-05, "epoch": 2.3959933871438297, "percentage": 79.87, "elapsed_time": "1 day, 12:33:20", "remaining_time": "9:12:57", "throughput": 27150.1, "total_tokens": 3572953664} +{"current_steps": 12330, "total_steps": 15426, "loss": 0.3172, "lr": 3.346072660328435e-05, "epoch": 2.3979383448409997, "percentage": 79.93, "elapsed_time": "1 day, 12:35:04", "remaining_time": "9:11:10", "throughput": 27150.92, "total_tokens": 3575897856} +{"current_steps": 12340, "total_steps": 15426, "loss": 0.3175, "lr": 3.345323645777756e-05, "epoch": 2.3998833025381696, "percentage": 79.99, "elapsed_time": "1 day, 12:36:44", "remaining_time": "9:09:21", "throughput": 27151.98, "total_tokens": 3578764864} +{"current_steps": 12350, "total_steps": 15426, "loss": 0.3232, "lr": 3.34457513400015e-05, "epoch": 2.40182826023534, "percentage": 80.06, "elapsed_time": "1 day, 12:38:20", "remaining_time": "9:07:32", "throughput": 27153.77, "total_tokens": 3581598272} +{"current_steps": 12360, "total_steps": 15426, "loss": 0.318, "lr": 3.343827124433396e-05, "epoch": 2.40377321793251, "percentage": 80.12, "elapsed_time": "1 day, 12:40:03", "remaining_time": "9:05:44", "throughput": 27154.57, "total_tokens": 3584494016} +{"current_steps": 12370, "total_steps": 15426, "loss": 0.3197, "lr": 3.343079616516151e-05, "epoch": 2.40571817562968, "percentage": 80.19, "elapsed_time": "1 day, 12:41:45", "remaining_time": "9:03:56", "throughput": 27155.49, "total_tokens": 3587379840} +{"current_steps": 12380, "total_steps": 15426, "loss": 0.3167, "lr": 3.3423326096879495e-05, "epoch": 2.4076631333268503, "percentage": 80.25, "elapsed_time": "1 day, 12:43:27", "remaining_time": "9:02:08", "throughput": 27156.45, "total_tokens": 3590287360} +{"current_steps": 12390, "total_steps": 15426, "loss": 0.3236, "lr": 3.341586103389203e-05, "epoch": 2.4096080910240203, "percentage": 80.32, "elapsed_time": "1 day, 12:45:03", "remaining_time": "9:00:19", "throughput": 27157.91, "total_tokens": 3593083712} +{"current_steps": 12400, "total_steps": 15426, "loss": 0.3219, "lr": 3.3408400970611995e-05, "epoch": 2.4115530487211903, "percentage": 80.38, "elapsed_time": "1 day, 12:46:45", "remaining_time": "8:58:31", "throughput": 27158.95, "total_tokens": 3595983872} +{"current_steps": 12410, "total_steps": 15426, "loss": 0.3203, "lr": 3.340094590146095e-05, "epoch": 2.41349800641836, "percentage": 80.45, "elapsed_time": "1 day, 12:48:26", "remaining_time": "8:56:42", "throughput": 27160.13, "total_tokens": 3598883200} +{"current_steps": 12420, "total_steps": 15426, "loss": 0.3216, "lr": 3.3393495820869215e-05, "epoch": 2.4154429641155306, "percentage": 80.51, "elapsed_time": "1 day, 12:50:05", "remaining_time": "8:54:54", "throughput": 27161.45, "total_tokens": 3601764736} +{"current_steps": 12430, "total_steps": 15426, "loss": 0.3209, "lr": 3.338605072327576e-05, "epoch": 2.4173879218127006, "percentage": 80.58, "elapsed_time": "1 day, 12:51:45", "remaining_time": "8:53:06", "throughput": 27162.75, "total_tokens": 3604656256} +{"current_steps": 12440, "total_steps": 15426, "loss": 0.3221, "lr": 3.337861060312827e-05, "epoch": 2.4193328795098705, "percentage": 80.64, "elapsed_time": "1 day, 12:53:30", "remaining_time": "8:51:18", "throughput": 27163.42, "total_tokens": 3607593024} +{"current_steps": 12450, "total_steps": 15426, "loss": 0.3229, "lr": 3.337117545488306e-05, "epoch": 2.421277837207041, "percentage": 80.71, "elapsed_time": "1 day, 12:55:12", "remaining_time": "8:49:30", "throughput": 27164.71, "total_tokens": 3610517696} +{"current_steps": 12460, "total_steps": 15426, "loss": 0.3227, "lr": 3.33637452730051e-05, "epoch": 2.423222794904211, "percentage": 80.77, "elapsed_time": "1 day, 12:56:53", "remaining_time": "8:47:42", "throughput": 27165.8, "total_tokens": 3613412416} +{"current_steps": 12470, "total_steps": 15426, "loss": 0.3218, "lr": 3.335632005196796e-05, "epoch": 2.425167752601381, "percentage": 80.84, "elapsed_time": "1 day, 12:58:36", "remaining_time": "8:45:55", "throughput": 27166.4, "total_tokens": 3616298496} +{"current_steps": 12480, "total_steps": 15426, "loss": 0.3191, "lr": 3.334889978625383e-05, "epoch": 2.427112710298551, "percentage": 80.9, "elapsed_time": "1 day, 13:00:17", "remaining_time": "8:44:07", "throughput": 27167.21, "total_tokens": 3619159040} +{"current_steps": 12490, "total_steps": 15426, "loss": 0.3237, "lr": 3.3341484470353515e-05, "epoch": 2.429057667995721, "percentage": 80.97, "elapsed_time": "1 day, 13:02:00", "remaining_time": "8:42:19", "throughput": 27168.21, "total_tokens": 3622067712} +{"current_steps": 12500, "total_steps": 15426, "loss": 0.3191, "lr": 3.333407409876635e-05, "epoch": 2.431002625692891, "percentage": 81.03, "elapsed_time": "1 day, 13:03:38", "remaining_time": "8:40:30", "throughput": 27169.6, "total_tokens": 3624919808} +{"current_steps": 12510, "total_steps": 15426, "loss": 0.3187, "lr": 3.332666866600024e-05, "epoch": 2.432947583390061, "percentage": 81.1, "elapsed_time": "1 day, 13:05:23", "remaining_time": "8:38:43", "throughput": 27170.36, "total_tokens": 3627892480} +{"current_steps": 12520, "total_steps": 15426, "loss": 0.3244, "lr": 3.331926816657162e-05, "epoch": 2.4348925410872315, "percentage": 81.16, "elapsed_time": "1 day, 13:07:09", "remaining_time": "8:36:56", "throughput": 27170.79, "total_tokens": 3630820096} +{"current_steps": 12530, "total_steps": 15426, "loss": 0.3251, "lr": 3.331187259500546e-05, "epoch": 2.4368374987844015, "percentage": 81.23, "elapsed_time": "1 day, 13:08:49", "remaining_time": "8:35:08", "throughput": 27172.27, "total_tokens": 3633722688} +{"current_steps": 12540, "total_steps": 15426, "loss": 0.3215, "lr": 3.3304481945835235e-05, "epoch": 2.4387824564815714, "percentage": 81.29, "elapsed_time": "1 day, 13:10:36", "remaining_time": "8:33:21", "throughput": 27172.32, "total_tokens": 3636642688} +{"current_steps": 12550, "total_steps": 15426, "loss": 0.3251, "lr": 3.329709621360288e-05, "epoch": 2.440727414178742, "percentage": 81.36, "elapsed_time": "1 day, 13:12:14", "remaining_time": "8:31:32", "throughput": 27173.45, "total_tokens": 3639453312} +{"current_steps": 12560, "total_steps": 15426, "loss": 0.3166, "lr": 3.328971539285882e-05, "epoch": 2.442672371875912, "percentage": 81.42, "elapsed_time": "1 day, 13:13:57", "remaining_time": "8:29:45", "throughput": 27174.44, "total_tokens": 3642392192} +{"current_steps": 12570, "total_steps": 15426, "loss": 0.3193, "lr": 3.3282339478161935e-05, "epoch": 2.4446173295730818, "percentage": 81.49, "elapsed_time": "1 day, 13:15:32", "remaining_time": "8:27:55", "throughput": 27176.38, "total_tokens": 3645241664} +{"current_steps": 12580, "total_steps": 15426, "loss": 0.3183, "lr": 3.327496846407953e-05, "epoch": 2.4465622872702517, "percentage": 81.55, "elapsed_time": "1 day, 13:17:14", "remaining_time": "8:26:08", "throughput": 27177.7, "total_tokens": 3648174592} +{"current_steps": 12590, "total_steps": 15426, "loss": 0.323, "lr": 3.3267602345187304e-05, "epoch": 2.448507244967422, "percentage": 81.62, "elapsed_time": "1 day, 13:18:56", "remaining_time": "8:24:20", "throughput": 27178.51, "total_tokens": 3651067392} +{"current_steps": 12600, "total_steps": 15426, "loss": 0.3189, "lr": 3.326024111606942e-05, "epoch": 2.450452202664592, "percentage": 81.68, "elapsed_time": "1 day, 13:20:38", "remaining_time": "8:22:32", "throughput": 27179.4, "total_tokens": 3653964352} +{"current_steps": 12610, "total_steps": 15426, "loss": 0.3191, "lr": 3.325288477131839e-05, "epoch": 2.452397160361762, "percentage": 81.75, "elapsed_time": "1 day, 13:22:21", "remaining_time": "8:20:45", "throughput": 27180.54, "total_tokens": 3656906240} +{"current_steps": 12620, "total_steps": 15426, "loss": 0.3169, "lr": 3.324553330553507e-05, "epoch": 2.454342118058932, "percentage": 81.81, "elapsed_time": "1 day, 13:24:05", "remaining_time": "8:18:57", "throughput": 27180.9, "total_tokens": 3659776640} +{"current_steps": 12630, "total_steps": 15426, "loss": 0.3244, "lr": 3.323818671332871e-05, "epoch": 2.4562870757561024, "percentage": 81.87, "elapsed_time": "1 day, 13:25:44", "remaining_time": "8:17:09", "throughput": 27182.51, "total_tokens": 3662683584} +{"current_steps": 12640, "total_steps": 15426, "loss": 0.322, "lr": 3.323084498931687e-05, "epoch": 2.4582320334532723, "percentage": 81.94, "elapsed_time": "1 day, 13:27:23", "remaining_time": "8:15:21", "throughput": 27183.75, "total_tokens": 3665549824} +{"current_steps": 12650, "total_steps": 15426, "loss": 0.3203, "lr": 3.322350812812545e-05, "epoch": 2.4601769911504423, "percentage": 82.0, "elapsed_time": "1 day, 13:29:03", "remaining_time": "8:13:32", "throughput": 27184.94, "total_tokens": 3668430720} +{"current_steps": 12660, "total_steps": 15426, "loss": 0.3194, "lr": 3.321617612438862e-05, "epoch": 2.4621219488476127, "percentage": 82.07, "elapsed_time": "1 day, 13:30:42", "remaining_time": "8:11:44", "throughput": 27186.5, "total_tokens": 3671326016} +{"current_steps": 12670, "total_steps": 15426, "loss": 0.3205, "lr": 3.320884897274886e-05, "epoch": 2.4640669065447827, "percentage": 82.13, "elapsed_time": "1 day, 13:32:21", "remaining_time": "8:09:56", "throughput": 27187.76, "total_tokens": 3674189760} +{"current_steps": 12680, "total_steps": 15426, "loss": 0.3172, "lr": 3.320152666785692e-05, "epoch": 2.4660118642419526, "percentage": 82.2, "elapsed_time": "1 day, 13:34:03", "remaining_time": "8:08:08", "throughput": 27188.59, "total_tokens": 3677077952} +{"current_steps": 12690, "total_steps": 15426, "loss": 0.321, "lr": 3.319420920437179e-05, "epoch": 2.467956821939123, "percentage": 82.26, "elapsed_time": "1 day, 13:35:42", "remaining_time": "8:06:20", "throughput": 27189.78, "total_tokens": 3679932224} +{"current_steps": 12700, "total_steps": 15426, "loss": 0.3223, "lr": 3.31868965769607e-05, "epoch": 2.469901779636293, "percentage": 82.33, "elapsed_time": "1 day, 13:37:21", "remaining_time": "8:04:31", "throughput": 27191.16, "total_tokens": 3682806976} +{"current_steps": 12710, "total_steps": 15426, "loss": 0.3222, "lr": 3.317958878029911e-05, "epoch": 2.471846737333463, "percentage": 82.39, "elapsed_time": "1 day, 13:39:03", "remaining_time": "8:02:44", "throughput": 27192.04, "total_tokens": 3685690624} +{"current_steps": 12720, "total_steps": 15426, "loss": 0.3214, "lr": 3.3172285809070665e-05, "epoch": 2.473791695030633, "percentage": 82.46, "elapsed_time": "1 day, 13:40:45", "remaining_time": "8:00:56", "throughput": 27192.82, "total_tokens": 3688578880} +{"current_steps": 12730, "total_steps": 15426, "loss": 0.3197, "lr": 3.3164987657967214e-05, "epoch": 2.4757366527278033, "percentage": 82.52, "elapsed_time": "1 day, 13:42:29", "remaining_time": "7:59:09", "throughput": 27193.23, "total_tokens": 3691477696} +{"current_steps": 12740, "total_steps": 15426, "loss": 0.3214, "lr": 3.315769432168877e-05, "epoch": 2.4776816104249733, "percentage": 82.59, "elapsed_time": "1 day, 13:44:12", "remaining_time": "7:57:22", "throughput": 27193.96, "total_tokens": 3694367936} +{"current_steps": 12750, "total_steps": 15426, "loss": 0.321, "lr": 3.315040579494349e-05, "epoch": 2.479626568122143, "percentage": 82.65, "elapsed_time": "1 day, 13:45:52", "remaining_time": "7:55:33", "throughput": 27195.15, "total_tokens": 3697236352} +{"current_steps": 12760, "total_steps": 15426, "loss": 0.3232, "lr": 3.31431220724477e-05, "epoch": 2.4815715258193136, "percentage": 82.72, "elapsed_time": "1 day, 13:47:34", "remaining_time": "7:53:46", "throughput": 27195.98, "total_tokens": 3700139520} +{"current_steps": 12770, "total_steps": 15426, "loss": 0.3229, "lr": 3.3135843148925834e-05, "epoch": 2.4835164835164836, "percentage": 82.78, "elapsed_time": "1 day, 13:49:17", "remaining_time": "7:51:58", "throughput": 27197.05, "total_tokens": 3703070720} +{"current_steps": 12780, "total_steps": 15426, "loss": 0.3198, "lr": 3.3128569019110414e-05, "epoch": 2.4854614412136535, "percentage": 82.85, "elapsed_time": "1 day, 13:51:01", "remaining_time": "7:50:11", "throughput": 27197.83, "total_tokens": 3706024512} +{"current_steps": 12790, "total_steps": 15426, "loss": 0.3188, "lr": 3.312129967774207e-05, "epoch": 2.4874063989108235, "percentage": 82.91, "elapsed_time": "1 day, 13:52:46", "remaining_time": "7:48:24", "throughput": 27198.41, "total_tokens": 3708944384} +{"current_steps": 12800, "total_steps": 15426, "loss": 0.3184, "lr": 3.311403511956952e-05, "epoch": 2.489351356607994, "percentage": 82.98, "elapsed_time": "1 day, 13:54:29", "remaining_time": "7:46:37", "throughput": 27198.96, "total_tokens": 3711839552} +{"current_steps": 12810, "total_steps": 15426, "loss": 0.3191, "lr": 3.310677533934952e-05, "epoch": 2.491296314305164, "percentage": 83.04, "elapsed_time": "1 day, 13:56:07", "remaining_time": "7:44:49", "throughput": 27200.49, "total_tokens": 3714706368} +{"current_steps": 12820, "total_steps": 15426, "loss": 0.3193, "lr": 3.309952033184686e-05, "epoch": 2.493241272002334, "percentage": 83.11, "elapsed_time": "1 day, 13:57:51", "remaining_time": "7:43:02", "throughput": 27201.73, "total_tokens": 3717698048} +{"current_steps": 12830, "total_steps": 15426, "loss": 0.3233, "lr": 3.309227009183439e-05, "epoch": 2.495186229699504, "percentage": 83.17, "elapsed_time": "1 day, 13:59:33", "remaining_time": "7:41:14", "throughput": 27202.99, "total_tokens": 3720648576} +{"current_steps": 12840, "total_steps": 15426, "loss": 0.3191, "lr": 3.308502461409295e-05, "epoch": 2.497131187396674, "percentage": 83.24, "elapsed_time": "1 day, 14:01:15", "remaining_time": "7:39:27", "throughput": 27204.07, "total_tokens": 3723579520} +{"current_steps": 12850, "total_steps": 15426, "loss": 0.3227, "lr": 3.3077783893411386e-05, "epoch": 2.499076145093844, "percentage": 83.3, "elapsed_time": "1 day, 14:02:56", "remaining_time": "7:37:39", "throughput": 27205.35, "total_tokens": 3726499456} +{"current_steps": 12860, "total_steps": 15426, "loss": 0.3222, "lr": 3.30705479245865e-05, "epoch": 2.5010211027910145, "percentage": 83.37, "elapsed_time": "1 day, 14:04:31", "remaining_time": "7:35:50", "throughput": 27207.03, "total_tokens": 3729320256} +{"current_steps": 12870, "total_steps": 15426, "loss": 0.3183, "lr": 3.3063316702423094e-05, "epoch": 2.5029660604881845, "percentage": 83.43, "elapsed_time": "1 day, 14:06:13", "remaining_time": "7:34:02", "throughput": 27208.0, "total_tokens": 3732205376} +{"current_steps": 12880, "total_steps": 15426, "loss": 0.3173, "lr": 3.305609022173388e-05, "epoch": 2.5049110181853544, "percentage": 83.5, "elapsed_time": "1 day, 14:07:53", "remaining_time": "7:32:15", "throughput": 27209.24, "total_tokens": 3735113472} +{"current_steps": 12890, "total_steps": 15426, "loss": 0.3213, "lr": 3.304886847733954e-05, "epoch": 2.5068559758825244, "percentage": 83.56, "elapsed_time": "1 day, 14:09:33", "remaining_time": "7:30:27", "throughput": 27210.47, "total_tokens": 3738004352} +{"current_steps": 12900, "total_steps": 15426, "loss": 0.3222, "lr": 3.304165146406865e-05, "epoch": 2.508800933579695, "percentage": 83.63, "elapsed_time": "1 day, 14:11:13", "remaining_time": "7:28:39", "throughput": 27211.93, "total_tokens": 3740908544} +{"current_steps": 12910, "total_steps": 15426, "loss": 0.316, "lr": 3.30344391767577e-05, "epoch": 2.5107458912768648, "percentage": 83.69, "elapsed_time": "1 day, 14:12:55", "remaining_time": "7:26:51", "throughput": 27212.8, "total_tokens": 3743823488} +{"current_steps": 12920, "total_steps": 15426, "loss": 0.3144, "lr": 3.302723161025104e-05, "epoch": 2.5126908489740347, "percentage": 83.75, "elapsed_time": "1 day, 14:14:40", "remaining_time": "7:25:04", "throughput": 27213.59, "total_tokens": 3746767296} +{"current_steps": 12930, "total_steps": 15426, "loss": 0.3207, "lr": 3.302002875940093e-05, "epoch": 2.5146358066712047, "percentage": 83.82, "elapsed_time": "1 day, 14:16:19", "remaining_time": "7:23:16", "throughput": 27215.01, "total_tokens": 3749678784} +{"current_steps": 12940, "total_steps": 15426, "loss": 0.3179, "lr": 3.3012830619067466e-05, "epoch": 2.516580764368375, "percentage": 83.88, "elapsed_time": "1 day, 14:18:02", "remaining_time": "7:21:29", "throughput": 27216.05, "total_tokens": 3752610240} +{"current_steps": 12950, "total_steps": 15426, "loss": 0.321, "lr": 3.300563718411857e-05, "epoch": 2.518525722065545, "percentage": 83.95, "elapsed_time": "1 day, 14:19:38", "remaining_time": "7:19:41", "throughput": 27217.86, "total_tokens": 3755490112} +{"current_steps": 12960, "total_steps": 15426, "loss": 0.3197, "lr": 3.299844844943e-05, "epoch": 2.520470679762715, "percentage": 84.01, "elapsed_time": "1 day, 14:21:16", "remaining_time": "7:17:52", "throughput": 27219.01, "total_tokens": 3758295232} +{"current_steps": 12970, "total_steps": 15426, "loss": 0.3216, "lr": 3.299126440988535e-05, "epoch": 2.5224156374598854, "percentage": 84.08, "elapsed_time": "1 day, 14:22:54", "remaining_time": "7:16:04", "throughput": 27220.25, "total_tokens": 3761149760} +{"current_steps": 12980, "total_steps": 15426, "loss": 0.3173, "lr": 3.298408506037596e-05, "epoch": 2.5243605951570554, "percentage": 84.14, "elapsed_time": "1 day, 14:24:36", "remaining_time": "7:14:17", "throughput": 27221.39, "total_tokens": 3764074432} +{"current_steps": 12990, "total_steps": 15426, "loss": 0.3212, "lr": 3.297691039580097e-05, "epoch": 2.5263055528542253, "percentage": 84.21, "elapsed_time": "1 day, 14:26:18", "remaining_time": "7:12:29", "throughput": 27222.32, "total_tokens": 3766972288} +{"current_steps": 13000, "total_steps": 15426, "loss": 0.3154, "lr": 3.29697404110673e-05, "epoch": 2.5282505105513957, "percentage": 84.27, "elapsed_time": "1 day, 14:28:00", "remaining_time": "7:10:42", "throughput": 27223.14, "total_tokens": 3769886912} +{"current_steps": 13010, "total_steps": 15426, "loss": 0.3223, "lr": 3.2962575101089594e-05, "epoch": 2.5301954682485657, "percentage": 84.34, "elapsed_time": "1 day, 14:29:41", "remaining_time": "7:08:54", "throughput": 27224.18, "total_tokens": 3772755712} +{"current_steps": 13020, "total_steps": 15426, "loss": 0.3218, "lr": 3.295541446079024e-05, "epoch": 2.5321404259457356, "percentage": 84.4, "elapsed_time": "1 day, 14:31:29", "remaining_time": "7:07:08", "throughput": 27224.28, "total_tokens": 3775735616} +{"current_steps": 13030, "total_steps": 15426, "loss": 0.3165, "lr": 3.2948258485099336e-05, "epoch": 2.534085383642906, "percentage": 84.47, "elapsed_time": "1 day, 14:33:13", "remaining_time": "7:05:21", "throughput": 27225.14, "total_tokens": 3778677696} +{"current_steps": 13040, "total_steps": 15426, "loss": 0.3191, "lr": 3.29411071689547e-05, "epoch": 2.536030341340076, "percentage": 84.53, "elapsed_time": "1 day, 14:34:51", "remaining_time": "7:03:33", "throughput": 27226.38, "total_tokens": 3781509696} +{"current_steps": 13050, "total_steps": 15426, "loss": 0.3202, "lr": 3.2933960507301826e-05, "epoch": 2.537975299037246, "percentage": 84.6, "elapsed_time": "1 day, 14:36:31", "remaining_time": "7:01:46", "throughput": 27227.56, "total_tokens": 3784405440} +{"current_steps": 13060, "total_steps": 15426, "loss": 0.318, "lr": 3.292681849509387e-05, "epoch": 2.539920256734416, "percentage": 84.66, "elapsed_time": "1 day, 14:38:16", "remaining_time": "6:59:59", "throughput": 27228.31, "total_tokens": 3787373504} +{"current_steps": 13070, "total_steps": 15426, "loss": 0.3221, "lr": 3.291968112729166e-05, "epoch": 2.541865214431586, "percentage": 84.73, "elapsed_time": "1 day, 14:39:57", "remaining_time": "6:58:11", "throughput": 27229.61, "total_tokens": 3790303872} +{"current_steps": 13080, "total_steps": 15426, "loss": 0.319, "lr": 3.291254839886367e-05, "epoch": 2.5438101721287563, "percentage": 84.79, "elapsed_time": "1 day, 14:41:37", "remaining_time": "6:56:24", "throughput": 27230.92, "total_tokens": 3793196032} +{"current_steps": 13090, "total_steps": 15426, "loss": 0.3182, "lr": 3.2905420304785995e-05, "epoch": 2.545755129825926, "percentage": 84.86, "elapsed_time": "1 day, 14:43:21", "remaining_time": "6:54:37", "throughput": 27231.91, "total_tokens": 3796170816} +{"current_steps": 13100, "total_steps": 15426, "loss": 0.3234, "lr": 3.289829684004235e-05, "epoch": 2.547700087523096, "percentage": 84.92, "elapsed_time": "1 day, 14:44:58", "remaining_time": "6:52:49", "throughput": 27233.33, "total_tokens": 3799016704} +{"current_steps": 13110, "total_steps": 15426, "loss": 0.3217, "lr": 3.289117799962402e-05, "epoch": 2.5496450452202666, "percentage": 84.99, "elapsed_time": "1 day, 14:46:37", "remaining_time": "6:51:01", "throughput": 27234.51, "total_tokens": 3801857344} +{"current_steps": 13120, "total_steps": 15426, "loss": 0.3204, "lr": 3.2884063778529914e-05, "epoch": 2.5515900029174365, "percentage": 85.05, "elapsed_time": "1 day, 14:48:21", "remaining_time": "6:49:14", "throughput": 27235.22, "total_tokens": 3804800768} +{"current_steps": 13130, "total_steps": 15426, "loss": 0.3211, "lr": 3.28769541717665e-05, "epoch": 2.5535349606146065, "percentage": 85.12, "elapsed_time": "1 day, 14:50:01", "remaining_time": "6:47:26", "throughput": 27236.53, "total_tokens": 3807708224} +{"current_steps": 13140, "total_steps": 15426, "loss": 0.322, "lr": 3.2869849174347775e-05, "epoch": 2.555479918311777, "percentage": 85.18, "elapsed_time": "1 day, 14:51:41", "remaining_time": "6:45:39", "throughput": 27237.7, "total_tokens": 3810605888} +{"current_steps": 13150, "total_steps": 15426, "loss": 0.3227, "lr": 3.2862748781295294e-05, "epoch": 2.557424876008947, "percentage": 85.25, "elapsed_time": "1 day, 14:53:25", "remaining_time": "6:43:52", "throughput": 27238.58, "total_tokens": 3813541184} +{"current_steps": 13160, "total_steps": 15426, "loss": 0.3227, "lr": 3.2855652987638146e-05, "epoch": 2.559369833706117, "percentage": 85.31, "elapsed_time": "1 day, 14:55:07", "remaining_time": "6:42:04", "throughput": 27239.57, "total_tokens": 3816478912} +{"current_steps": 13170, "total_steps": 15426, "loss": 0.3146, "lr": 3.284856178841291e-05, "epoch": 2.561314791403287, "percentage": 85.38, "elapsed_time": "1 day, 14:56:53", "remaining_time": "6:40:18", "throughput": 27239.85, "total_tokens": 3819408448} +{"current_steps": 13180, "total_steps": 15426, "loss": 0.3224, "lr": 3.284147517866367e-05, "epoch": 2.563259749100457, "percentage": 85.44, "elapsed_time": "1 day, 14:58:40", "remaining_time": "6:38:31", "throughput": 27240.34, "total_tokens": 3822372032} +{"current_steps": 13190, "total_steps": 15426, "loss": 0.3227, "lr": 3.2834393153441976e-05, "epoch": 2.565204706797627, "percentage": 85.5, "elapsed_time": "1 day, 15:00:22", "remaining_time": "6:36:44", "throughput": 27240.96, "total_tokens": 3825232128} +{"current_steps": 13200, "total_steps": 15426, "loss": 0.3251, "lr": 3.282731570780689e-05, "epoch": 2.567149664494797, "percentage": 85.57, "elapsed_time": "1 day, 15:02:00", "remaining_time": "6:34:56", "throughput": 27242.18, "total_tokens": 3828089216} +{"current_steps": 13210, "total_steps": 15426, "loss": 0.3192, "lr": 3.2820242836824875e-05, "epoch": 2.569094622191967, "percentage": 85.63, "elapsed_time": "1 day, 15:03:37", "remaining_time": "6:33:08", "throughput": 27244.04, "total_tokens": 3830984384} +{"current_steps": 13220, "total_steps": 15426, "loss": 0.3222, "lr": 3.2813174535569854e-05, "epoch": 2.5710395798891374, "percentage": 85.7, "elapsed_time": "1 day, 15:05:21", "remaining_time": "6:31:21", "throughput": 27244.61, "total_tokens": 3833891520} +{"current_steps": 13230, "total_steps": 15426, "loss": 0.3212, "lr": 3.280611079912318e-05, "epoch": 2.5729845375863074, "percentage": 85.76, "elapsed_time": "1 day, 15:06:59", "remaining_time": "6:29:34", "throughput": 27245.67, "total_tokens": 3836722304} +{"current_steps": 13240, "total_steps": 15426, "loss": 0.322, "lr": 3.279905162257358e-05, "epoch": 2.5749294952834774, "percentage": 85.83, "elapsed_time": "1 day, 15:09:11", "remaining_time": "6:27:51", "throughput": 27240.58, "total_tokens": 3839591808} +{"current_steps": 13250, "total_steps": 15426, "loss": 0.32, "lr": 3.279199700101723e-05, "epoch": 2.5768744529806478, "percentage": 85.89, "elapsed_time": "1 day, 15:11:32", "remaining_time": "6:26:11", "throughput": 27233.9, "total_tokens": 3842503040} +{"current_steps": 13260, "total_steps": 15426, "loss": 0.3193, "lr": 3.2784946929557644e-05, "epoch": 2.5788194106778177, "percentage": 85.96, "elapsed_time": "1 day, 15:13:52", "remaining_time": "6:24:30", "throughput": 27227.43, "total_tokens": 3845395392} +{"current_steps": 13270, "total_steps": 15426, "loss": 0.3178, "lr": 3.277790140330571e-05, "epoch": 2.5807643683749877, "percentage": 86.02, "elapsed_time": "1 day, 15:15:59", "remaining_time": "6:22:46", "throughput": 27223.65, "total_tokens": 3848313920} +{"current_steps": 13280, "total_steps": 15426, "loss": 0.3173, "lr": 3.277086041737968e-05, "epoch": 2.582709326072158, "percentage": 86.09, "elapsed_time": "1 day, 15:18:22", "remaining_time": "6:21:06", "throughput": 27216.83, "total_tokens": 3851256256} +{"current_steps": 13290, "total_steps": 15426, "loss": 0.3182, "lr": 3.276382396690513e-05, "epoch": 2.584654283769328, "percentage": 86.15, "elapsed_time": "1 day, 15:20:29", "remaining_time": "6:19:22", "throughput": 27212.76, "total_tokens": 3854116800} +{"current_steps": 13300, "total_steps": 15426, "loss": 0.3184, "lr": 3.275679204701496e-05, "epoch": 2.586599241466498, "percentage": 86.22, "elapsed_time": "1 day, 15:22:37", "remaining_time": "6:17:39", "throughput": 27208.53, "total_tokens": 3857021632} +{"current_steps": 13310, "total_steps": 15426, "loss": 0.3202, "lr": 3.274976465284939e-05, "epoch": 2.5885441991636684, "percentage": 86.28, "elapsed_time": "1 day, 15:24:45", "remaining_time": "6:15:56", "throughput": 27204.33, "total_tokens": 3859910464} +{"current_steps": 13320, "total_steps": 15426, "loss": 0.3207, "lr": 3.274274177955593e-05, "epoch": 2.5904891568608384, "percentage": 86.35, "elapsed_time": "1 day, 15:27:09", "remaining_time": "6:14:15", "throughput": 27197.32, "total_tokens": 3862814336} +{"current_steps": 13330, "total_steps": 15426, "loss": 0.3186, "lr": 3.273572342228937e-05, "epoch": 2.5924341145580083, "percentage": 86.41, "elapsed_time": "1 day, 15:29:19", "remaining_time": "6:12:33", "throughput": 27193.06, "total_tokens": 3865749056} +{"current_steps": 13340, "total_steps": 15426, "loss": 0.3258, "lr": 3.272870957621176e-05, "epoch": 2.5943790722551787, "percentage": 86.48, "elapsed_time": "1 day, 15:31:29", "remaining_time": "6:10:50", "throughput": 27188.4, "total_tokens": 3868636800} +{"current_steps": 13350, "total_steps": 15426, "loss": 0.3204, "lr": 3.2721700236492414e-05, "epoch": 2.5963240299523487, "percentage": 86.54, "elapsed_time": "1 day, 15:33:33", "remaining_time": "6:09:06", "throughput": 27184.77, "total_tokens": 3871480448} +{"current_steps": 13360, "total_steps": 15426, "loss": 0.3195, "lr": 3.271469539830788e-05, "epoch": 2.5982689876495186, "percentage": 86.61, "elapsed_time": "1 day, 15:36:24", "remaining_time": "6:07:29", "throughput": 27172.42, "total_tokens": 3874368320} +{"current_steps": 13370, "total_steps": 15426, "loss": 0.3198, "lr": 3.270769505684193e-05, "epoch": 2.6002139453466886, "percentage": 86.67, "elapsed_time": "1 day, 15:38:30", "remaining_time": "6:05:45", "throughput": 27168.86, "total_tokens": 3877283776} +{"current_steps": 13380, "total_steps": 15426, "loss": 0.3217, "lr": 3.2700699207285544e-05, "epoch": 2.6021589030438586, "percentage": 86.74, "elapsed_time": "1 day, 15:40:49", "remaining_time": "6:04:03", "throughput": 27162.91, "total_tokens": 3880214080} +{"current_steps": 13390, "total_steps": 15426, "loss": 0.3193, "lr": 3.269370784483691e-05, "epoch": 2.604103860741029, "percentage": 86.8, "elapsed_time": "1 day, 15:43:10", "remaining_time": "6:02:22", "throughput": 27156.84, "total_tokens": 3883164352} +{"current_steps": 13400, "total_steps": 15426, "loss": 0.3217, "lr": 3.268672096470138e-05, "epoch": 2.606048818438199, "percentage": 86.87, "elapsed_time": "1 day, 15:45:26", "remaining_time": "6:00:39", "throughput": 27150.99, "total_tokens": 3886022656} +{"current_steps": 13410, "total_steps": 15426, "loss": 0.3174, "lr": 3.2679738562091506e-05, "epoch": 2.607993776135369, "percentage": 86.93, "elapsed_time": "1 day, 15:47:42", "remaining_time": "5:58:57", "throughput": 27145.86, "total_tokens": 3888973056} +{"current_steps": 13420, "total_steps": 15426, "loss": 0.3187, "lr": 3.2672760632226964e-05, "epoch": 2.6099387338325393, "percentage": 87.0, "elapsed_time": "1 day, 15:49:43", "remaining_time": "5:57:12", "throughput": 27143.14, "total_tokens": 3891886720} +{"current_steps": 13430, "total_steps": 15426, "loss": 0.3215, "lr": 3.266578717033458e-05, "epoch": 2.6118836915297092, "percentage": 87.06, "elapsed_time": "1 day, 15:52:13", "remaining_time": "5:55:32", "throughput": 27134.99, "total_tokens": 3894789568} +{"current_steps": 13440, "total_steps": 15426, "loss": 0.3206, "lr": 3.265881817164833e-05, "epoch": 2.613828649226879, "percentage": 87.13, "elapsed_time": "1 day, 15:55:18", "remaining_time": "5:53:56", "throughput": 27120.39, "total_tokens": 3897694080} +{"current_steps": 13450, "total_steps": 15426, "loss": 0.3183, "lr": 3.265185363140928e-05, "epoch": 2.6157736069240496, "percentage": 87.19, "elapsed_time": "1 day, 15:57:33", "remaining_time": "5:52:14", "throughput": 27115.07, "total_tokens": 3900584320} +{"current_steps": 13460, "total_steps": 15426, "loss": 0.3222, "lr": 3.26448935448656e-05, "epoch": 2.6177185646212195, "percentage": 87.26, "elapsed_time": "1 day, 16:00:08", "remaining_time": "5:50:34", "throughput": 27106.68, "total_tokens": 3903585344} +{"current_steps": 13470, "total_steps": 15426, "loss": 0.3201, "lr": 3.263793790727256e-05, "epoch": 2.6196635223183895, "percentage": 87.32, "elapsed_time": "1 day, 16:02:44", "remaining_time": "5:48:54", "throughput": 27097.78, "total_tokens": 3906540352} +{"current_steps": 13480, "total_steps": 15426, "loss": 0.3186, "lr": 3.2630986713892495e-05, "epoch": 2.62160848001556, "percentage": 87.38, "elapsed_time": "1 day, 16:05:30", "remaining_time": "5:47:15", "throughput": 27086.96, "total_tokens": 3909477120} +{"current_steps": 13490, "total_steps": 15426, "loss": 0.3222, "lr": 3.26240399599948e-05, "epoch": 2.62355343771273, "percentage": 87.45, "elapsed_time": "1 day, 16:07:57", "remaining_time": "5:45:34", "throughput": 27079.45, "total_tokens": 3912370112} +{"current_steps": 13500, "total_steps": 15426, "loss": 0.321, "lr": 3.2617097640855914e-05, "epoch": 2.6254983954099, "percentage": 87.51, "elapsed_time": "1 day, 16:10:10", "remaining_time": "5:43:51", "throughput": 27074.79, "total_tokens": 3915293824} +{"current_steps": 13510, "total_steps": 15426, "loss": 0.3175, "lr": 3.2610159751759314e-05, "epoch": 2.62744335310707, "percentage": 87.58, "elapsed_time": "1 day, 16:13:01", "remaining_time": "5:42:13", "throughput": 27062.56, "total_tokens": 3918158848} +{"current_steps": 13520, "total_steps": 15426, "loss": 0.3203, "lr": 3.26032262879955e-05, "epoch": 2.6293883108042397, "percentage": 87.64, "elapsed_time": "1 day, 16:16:14", "remaining_time": "5:40:37", "throughput": 27046.56, "total_tokens": 3921052480} +{"current_steps": 13530, "total_steps": 15426, "loss": 0.3184, "lr": 3.259629724486198e-05, "epoch": 2.63133326850141, "percentage": 87.71, "elapsed_time": "1 day, 16:18:45", "remaining_time": "5:38:56", "throughput": 27038.33, "total_tokens": 3923948416} +{"current_steps": 13540, "total_steps": 15426, "loss": 0.3218, "lr": 3.258937261766323e-05, "epoch": 2.63327822619858, "percentage": 87.77, "elapsed_time": "1 day, 16:21:04", "remaining_time": "5:37:14", "throughput": 27032.35, "total_tokens": 3926839488} +{"current_steps": 13550, "total_steps": 15426, "loss": 0.3185, "lr": 3.258245240171074e-05, "epoch": 2.63522318389575, "percentage": 87.84, "elapsed_time": "1 day, 16:23:16", "remaining_time": "5:35:30", "throughput": 27027.88, "total_tokens": 3929760704} +{"current_steps": 13560, "total_steps": 15426, "loss": 0.3166, "lr": 3.2575536592322935e-05, "epoch": 2.6371681415929205, "percentage": 87.9, "elapsed_time": "1 day, 16:25:25", "remaining_time": "5:33:45", "throughput": 27024.17, "total_tokens": 3932708224} +{"current_steps": 13570, "total_steps": 15426, "loss": 0.3184, "lr": 3.256862518482523e-05, "epoch": 2.6391130992900904, "percentage": 87.97, "elapsed_time": "1 day, 16:27:59", "remaining_time": "5:32:04", "throughput": 27015.89, "total_tokens": 3935650240} +{"current_steps": 13580, "total_steps": 15426, "loss": 0.3223, "lr": 3.256171817454994e-05, "epoch": 2.6410580569872604, "percentage": 88.03, "elapsed_time": "1 day, 16:30:19", "remaining_time": "5:30:22", "throughput": 27009.88, "total_tokens": 3938568384} +{"current_steps": 13590, "total_steps": 15426, "loss": 0.318, "lr": 3.255481555683633e-05, "epoch": 2.6430030146844308, "percentage": 88.1, "elapsed_time": "1 day, 16:32:46", "remaining_time": "5:28:40", "throughput": 27002.73, "total_tokens": 3941503808} +{"current_steps": 13600, "total_steps": 15426, "loss": 0.321, "lr": 3.254791732703057e-05, "epoch": 2.6449479723816007, "percentage": 88.16, "elapsed_time": "1 day, 16:35:19", "remaining_time": "5:26:58", "throughput": 26994.14, "total_tokens": 3944375680} +{"current_steps": 13610, "total_steps": 15426, "loss": 0.3145, "lr": 3.254102348048575e-05, "epoch": 2.6468929300787707, "percentage": 88.23, "elapsed_time": "1 day, 16:37:39", "remaining_time": "5:25:15", "throughput": 26987.81, "total_tokens": 3947230464} +{"current_steps": 13620, "total_steps": 15426, "loss": 0.3224, "lr": 3.25341340125618e-05, "epoch": 2.648837887775941, "percentage": 88.29, "elapsed_time": "1 day, 16:39:51", "remaining_time": "5:23:31", "throughput": 26983.03, "total_tokens": 3950095744} +{"current_steps": 13630, "total_steps": 15426, "loss": 0.3168, "lr": 3.2527248918625575e-05, "epoch": 2.650782845473111, "percentage": 88.36, "elapsed_time": "1 day, 16:41:51", "remaining_time": "5:21:45", "throughput": 26980.71, "total_tokens": 3952971648} +{"current_steps": 13640, "total_steps": 15426, "loss": 0.3205, "lr": 3.252036819405075e-05, "epoch": 2.652727803170281, "percentage": 88.42, "elapsed_time": "1 day, 16:43:55", "remaining_time": "5:20:00", "throughput": 26978.02, "total_tokens": 3955926144} +{"current_steps": 13650, "total_steps": 15426, "loss": 0.3232, "lr": 3.251349183421788e-05, "epoch": 2.6546727608674514, "percentage": 88.49, "elapsed_time": "1 day, 16:46:04", "remaining_time": "5:18:15", "throughput": 26973.67, "total_tokens": 3958776320} +{"current_steps": 13660, "total_steps": 15426, "loss": 0.3243, "lr": 3.250661983451434e-05, "epoch": 2.6566177185646214, "percentage": 88.55, "elapsed_time": "1 day, 16:48:12", "remaining_time": "5:16:30", "throughput": 26969.39, "total_tokens": 3961594368} +{"current_steps": 13670, "total_steps": 15426, "loss": 0.32, "lr": 3.2499752190334326e-05, "epoch": 2.6585626762617913, "percentage": 88.62, "elapsed_time": "1 day, 16:50:56", "remaining_time": "5:14:50", "throughput": 26958.93, "total_tokens": 3964487296} +{"current_steps": 13680, "total_steps": 15426, "loss": 0.3216, "lr": 3.2492888897078834e-05, "epoch": 2.6605076339589613, "percentage": 88.68, "elapsed_time": "1 day, 16:53:44", "remaining_time": "5:13:10", "throughput": 26948.07, "total_tokens": 3967406272} +{"current_steps": 13690, "total_steps": 15426, "loss": 0.3221, "lr": 3.248602995015567e-05, "epoch": 2.6624525916561312, "percentage": 88.75, "elapsed_time": "1 day, 16:56:00", "remaining_time": "5:11:26", "throughput": 26942.99, "total_tokens": 3970321024} +{"current_steps": 13700, "total_steps": 15426, "loss": 0.3161, "lr": 3.247917534497943e-05, "epoch": 2.6643975493533016, "percentage": 88.81, "elapsed_time": "1 day, 16:58:05", "remaining_time": "5:09:40", "throughput": 26939.82, "total_tokens": 3973219584} +{"current_steps": 13710, "total_steps": 15426, "loss": 0.3207, "lr": 3.247232507697145e-05, "epoch": 2.6663425070504716, "percentage": 88.88, "elapsed_time": "1 day, 17:00:01", "remaining_time": "5:07:54", "throughput": 26937.97, "total_tokens": 3976085632} +{"current_steps": 13720, "total_steps": 15426, "loss": 0.3123, "lr": 3.246547914155985e-05, "epoch": 2.6682874647476416, "percentage": 88.94, "elapsed_time": "1 day, 17:02:05", "remaining_time": "5:06:08", "throughput": 26935.31, "total_tokens": 3979032256} +{"current_steps": 13730, "total_steps": 15426, "loss": 0.3206, "lr": 3.245863753417949e-05, "epoch": 2.670232422444812, "percentage": 89.01, "elapsed_time": "1 day, 17:04:12", "remaining_time": "5:04:23", "throughput": 26931.67, "total_tokens": 3981907648} +{"current_steps": 13740, "total_steps": 15426, "loss": 0.3197, "lr": 3.2451800250271944e-05, "epoch": 2.672177380141982, "percentage": 89.07, "elapsed_time": "1 day, 17:06:17", "remaining_time": "5:02:37", "throughput": 26928.62, "total_tokens": 3984821056} +{"current_steps": 13750, "total_steps": 15426, "loss": 0.3211, "lr": 3.244496728528553e-05, "epoch": 2.674122337839152, "percentage": 89.14, "elapsed_time": "1 day, 17:08:28", "remaining_time": "5:00:53", "throughput": 26924.12, "total_tokens": 3987696704} +{"current_steps": 13760, "total_steps": 15426, "loss": 0.3191, "lr": 3.243813863467525e-05, "epoch": 2.6760672955363223, "percentage": 89.2, "elapsed_time": "1 day, 17:10:59", "remaining_time": "4:59:10", "throughput": 26916.31, "total_tokens": 3990588800} +{"current_steps": 13770, "total_steps": 15426, "loss": 0.3192, "lr": 3.243131429390281e-05, "epoch": 2.6780122532334922, "percentage": 89.26, "elapsed_time": "1 day, 17:13:19", "remaining_time": "4:57:26", "throughput": 26910.35, "total_tokens": 3993475136} +{"current_steps": 13780, "total_steps": 15426, "loss": 0.3186, "lr": 3.2424494258436594e-05, "epoch": 2.679957210930662, "percentage": 89.33, "elapsed_time": "1 day, 17:15:19", "remaining_time": "4:55:40", "throughput": 26908.06, "total_tokens": 3996368960} +{"current_steps": 13790, "total_steps": 15426, "loss": 0.3188, "lr": 3.241767852375166e-05, "epoch": 2.6819021686278326, "percentage": 89.39, "elapsed_time": "1 day, 17:17:23", "remaining_time": "4:53:54", "throughput": 26904.98, "total_tokens": 3999257152} +{"current_steps": 13800, "total_steps": 15426, "loss": 0.3191, "lr": 3.241086708532971e-05, "epoch": 2.6838471263250026, "percentage": 89.46, "elapsed_time": "1 day, 17:19:28", "remaining_time": "4:52:08", "throughput": 26901.88, "total_tokens": 4002155200} +{"current_steps": 13810, "total_steps": 15426, "loss": 0.3225, "lr": 3.24040599386591e-05, "epoch": 2.6857920840221725, "percentage": 89.52, "elapsed_time": "1 day, 17:21:26", "remaining_time": "4:50:22", "throughput": 26899.63, "total_tokens": 4004998528} +{"current_steps": 13820, "total_steps": 15426, "loss": 0.3152, "lr": 3.23972570792348e-05, "epoch": 2.6877370417193425, "percentage": 89.59, "elapsed_time": "1 day, 17:23:27", "remaining_time": "4:48:35", "throughput": 26897.43, "total_tokens": 4007907648} +{"current_steps": 13830, "total_steps": 15426, "loss": 0.3193, "lr": 3.239045850255842e-05, "epoch": 2.6896819994165124, "percentage": 89.65, "elapsed_time": "1 day, 17:25:36", "remaining_time": "4:46:50", "throughput": 26893.29, "total_tokens": 4010781248} +{"current_steps": 13840, "total_steps": 15426, "loss": 0.3194, "lr": 3.238366420413817e-05, "epoch": 2.691626957113683, "percentage": 89.72, "elapsed_time": "1 day, 17:28:08", "remaining_time": "4:45:07", "throughput": 26885.23, "total_tokens": 4013649472} +{"current_steps": 13850, "total_steps": 15426, "loss": 0.315, "lr": 3.237687417948882e-05, "epoch": 2.693571914810853, "percentage": 89.78, "elapsed_time": "1 day, 17:30:20", "remaining_time": "4:43:22", "throughput": 26881.12, "total_tokens": 4016597120} +{"current_steps": 13860, "total_steps": 15426, "loss": 0.3192, "lr": 3.2370088424131776e-05, "epoch": 2.6955168725080227, "percentage": 89.85, "elapsed_time": "1 day, 17:32:29", "remaining_time": "4:41:37", "throughput": 26877.62, "total_tokens": 4019535360} +{"current_steps": 13870, "total_steps": 15426, "loss": 0.3193, "lr": 3.236330693359497e-05, "epoch": 2.697461830205193, "percentage": 89.91, "elapsed_time": "1 day, 17:34:30", "remaining_time": "4:39:50", "throughput": 26874.82, "total_tokens": 4022365952} +{"current_steps": 13880, "total_steps": 15426, "loss": 0.3172, "lr": 3.2356529703412894e-05, "epoch": 2.699406787902363, "percentage": 89.98, "elapsed_time": "1 day, 17:36:45", "remaining_time": "4:38:05", "throughput": 26869.41, "total_tokens": 4025183104} +{"current_steps": 13890, "total_steps": 15426, "loss": 0.3169, "lr": 3.234975672912661e-05, "epoch": 2.701351745599533, "percentage": 90.04, "elapsed_time": "1 day, 17:38:52", "remaining_time": "4:36:20", "throughput": 26865.61, "total_tokens": 4028038208} +{"current_steps": 13900, "total_steps": 15426, "loss": 0.319, "lr": 3.234298800628368e-05, "epoch": 2.7032967032967035, "percentage": 90.11, "elapsed_time": "1 day, 17:40:59", "remaining_time": "4:34:34", "throughput": 26862.34, "total_tokens": 4030955840} +{"current_steps": 13910, "total_steps": 15426, "loss": 0.3241, "lr": 3.2336223530438195e-05, "epoch": 2.7052416609938734, "percentage": 90.17, "elapsed_time": "1 day, 17:43:09", "remaining_time": "4:32:48", "throughput": 26858.4, "total_tokens": 4033850304} +{"current_steps": 13920, "total_steps": 15426, "loss": 0.3174, "lr": 3.232946329715076e-05, "epoch": 2.7071866186910434, "percentage": 90.24, "elapsed_time": "1 day, 17:45:06", "remaining_time": "4:31:01", "throughput": 26857.0, "total_tokens": 4036779392} +{"current_steps": 13930, "total_steps": 15426, "loss": 0.3173, "lr": 3.2322707301988456e-05, "epoch": 2.709131576388214, "percentage": 90.3, "elapsed_time": "1 day, 17:47:09", "remaining_time": "4:29:15", "throughput": 26854.35, "total_tokens": 4039673664} +{"current_steps": 13940, "total_steps": 15426, "loss": 0.3208, "lr": 3.231595554052488e-05, "epoch": 2.7110765340853837, "percentage": 90.37, "elapsed_time": "1 day, 17:49:07", "remaining_time": "4:27:28", "throughput": 26852.25, "total_tokens": 4042550464} +{"current_steps": 13950, "total_steps": 15426, "loss": 0.3196, "lr": 3.230920800834005e-05, "epoch": 2.7130214917825537, "percentage": 90.43, "elapsed_time": "1 day, 17:51:05", "remaining_time": "4:25:41", "throughput": 26850.65, "total_tokens": 4045458944} +{"current_steps": 13960, "total_steps": 15426, "loss": 0.3192, "lr": 3.2302464701020486e-05, "epoch": 2.7149664494797237, "percentage": 90.5, "elapsed_time": "1 day, 17:53:07", "remaining_time": "4:23:54", "throughput": 26848.24, "total_tokens": 4048367104} +{"current_steps": 13970, "total_steps": 15426, "loss": 0.3246, "lr": 3.2295725614159126e-05, "epoch": 2.716911407176894, "percentage": 90.56, "elapsed_time": "1 day, 17:55:14", "remaining_time": "4:22:08", "throughput": 26845.06, "total_tokens": 4051296640} +{"current_steps": 13980, "total_steps": 15426, "loss": 0.3278, "lr": 3.228899074335536e-05, "epoch": 2.718856364874064, "percentage": 90.63, "elapsed_time": "1 day, 17:57:14", "remaining_time": "4:20:22", "throughput": 26842.13, "total_tokens": 4054097024} +{"current_steps": 13990, "total_steps": 15426, "loss": 0.3219, "lr": 3.228226008421498e-05, "epoch": 2.720801322571234, "percentage": 90.69, "elapsed_time": "1 day, 17:59:06", "remaining_time": "4:18:34", "throughput": 26841.08, "total_tokens": 4056942912} +{"current_steps": 14000, "total_steps": 15426, "loss": 0.3215, "lr": 3.2275533632350193e-05, "epoch": 2.722746280268404, "percentage": 90.76, "elapsed_time": "1 day, 18:01:01", "remaining_time": "4:16:47", "throughput": 26840.18, "total_tokens": 4059888320} +{"current_steps": 14010, "total_steps": 15426, "loss": 0.3194, "lr": 3.226881138337963e-05, "epoch": 2.7246912379655743, "percentage": 90.82, "elapsed_time": "1 day, 18:03:02", "remaining_time": "4:15:00", "throughput": 26837.76, "total_tokens": 4062775296} +{"current_steps": 14020, "total_steps": 15426, "loss": 0.3197, "lr": 3.2262093332928256e-05, "epoch": 2.7266361956627443, "percentage": 90.89, "elapsed_time": "1 day, 18:05:03", "remaining_time": "4:13:13", "throughput": 26835.49, "total_tokens": 4065676672} +{"current_steps": 14030, "total_steps": 15426, "loss": 0.3183, "lr": 3.225537947662746e-05, "epoch": 2.7285811533599142, "percentage": 90.95, "elapsed_time": "1 day, 18:07:00", "remaining_time": "4:11:26", "throughput": 26834.45, "total_tokens": 4068644480} +{"current_steps": 14040, "total_steps": 15426, "loss": 0.3201, "lr": 3.224866981011494e-05, "epoch": 2.7305261110570846, "percentage": 91.02, "elapsed_time": "1 day, 18:08:56", "remaining_time": "4:09:39", "throughput": 26832.78, "total_tokens": 4071512256} +{"current_steps": 14050, "total_steps": 15426, "loss": 0.3235, "lr": 3.22419643290348e-05, "epoch": 2.7324710687542546, "percentage": 91.08, "elapsed_time": "1 day, 18:11:14", "remaining_time": "4:07:53", "throughput": 26827.43, "total_tokens": 4074405632} +{"current_steps": 14060, "total_steps": 15426, "loss": 0.3196, "lr": 3.2235263029037446e-05, "epoch": 2.7344160264514246, "percentage": 91.14, "elapsed_time": "1 day, 18:13:44", "remaining_time": "4:06:09", "throughput": 26819.97, "total_tokens": 4077297920} +{"current_steps": 14070, "total_steps": 15426, "loss": 0.3208, "lr": 3.222856590577962e-05, "epoch": 2.736360984148595, "percentage": 91.21, "elapsed_time": "1 day, 18:16:01", "remaining_time": "4:04:24", "throughput": 26815.14, "total_tokens": 4080229120} +{"current_steps": 14080, "total_steps": 15426, "loss": 0.3188, "lr": 3.222187295492436e-05, "epoch": 2.738305941845765, "percentage": 91.27, "elapsed_time": "1 day, 18:18:47", "remaining_time": "4:02:42", "throughput": 26804.69, "total_tokens": 4083095744} +{"current_steps": 14090, "total_steps": 15426, "loss": 0.3216, "lr": 3.221518417214104e-05, "epoch": 2.740250899542935, "percentage": 91.34, "elapsed_time": "1 day, 18:21:07", "remaining_time": "4:00:56", "throughput": 26798.93, "total_tokens": 4085962880} +{"current_steps": 14100, "total_steps": 15426, "loss": 0.3173, "lr": 3.22084995531053e-05, "epoch": 2.7421958572401053, "percentage": 91.4, "elapsed_time": "1 day, 18:23:11", "remaining_time": "3:59:10", "throughput": 26796.25, "total_tokens": 4088871168} +{"current_steps": 14110, "total_steps": 15426, "loss": 0.3197, "lr": 3.220181909349907e-05, "epoch": 2.7441408149372752, "percentage": 91.47, "elapsed_time": "1 day, 18:25:06", "remaining_time": "3:57:22", "throughput": 26794.65, "total_tokens": 4091709184} +{"current_steps": 14120, "total_steps": 15426, "loss": 0.3178, "lr": 3.219514278901053e-05, "epoch": 2.746085772634445, "percentage": 91.53, "elapsed_time": "1 day, 18:27:08", "remaining_time": "3:55:35", "throughput": 26792.15, "total_tokens": 4094616704} +{"current_steps": 14130, "total_steps": 15426, "loss": 0.3171, "lr": 3.218847063533413e-05, "epoch": 2.748030730331615, "percentage": 91.6, "elapsed_time": "1 day, 18:29:12", "remaining_time": "3:53:48", "throughput": 26789.88, "total_tokens": 4097585472} +{"current_steps": 14140, "total_steps": 15426, "loss": 0.3165, "lr": 3.218180262817055e-05, "epoch": 2.749975688028785, "percentage": 91.66, "elapsed_time": "1 day, 18:31:14", "remaining_time": "3:52:01", "throughput": 26787.69, "total_tokens": 4100519424} +{"current_steps": 14150, "total_steps": 15426, "loss": 0.3201, "lr": 3.217513876322674e-05, "epoch": 2.7519206457259555, "percentage": 91.73, "elapsed_time": "1 day, 18:33:27", "remaining_time": "3:50:15", "throughput": 26783.42, "total_tokens": 4103425984} +{"current_steps": 14160, "total_steps": 15426, "loss": 0.3203, "lr": 3.216847903621581e-05, "epoch": 2.7538656034231255, "percentage": 91.79, "elapsed_time": "1 day, 18:35:23", "remaining_time": "3:48:28", "throughput": 26782.04, "total_tokens": 4106328512} +{"current_steps": 14170, "total_steps": 15426, "loss": 0.3172, "lr": 3.216182344285713e-05, "epoch": 2.7558105611202954, "percentage": 91.86, "elapsed_time": "1 day, 18:38:12", "remaining_time": "3:46:45", "throughput": 26771.27, "total_tokens": 4109191296} +{"current_steps": 14180, "total_steps": 15426, "loss": 0.3143, "lr": 3.215517197887625e-05, "epoch": 2.757755518817466, "percentage": 91.92, "elapsed_time": "1 day, 18:40:46", "remaining_time": "3:45:00", "throughput": 26763.35, "total_tokens": 4112091328} +{"current_steps": 14190, "total_steps": 15426, "loss": 0.3176, "lr": 3.214852464000488e-05, "epoch": 2.759700476514636, "percentage": 91.99, "elapsed_time": "1 day, 18:43:03", "remaining_time": "3:43:15", "throughput": 26758.25, "total_tokens": 4114978432} +{"current_steps": 14200, "total_steps": 15426, "loss": 0.32, "lr": 3.2141881421980945e-05, "epoch": 2.7616454342118057, "percentage": 92.05, "elapsed_time": "1 day, 18:45:18", "remaining_time": "3:41:29", "throughput": 26753.76, "total_tokens": 4117899008} +{"current_steps": 14210, "total_steps": 15426, "loss": 0.319, "lr": 3.213524232054851e-05, "epoch": 2.763590391908976, "percentage": 92.12, "elapsed_time": "1 day, 18:47:32", "remaining_time": "3:39:42", "throughput": 26749.67, "total_tokens": 4120857856} +{"current_steps": 14220, "total_steps": 15426, "loss": 0.3219, "lr": 3.21286073314578e-05, "epoch": 2.765535349606146, "percentage": 92.18, "elapsed_time": "1 day, 18:49:33", "remaining_time": "3:37:55", "throughput": 26747.72, "total_tokens": 4123794816} +{"current_steps": 14230, "total_steps": 15426, "loss": 0.3232, "lr": 3.2121976450465155e-05, "epoch": 2.767480307303316, "percentage": 92.25, "elapsed_time": "1 day, 18:51:29", "remaining_time": "3:36:07", "throughput": 26746.07, "total_tokens": 4126639424} +{"current_steps": 14240, "total_steps": 15426, "loss": 0.3145, "lr": 3.211534967333308e-05, "epoch": 2.7694252650004865, "percentage": 92.31, "elapsed_time": "1 day, 18:53:42", "remaining_time": "3:34:21", "throughput": 26742.1, "total_tokens": 4129574848} +{"current_steps": 14250, "total_steps": 15426, "loss": 0.3208, "lr": 3.210872699583019e-05, "epoch": 2.7713702226976564, "percentage": 92.38, "elapsed_time": "1 day, 18:55:35", "remaining_time": "3:32:33", "throughput": 26740.87, "total_tokens": 4132407936} +{"current_steps": 14260, "total_steps": 15426, "loss": 0.3164, "lr": 3.210210841373118e-05, "epoch": 2.7733151803948264, "percentage": 92.44, "elapsed_time": "1 day, 18:57:29", "remaining_time": "3:30:45", "throughput": 26740.13, "total_tokens": 4135338048} +{"current_steps": 14270, "total_steps": 15426, "loss": 0.3168, "lr": 3.2095493922816855e-05, "epoch": 2.7752601380919963, "percentage": 92.51, "elapsed_time": "1 day, 18:59:23", "remaining_time": "3:28:57", "throughput": 26739.16, "total_tokens": 4138238528} +{"current_steps": 14280, "total_steps": 15426, "loss": 0.3186, "lr": 3.2088883518874105e-05, "epoch": 2.7772050957891667, "percentage": 92.57, "elapsed_time": "1 day, 19:01:22", "remaining_time": "3:27:09", "throughput": 26737.16, "total_tokens": 4141126976} +{"current_steps": 14290, "total_steps": 15426, "loss": 0.3223, "lr": 3.208227719769589e-05, "epoch": 2.7791500534863367, "percentage": 92.64, "elapsed_time": "1 day, 19:03:17", "remaining_time": "3:25:21", "throughput": 26735.95, "total_tokens": 4144011264} +{"current_steps": 14300, "total_steps": 15426, "loss": 0.3161, "lr": 3.207567495508124e-05, "epoch": 2.7810950111835067, "percentage": 92.7, "elapsed_time": "1 day, 19:05:09", "remaining_time": "3:23:33", "throughput": 26735.26, "total_tokens": 4146884736} +{"current_steps": 14310, "total_steps": 15426, "loss": 0.3208, "lr": 3.2069076786835205e-05, "epoch": 2.7830399688806766, "percentage": 92.77, "elapsed_time": "1 day, 19:07:06", "remaining_time": "3:21:45", "throughput": 26733.96, "total_tokens": 4149814400} +{"current_steps": 14320, "total_steps": 15426, "loss": 0.3214, "lr": 3.2062482688768904e-05, "epoch": 2.784984926577847, "percentage": 92.83, "elapsed_time": "1 day, 19:08:54", "remaining_time": "3:19:57", "throughput": 26733.59, "total_tokens": 4152654528} +{"current_steps": 14330, "total_steps": 15426, "loss": 0.3194, "lr": 3.205589265669947e-05, "epoch": 2.786929884275017, "percentage": 92.9, "elapsed_time": "1 day, 19:10:51", "remaining_time": "3:18:09", "throughput": 26732.22, "total_tokens": 4155562048} +{"current_steps": 14340, "total_steps": 15426, "loss": 0.3177, "lr": 3.204930668645005e-05, "epoch": 2.788874841972187, "percentage": 92.96, "elapsed_time": "1 day, 19:12:50", "remaining_time": "3:16:21", "throughput": 26730.76, "total_tokens": 4158516160} +{"current_steps": 14350, "total_steps": 15426, "loss": 0.319, "lr": 3.20427247738498e-05, "epoch": 2.7908197996693573, "percentage": 93.02, "elapsed_time": "1 day, 19:14:42", "remaining_time": "3:14:33", "throughput": 26730.18, "total_tokens": 4161430592} +{"current_steps": 14360, "total_steps": 15426, "loss": 0.3201, "lr": 3.2036146914733854e-05, "epoch": 2.7927647573665273, "percentage": 93.09, "elapsed_time": "1 day, 19:16:34", "remaining_time": "3:12:45", "throughput": 26729.3, "total_tokens": 4164276352} +{"current_steps": 14370, "total_steps": 15426, "loss": 0.3174, "lr": 3.202957310494336e-05, "epoch": 2.7947097150636973, "percentage": 93.15, "elapsed_time": "1 day, 19:18:28", "remaining_time": "3:10:57", "throughput": 26728.22, "total_tokens": 4167161408} +{"current_steps": 14380, "total_steps": 15426, "loss": 0.3214, "lr": 3.202300334032542e-05, "epoch": 2.7966546727608677, "percentage": 93.22, "elapsed_time": "1 day, 19:20:23", "remaining_time": "3:09:09", "throughput": 26726.99, "total_tokens": 4170036160} +{"current_steps": 14390, "total_steps": 15426, "loss": 0.3151, "lr": 3.201643761673308e-05, "epoch": 2.7985996304580376, "percentage": 93.28, "elapsed_time": "1 day, 19:22:15", "remaining_time": "3:07:20", "throughput": 26726.3, "total_tokens": 4172930048} +{"current_steps": 14400, "total_steps": 15426, "loss": 0.3193, "lr": 3.200987593002536e-05, "epoch": 2.8005445881552076, "percentage": 93.35, "elapsed_time": "1 day, 19:24:05", "remaining_time": "3:05:32", "throughput": 26725.77, "total_tokens": 4175791104} +{"current_steps": 14410, "total_steps": 15426, "loss": 0.3241, "lr": 3.200331827606721e-05, "epoch": 2.802489545852378, "percentage": 93.41, "elapsed_time": "1 day, 19:25:55", "remaining_time": "3:03:44", "throughput": 26725.6, "total_tokens": 4178698752} +{"current_steps": 14420, "total_steps": 15426, "loss": 0.3244, "lr": 3.199676465072951e-05, "epoch": 2.804434503549548, "percentage": 93.48, "elapsed_time": "1 day, 19:27:43", "remaining_time": "3:01:55", "throughput": 26725.45, "total_tokens": 4181548864} +{"current_steps": 14430, "total_steps": 15426, "loss": 0.3163, "lr": 3.1990215049889046e-05, "epoch": 2.806379461246718, "percentage": 93.54, "elapsed_time": "1 day, 19:29:43", "remaining_time": "3:00:07", "throughput": 26723.49, "total_tokens": 4184448512} +{"current_steps": 14440, "total_steps": 15426, "loss": 0.3176, "lr": 3.198366946942851e-05, "epoch": 2.808324418943888, "percentage": 93.61, "elapsed_time": "1 day, 19:31:36", "remaining_time": "2:58:19", "throughput": 26722.32, "total_tokens": 4187304000} +{"current_steps": 14450, "total_steps": 15426, "loss": 0.315, "lr": 3.1977127905236514e-05, "epoch": 2.810269376641058, "percentage": 93.67, "elapsed_time": "1 day, 19:33:36", "remaining_time": "2:56:31", "throughput": 26720.59, "total_tokens": 4190241472} +{"current_steps": 14460, "total_steps": 15426, "loss": 0.3208, "lr": 3.197059035320752e-05, "epoch": 2.812214334338228, "percentage": 93.74, "elapsed_time": "1 day, 19:35:34", "remaining_time": "2:54:43", "throughput": 26718.95, "total_tokens": 4193119808} +{"current_steps": 14470, "total_steps": 15426, "loss": 0.3231, "lr": 3.196405680924189e-05, "epoch": 2.814159292035398, "percentage": 93.8, "elapsed_time": "1 day, 19:37:18", "remaining_time": "2:52:55", "throughput": 26719.71, "total_tokens": 4196035392} +{"current_steps": 14480, "total_steps": 15426, "loss": 0.3155, "lr": 3.195752726924582e-05, "epoch": 2.816104249732568, "percentage": 93.87, "elapsed_time": "1 day, 19:38:58", "remaining_time": "2:51:06", "throughput": 26721.06, "total_tokens": 4198898688} +{"current_steps": 14490, "total_steps": 15426, "loss": 0.3213, "lr": 3.195100172913139e-05, "epoch": 2.8180492074297385, "percentage": 93.93, "elapsed_time": "1 day, 19:40:43", "remaining_time": "2:49:17", "throughput": 26721.32, "total_tokens": 4201763776} +{"current_steps": 14500, "total_steps": 15426, "loss": 0.3179, "lr": 3.19444801848165e-05, "epoch": 2.8199941651269085, "percentage": 94.0, "elapsed_time": "1 day, 19:42:28", "remaining_time": "2:47:28", "throughput": 26721.81, "total_tokens": 4204647808} +{"current_steps": 14510, "total_steps": 15426, "loss": 0.3152, "lr": 3.1937962632224885e-05, "epoch": 2.8219391228240784, "percentage": 94.06, "elapsed_time": "1 day, 19:44:14", "remaining_time": "2:45:39", "throughput": 26722.4, "total_tokens": 4207568192} +{"current_steps": 14520, "total_steps": 15426, "loss": 0.3174, "lr": 3.193144906728609e-05, "epoch": 2.823884080521249, "percentage": 94.13, "elapsed_time": "1 day, 19:45:58", "remaining_time": "2:43:51", "throughput": 26723.15, "total_tokens": 4210471168} +{"current_steps": 14530, "total_steps": 15426, "loss": 0.3166, "lr": 3.1924939485935494e-05, "epoch": 2.825829038218419, "percentage": 94.19, "elapsed_time": "1 day, 19:47:47", "remaining_time": "2:42:02", "throughput": 26723.22, "total_tokens": 4213373056} +{"current_steps": 14540, "total_steps": 15426, "loss": 0.3188, "lr": 3.1918433884114253e-05, "epoch": 2.8277739959155888, "percentage": 94.26, "elapsed_time": "1 day, 19:49:31", "remaining_time": "2:40:13", "throughput": 26723.87, "total_tokens": 4216256320} +{"current_steps": 14550, "total_steps": 15426, "loss": 0.3194, "lr": 3.191193225776931e-05, "epoch": 2.829718953612759, "percentage": 94.32, "elapsed_time": "1 day, 19:51:19", "remaining_time": "2:38:25", "throughput": 26724.31, "total_tokens": 4219227328} +{"current_steps": 14560, "total_steps": 15426, "loss": 0.3212, "lr": 3.190543460285339e-05, "epoch": 2.831663911309929, "percentage": 94.39, "elapsed_time": "1 day, 19:53:04", "remaining_time": "2:36:36", "throughput": 26724.72, "total_tokens": 4222091904} +{"current_steps": 14570, "total_steps": 15426, "loss": 0.3194, "lr": 3.189894091532499e-05, "epoch": 2.833608869007099, "percentage": 94.45, "elapsed_time": "1 day, 19:55:02", "remaining_time": "2:34:48", "throughput": 26723.0, "total_tokens": 4224984256} +{"current_steps": 14580, "total_steps": 15426, "loss": 0.3216, "lr": 3.1892451191148346e-05, "epoch": 2.835553826704269, "percentage": 94.52, "elapsed_time": "1 day, 19:57:00", "remaining_time": "2:33:00", "throughput": 26721.73, "total_tokens": 4227913024} +{"current_steps": 14590, "total_steps": 15426, "loss": 0.316, "lr": 3.1885965426293465e-05, "epoch": 2.8374987844014394, "percentage": 94.58, "elapsed_time": "1 day, 19:58:54", "remaining_time": "2:31:12", "throughput": 26720.75, "total_tokens": 4230806464} +{"current_steps": 14600, "total_steps": 15426, "loss": 0.3183, "lr": 3.187948361673606e-05, "epoch": 2.8394437420986094, "percentage": 94.65, "elapsed_time": "1 day, 20:00:44", "remaining_time": "2:29:24", "throughput": 26720.35, "total_tokens": 4233684672} +{"current_steps": 14610, "total_steps": 15426, "loss": 0.3146, "lr": 3.187300575845759e-05, "epoch": 2.8413886997957793, "percentage": 94.71, "elapsed_time": "1 day, 20:02:33", "remaining_time": "2:27:35", "throughput": 26720.19, "total_tokens": 4236586688} +{"current_steps": 14620, "total_steps": 15426, "loss": 0.3204, "lr": 3.186653184744521e-05, "epoch": 2.8433336574929493, "percentage": 94.78, "elapsed_time": "1 day, 20:04:20", "remaining_time": "2:25:46", "throughput": 26720.53, "total_tokens": 4239483712} +{"current_steps": 14630, "total_steps": 15426, "loss": 0.3202, "lr": 3.18600618796918e-05, "epoch": 2.8452786151901197, "percentage": 94.84, "elapsed_time": "1 day, 20:06:13", "remaining_time": "2:23:58", "throughput": 26719.82, "total_tokens": 4242389952} +{"current_steps": 14640, "total_steps": 15426, "loss": 0.3152, "lr": 3.185359585119591e-05, "epoch": 2.8472235728872897, "percentage": 94.9, "elapsed_time": "1 day, 20:08:05", "remaining_time": "2:22:10", "throughput": 26719.71, "total_tokens": 4245377472} +{"current_steps": 14650, "total_steps": 15426, "loss": 0.3139, "lr": 3.184713375796178e-05, "epoch": 2.8491685305844596, "percentage": 94.97, "elapsed_time": "1 day, 20:09:53", "remaining_time": "2:20:21", "throughput": 26719.78, "total_tokens": 4248271616} +{"current_steps": 14660, "total_steps": 15426, "loss": 0.3195, "lr": 3.1840675595999344e-05, "epoch": 2.85111348828163, "percentage": 95.03, "elapsed_time": "1 day, 20:11:45", "remaining_time": "2:18:33", "throughput": 26719.31, "total_tokens": 4251180736} +{"current_steps": 14670, "total_steps": 15426, "loss": 0.3154, "lr": 3.1834221361324155e-05, "epoch": 2.8530584459788, "percentage": 95.1, "elapsed_time": "1 day, 20:13:36", "remaining_time": "2:16:45", "throughput": 26719.04, "total_tokens": 4254107584} +{"current_steps": 14680, "total_steps": 15426, "loss": 0.3205, "lr": 3.182777104995744e-05, "epoch": 2.85500340367597, "percentage": 95.16, "elapsed_time": "1 day, 20:16:03", "remaining_time": "2:14:58", "throughput": 26712.92, "total_tokens": 4257056128} +{"current_steps": 14690, "total_steps": 15426, "loss": 0.3193, "lr": 3.182132465792609e-05, "epoch": 2.8569483613731403, "percentage": 95.23, "elapsed_time": "1 day, 20:18:49", "remaining_time": "2:13:12", "throughput": 26703.13, "total_tokens": 4259934336} +{"current_steps": 14700, "total_steps": 15426, "loss": 0.3188, "lr": 3.181488218126259e-05, "epoch": 2.8588933190703103, "percentage": 95.29, "elapsed_time": "1 day, 20:21:15", "remaining_time": "2:11:26", "throughput": 26697.05, "total_tokens": 4262869760} +{"current_steps": 14710, "total_steps": 15426, "loss": 0.3204, "lr": 3.180844361600506e-05, "epoch": 2.8608382767674803, "percentage": 95.36, "elapsed_time": "1 day, 20:23:30", "remaining_time": "2:09:38", "throughput": 26692.63, "total_tokens": 4265759424} +{"current_steps": 14720, "total_steps": 15426, "loss": 0.3182, "lr": 3.180200895819722e-05, "epoch": 2.8627832344646507, "percentage": 95.42, "elapsed_time": "1 day, 20:25:52", "remaining_time": "2:07:51", "throughput": 26687.23, "total_tokens": 4268694976} +{"current_steps": 14730, "total_steps": 15426, "loss": 0.3214, "lr": 3.1795578203888424e-05, "epoch": 2.8647281921618206, "percentage": 95.49, "elapsed_time": "1 day, 20:28:13", "remaining_time": "2:06:04", "throughput": 26681.63, "total_tokens": 4271568448} +{"current_steps": 14740, "total_steps": 15426, "loss": 0.3205, "lr": 3.178915134913357e-05, "epoch": 2.8666731498589906, "percentage": 95.55, "elapsed_time": "1 day, 20:30:41", "remaining_time": "2:04:17", "throughput": 26675.12, "total_tokens": 4274465344} +{"current_steps": 14750, "total_steps": 15426, "loss": 0.3164, "lr": 3.178272838999316e-05, "epoch": 2.8686181075561605, "percentage": 95.62, "elapsed_time": "1 day, 20:33:41", "remaining_time": "2:02:32", "throughput": 26663.43, "total_tokens": 4277379520} +{"current_steps": 14760, "total_steps": 15426, "loss": 0.3191, "lr": 3.1776309322533274e-05, "epoch": 2.8705630652533305, "percentage": 95.68, "elapsed_time": "1 day, 20:36:21", "remaining_time": "2:00:45", "throughput": 26655.17, "total_tokens": 4280336192} +{"current_steps": 14770, "total_steps": 15426, "loss": 0.3201, "lr": 3.1769894142825536e-05, "epoch": 2.872508022950501, "percentage": 95.75, "elapsed_time": "1 day, 20:38:49", "remaining_time": "1:58:58", "throughput": 26648.69, "total_tokens": 4283228864} +{"current_steps": 14780, "total_steps": 15426, "loss": 0.3163, "lr": 3.176348284694712e-05, "epoch": 2.874452980647671, "percentage": 95.81, "elapsed_time": "1 day, 20:41:21", "remaining_time": "1:57:11", "throughput": 26641.75, "total_tokens": 4286169472} +{"current_steps": 14790, "total_steps": 15426, "loss": 0.323, "lr": 3.175707543098075e-05, "epoch": 2.876397938344841, "percentage": 95.88, "elapsed_time": "1 day, 20:43:28", "remaining_time": "1:55:23", "throughput": 26638.54, "total_tokens": 4289039296} +{"current_steps": 14800, "total_steps": 15426, "loss": 0.3192, "lr": 3.1750671891014653e-05, "epoch": 2.878342896042011, "percentage": 95.94, "elapsed_time": "1 day, 20:45:30", "remaining_time": "1:53:35", "throughput": 26636.27, "total_tokens": 4291926592} +{"current_steps": 14810, "total_steps": 15426, "loss": 0.3187, "lr": 3.174427222314262e-05, "epoch": 2.880287853739181, "percentage": 96.01, "elapsed_time": "1 day, 20:47:37", "remaining_time": "1:51:47", "throughput": 26633.25, "total_tokens": 4294805696} +{"current_steps": 14820, "total_steps": 15426, "loss": 0.3163, "lr": 3.173787642346391e-05, "epoch": 2.882232811436351, "percentage": 96.07, "elapsed_time": "1 day, 20:49:35", "remaining_time": "1:49:58", "throughput": 26631.37, "total_tokens": 4297655872} +{"current_steps": 14830, "total_steps": 15426, "loss": 0.3199, "lr": 3.173148448808331e-05, "epoch": 2.8841777691335215, "percentage": 96.14, "elapsed_time": "1 day, 20:51:51", "remaining_time": "1:48:10", "throughput": 26626.82, "total_tokens": 4300529536} +{"current_steps": 14840, "total_steps": 15426, "loss": 0.3184, "lr": 3.172509641311107e-05, "epoch": 2.8861227268306915, "percentage": 96.2, "elapsed_time": "1 day, 20:53:54", "remaining_time": "1:46:22", "throughput": 26624.32, "total_tokens": 4303405568} +{"current_steps": 14850, "total_steps": 15426, "loss": 0.3158, "lr": 3.171871219466293e-05, "epoch": 2.8880676845278614, "percentage": 96.27, "elapsed_time": "1 day, 20:56:29", "remaining_time": "1:44:35", "throughput": 26616.6, "total_tokens": 4306281472} +{"current_steps": 14860, "total_steps": 15426, "loss": 0.3214, "lr": 3.171233182886011e-05, "epoch": 2.890012642225032, "percentage": 96.33, "elapsed_time": "1 day, 20:58:49", "remaining_time": "1:42:47", "throughput": 26611.45, "total_tokens": 4309175296} +{"current_steps": 14870, "total_steps": 15426, "loss": 0.3187, "lr": 3.170595531182928e-05, "epoch": 2.891957599922202, "percentage": 96.4, "elapsed_time": "1 day, 21:00:57", "remaining_time": "1:40:59", "throughput": 26608.35, "total_tokens": 4312074880} +{"current_steps": 14880, "total_steps": 15426, "loss": 0.3175, "lr": 3.169958263970256e-05, "epoch": 2.8939025576193718, "percentage": 96.46, "elapsed_time": "1 day, 21:02:57", "remaining_time": "1:39:10", "throughput": 26606.54, "total_tokens": 4314988608} +{"current_steps": 14890, "total_steps": 15426, "loss": 0.3239, "lr": 3.169321380861751e-05, "epoch": 2.8958475153165417, "percentage": 96.53, "elapsed_time": "1 day, 21:04:59", "remaining_time": "1:37:22", "throughput": 26604.51, "total_tokens": 4317897088} +{"current_steps": 14900, "total_steps": 15426, "loss": 0.3216, "lr": 3.168684881471711e-05, "epoch": 2.8977924730137117, "percentage": 96.59, "elapsed_time": "1 day, 21:07:07", "remaining_time": "1:35:34", "throughput": 26601.22, "total_tokens": 4320779008} +{"current_steps": 14910, "total_steps": 15426, "loss": 0.3177, "lr": 3.168048765414979e-05, "epoch": 2.899737430710882, "percentage": 96.65, "elapsed_time": "1 day, 21:09:30", "remaining_time": "1:33:46", "throughput": 26595.55, "total_tokens": 4323663296} +{"current_steps": 14920, "total_steps": 15426, "loss": 0.3196, "lr": 3.167413032306936e-05, "epoch": 2.901682388408052, "percentage": 96.72, "elapsed_time": "1 day, 21:11:59", "remaining_time": "1:31:58", "throughput": 26589.23, "total_tokens": 4326575040} +{"current_steps": 14930, "total_steps": 15426, "loss": 0.3225, "lr": 3.166777681763504e-05, "epoch": 2.903627346105222, "percentage": 96.78, "elapsed_time": "1 day, 21:14:36", "remaining_time": "1:30:11", "throughput": 26581.39, "total_tokens": 4329474496} +{"current_steps": 14940, "total_steps": 15426, "loss": 0.3191, "lr": 3.166142713401144e-05, "epoch": 2.9055723038023924, "percentage": 96.85, "elapsed_time": "1 day, 21:17:18", "remaining_time": "1:28:23", "throughput": 26572.75, "total_tokens": 4332393984} +{"current_steps": 14950, "total_steps": 15426, "loss": 0.3198, "lr": 3.165508126836857e-05, "epoch": 2.9075172614995624, "percentage": 96.91, "elapsed_time": "1 day, 21:19:57", "remaining_time": "1:26:36", "throughput": 26564.84, "total_tokens": 4335307776} +{"current_steps": 14960, "total_steps": 15426, "loss": 0.3181, "lr": 3.164873921688177e-05, "epoch": 2.9094622191967323, "percentage": 96.98, "elapsed_time": "1 day, 21:22:08", "remaining_time": "1:24:47", "throughput": 26560.96, "total_tokens": 4338167744} +{"current_steps": 14970, "total_steps": 15426, "loss": 0.3184, "lr": 3.164240097573178e-05, "epoch": 2.9114071768939027, "percentage": 97.04, "elapsed_time": "1 day, 21:24:31", "remaining_time": "1:22:59", "throughput": 26555.64, "total_tokens": 4341078912} +{"current_steps": 14980, "total_steps": 15426, "loss": 0.3211, "lr": 3.163606654110467e-05, "epoch": 2.9133521345910727, "percentage": 97.11, "elapsed_time": "1 day, 21:27:15", "remaining_time": "1:21:11", "throughput": 26546.35, "total_tokens": 4343913216} +{"current_steps": 14990, "total_steps": 15426, "loss": 0.3204, "lr": 3.162973590919187e-05, "epoch": 2.9152970922882426, "percentage": 97.17, "elapsed_time": "1 day, 21:30:48", "remaining_time": "1:19:25", "throughput": 26529.43, "total_tokens": 4346807296} +{"current_steps": 15000, "total_steps": 15426, "loss": 0.3152, "lr": 3.162340907619012e-05, "epoch": 2.917242049985413, "percentage": 97.24, "elapsed_time": "1 day, 21:33:46", "remaining_time": "1:17:38", "throughput": 26518.5, "total_tokens": 4349723584} +{"current_steps": 15000, "total_steps": 15426, "eval_loss": 0.31161510944366455, "epoch": 2.917242049985413, "percentage": 97.24, "elapsed_time": "1 day, 21:33:48", "remaining_time": "1:17:38", "throughput": 26518.15, "total_tokens": 4349723584} +{"current_steps": 15010, "total_steps": 15426, "loss": 0.322, "lr": 3.1617086038301516e-05, "epoch": 2.919187007682583, "percentage": 97.3, "elapsed_time": "1 day, 21:41:15", "remaining_time": "1:15:58", "throughput": 26463.89, "total_tokens": 4352655360} +{"current_steps": 15020, "total_steps": 15426, "loss": 0.3205, "lr": 3.161076679173344e-05, "epoch": 2.921131965379753, "percentage": 97.37, "elapsed_time": "1 day, 21:43:43", "remaining_time": "1:14:09", "throughput": 26457.81, "total_tokens": 4355572608} +{"current_steps": 15030, "total_steps": 15426, "loss": 0.3211, "lr": 3.1604451332698575e-05, "epoch": 2.9230769230769234, "percentage": 97.43, "elapsed_time": "1 day, 21:46:05", "remaining_time": "1:12:21", "throughput": 26452.65, "total_tokens": 4358483520} +{"current_steps": 15040, "total_steps": 15426, "loss": 0.3205, "lr": 3.1598139657414923e-05, "epoch": 2.9250218807740933, "percentage": 97.5, "elapsed_time": "1 day, 21:48:38", "remaining_time": "1:10:32", "throughput": 26445.94, "total_tokens": 4361413248} +{"current_steps": 15050, "total_steps": 15426, "loss": 0.3202, "lr": 3.159183176210574e-05, "epoch": 2.9269668384712633, "percentage": 97.56, "elapsed_time": "1 day, 21:50:47", "remaining_time": "1:08:43", "throughput": 26442.95, "total_tokens": 4364346496} +{"current_steps": 15060, "total_steps": 15426, "loss": 0.3191, "lr": 3.1585527642999595e-05, "epoch": 2.9289117961684332, "percentage": 97.63, "elapsed_time": "1 day, 21:53:30", "remaining_time": "1:06:55", "throughput": 26434.48, "total_tokens": 4367263808} +{"current_steps": 15070, "total_steps": 15426, "loss": 0.3162, "lr": 3.1579227296330294e-05, "epoch": 2.930856753865603, "percentage": 97.69, "elapsed_time": "1 day, 21:55:46", "remaining_time": "1:05:05", "throughput": 26430.41, "total_tokens": 4370179584} +{"current_steps": 15080, "total_steps": 15426, "loss": 0.3197, "lr": 3.157293071833691e-05, "epoch": 2.9328017115627736, "percentage": 97.76, "elapsed_time": "1 day, 21:58:12", "remaining_time": "1:03:17", "throughput": 26424.64, "total_tokens": 4373077568} +{"current_steps": 15090, "total_steps": 15426, "loss": 0.3196, "lr": 3.156663790526375e-05, "epoch": 2.9347466692599435, "percentage": 97.82, "elapsed_time": "1 day, 22:00:13", "remaining_time": "1:01:27", "throughput": 26422.49, "total_tokens": 4375918528} +{"current_steps": 15100, "total_steps": 15426, "loss": 0.3166, "lr": 3.156034885336039e-05, "epoch": 2.9366916269571135, "percentage": 97.89, "elapsed_time": "1 day, 22:02:22", "remaining_time": "0:59:38", "throughput": 26419.52, "total_tokens": 4378835392} +{"current_steps": 15110, "total_steps": 15426, "loss": 0.3183, "lr": 3.155406355888161e-05, "epoch": 2.938636584654284, "percentage": 97.95, "elapsed_time": "1 day, 22:04:58", "remaining_time": "0:57:49", "throughput": 26412.07, "total_tokens": 4381720704} +{"current_steps": 15120, "total_steps": 15426, "loss": 0.317, "lr": 3.1547782018087407e-05, "epoch": 2.940581542351454, "percentage": 98.02, "elapsed_time": "1 day, 22:07:47", "remaining_time": "0:56:00", "throughput": 26402.97, "total_tokens": 4384684608} +{"current_steps": 15130, "total_steps": 15426, "loss": 0.3196, "lr": 3.154150422724299e-05, "epoch": 2.942526500048624, "percentage": 98.08, "elapsed_time": "1 day, 22:16:01", "remaining_time": "0:54:18", "throughput": 26342.12, "total_tokens": 4387572096} +{"current_steps": 15140, "total_steps": 15426, "loss": 0.3238, "lr": 3.1535230182618783e-05, "epoch": 2.944471457745794, "percentage": 98.15, "elapsed_time": "1 day, 22:20:00", "remaining_time": "0:52:30", "throughput": 26321.58, "total_tokens": 4390444672} +{"current_steps": 15150, "total_steps": 15426, "loss": 0.3189, "lr": 3.1528959880490387e-05, "epoch": 2.946416415442964, "percentage": 98.21, "elapsed_time": "1 day, 22:23:53", "remaining_time": "0:50:42", "throughput": 26302.58, "total_tokens": 4393400832} +{"current_steps": 15160, "total_steps": 15426, "loss": 0.3209, "lr": 3.152269331713859e-05, "epoch": 2.948361373140134, "percentage": 98.28, "elapsed_time": "1 day, 22:26:53", "remaining_time": "0:48:53", "throughput": 26291.61, "total_tokens": 4396314752} +{"current_steps": 15170, "total_steps": 15426, "loss": 0.3162, "lr": 3.151643048884935e-05, "epoch": 2.9503063308373045, "percentage": 98.34, "elapsed_time": "1 day, 22:30:44", "remaining_time": "0:47:05", "throughput": 26272.23, "total_tokens": 4399142848} +{"current_steps": 15180, "total_steps": 15426, "loss": 0.3165, "lr": 3.151017139191379e-05, "epoch": 2.9522512885344745, "percentage": 98.41, "elapsed_time": "1 day, 22:34:07", "remaining_time": "0:45:16", "throughput": 26257.53, "total_tokens": 4402020800} +{"current_steps": 15190, "total_steps": 15426, "loss": 0.3207, "lr": 3.150391602262818e-05, "epoch": 2.9541962462316445, "percentage": 98.47, "elapsed_time": "1 day, 22:36:58", "remaining_time": "0:43:27", "throughput": 26247.89, "total_tokens": 4404882048} +{"current_steps": 15200, "total_steps": 15426, "loss": 0.3182, "lr": 3.149766437729394e-05, "epoch": 2.9561412039288144, "percentage": 98.53, "elapsed_time": "1 day, 22:39:51", "remaining_time": "0:41:37", "throughput": 26237.78, "total_tokens": 4407729216} +{"current_steps": 15210, "total_steps": 15426, "loss": 0.3193, "lr": 3.1491416452217635e-05, "epoch": 2.9580861616259844, "percentage": 98.6, "elapsed_time": "1 day, 22:43:51", "remaining_time": "0:39:49", "throughput": 26217.63, "total_tokens": 4410623296} +{"current_steps": 15220, "total_steps": 15426, "loss": 0.3174, "lr": 3.1485172243710954e-05, "epoch": 2.9600311193231548, "percentage": 98.66, "elapsed_time": "1 day, 22:47:05", "remaining_time": "0:37:59", "throughput": 26204.97, "total_tokens": 4413572864} +{"current_steps": 15230, "total_steps": 15426, "loss": 0.3184, "lr": 3.1478931748090676e-05, "epoch": 2.9619760770203247, "percentage": 98.73, "elapsed_time": "1 day, 22:50:04", "remaining_time": "0:36:09", "throughput": 26194.06, "total_tokens": 4416446336} +{"current_steps": 15240, "total_steps": 15426, "loss": 0.322, "lr": 3.147269496167873e-05, "epoch": 2.9639210347174947, "percentage": 98.79, "elapsed_time": "1 day, 22:53:13", "remaining_time": "0:34:20", "throughput": 26182.06, "total_tokens": 4419349504} +{"current_steps": 15250, "total_steps": 15426, "loss": 0.3159, "lr": 3.1466461880802105e-05, "epoch": 2.965865992414665, "percentage": 98.86, "elapsed_time": "1 day, 22:56:44", "remaining_time": "0:32:30", "throughput": 26166.62, "total_tokens": 4422283648} +{"current_steps": 15260, "total_steps": 15426, "loss": 0.3151, "lr": 3.146023250179291e-05, "epoch": 2.967810950111835, "percentage": 98.92, "elapsed_time": "1 day, 22:59:50", "remaining_time": "0:30:40", "throughput": 26155.13, "total_tokens": 4425212032} +{"current_steps": 15270, "total_steps": 15426, "loss": 0.318, "lr": 3.145400682098833e-05, "epoch": 2.969755907809005, "percentage": 98.99, "elapsed_time": "1 day, 23:03:05", "remaining_time": "0:28:50", "throughput": 26142.47, "total_tokens": 4428151424} +{"current_steps": 15280, "total_steps": 15426, "loss": 0.3193, "lr": 3.14477848347306e-05, "epoch": 2.9717008655061754, "percentage": 99.05, "elapsed_time": "1 day, 23:06:31", "remaining_time": "0:27:00", "throughput": 26127.9, "total_tokens": 4431066432} +{"current_steps": 15290, "total_steps": 15426, "loss": 0.3193, "lr": 3.144156653936703e-05, "epoch": 2.9736458232033454, "percentage": 99.12, "elapsed_time": "1 day, 23:09:33", "remaining_time": "0:25:10", "throughput": 26117.12, "total_tokens": 4433993792} +{"current_steps": 15300, "total_steps": 15426, "loss": 0.3167, "lr": 3.1435351931250004e-05, "epoch": 2.9755907809005153, "percentage": 99.18, "elapsed_time": "1 day, 23:12:42", "remaining_time": "0:23:19", "throughput": 26105.3, "total_tokens": 4436920192} +{"current_steps": 15310, "total_steps": 15426, "loss": 0.3217, "lr": 3.142914100673692e-05, "epoch": 2.9775357385976857, "percentage": 99.25, "elapsed_time": "1 day, 23:16:38", "remaining_time": "0:21:29", "throughput": 26086.3, "total_tokens": 4439838272} +{"current_steps": 15320, "total_steps": 15426, "loss": 0.3213, "lr": 3.1422933762190235e-05, "epoch": 2.9794806962948557, "percentage": 99.31, "elapsed_time": "1 day, 23:19:53", "remaining_time": "0:19:38", "throughput": 26073.53, "total_tokens": 4442753984} +{"current_steps": 15330, "total_steps": 15426, "loss": 0.3184, "lr": 3.141673019397741e-05, "epoch": 2.9814256539920256, "percentage": 99.38, "elapsed_time": "1 day, 23:23:23", "remaining_time": "0:17:48", "throughput": 26058.64, "total_tokens": 4445705600} +{"current_steps": 15340, "total_steps": 15426, "loss": 0.3195, "lr": 3.141053029847095e-05, "epoch": 2.9833706116891956, "percentage": 99.44, "elapsed_time": "1 day, 23:27:25", "remaining_time": "0:15:57", "throughput": 26038.62, "total_tokens": 4448571776} +{"current_steps": 15350, "total_steps": 15426, "loss": 0.3216, "lr": 3.140433407204835e-05, "epoch": 2.985315569386366, "percentage": 99.51, "elapsed_time": "1 day, 23:32:06", "remaining_time": "0:14:07", "throughput": 26012.8, "total_tokens": 4451474112} +{"current_steps": 15360, "total_steps": 15426, "loss": 0.3227, "lr": 3.1398141511092124e-05, "epoch": 2.987260527083536, "percentage": 99.57, "elapsed_time": "1 day, 23:35:54", "remaining_time": "0:12:16", "throughput": 25995.04, "total_tokens": 4454372608} +{"current_steps": 15370, "total_steps": 15426, "loss": 0.32, "lr": 3.1391952611989736e-05, "epoch": 2.989205484780706, "percentage": 99.64, "elapsed_time": "1 day, 23:39:47", "remaining_time": "0:10:25", "throughput": 25976.48, "total_tokens": 4457241728} +{"current_steps": 15380, "total_steps": 15426, "loss": 0.3204, "lr": 3.138576737113369e-05, "epoch": 2.991150442477876, "percentage": 99.7, "elapsed_time": "1 day, 23:43:33", "remaining_time": "0:08:33", "throughput": 25959.47, "total_tokens": 4460187008} +{"current_steps": 15390, "total_steps": 15426, "loss": 0.3189, "lr": 3.137958578492143e-05, "epoch": 2.9930954001750463, "percentage": 99.77, "elapsed_time": "1 day, 23:46:50", "remaining_time": "0:06:42", "throughput": 25946.62, "total_tokens": 4463080704} +{"current_steps": 15400, "total_steps": 15426, "loss": 0.3155, "lr": 3.137340784975535e-05, "epoch": 2.9950403578722162, "percentage": 99.83, "elapsed_time": "1 day, 23:50:03", "remaining_time": "0:04:50", "throughput": 25934.38, "total_tokens": 4465991616} +{"current_steps": 15410, "total_steps": 15426, "loss": 0.319, "lr": 3.136723356204285e-05, "epoch": 2.996985315569386, "percentage": 99.9, "elapsed_time": "1 day, 23:53:53", "remaining_time": "0:02:59", "throughput": 25916.63, "total_tokens": 4468904576} +{"current_steps": 15420, "total_steps": 15426, "loss": 0.3214, "lr": 3.136106291819622e-05, "epoch": 2.9989302732665566, "percentage": 99.96, "elapsed_time": "1 day, 23:57:35", "remaining_time": "0:01:07", "throughput": 25900.43, "total_tokens": 4471839616} +{"current_steps": 15426, "total_steps": 15426, "epoch": 3.0, "percentage": 100.0, "elapsed_time": "2 days, 0:07:28", "remaining_time": "0:00:00", "throughput": 25820.72, "total_tokens": 4473404224}