commit e732b00f819f64f41564a5e23223bb69898a1fbd Author: ModelHub XC Date: Thu May 28 00:20:26 2026 +0800 初始化项目,由ModelHub XC社区提供模型 Model: baban/QwenTranslate_English_Telugu Source: Original Platform diff --git a/.gitattributes b/.gitattributes new file mode 100644 index 0000000..52373fe --- /dev/null +++ b/.gitattributes @@ -0,0 +1,36 @@ +*.7z filter=lfs diff=lfs merge=lfs -text +*.arrow filter=lfs diff=lfs merge=lfs -text +*.bin filter=lfs diff=lfs merge=lfs -text +*.bz2 filter=lfs diff=lfs merge=lfs -text +*.ckpt filter=lfs diff=lfs merge=lfs -text +*.ftz filter=lfs diff=lfs merge=lfs -text +*.gz filter=lfs diff=lfs merge=lfs -text +*.h5 filter=lfs diff=lfs merge=lfs -text +*.joblib filter=lfs diff=lfs merge=lfs -text +*.lfs.* filter=lfs diff=lfs merge=lfs -text +*.mlmodel filter=lfs diff=lfs merge=lfs -text +*.model filter=lfs diff=lfs merge=lfs -text +*.msgpack filter=lfs diff=lfs merge=lfs -text +*.npy filter=lfs diff=lfs merge=lfs -text +*.npz filter=lfs diff=lfs merge=lfs -text +*.onnx filter=lfs diff=lfs merge=lfs -text +*.ot filter=lfs diff=lfs merge=lfs -text +*.parquet filter=lfs diff=lfs merge=lfs -text +*.pb filter=lfs diff=lfs merge=lfs -text +*.pickle filter=lfs diff=lfs merge=lfs -text +*.pkl filter=lfs diff=lfs merge=lfs -text +*.pt filter=lfs diff=lfs merge=lfs -text +*.pth filter=lfs diff=lfs merge=lfs -text +*.rar filter=lfs diff=lfs merge=lfs -text +*.safetensors filter=lfs diff=lfs merge=lfs -text +saved_model/**/* filter=lfs diff=lfs merge=lfs -text +*.tar.* filter=lfs diff=lfs merge=lfs -text +*.tar filter=lfs diff=lfs merge=lfs -text +*.tflite filter=lfs diff=lfs merge=lfs -text +*.tgz filter=lfs diff=lfs merge=lfs -text +*.wasm filter=lfs diff=lfs merge=lfs -text +*.xz filter=lfs diff=lfs merge=lfs -text +*.zip filter=lfs diff=lfs merge=lfs -text +*.zst filter=lfs diff=lfs merge=lfs -text +*tfevents* filter=lfs diff=lfs merge=lfs -text +tokenizer.json filter=lfs diff=lfs merge=lfs -text diff --git a/README.md b/README.md new file mode 100644 index 0000000..6c5cc9d --- /dev/null +++ b/README.md @@ -0,0 +1,63 @@ +--- +library_name: transformers +license: other +base_model: Qwen/Qwen2.5-3B-Instruct +tags: +- llama-factory +- full +- generated_from_trainer +model-index: +- name: MT_En_Telugu + results: [] +--- + + + +# MT_En_Telugu + +This model is a fine-tuned version of [Qwen/Qwen2.5-3B-Instruct](https://huggingface.co/Qwen/Qwen2.5-3B-Instruct) on the MT_En_Telugu dataset. +It achieves the following results on the evaluation set: +- Loss: 0.2652 +- Num Input Tokens Seen: 4899505792 + +## Model description + +More information needed + +## Intended uses & limitations + +More information needed + +## Training and evaluation data + +More information needed + +## Training procedure + +### Training hyperparameters + +The following hyperparameters were used during training: +- learning_rate: 5e-05 +- train_batch_size: 16 +- eval_batch_size: 16 +- seed: 42 +- distributed_type: multi-GPU +- num_devices: 8 +- gradient_accumulation_steps: 8 +- total_train_batch_size: 1024 +- total_eval_batch_size: 128 +- optimizer: Use OptimizerNames.ADAMW_TORCH with betas=(0.9,0.999) and epsilon=1e-08 and optimizer_args=No additional optimizer arguments +- lr_scheduler_type: inverse_sqrt +- num_epochs: 3.0 + +### Training results + + + +### Framework versions + +- Transformers 4.52.4 +- Pytorch 2.5.1+cu124 +- Datasets 3.6.0 +- Tokenizers 0.21.1 diff --git a/chat_template.jinja b/chat_template.jinja new file mode 100644 index 0000000..bdf7919 --- /dev/null +++ b/chat_template.jinja @@ -0,0 +1,54 @@ +{%- if tools %} + {{- '<|im_start|>system\n' }} + {%- if messages[0]['role'] == 'system' %} + {{- messages[0]['content'] }} + {%- else %} + {{- 'You are Qwen, created by Alibaba Cloud. You are a helpful assistant.' }} + {%- endif %} + {{- "\n\n# Tools\n\nYou may call one or more functions to assist with the user query.\n\nYou are provided with function signatures within XML tags:\n" }} + {%- for tool in tools %} + {{- "\n" }} + {{- tool | tojson }} + {%- endfor %} + {{- "\n\n\nFor each function call, return a json object with function name and arguments within XML tags:\n\n{\"name\": , \"arguments\": }\n<|im_end|>\n" }} +{%- else %} + {%- if messages[0]['role'] == 'system' %} + {{- '<|im_start|>system\n' + messages[0]['content'] + '<|im_end|>\n' }} + {%- else %} + {{- '<|im_start|>system\nYou are Qwen, created by Alibaba Cloud. You are a helpful assistant.<|im_end|>\n' }} + {%- endif %} +{%- endif %} +{%- for message in messages %} + {%- if (message.role == "user") or (message.role == "system" and not loop.first) or (message.role == "assistant" and not message.tool_calls) %} + {{- '<|im_start|>' + message.role + '\n' + message.content + '<|im_end|>' + '\n' }} + {%- elif message.role == "assistant" %} + {{- '<|im_start|>' + message.role }} + {%- if message.content %} + {{- '\n' + message.content }} + {%- endif %} + {%- for tool_call in message.tool_calls %} + {%- if tool_call.function is defined %} + {%- set tool_call = tool_call.function %} + {%- endif %} + {{- '\n\n{"name": "' }} + {{- tool_call.name }} + {{- '", "arguments": ' }} + {{- tool_call.arguments | tojson }} + {{- '}\n' }} + {%- endfor %} + {{- '<|im_end|>\n' }} + {%- elif message.role == "tool" %} + {%- if (loop.index0 == 0) or (messages[loop.index0 - 1].role != "tool") %} + {{- '<|im_start|>user' }} + {%- endif %} + {{- '\n\n' }} + {{- message.content }} + {{- '\n' }} + {%- if loop.last or (messages[loop.index0 + 1].role != "tool") %} + {{- '<|im_end|>\n' }} + {%- endif %} + {%- endif %} +{%- endfor %} +{%- if add_generation_prompt %} + {{- '<|im_start|>assistant\n' }} +{%- endif %} diff --git a/config.json b/config.json new file mode 100644 index 0000000..fb880d9 --- /dev/null +++ b/config.json @@ -0,0 +1,28 @@ +{ + "architectures": [ + "Qwen2ForCausalLM" + ], + "attention_dropout": 0.0, + "bos_token_id": 151643, + "eos_token_id": 151645, + "hidden_act": "silu", + "hidden_size": 2048, + "initializer_range": 0.02, + "intermediate_size": 11008, + "max_position_embeddings": 32768, + "max_window_layers": 70, + "model_type": "qwen2", + "num_attention_heads": 16, + "num_hidden_layers": 36, + "num_key_value_heads": 2, + "rms_norm_eps": 1e-06, + "rope_scaling": null, + "rope_theta": 1000000.0, + "sliding_window": 32768, + "tie_word_embeddings": true, + "torch_dtype": "bfloat16", + "transformers_version": "4.52.4", + "use_cache": false, + "use_sliding_window": false, + "vocab_size": 151936 +} diff --git a/generation_config.json b/generation_config.json new file mode 100644 index 0000000..028ef0a --- /dev/null +++ b/generation_config.json @@ -0,0 +1,14 @@ +{ + "bos_token_id": 151643, + "do_sample": true, + "eos_token_id": [ + 151645, + 151643 + ], + "pad_token_id": 151643, + "repetition_penalty": 1.05, + "temperature": 0.7, + "top_k": 20, + "top_p": 0.8, + "transformers_version": "4.52.4" +} diff --git a/model-00001-of-00002.safetensors b/model-00001-of-00002.safetensors new file mode 100644 index 0000000..7e323c8 --- /dev/null +++ b/model-00001-of-00002.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8263b73d1ba7b48e47ff71775778c90623c2f1fcee24601a5ea2e08889fa0c07 +size 4957560304 diff --git a/model-00002-of-00002.safetensors b/model-00002-of-00002.safetensors new file mode 100644 index 0000000..42a48a2 --- /dev/null +++ b/model-00002-of-00002.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e9a6a06a29e8d60f1f19c240da71638c2ee82fb49f3e5fb1c009dd9e8da94654 +size 1214366696 diff --git a/model.safetensors.index.json b/model.safetensors.index.json new file mode 100644 index 0000000..f19a648 --- /dev/null +++ b/model.safetensors.index.json @@ -0,0 +1,441 @@ +{ + "metadata": { + "total_size": 6171877376 + }, + "weight_map": { + "model.embed_tokens.weight": "model-00001-of-00002.safetensors", + "model.layers.0.input_layernorm.weight": "model-00001-of-00002.safetensors", + "model.layers.0.mlp.down_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.0.mlp.gate_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.0.mlp.up_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.0.post_attention_layernorm.weight": "model-00001-of-00002.safetensors", + "model.layers.0.self_attn.k_proj.bias": "model-00001-of-00002.safetensors", + "model.layers.0.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.0.self_attn.o_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.0.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.layers.0.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.0.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.layers.0.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.1.input_layernorm.weight": "model-00001-of-00002.safetensors", + "model.layers.1.mlp.down_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.1.mlp.gate_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.1.mlp.up_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.1.post_attention_layernorm.weight": "model-00001-of-00002.safetensors", + "model.layers.1.self_attn.k_proj.bias": "model-00001-of-00002.safetensors", + "model.layers.1.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.1.self_attn.o_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.1.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.layers.1.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.1.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.layers.1.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.10.input_layernorm.weight": "model-00001-of-00002.safetensors", + "model.layers.10.mlp.down_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.10.mlp.gate_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.10.mlp.up_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.10.post_attention_layernorm.weight": "model-00001-of-00002.safetensors", + "model.layers.10.self_attn.k_proj.bias": "model-00001-of-00002.safetensors", + "model.layers.10.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.10.self_attn.o_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.10.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.layers.10.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.10.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.layers.10.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.11.input_layernorm.weight": "model-00001-of-00002.safetensors", + "model.layers.11.mlp.down_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.11.mlp.gate_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.11.mlp.up_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.11.post_attention_layernorm.weight": "model-00001-of-00002.safetensors", + "model.layers.11.self_attn.k_proj.bias": "model-00001-of-00002.safetensors", + "model.layers.11.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.11.self_attn.o_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.11.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.layers.11.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.11.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.layers.11.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.12.input_layernorm.weight": "model-00001-of-00002.safetensors", + "model.layers.12.mlp.down_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.12.mlp.gate_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.12.mlp.up_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.12.post_attention_layernorm.weight": "model-00001-of-00002.safetensors", + "model.layers.12.self_attn.k_proj.bias": "model-00001-of-00002.safetensors", + "model.layers.12.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.12.self_attn.o_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.12.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.layers.12.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.12.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.layers.12.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.13.input_layernorm.weight": "model-00001-of-00002.safetensors", + "model.layers.13.mlp.down_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.13.mlp.gate_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.13.mlp.up_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.13.post_attention_layernorm.weight": "model-00001-of-00002.safetensors", + "model.layers.13.self_attn.k_proj.bias": "model-00001-of-00002.safetensors", + "model.layers.13.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.13.self_attn.o_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.13.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.layers.13.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.13.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.layers.13.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.14.input_layernorm.weight": "model-00001-of-00002.safetensors", + "model.layers.14.mlp.down_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.14.mlp.gate_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.14.mlp.up_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.14.post_attention_layernorm.weight": "model-00001-of-00002.safetensors", + "model.layers.14.self_attn.k_proj.bias": "model-00001-of-00002.safetensors", + "model.layers.14.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.14.self_attn.o_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.14.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.layers.14.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.14.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.layers.14.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.15.input_layernorm.weight": "model-00001-of-00002.safetensors", + "model.layers.15.mlp.down_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.15.mlp.gate_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.15.mlp.up_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.15.post_attention_layernorm.weight": "model-00001-of-00002.safetensors", + "model.layers.15.self_attn.k_proj.bias": "model-00001-of-00002.safetensors", + "model.layers.15.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.15.self_attn.o_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.15.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.layers.15.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.15.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.layers.15.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.16.input_layernorm.weight": "model-00001-of-00002.safetensors", + "model.layers.16.mlp.down_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.16.mlp.gate_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.16.mlp.up_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.16.post_attention_layernorm.weight": "model-00001-of-00002.safetensors", + "model.layers.16.self_attn.k_proj.bias": "model-00001-of-00002.safetensors", + "model.layers.16.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.16.self_attn.o_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.16.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.layers.16.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.16.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.layers.16.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.17.input_layernorm.weight": "model-00001-of-00002.safetensors", + "model.layers.17.mlp.down_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.17.mlp.gate_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.17.mlp.up_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.17.post_attention_layernorm.weight": "model-00001-of-00002.safetensors", + "model.layers.17.self_attn.k_proj.bias": "model-00001-of-00002.safetensors", + "model.layers.17.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.17.self_attn.o_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.17.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.layers.17.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.17.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.layers.17.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.18.input_layernorm.weight": "model-00001-of-00002.safetensors", + "model.layers.18.mlp.down_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.18.mlp.gate_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.18.mlp.up_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.18.post_attention_layernorm.weight": "model-00001-of-00002.safetensors", + "model.layers.18.self_attn.k_proj.bias": "model-00001-of-00002.safetensors", + "model.layers.18.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.18.self_attn.o_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.18.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.layers.18.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.18.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.layers.18.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.19.input_layernorm.weight": "model-00001-of-00002.safetensors", + "model.layers.19.mlp.down_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.19.mlp.gate_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.19.mlp.up_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.19.post_attention_layernorm.weight": "model-00001-of-00002.safetensors", + "model.layers.19.self_attn.k_proj.bias": "model-00001-of-00002.safetensors", + "model.layers.19.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.19.self_attn.o_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.19.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.layers.19.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.19.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.layers.19.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.2.input_layernorm.weight": "model-00001-of-00002.safetensors", + "model.layers.2.mlp.down_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.2.mlp.gate_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.2.mlp.up_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.2.post_attention_layernorm.weight": "model-00001-of-00002.safetensors", + "model.layers.2.self_attn.k_proj.bias": "model-00001-of-00002.safetensors", + "model.layers.2.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.2.self_attn.o_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.2.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.layers.2.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.2.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.layers.2.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.20.input_layernorm.weight": "model-00001-of-00002.safetensors", + "model.layers.20.mlp.down_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.20.mlp.gate_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.20.mlp.up_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.20.post_attention_layernorm.weight": "model-00001-of-00002.safetensors", + "model.layers.20.self_attn.k_proj.bias": "model-00001-of-00002.safetensors", + "model.layers.20.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.20.self_attn.o_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.20.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.layers.20.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.20.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.layers.20.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.21.input_layernorm.weight": "model-00001-of-00002.safetensors", + "model.layers.21.mlp.down_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.21.mlp.gate_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.21.mlp.up_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.21.post_attention_layernorm.weight": "model-00001-of-00002.safetensors", + "model.layers.21.self_attn.k_proj.bias": "model-00001-of-00002.safetensors", + "model.layers.21.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.21.self_attn.o_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.21.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.layers.21.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.21.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.layers.21.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.22.input_layernorm.weight": "model-00001-of-00002.safetensors", + "model.layers.22.mlp.down_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.22.mlp.gate_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.22.mlp.up_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.22.post_attention_layernorm.weight": "model-00001-of-00002.safetensors", + "model.layers.22.self_attn.k_proj.bias": "model-00001-of-00002.safetensors", + "model.layers.22.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.22.self_attn.o_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.22.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.layers.22.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.22.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.layers.22.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.23.input_layernorm.weight": "model-00001-of-00002.safetensors", + "model.layers.23.mlp.down_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.23.mlp.gate_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.23.mlp.up_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.23.post_attention_layernorm.weight": "model-00001-of-00002.safetensors", + "model.layers.23.self_attn.k_proj.bias": "model-00001-of-00002.safetensors", + "model.layers.23.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.23.self_attn.o_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.23.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.layers.23.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.23.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.layers.23.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.24.input_layernorm.weight": "model-00001-of-00002.safetensors", + "model.layers.24.mlp.down_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.24.mlp.gate_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.24.mlp.up_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.24.post_attention_layernorm.weight": "model-00001-of-00002.safetensors", + "model.layers.24.self_attn.k_proj.bias": "model-00001-of-00002.safetensors", + "model.layers.24.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.24.self_attn.o_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.24.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.layers.24.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.24.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.layers.24.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.25.input_layernorm.weight": "model-00001-of-00002.safetensors", + "model.layers.25.mlp.down_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.25.mlp.gate_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.25.mlp.up_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.25.post_attention_layernorm.weight": "model-00001-of-00002.safetensors", + "model.layers.25.self_attn.k_proj.bias": "model-00001-of-00002.safetensors", + "model.layers.25.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.25.self_attn.o_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.25.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.layers.25.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.25.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.layers.25.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.26.input_layernorm.weight": "model-00001-of-00002.safetensors", + "model.layers.26.mlp.down_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.26.mlp.gate_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.26.mlp.up_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.26.post_attention_layernorm.weight": "model-00001-of-00002.safetensors", + "model.layers.26.self_attn.k_proj.bias": "model-00001-of-00002.safetensors", + "model.layers.26.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.26.self_attn.o_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.26.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.layers.26.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.26.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.layers.26.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.27.input_layernorm.weight": "model-00001-of-00002.safetensors", + "model.layers.27.mlp.down_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.27.mlp.gate_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.27.mlp.up_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.27.post_attention_layernorm.weight": "model-00001-of-00002.safetensors", + "model.layers.27.self_attn.k_proj.bias": "model-00001-of-00002.safetensors", + "model.layers.27.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.27.self_attn.o_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.27.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.layers.27.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.27.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.layers.27.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.28.input_layernorm.weight": "model-00002-of-00002.safetensors", + "model.layers.28.mlp.down_proj.weight": "model-00002-of-00002.safetensors", + "model.layers.28.mlp.gate_proj.weight": "model-00002-of-00002.safetensors", + "model.layers.28.mlp.up_proj.weight": "model-00002-of-00002.safetensors", + "model.layers.28.post_attention_layernorm.weight": "model-00002-of-00002.safetensors", + "model.layers.28.self_attn.k_proj.bias": "model-00001-of-00002.safetensors", + "model.layers.28.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.28.self_attn.o_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.28.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.layers.28.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.28.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.layers.28.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.29.input_layernorm.weight": "model-00002-of-00002.safetensors", + "model.layers.29.mlp.down_proj.weight": "model-00002-of-00002.safetensors", + "model.layers.29.mlp.gate_proj.weight": "model-00002-of-00002.safetensors", + "model.layers.29.mlp.up_proj.weight": "model-00002-of-00002.safetensors", + "model.layers.29.post_attention_layernorm.weight": "model-00002-of-00002.safetensors", + "model.layers.29.self_attn.k_proj.bias": "model-00002-of-00002.safetensors", + "model.layers.29.self_attn.k_proj.weight": "model-00002-of-00002.safetensors", + "model.layers.29.self_attn.o_proj.weight": "model-00002-of-00002.safetensors", + "model.layers.29.self_attn.q_proj.bias": "model-00002-of-00002.safetensors", + "model.layers.29.self_attn.q_proj.weight": "model-00002-of-00002.safetensors", + "model.layers.29.self_attn.v_proj.bias": "model-00002-of-00002.safetensors", + "model.layers.29.self_attn.v_proj.weight": "model-00002-of-00002.safetensors", + "model.layers.3.input_layernorm.weight": "model-00001-of-00002.safetensors", + "model.layers.3.mlp.down_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.3.mlp.gate_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.3.mlp.up_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.3.post_attention_layernorm.weight": "model-00001-of-00002.safetensors", + "model.layers.3.self_attn.k_proj.bias": "model-00001-of-00002.safetensors", + "model.layers.3.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.3.self_attn.o_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.3.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.layers.3.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.3.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.layers.3.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.30.input_layernorm.weight": "model-00002-of-00002.safetensors", + "model.layers.30.mlp.down_proj.weight": "model-00002-of-00002.safetensors", + "model.layers.30.mlp.gate_proj.weight": "model-00002-of-00002.safetensors", + "model.layers.30.mlp.up_proj.weight": "model-00002-of-00002.safetensors", + "model.layers.30.post_attention_layernorm.weight": "model-00002-of-00002.safetensors", + "model.layers.30.self_attn.k_proj.bias": "model-00002-of-00002.safetensors", + "model.layers.30.self_attn.k_proj.weight": "model-00002-of-00002.safetensors", + "model.layers.30.self_attn.o_proj.weight": "model-00002-of-00002.safetensors", + "model.layers.30.self_attn.q_proj.bias": "model-00002-of-00002.safetensors", + "model.layers.30.self_attn.q_proj.weight": "model-00002-of-00002.safetensors", + "model.layers.30.self_attn.v_proj.bias": "model-00002-of-00002.safetensors", + "model.layers.30.self_attn.v_proj.weight": "model-00002-of-00002.safetensors", + "model.layers.31.input_layernorm.weight": "model-00002-of-00002.safetensors", + "model.layers.31.mlp.down_proj.weight": "model-00002-of-00002.safetensors", + "model.layers.31.mlp.gate_proj.weight": "model-00002-of-00002.safetensors", + "model.layers.31.mlp.up_proj.weight": "model-00002-of-00002.safetensors", + "model.layers.31.post_attention_layernorm.weight": "model-00002-of-00002.safetensors", + "model.layers.31.self_attn.k_proj.bias": "model-00002-of-00002.safetensors", + "model.layers.31.self_attn.k_proj.weight": "model-00002-of-00002.safetensors", + "model.layers.31.self_attn.o_proj.weight": "model-00002-of-00002.safetensors", + "model.layers.31.self_attn.q_proj.bias": "model-00002-of-00002.safetensors", + "model.layers.31.self_attn.q_proj.weight": "model-00002-of-00002.safetensors", + "model.layers.31.self_attn.v_proj.bias": "model-00002-of-00002.safetensors", + "model.layers.31.self_attn.v_proj.weight": "model-00002-of-00002.safetensors", + "model.layers.32.input_layernorm.weight": "model-00002-of-00002.safetensors", + "model.layers.32.mlp.down_proj.weight": "model-00002-of-00002.safetensors", + "model.layers.32.mlp.gate_proj.weight": "model-00002-of-00002.safetensors", + "model.layers.32.mlp.up_proj.weight": "model-00002-of-00002.safetensors", + "model.layers.32.post_attention_layernorm.weight": "model-00002-of-00002.safetensors", + "model.layers.32.self_attn.k_proj.bias": "model-00002-of-00002.safetensors", + "model.layers.32.self_attn.k_proj.weight": "model-00002-of-00002.safetensors", + "model.layers.32.self_attn.o_proj.weight": "model-00002-of-00002.safetensors", + "model.layers.32.self_attn.q_proj.bias": "model-00002-of-00002.safetensors", + "model.layers.32.self_attn.q_proj.weight": "model-00002-of-00002.safetensors", + "model.layers.32.self_attn.v_proj.bias": "model-00002-of-00002.safetensors", + "model.layers.32.self_attn.v_proj.weight": "model-00002-of-00002.safetensors", + "model.layers.33.input_layernorm.weight": "model-00002-of-00002.safetensors", + "model.layers.33.mlp.down_proj.weight": "model-00002-of-00002.safetensors", + "model.layers.33.mlp.gate_proj.weight": "model-00002-of-00002.safetensors", + "model.layers.33.mlp.up_proj.weight": "model-00002-of-00002.safetensors", + "model.layers.33.post_attention_layernorm.weight": "model-00002-of-00002.safetensors", + "model.layers.33.self_attn.k_proj.bias": "model-00002-of-00002.safetensors", + "model.layers.33.self_attn.k_proj.weight": "model-00002-of-00002.safetensors", + "model.layers.33.self_attn.o_proj.weight": "model-00002-of-00002.safetensors", + "model.layers.33.self_attn.q_proj.bias": "model-00002-of-00002.safetensors", + "model.layers.33.self_attn.q_proj.weight": "model-00002-of-00002.safetensors", + "model.layers.33.self_attn.v_proj.bias": "model-00002-of-00002.safetensors", + "model.layers.33.self_attn.v_proj.weight": "model-00002-of-00002.safetensors", + "model.layers.34.input_layernorm.weight": "model-00002-of-00002.safetensors", + "model.layers.34.mlp.down_proj.weight": "model-00002-of-00002.safetensors", + "model.layers.34.mlp.gate_proj.weight": "model-00002-of-00002.safetensors", + "model.layers.34.mlp.up_proj.weight": "model-00002-of-00002.safetensors", + "model.layers.34.post_attention_layernorm.weight": "model-00002-of-00002.safetensors", + "model.layers.34.self_attn.k_proj.bias": "model-00002-of-00002.safetensors", + "model.layers.34.self_attn.k_proj.weight": "model-00002-of-00002.safetensors", + "model.layers.34.self_attn.o_proj.weight": "model-00002-of-00002.safetensors", + "model.layers.34.self_attn.q_proj.bias": "model-00002-of-00002.safetensors", + "model.layers.34.self_attn.q_proj.weight": "model-00002-of-00002.safetensors", + "model.layers.34.self_attn.v_proj.bias": "model-00002-of-00002.safetensors", + "model.layers.34.self_attn.v_proj.weight": "model-00002-of-00002.safetensors", + "model.layers.35.input_layernorm.weight": "model-00002-of-00002.safetensors", + "model.layers.35.mlp.down_proj.weight": "model-00002-of-00002.safetensors", + "model.layers.35.mlp.gate_proj.weight": "model-00002-of-00002.safetensors", + "model.layers.35.mlp.up_proj.weight": "model-00002-of-00002.safetensors", + "model.layers.35.post_attention_layernorm.weight": "model-00002-of-00002.safetensors", + "model.layers.35.self_attn.k_proj.bias": "model-00002-of-00002.safetensors", + "model.layers.35.self_attn.k_proj.weight": "model-00002-of-00002.safetensors", + "model.layers.35.self_attn.o_proj.weight": "model-00002-of-00002.safetensors", + "model.layers.35.self_attn.q_proj.bias": "model-00002-of-00002.safetensors", + "model.layers.35.self_attn.q_proj.weight": "model-00002-of-00002.safetensors", + "model.layers.35.self_attn.v_proj.bias": "model-00002-of-00002.safetensors", + "model.layers.35.self_attn.v_proj.weight": "model-00002-of-00002.safetensors", + "model.layers.4.input_layernorm.weight": "model-00001-of-00002.safetensors", + "model.layers.4.mlp.down_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.4.mlp.gate_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.4.mlp.up_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.4.post_attention_layernorm.weight": "model-00001-of-00002.safetensors", + "model.layers.4.self_attn.k_proj.bias": "model-00001-of-00002.safetensors", + "model.layers.4.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.4.self_attn.o_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.4.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.layers.4.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.4.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.layers.4.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.5.input_layernorm.weight": "model-00001-of-00002.safetensors", + "model.layers.5.mlp.down_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.5.mlp.gate_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.5.mlp.up_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.5.post_attention_layernorm.weight": "model-00001-of-00002.safetensors", + "model.layers.5.self_attn.k_proj.bias": "model-00001-of-00002.safetensors", + "model.layers.5.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.5.self_attn.o_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.5.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.layers.5.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.5.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.layers.5.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.6.input_layernorm.weight": "model-00001-of-00002.safetensors", + "model.layers.6.mlp.down_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.6.mlp.gate_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.6.mlp.up_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.6.post_attention_layernorm.weight": "model-00001-of-00002.safetensors", + "model.layers.6.self_attn.k_proj.bias": "model-00001-of-00002.safetensors", + "model.layers.6.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.6.self_attn.o_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.6.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.layers.6.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.6.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.layers.6.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.7.input_layernorm.weight": "model-00001-of-00002.safetensors", + "model.layers.7.mlp.down_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.7.mlp.gate_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.7.mlp.up_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.7.post_attention_layernorm.weight": "model-00001-of-00002.safetensors", + "model.layers.7.self_attn.k_proj.bias": "model-00001-of-00002.safetensors", + "model.layers.7.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.7.self_attn.o_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.7.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.layers.7.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.7.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.layers.7.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.8.input_layernorm.weight": "model-00001-of-00002.safetensors", + "model.layers.8.mlp.down_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.8.mlp.gate_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.8.mlp.up_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.8.post_attention_layernorm.weight": "model-00001-of-00002.safetensors", + "model.layers.8.self_attn.k_proj.bias": "model-00001-of-00002.safetensors", + "model.layers.8.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.8.self_attn.o_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.8.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.layers.8.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.8.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.layers.8.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.9.input_layernorm.weight": "model-00001-of-00002.safetensors", + "model.layers.9.mlp.down_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.9.mlp.gate_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.9.mlp.up_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.9.post_attention_layernorm.weight": "model-00001-of-00002.safetensors", + "model.layers.9.self_attn.k_proj.bias": "model-00001-of-00002.safetensors", + "model.layers.9.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.9.self_attn.o_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.9.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.layers.9.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.9.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.layers.9.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.norm.weight": "model-00002-of-00002.safetensors" + } +} diff --git a/special_tokens_map.json b/special_tokens_map.json new file mode 100644 index 0000000..ac23c0a --- /dev/null +++ b/special_tokens_map.json @@ -0,0 +1,31 @@ +{ + "additional_special_tokens": [ + "<|im_start|>", + "<|im_end|>", + "<|object_ref_start|>", + "<|object_ref_end|>", + "<|box_start|>", + "<|box_end|>", + "<|quad_start|>", + "<|quad_end|>", + "<|vision_start|>", + "<|vision_end|>", + "<|vision_pad|>", + "<|image_pad|>", + "<|video_pad|>" + ], + "eos_token": { + "content": "<|im_end|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "pad_token": { + "content": "<|endoftext|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + } +} diff --git a/tokenizer.json b/tokenizer.json new file mode 100644 index 0000000..51ebb3b --- /dev/null +++ b/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9c5ae00e602b8860cbd784ba82a8aa14e8feecec692e7076590d014d7b7fdafa +size 11421896 diff --git a/tokenizer_config.json b/tokenizer_config.json new file mode 100644 index 0000000..fb44f02 --- /dev/null +++ b/tokenizer_config.json @@ -0,0 +1,208 @@ +{ + "add_bos_token": false, + "add_prefix_space": false, + "added_tokens_decoder": { + "151643": { + "content": "<|endoftext|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "151644": { + "content": "<|im_start|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "151645": { + "content": "<|im_end|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "151646": { + "content": "<|object_ref_start|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "151647": { + "content": "<|object_ref_end|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "151648": { + "content": "<|box_start|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "151649": { + "content": "<|box_end|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "151650": { + "content": "<|quad_start|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "151651": { + "content": "<|quad_end|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "151652": { + "content": "<|vision_start|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "151653": { + "content": "<|vision_end|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "151654": { + "content": "<|vision_pad|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "151655": { + "content": "<|image_pad|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "151656": { + "content": "<|video_pad|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "151657": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "151658": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "151659": { + "content": "<|fim_prefix|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "151660": { + "content": "<|fim_middle|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "151661": { + "content": "<|fim_suffix|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "151662": { + "content": "<|fim_pad|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "151663": { + "content": "<|repo_name|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "151664": { + "content": "<|file_sep|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + } + }, + "additional_special_tokens": [ + "<|im_start|>", + "<|im_end|>", + "<|object_ref_start|>", + "<|object_ref_end|>", + "<|box_start|>", + "<|box_end|>", + "<|quad_start|>", + "<|quad_end|>", + "<|vision_start|>", + "<|vision_end|>", + "<|vision_pad|>", + "<|image_pad|>", + "<|video_pad|>" + ], + "bos_token": null, + "clean_up_tokenization_spaces": false, + "eos_token": "<|im_end|>", + "errors": "replace", + "extra_special_tokens": {}, + "model_max_length": 131072, + "pad_token": "<|endoftext|>", + "padding_side": "right", + "split_special_tokens": false, + "tokenizer_class": "Qwen2Tokenizer", + "unk_token": null +} diff --git a/trainer_log.jsonl b/trainer_log.jsonl new file mode 100644 index 0000000..255df93 --- /dev/null +++ b/trainer_log.jsonl @@ -0,0 +1,1452 @@ +{"current_steps": 10, "total_steps": 14493, "loss": 1.3615, "lr": 4.9977515176118345e-05, "epoch": 0.0020703397945187755, "percentage": 0.07, "elapsed_time": "0:03:48", "remaining_time": "3 days, 19:55:57", "throughput": 15082.33, "total_tokens": 3446528} +{"current_steps": 20, "total_steps": 14493, "loss": 0.8676, "lr": 4.9952567580506e-05, "epoch": 0.004140679589037551, "percentage": 0.14, "elapsed_time": "0:07:23", "remaining_time": "3 days, 17:10:06", "throughput": 15272.81, "total_tokens": 6774912} +{"current_steps": 30, "total_steps": 14493, "loss": 0.7365, "lr": 4.992765730738634e-05, "epoch": 0.006211019383556326, "percentage": 0.21, "elapsed_time": "0:11:01", "remaining_time": "3 days, 16:36:50", "throughput": 15338.08, "total_tokens": 10149376} +{"current_steps": 40, "total_steps": 14493, "loss": 0.664, "lr": 4.9902784263792476e-05, "epoch": 0.008281359178075102, "percentage": 0.28, "elapsed_time": "0:14:50", "remaining_time": "3 days, 17:23:20", "throughput": 15230.13, "total_tokens": 13564160} +{"current_steps": 50, "total_steps": 14493, "loss": 0.6169, "lr": 4.987794835708133e-05, "epoch": 0.010351698972593876, "percentage": 0.34, "elapsed_time": "0:18:31", "remaining_time": "3 days, 17:11:56", "throughput": 15241.03, "total_tokens": 16942976} +{"current_steps": 60, "total_steps": 14493, "loss": 0.5874, "lr": 4.985314949493234e-05, "epoch": 0.012422038767112653, "percentage": 0.41, "elapsed_time": "0:22:17", "remaining_time": "3 days, 17:20:54", "throughput": 15240.88, "total_tokens": 20379520} +{"current_steps": 70, "total_steps": 14493, "loss": 0.565, "lr": 4.982838758534584e-05, "epoch": 0.014492378561631427, "percentage": 0.48, "elapsed_time": "0:25:11", "remaining_time": "3 days, 14:30:57", "throughput": 15663.24, "total_tokens": 23676800} +{"current_steps": 80, "total_steps": 14493, "loss": 0.5422, "lr": 4.980366253664179e-05, "epoch": 0.016562718356150204, "percentage": 0.55, "elapsed_time": "0:28:10", "remaining_time": "3 days, 12:35:09", "throughput": 16022.65, "total_tokens": 27081344} +{"current_steps": 90, "total_steps": 14493, "loss": 0.5317, "lr": 4.977897425745825e-05, "epoch": 0.018633058150668978, "percentage": 0.62, "elapsed_time": "0:30:57", "remaining_time": "3 days, 10:33:44", "throughput": 16394.22, "total_tokens": 30448384} +{"current_steps": 100, "total_steps": 14493, "loss": 0.5212, "lr": 4.975432265674997e-05, "epoch": 0.020703397945187753, "percentage": 0.69, "elapsed_time": "0:34:01", "remaining_time": "3 days, 9:36:03", "throughput": 16565.73, "total_tokens": 33810944} +{"current_steps": 110, "total_steps": 14493, "loss": 0.5084, "lr": 4.972970764378705e-05, "epoch": 0.02277373773970653, "percentage": 0.76, "elapsed_time": "0:37:01", "remaining_time": "3 days, 8:40:47", "throughput": 16729.11, "total_tokens": 37160704} +{"current_steps": 120, "total_steps": 14493, "loss": 0.5029, "lr": 4.970512912815344e-05, "epoch": 0.024844077534225305, "percentage": 0.83, "elapsed_time": "0:39:47", "remaining_time": "3 days, 7:25:08", "throughput": 16999.54, "total_tokens": 40578688} +{"current_steps": 130, "total_steps": 14493, "loss": 0.4897, "lr": 4.968058701974564e-05, "epoch": 0.02691441732874408, "percentage": 0.9, "elapsed_time": "0:43:12", "remaining_time": "3 days, 7:34:38", "throughput": 16972.26, "total_tokens": 44007808} +{"current_steps": 140, "total_steps": 14493, "loss": 0.4856, "lr": 4.96560812287712e-05, "epoch": 0.028984757123262855, "percentage": 0.97, "elapsed_time": "0:46:43", "remaining_time": "3 days, 7:49:39", "throughput": 16927.62, "total_tokens": 47450112} +{"current_steps": 150, "total_steps": 14493, "loss": 0.4798, "lr": 4.963161166574748e-05, "epoch": 0.03105509691778163, "percentage": 1.03, "elapsed_time": "0:50:13", "remaining_time": "3 days, 8:02:43", "throughput": 16872.57, "total_tokens": 50847744} +{"current_steps": 160, "total_steps": 14493, "loss": 0.473, "lr": 4.960717824150013e-05, "epoch": 0.03312543671230041, "percentage": 1.1, "elapsed_time": "0:53:27", "remaining_time": "3 days, 7:49:27", "throughput": 16931.14, "total_tokens": 54313344} +{"current_steps": 170, "total_steps": 14493, "loss": 0.4666, "lr": 4.9582780867161893e-05, "epoch": 0.03519577650681918, "percentage": 1.17, "elapsed_time": "0:57:32", "remaining_time": "3 days, 8:47:30", "throughput": 16699.34, "total_tokens": 57647872} +{"current_steps": 180, "total_steps": 14493, "loss": 0.4676, "lr": 4.955841945417105e-05, "epoch": 0.037266116301337956, "percentage": 1.24, "elapsed_time": "1:01:33", "remaining_time": "3 days, 9:35:16", "throughput": 16518.3, "total_tokens": 61014912} +{"current_steps": 190, "total_steps": 14493, "loss": 0.4578, "lr": 4.953409391427024e-05, "epoch": 0.03933645609585673, "percentage": 1.31, "elapsed_time": "1:05:44", "remaining_time": "3 days, 10:29:09", "throughput": 16323.85, "total_tokens": 64392064} +{"current_steps": 200, "total_steps": 14493, "loss": 0.4582, "lr": 4.950980415950502e-05, "epoch": 0.041406795890375506, "percentage": 1.38, "elapsed_time": "1:09:38", "remaining_time": "3 days, 10:57:00", "throughput": 16244.84, "total_tokens": 67880064} +{"current_steps": 210, "total_steps": 14493, "loss": 0.4491, "lr": 4.9485550102222575e-05, "epoch": 0.04347713568489428, "percentage": 1.45, "elapsed_time": "1:13:11", "remaining_time": "3 days, 10:58:09", "throughput": 16238.93, "total_tokens": 71314432} +{"current_steps": 220, "total_steps": 14493, "loss": 0.4453, "lr": 4.946133165507037e-05, "epoch": 0.04554747547941306, "percentage": 1.52, "elapsed_time": "1:17:22", "remaining_time": "3 days, 11:39:33", "throughput": 16108.53, "total_tokens": 74779136} +{"current_steps": 230, "total_steps": 14493, "loss": 0.4415, "lr": 4.943714873099483e-05, "epoch": 0.047617815273931836, "percentage": 1.59, "elapsed_time": "1:20:23", "remaining_time": "3 days, 11:05:48", "throughput": 16206.43, "total_tokens": 78179072} +{"current_steps": 240, "total_steps": 14493, "loss": 0.445, "lr": 4.9413001243240024e-05, "epoch": 0.04968815506845061, "percentage": 1.66, "elapsed_time": "1:23:57", "remaining_time": "3 days, 11:05:55", "throughput": 16176.89, "total_tokens": 81488640} +{"current_steps": 250, "total_steps": 14493, "loss": 0.4399, "lr": 4.938888910534637e-05, "epoch": 0.051758494862969386, "percentage": 1.72, "elapsed_time": "1:27:21", "remaining_time": "3 days, 10:56:45", "throughput": 16191.78, "total_tokens": 84865408} +{"current_steps": 260, "total_steps": 14493, "loss": 0.4309, "lr": 4.936481223114932e-05, "epoch": 0.05382883465748816, "percentage": 1.79, "elapsed_time": "1:29:52", "remaining_time": "3 days, 9:59:58", "throughput": 16352.75, "total_tokens": 88182528} +{"current_steps": 270, "total_steps": 14493, "loss": 0.4336, "lr": 4.934077053477808e-05, "epoch": 0.055899174452006935, "percentage": 1.86, "elapsed_time": "1:33:27", "remaining_time": "3 days, 10:02:51", "throughput": 16319.42, "total_tokens": 91505152} +{"current_steps": 280, "total_steps": 14493, "loss": 0.4298, "lr": 4.931676393065431e-05, "epoch": 0.05796951424652571, "percentage": 1.93, "elapsed_time": "1:38:06", "remaining_time": "3 days, 11:00:08", "throughput": 16109.42, "total_tokens": 94829824} +{"current_steps": 290, "total_steps": 14493, "loss": 0.4265, "lr": 4.929279233349088e-05, "epoch": 0.060039854041044484, "percentage": 2.0, "elapsed_time": "1:41:51", "remaining_time": "3 days, 11:08:25", "throughput": 16077.18, "total_tokens": 98252160} +{"current_steps": 300, "total_steps": 14493, "loss": 0.4278, "lr": 4.926885565829051e-05, "epoch": 0.06211019383556326, "percentage": 2.07, "elapsed_time": "1:45:35", "remaining_time": "3 days, 11:15:15", "throughput": 16032.11, "total_tokens": 101565696} +{"current_steps": 310, "total_steps": 14493, "loss": 0.4253, "lr": 4.924495382034461e-05, "epoch": 0.06418053363008204, "percentage": 2.14, "elapsed_time": "1:48:45", "remaining_time": "3 days, 10:56:03", "throughput": 16076.26, "total_tokens": 104909696} +{"current_steps": 320, "total_steps": 14493, "loss": 0.421, "lr": 4.9221086735231975e-05, "epoch": 0.06625087342460081, "percentage": 2.21, "elapsed_time": "1:51:24", "remaining_time": "3 days, 10:14:00", "throughput": 16193.09, "total_tokens": 108235392} +{"current_steps": 330, "total_steps": 14493, "loss": 0.4203, "lr": 4.919725431881751e-05, "epoch": 0.06832121321911959, "percentage": 2.28, "elapsed_time": "1:54:08", "remaining_time": "3 days, 9:39:03", "throughput": 16289.21, "total_tokens": 111563776} +{"current_steps": 340, "total_steps": 14493, "loss": 0.4148, "lr": 4.917345648725101e-05, "epoch": 0.07039155301363836, "percentage": 2.35, "elapsed_time": "1:58:19", "remaining_time": "3 days, 10:05:40", "throughput": 16202.93, "total_tokens": 115037824} +{"current_steps": 350, "total_steps": 14493, "loss": 0.4158, "lr": 4.914969315696596e-05, "epoch": 0.07246189280815714, "percentage": 2.41, "elapsed_time": "2:02:00", "remaining_time": "3 days, 10:10:13", "throughput": 16170.06, "total_tokens": 118373760} +{"current_steps": 360, "total_steps": 14493, "loss": 0.4136, "lr": 4.912596424467818e-05, "epoch": 0.07453223260267591, "percentage": 2.48, "elapsed_time": "2:07:39", "remaining_time": "3 days, 11:31:45", "throughput": 15898.41, "total_tokens": 121776640} +{"current_steps": 370, "total_steps": 14493, "loss": 0.4118, "lr": 4.910226966738475e-05, "epoch": 0.07660257239719469, "percentage": 2.55, "elapsed_time": "2:11:17", "remaining_time": "3 days, 11:31:28", "throughput": 15889.32, "total_tokens": 125169152} +{"current_steps": 380, "total_steps": 14493, "loss": 0.4108, "lr": 4.9078609342362666e-05, "epoch": 0.07867291219171346, "percentage": 2.62, "elapsed_time": "2:14:02", "remaining_time": "3 days, 10:58:17", "throughput": 15995.76, "total_tokens": 128647168} +{"current_steps": 390, "total_steps": 14493, "loss": 0.4063, "lr": 4.905498318716775e-05, "epoch": 0.08074325198623224, "percentage": 2.69, "elapsed_time": "2:16:26", "remaining_time": "3 days, 10:14:08", "throughput": 16111.98, "total_tokens": 131906176} +{"current_steps": 400, "total_steps": 14493, "loss": 0.4093, "lr": 4.9031391119633295e-05, "epoch": 0.08281359178075101, "percentage": 2.76, "elapsed_time": "2:19:17", "remaining_time": "3 days, 9:47:28", "throughput": 16191.7, "total_tokens": 135319040} +{"current_steps": 410, "total_steps": 14493, "loss": 0.4086, "lr": 4.9007833057869e-05, "epoch": 0.08488393157526979, "percentage": 2.83, "elapsed_time": "2:22:25", "remaining_time": "3 days, 9:32:08", "throughput": 16234.87, "total_tokens": 138735360} +{"current_steps": 420, "total_steps": 14493, "loss": 0.4051, "lr": 4.898430892025967e-05, "epoch": 0.08695427136978856, "percentage": 2.9, "elapsed_time": "2:25:44", "remaining_time": "3 days, 9:23:11", "throughput": 16251.59, "total_tokens": 142106240} +{"current_steps": 430, "total_steps": 14493, "loss": 0.4001, "lr": 4.896081862546415e-05, "epoch": 0.08902461116430734, "percentage": 2.97, "elapsed_time": "2:29:02", "remaining_time": "3 days, 9:14:09", "throughput": 16273.27, "total_tokens": 145517952} +{"current_steps": 440, "total_steps": 14493, "loss": 0.4019, "lr": 4.8937362092414e-05, "epoch": 0.09109495095882612, "percentage": 3.04, "elapsed_time": "2:32:20", "remaining_time": "3 days, 9:05:37", "throughput": 16283.75, "total_tokens": 148842880} +{"current_steps": 450, "total_steps": 14493, "loss": 0.3935, "lr": 4.891393924031244e-05, "epoch": 0.0931652907533449, "percentage": 3.1, "elapsed_time": "2:35:41", "remaining_time": "3 days, 8:58:35", "throughput": 16286.67, "total_tokens": 152141056} +{"current_steps": 460, "total_steps": 14493, "loss": 0.4028, "lr": 4.8890549988633095e-05, "epoch": 0.09523563054786367, "percentage": 3.17, "elapsed_time": "2:39:11", "remaining_time": "3 days, 8:56:12", "throughput": 16284.86, "total_tokens": 155538944} +{"current_steps": 470, "total_steps": 14493, "loss": 0.3993, "lr": 4.8867194257118907e-05, "epoch": 0.09730597034238245, "percentage": 3.24, "elapsed_time": "2:42:47", "remaining_time": "3 days, 8:57:17", "throughput": 16270.49, "total_tokens": 158929152} +{"current_steps": 480, "total_steps": 14493, "loss": 0.398, "lr": 4.884387196578093e-05, "epoch": 0.09937631013690122, "percentage": 3.31, "elapsed_time": "2:46:48", "remaining_time": "3 days, 9:09:42", "throughput": 16211.64, "total_tokens": 162252672} +{"current_steps": 490, "total_steps": 14493, "loss": 0.3944, "lr": 4.882058303489718e-05, "epoch": 0.10144664993142, "percentage": 3.38, "elapsed_time": "2:51:04", "remaining_time": "3 days, 9:28:50", "throughput": 16134.38, "total_tokens": 165609216} +{"current_steps": 500, "total_steps": 14493, "loss": 0.3922, "lr": 4.8797327385011496e-05, "epoch": 0.10351698972593877, "percentage": 3.45, "elapsed_time": "2:54:43", "remaining_time": "3 days, 9:29:54", "throughput": 16122.24, "total_tokens": 169019520} +{"current_steps": 510, "total_steps": 14493, "loss": 0.3889, "lr": 4.8774104936932425e-05, "epoch": 0.10558732952045755, "percentage": 3.52, "elapsed_time": "2:57:46", "remaining_time": "3 days, 9:14:03", "throughput": 16163.86, "total_tokens": 172407680} +{"current_steps": 520, "total_steps": 14493, "loss": 0.3917, "lr": 4.8750915611732076e-05, "epoch": 0.10765766931497632, "percentage": 3.59, "elapsed_time": "3:00:43", "remaining_time": "3 days, 8:56:26", "throughput": 16200.52, "total_tokens": 175675648} +{"current_steps": 530, "total_steps": 14493, "loss": 0.3898, "lr": 4.8727759330744986e-05, "epoch": 0.1097280091094951, "percentage": 3.66, "elapsed_time": "3:03:51", "remaining_time": "3 days, 8:43:43", "throughput": 16223.82, "total_tokens": 178970624} +{"current_steps": 540, "total_steps": 14493, "loss": 0.3873, "lr": 4.870463601556696e-05, "epoch": 0.11179834890401387, "percentage": 3.73, "elapsed_time": "3:06:52", "remaining_time": "3 days, 8:28:44", "throughput": 16264.8, "total_tokens": 182372480} +{"current_steps": 550, "total_steps": 14493, "loss": 0.387, "lr": 4.8681545588054075e-05, "epoch": 0.11386868869853264, "percentage": 3.79, "elapsed_time": "3:09:36", "remaining_time": "3 days, 8:06:48", "throughput": 16326.65, "total_tokens": 185742592} +{"current_steps": 560, "total_steps": 14493, "loss": 0.3863, "lr": 4.8658487970321404e-05, "epoch": 0.11593902849305142, "percentage": 3.86, "elapsed_time": "3:12:34", "remaining_time": "3 days, 7:51:32", "throughput": 16372.33, "total_tokens": 189181952} +{"current_steps": 570, "total_steps": 14493, "loss": 0.3866, "lr": 4.863546308474209e-05, "epoch": 0.1180093682875702, "percentage": 3.93, "elapsed_time": "3:15:16", "remaining_time": "3 days, 7:29:44", "throughput": 16431.08, "total_tokens": 192510592} +{"current_steps": 580, "total_steps": 14493, "loss": 0.3859, "lr": 4.86124708539461e-05, "epoch": 0.12007970808208897, "percentage": 4.0, "elapsed_time": "3:17:39", "remaining_time": "3 days, 7:01:15", "throughput": 16516.14, "total_tokens": 195866880} +{"current_steps": 590, "total_steps": 14493, "loss": 0.3874, "lr": 4.8589511200819216e-05, "epoch": 0.12215004787660774, "percentage": 4.07, "elapsed_time": "3:20:01", "remaining_time": "3 days, 6:33:22", "throughput": 16601.67, "total_tokens": 199240832} +{"current_steps": 600, "total_steps": 14493, "loss": 0.3787, "lr": 4.8566584048501926e-05, "epoch": 0.12422038767112652, "percentage": 4.14, "elapsed_time": "3:22:31", "remaining_time": "3 days, 6:09:21", "throughput": 16675.53, "total_tokens": 202628096} +{"current_steps": 610, "total_steps": 14493, "loss": 0.3838, "lr": 4.854368932038835e-05, "epoch": 0.1262907274656453, "percentage": 4.21, "elapsed_time": "3:25:35", "remaining_time": "3 days, 5:58:59", "throughput": 16708.2, "total_tokens": 206100608} +{"current_steps": 620, "total_steps": 14493, "loss": 0.3793, "lr": 4.8520826940125144e-05, "epoch": 0.12836106726016408, "percentage": 4.28, "elapsed_time": "3:29:00", "remaining_time": "3 days, 5:56:40", "throughput": 16702.22, "total_tokens": 209451392} +{"current_steps": 630, "total_steps": 14493, "loss": 0.3781, "lr": 4.849799683161046e-05, "epoch": 0.13043140705468284, "percentage": 4.35, "elapsed_time": "3:31:51", "remaining_time": "3 days, 5:41:47", "throughput": 16747.89, "total_tokens": 212886016} +{"current_steps": 640, "total_steps": 14493, "loss": 0.3758, "lr": 4.8475198918992835e-05, "epoch": 0.13250174684920163, "percentage": 4.42, "elapsed_time": "3:35:22", "remaining_time": "3 days, 5:41:41", "throughput": 16740.33, "total_tokens": 216318848} +{"current_steps": 650, "total_steps": 14493, "loss": 0.377, "lr": 4.845243312667023e-05, "epoch": 0.1345720866437204, "percentage": 4.48, "elapsed_time": "3:39:09", "remaining_time": "3 days, 5:47:33", "throughput": 16701.74, "total_tokens": 219627264} +{"current_steps": 660, "total_steps": 14493, "loss": 0.3827, "lr": 4.842969937928884e-05, "epoch": 0.13664242643823918, "percentage": 4.55, "elapsed_time": "3:42:34", "remaining_time": "3 days, 5:44:50", "throughput": 16698.76, "total_tokens": 222997376} +{"current_steps": 670, "total_steps": 14493, "loss": 0.3787, "lr": 4.840699760174217e-05, "epoch": 0.13871276623275794, "percentage": 4.62, "elapsed_time": "3:46:00", "remaining_time": "3 days, 5:42:51", "throughput": 16696.85, "total_tokens": 226417536} +{"current_steps": 680, "total_steps": 14493, "loss": 0.3784, "lr": 4.8384327719169906e-05, "epoch": 0.14078310602727673, "percentage": 4.69, "elapsed_time": "3:49:13", "remaining_time": "3 days, 5:36:10", "throughput": 16709.77, "total_tokens": 229811712} +{"current_steps": 690, "total_steps": 14493, "loss": 0.3746, "lr": 4.836168965695694e-05, "epoch": 0.1428534458217955, "percentage": 4.76, "elapsed_time": "3:52:27", "remaining_time": "3 days, 5:30:20", "throughput": 16719.0, "total_tokens": 233196544} +{"current_steps": 700, "total_steps": 14493, "loss": 0.3731, "lr": 4.8339083340732304e-05, "epoch": 0.14492378561631428, "percentage": 4.83, "elapsed_time": "3:55:44", "remaining_time": "3 days, 5:25:15", "throughput": 16722.76, "total_tokens": 236542464} +{"current_steps": 710, "total_steps": 14493, "loss": 0.3764, "lr": 4.8316508696368154e-05, "epoch": 0.14699412541083307, "percentage": 4.9, "elapsed_time": "3:58:49", "remaining_time": "3 days, 5:16:04", "throughput": 16745.72, "total_tokens": 239949952} +{"current_steps": 720, "total_steps": 14493, "loss": 0.375, "lr": 4.8293965649978714e-05, "epoch": 0.14906446520535183, "percentage": 4.97, "elapsed_time": "4:01:42", "remaining_time": "3 days, 5:03:49", "throughput": 16782.06, "total_tokens": 243389184} +{"current_steps": 730, "total_steps": 14493, "loss": 0.3741, "lr": 4.8271454127919364e-05, "epoch": 0.15113480499987061, "percentage": 5.04, "elapsed_time": "4:04:17", "remaining_time": "3 days, 4:45:44", "throughput": 16835.58, "total_tokens": 246767744} +{"current_steps": 740, "total_steps": 14493, "loss": 0.377, "lr": 4.824897405678549e-05, "epoch": 0.15320514479438938, "percentage": 5.11, "elapsed_time": "4:06:44", "remaining_time": "3 days, 4:25:50", "throughput": 16895.1, "total_tokens": 250129920} +{"current_steps": 750, "total_steps": 14493, "loss": 0.3739, "lr": 4.8226525363411576e-05, "epoch": 0.15527548458890816, "percentage": 5.17, "elapsed_time": "4:09:36", "remaining_time": "3 days, 4:13:42", "throughput": 16925.22, "total_tokens": 253473920} +{"current_steps": 760, "total_steps": 14493, "loss": 0.369, "lr": 4.820410797487017e-05, "epoch": 0.15734582438342692, "percentage": 5.24, "elapsed_time": "4:12:06", "remaining_time": "3 days, 3:55:35", "throughput": 16978.43, "total_tokens": 256827392} +{"current_steps": 770, "total_steps": 14493, "loss": 0.3701, "lr": 4.818172181847091e-05, "epoch": 0.1594161641779457, "percentage": 5.31, "elapsed_time": "4:14:45", "remaining_time": "3 days, 3:40:10", "throughput": 17024.83, "total_tokens": 260225024} +{"current_steps": 780, "total_steps": 14493, "loss": 0.3734, "lr": 4.81593668217595e-05, "epoch": 0.16148650397246447, "percentage": 5.38, "elapsed_time": "4:17:19", "remaining_time": "3 days, 3:23:52", "throughput": 17076.02, "total_tokens": 263639680} +{"current_steps": 790, "total_steps": 14493, "loss": 0.368, "lr": 4.813704291251675e-05, "epoch": 0.16355684376698326, "percentage": 5.45, "elapsed_time": "4:19:43", "remaining_time": "3 days, 3:05:11", "throughput": 17132.46, "total_tokens": 266990720} +{"current_steps": 800, "total_steps": 14493, "loss": 0.372, "lr": 4.811475001875759e-05, "epoch": 0.16562718356150202, "percentage": 5.52, "elapsed_time": "4:22:10", "remaining_time": "3 days, 2:47:21", "throughput": 17191.6, "total_tokens": 270426624} +{"current_steps": 810, "total_steps": 14493, "loss": 0.37, "lr": 4.8092488068730105e-05, "epoch": 0.1676975233560208, "percentage": 5.59, "elapsed_time": "4:24:35", "remaining_time": "3 days, 2:29:38", "throughput": 17249.91, "total_tokens": 273851392} +{"current_steps": 820, "total_steps": 14493, "loss": 0.3704, "lr": 4.807025699091452e-05, "epoch": 0.16976786315053957, "percentage": 5.66, "elapsed_time": "4:26:52", "remaining_time": "3 days, 2:09:57", "throughput": 17308.02, "total_tokens": 277143040} +{"current_steps": 830, "total_steps": 14493, "loss": 0.3648, "lr": 4.8048056714022325e-05, "epoch": 0.17183820294505836, "percentage": 5.73, "elapsed_time": "4:29:38", "remaining_time": "3 days, 1:58:45", "throughput": 17340.55, "total_tokens": 280548480} +{"current_steps": 840, "total_steps": 14493, "loss": 0.3633, "lr": 4.802588716699519e-05, "epoch": 0.17390854273957712, "percentage": 5.8, "elapsed_time": "4:32:28", "remaining_time": "3 days, 1:48:36", "throughput": 17365.74, "total_tokens": 283897984} +{"current_steps": 850, "total_steps": 14493, "loss": 0.3701, "lr": 4.8003748279004156e-05, "epoch": 0.1759788825340959, "percentage": 5.86, "elapsed_time": "4:34:56", "remaining_time": "3 days, 1:32:50", "throughput": 17411.3, "total_tokens": 287217280} +{"current_steps": 860, "total_steps": 14493, "loss": 0.3667, "lr": 4.798163997944854e-05, "epoch": 0.17804922232861467, "percentage": 5.93, "elapsed_time": "4:37:22", "remaining_time": "3 days, 1:17:07", "throughput": 17461.63, "total_tokens": 290610688} +{"current_steps": 870, "total_steps": 14493, "loss": 0.363, "lr": 4.79595621979551e-05, "epoch": 0.18011956212313346, "percentage": 6.0, "elapsed_time": "4:39:42", "remaining_time": "3 days, 0:59:57", "throughput": 17516.96, "total_tokens": 293985792} +{"current_steps": 880, "total_steps": 14493, "loss": 0.3649, "lr": 4.793751486437702e-05, "epoch": 0.18218990191765225, "percentage": 6.07, "elapsed_time": "4:42:12", "remaining_time": "3 days, 0:45:33", "throughput": 17559.06, "total_tokens": 297318400} +{"current_steps": 890, "total_steps": 14493, "loss": 0.3637, "lr": 4.7915497908793064e-05, "epoch": 0.184260241712171, "percentage": 6.14, "elapsed_time": "4:44:33", "remaining_time": "3 days, 0:29:19", "throughput": 17608.59, "total_tokens": 300643712} +{"current_steps": 900, "total_steps": 14493, "loss": 0.3651, "lr": 4.7893511261506516e-05, "epoch": 0.1863305815066898, "percentage": 6.21, "elapsed_time": "4:46:58", "remaining_time": "3 days, 0:14:17", "throughput": 17653.57, "total_tokens": 303969152} +{"current_steps": 910, "total_steps": 14493, "loss": 0.3626, "lr": 4.787155485304435e-05, "epoch": 0.18840092130120856, "percentage": 6.28, "elapsed_time": "4:49:39", "remaining_time": "3 days, 0:03:25", "throughput": 17684.59, "total_tokens": 307340416} +{"current_steps": 920, "total_steps": 14493, "loss": 0.363, "lr": 4.784962861415629e-05, "epoch": 0.19047126109572735, "percentage": 6.35, "elapsed_time": "4:52:26", "remaining_time": "2 days, 23:54:23", "throughput": 17713.44, "total_tokens": 310803328} +{"current_steps": 930, "total_steps": 14493, "loss": 0.3609, "lr": 4.7827732475813884e-05, "epoch": 0.1925416008902461, "percentage": 6.42, "elapsed_time": "4:54:39", "remaining_time": "2 days, 23:37:14", "throughput": 17773.75, "total_tokens": 314229632} +{"current_steps": 940, "total_steps": 14493, "loss": 0.363, "lr": 4.7805866369209576e-05, "epoch": 0.1946119406847649, "percentage": 6.49, "elapsed_time": "4:56:55", "remaining_time": "2 days, 23:20:58", "throughput": 17835.74, "total_tokens": 317744128} +{"current_steps": 950, "total_steps": 14493, "loss": 0.3606, "lr": 4.778403022575583e-05, "epoch": 0.19668228047928366, "percentage": 6.55, "elapsed_time": "4:59:02", "remaining_time": "2 days, 23:02:58", "throughput": 17898.22, "total_tokens": 321131648} +{"current_steps": 960, "total_steps": 14493, "loss": 0.3645, "lr": 4.7762223977084195e-05, "epoch": 0.19875262027380244, "percentage": 6.62, "elapsed_time": "5:01:12", "remaining_time": "2 days, 22:46:09", "throughput": 17951.64, "total_tokens": 324435456} +{"current_steps": 970, "total_steps": 14493, "loss": 0.3616, "lr": 4.774044755504444e-05, "epoch": 0.2008229600683212, "percentage": 6.69, "elapsed_time": "5:03:37", "remaining_time": "2 days, 22:32:57", "throughput": 17995.66, "total_tokens": 327839488} +{"current_steps": 980, "total_steps": 14493, "loss": 0.3581, "lr": 4.7718700891703616e-05, "epoch": 0.20289329986284, "percentage": 6.76, "elapsed_time": "5:06:35", "remaining_time": "2 days, 22:27:38", "throughput": 18010.43, "total_tokens": 331319808} +{"current_steps": 990, "total_steps": 14493, "loss": 0.3589, "lr": 4.7696983919345215e-05, "epoch": 0.20496363965735875, "percentage": 6.83, "elapsed_time": "5:09:09", "remaining_time": "2 days, 22:16:43", "throughput": 18043.03, "total_tokens": 334688384} +{"current_steps": 1000, "total_steps": 14493, "loss": 0.3578, "lr": 4.7675296570468216e-05, "epoch": 0.20703397945187754, "percentage": 6.9, "elapsed_time": "5:11:56", "remaining_time": "2 days, 22:08:58", "throughput": 18063.66, "total_tokens": 338084608} +{"current_steps": 1010, "total_steps": 14493, "loss": 0.3579, "lr": 4.76536387777863e-05, "epoch": 0.2091043192463963, "percentage": 6.97, "elapsed_time": "5:14:55", "remaining_time": "2 days, 22:04:01", "throughput": 18074.33, "total_tokens": 341518080} +{"current_steps": 1020, "total_steps": 14493, "loss": 0.3595, "lr": 4.7632010474226915e-05, "epoch": 0.2111746590409151, "percentage": 7.04, "elapsed_time": "5:18:34", "remaining_time": "2 days, 22:08:05", "throughput": 18044.9, "total_tokens": 344926976} +{"current_steps": 1030, "total_steps": 14493, "loss": 0.3584, "lr": 4.761041159293035e-05, "epoch": 0.21324499883543385, "percentage": 7.11, "elapsed_time": "5:22:15", "remaining_time": "2 days, 22:12:14", "throughput": 18012.25, "total_tokens": 348279808} +{"current_steps": 1040, "total_steps": 14493, "loss": 0.3559, "lr": 4.7588842067249e-05, "epoch": 0.21531533862995264, "percentage": 7.18, "elapsed_time": "5:25:33", "remaining_time": "2 days, 22:11:19", "throughput": 18006.19, "total_tokens": 351727360} +{"current_steps": 1050, "total_steps": 14493, "loss": 0.358, "lr": 4.756730183074637e-05, "epoch": 0.21738567842447143, "percentage": 7.24, "elapsed_time": "5:28:31", "remaining_time": "2 days, 22:06:04", "throughput": 18016.17, "total_tokens": 355127168} +{"current_steps": 1060, "total_steps": 14493, "loss": 0.3559, "lr": 4.7545790817196314e-05, "epoch": 0.2194560182189902, "percentage": 7.31, "elapsed_time": "5:31:30", "remaining_time": "2 days, 22:01:02", "throughput": 18024.44, "total_tokens": 358510720} +{"current_steps": 1070, "total_steps": 14493, "loss": 0.3557, "lr": 4.752430896058212e-05, "epoch": 0.22152635801350898, "percentage": 7.38, "elapsed_time": "5:34:29", "remaining_time": "2 days, 21:56:09", "throughput": 18033.9, "total_tokens": 361931520} +{"current_steps": 1080, "total_steps": 14493, "loss": 0.3554, "lr": 4.750285619509567e-05, "epoch": 0.22359669780802774, "percentage": 7.45, "elapsed_time": "5:37:16", "remaining_time": "2 days, 21:48:51", "throughput": 18049.74, "total_tokens": 365271552} +{"current_steps": 1090, "total_steps": 14493, "loss": 0.3553, "lr": 4.7481432455136644e-05, "epoch": 0.22566703760254653, "percentage": 7.52, "elapsed_time": "5:40:08", "remaining_time": "2 days, 21:42:33", "throughput": 18066.03, "total_tokens": 368706432} +{"current_steps": 1100, "total_steps": 14493, "loss": 0.3523, "lr": 4.7460037675311584e-05, "epoch": 0.2277373773970653, "percentage": 7.59, "elapsed_time": "5:43:12", "remaining_time": "2 days, 21:38:43", "throughput": 18068.83, "total_tokens": 372082432} +{"current_steps": 1110, "total_steps": 14493, "loss": 0.3567, "lr": 4.7438671790433126e-05, "epoch": 0.22980771719158408, "percentage": 7.66, "elapsed_time": "5:46:41", "remaining_time": "2 days, 21:40:03", "throughput": 18050.49, "total_tokens": 375484416} +{"current_steps": 1120, "total_steps": 14493, "loss": 0.3508, "lr": 4.741733473551915e-05, "epoch": 0.23187805698610284, "percentage": 7.73, "elapsed_time": "5:49:39", "remaining_time": "2 days, 21:34:54", "throughput": 18058.65, "total_tokens": 378854912} +{"current_steps": 1130, "total_steps": 14493, "loss": 0.3529, "lr": 4.7396026445791966e-05, "epoch": 0.23394839678062163, "percentage": 7.8, "elapsed_time": "5:52:28", "remaining_time": "2 days, 21:28:15", "throughput": 18074.9, "total_tokens": 382258560} +{"current_steps": 1140, "total_steps": 14493, "loss": 0.3539, "lr": 4.737474685667742e-05, "epoch": 0.2360187365751404, "percentage": 7.87, "elapsed_time": "5:55:03", "remaining_time": "2 days, 21:18:51", "throughput": 18099.85, "total_tokens": 385590400} +{"current_steps": 1150, "total_steps": 14493, "loss": 0.3561, "lr": 4.7353495903804165e-05, "epoch": 0.23808907636965917, "percentage": 7.93, "elapsed_time": "5:57:44", "remaining_time": "2 days, 21:10:46", "throughput": 18119.72, "total_tokens": 388935040} +{"current_steps": 1160, "total_steps": 14493, "loss": 0.3535, "lr": 4.733227352300277e-05, "epoch": 0.24015941616417794, "percentage": 8.0, "elapsed_time": "6:00:23", "remaining_time": "2 days, 21:02:25", "throughput": 18136.75, "total_tokens": 392188544} +{"current_steps": 1170, "total_steps": 14493, "loss": 0.3553, "lr": 4.731107965030496e-05, "epoch": 0.24222975595869672, "percentage": 8.07, "elapsed_time": "6:03:18", "remaining_time": "2 days, 20:57:04", "throughput": 18147.59, "total_tokens": 395592448} +{"current_steps": 1180, "total_steps": 14493, "loss": 0.351, "lr": 4.728991422194278e-05, "epoch": 0.24430009575321548, "percentage": 8.14, "elapsed_time": "6:06:34", "remaining_time": "2 days, 20:55:46", "throughput": 18144.57, "total_tokens": 399080704} +{"current_steps": 1190, "total_steps": 14493, "loss": 0.3534, "lr": 4.726877717434773e-05, "epoch": 0.24637043554773427, "percentage": 8.21, "elapsed_time": "6:09:44", "remaining_time": "2 days, 20:53:24", "throughput": 18143.59, "total_tokens": 402513152} +{"current_steps": 1200, "total_steps": 14493, "loss": 0.3542, "lr": 4.724766844415013e-05, "epoch": 0.24844077534225303, "percentage": 8.28, "elapsed_time": "6:12:54", "remaining_time": "2 days, 20:50:52", "throughput": 18143.01, "total_tokens": 405938688} +{"current_steps": 1210, "total_steps": 14493, "loss": 0.3494, "lr": 4.722658796817813e-05, "epoch": 0.2505111151367718, "percentage": 8.35, "elapsed_time": "6:15:43", "remaining_time": "2 days, 20:44:37", "throughput": 18156.14, "total_tokens": 409305856} +{"current_steps": 1220, "total_steps": 14493, "loss": 0.3524, "lr": 4.7205535683457044e-05, "epoch": 0.2525814549312906, "percentage": 8.42, "elapsed_time": "6:18:29", "remaining_time": "2 days, 20:37:49", "throughput": 18169.73, "total_tokens": 412627328} +{"current_steps": 1230, "total_steps": 14493, "loss": 0.3482, "lr": 4.7184511527208484e-05, "epoch": 0.2546517947258094, "percentage": 8.49, "elapsed_time": "6:22:04", "remaining_time": "2 days, 20:39:58", "throughput": 18148.65, "total_tokens": 416056832} +{"current_steps": 1240, "total_steps": 14493, "loss": 0.3473, "lr": 4.7163515436849644e-05, "epoch": 0.25672213452032816, "percentage": 8.56, "elapsed_time": "6:24:54", "remaining_time": "2 days, 20:33:53", "throughput": 18166.42, "total_tokens": 419547008} +{"current_steps": 1250, "total_steps": 14493, "loss": 0.3455, "lr": 4.714254734999245e-05, "epoch": 0.2587924743148469, "percentage": 8.62, "elapsed_time": "6:27:38", "remaining_time": "2 days, 20:26:45", "throughput": 18182.25, "total_tokens": 422883712} +{"current_steps": 1260, "total_steps": 14493, "loss": 0.3484, "lr": 4.712160720444284e-05, "epoch": 0.2608628141093657, "percentage": 8.69, "elapsed_time": "6:30:24", "remaining_time": "2 days, 20:20:09", "throughput": 18192.55, "total_tokens": 426145408} +{"current_steps": 1270, "total_steps": 14493, "loss": 0.3468, "lr": 4.710069493819992e-05, "epoch": 0.2629331539038845, "percentage": 8.76, "elapsed_time": "6:33:35", "remaining_time": "2 days, 20:17:56", "throughput": 18189.59, "total_tokens": 429550720} +{"current_steps": 1280, "total_steps": 14493, "loss": 0.3503, "lr": 4.70798104894553e-05, "epoch": 0.26500349369840326, "percentage": 8.83, "elapsed_time": "6:36:02", "remaining_time": "2 days, 20:08:11", "throughput": 18222.43, "total_tokens": 433009152} +{"current_steps": 1290, "total_steps": 14493, "loss": 0.3506, "lr": 4.705895379659219e-05, "epoch": 0.267073833492922, "percentage": 8.9, "elapsed_time": "6:38:13", "remaining_time": "2 days, 19:55:42", "throughput": 18263.34, "total_tokens": 436366592} +{"current_steps": 1300, "total_steps": 14493, "loss": 0.3476, "lr": 4.7038124798184766e-05, "epoch": 0.2691441732874408, "percentage": 8.97, "elapsed_time": "6:40:16", "remaining_time": "2 days, 19:42:05", "throughput": 18308.75, "total_tokens": 439703040} +{"current_steps": 1310, "total_steps": 14493, "loss": 0.3453, "lr": 4.7017323432997304e-05, "epoch": 0.2712145130819596, "percentage": 9.04, "elapsed_time": "6:42:37", "remaining_time": "2 days, 19:31:43", "throughput": 18347.66, "total_tokens": 443229184} +{"current_steps": 1320, "total_steps": 14493, "loss": 0.3456, "lr": 4.6996549639983506e-05, "epoch": 0.27328485287647836, "percentage": 9.11, "elapsed_time": "6:45:03", "remaining_time": "2 days, 19:22:12", "throughput": 18376.16, "total_tokens": 446595968} +{"current_steps": 1330, "total_steps": 14493, "loss": 0.3459, "lr": 4.697580335828569e-05, "epoch": 0.2753551926709971, "percentage": 9.18, "elapsed_time": "6:47:23", "remaining_time": "2 days, 19:11:56", "throughput": 18410.12, "total_tokens": 450007296} +{"current_steps": 1340, "total_steps": 14493, "loss": 0.3477, "lr": 4.6955084527234076e-05, "epoch": 0.2774255324655159, "percentage": 9.25, "elapsed_time": "6:49:47", "remaining_time": "2 days, 19:02:18", "throughput": 18440.05, "total_tokens": 453387392} +{"current_steps": 1350, "total_steps": 14493, "loss": 0.3474, "lr": 4.6934393086346034e-05, "epoch": 0.2794958722600347, "percentage": 9.31, "elapsed_time": "6:52:07", "remaining_time": "2 days, 18:52:13", "throughput": 18472.05, "total_tokens": 456763520} +{"current_steps": 1360, "total_steps": 14493, "loss": 0.3453, "lr": 4.6913728975325324e-05, "epoch": 0.28156621205455346, "percentage": 9.38, "elapsed_time": "6:54:21", "remaining_time": "2 days, 18:41:19", "throughput": 18506.17, "total_tokens": 460094208} +{"current_steps": 1370, "total_steps": 14493, "loss": 0.3427, "lr": 4.6893092134061393e-05, "epoch": 0.2836365518490722, "percentage": 9.45, "elapsed_time": "6:56:46", "remaining_time": "2 days, 18:32:09", "throughput": 18534.64, "total_tokens": 463478784} +{"current_steps": 1380, "total_steps": 14493, "loss": 0.3443, "lr": 4.687248250262859e-05, "epoch": 0.285706891643591, "percentage": 9.52, "elapsed_time": "6:59:20", "remaining_time": "2 days, 18:24:36", "throughput": 18554.75, "total_tokens": 466840064} +{"current_steps": 1390, "total_steps": 14493, "loss": 0.3448, "lr": 4.685190002128548e-05, "epoch": 0.2877772314381098, "percentage": 9.59, "elapsed_time": "7:01:47", "remaining_time": "2 days, 18:16:06", "throughput": 18585.87, "total_tokens": 470366976} +{"current_steps": 1400, "total_steps": 14493, "loss": 0.3473, "lr": 4.6831344630474114e-05, "epoch": 0.28984757123262855, "percentage": 9.66, "elapsed_time": "7:03:40", "remaining_time": "2 days, 18:02:19", "throughput": 18634.78, "total_tokens": 473712000} +{"current_steps": 1410, "total_steps": 14493, "loss": 0.3464, "lr": 4.6810816270819276e-05, "epoch": 0.2919179110271473, "percentage": 9.73, "elapsed_time": "7:05:33", "remaining_time": "2 days, 17:48:40", "throughput": 18684.57, "total_tokens": 477086208} +{"current_steps": 1420, "total_steps": 14493, "loss": 0.346, "lr": 4.679031488312777e-05, "epoch": 0.29398825082166613, "percentage": 9.8, "elapsed_time": "7:07:29", "remaining_time": "2 days, 17:35:41", "throughput": 18735.44, "total_tokens": 480562304} +{"current_steps": 1430, "total_steps": 14493, "loss": 0.3454, "lr": 4.6769840408387717e-05, "epoch": 0.2960585906161849, "percentage": 9.87, "elapsed_time": "7:09:19", "remaining_time": "2 days, 17:21:54", "throughput": 18787.3, "total_tokens": 483956864} +{"current_steps": 1440, "total_steps": 14493, "loss": 0.347, "lr": 4.674939278776787e-05, "epoch": 0.29812893041070365, "percentage": 9.94, "elapsed_time": "7:11:08", "remaining_time": "2 days, 17:08:05", "throughput": 18840.33, "total_tokens": 487367936} +{"current_steps": 1450, "total_steps": 14493, "loss": 0.3433, "lr": 4.672897196261683e-05, "epoch": 0.3001992702052224, "percentage": 10.0, "elapsed_time": "7:13:00", "remaining_time": "2 days, 16:55:00", "throughput": 18889.0, "total_tokens": 490748928} +{"current_steps": 1460, "total_steps": 14493, "loss": 0.3437, "lr": 4.670857787446238e-05, "epoch": 0.30226960999974123, "percentage": 10.07, "elapsed_time": "7:14:40", "remaining_time": "2 days, 16:40:12", "throughput": 18940.56, "total_tokens": 493978112} +{"current_steps": 1470, "total_steps": 14493, "loss": 0.3418, "lr": 4.668821046501082e-05, "epoch": 0.30433994979426, "percentage": 10.14, "elapsed_time": "7:16:22", "remaining_time": "2 days, 16:25:52", "throughput": 18991.86, "total_tokens": 497248128} +{"current_steps": 1480, "total_steps": 14493, "loss": 0.3405, "lr": 4.6667869676146194e-05, "epoch": 0.30641028958877875, "percentage": 10.21, "elapsed_time": "7:18:06", "remaining_time": "2 days, 16:12:03", "throughput": 19043.22, "total_tokens": 500575104} +{"current_steps": 1490, "total_steps": 14493, "loss": 0.3432, "lr": 4.6647555449929645e-05, "epoch": 0.3084806293832975, "percentage": 10.28, "elapsed_time": "7:19:50", "remaining_time": "2 days, 15:58:28", "throughput": 19094.86, "total_tokens": 503928192} +{"current_steps": 1500, "total_steps": 14493, "loss": 0.3442, "lr": 4.662726772859869e-05, "epoch": 0.3105509691778163, "percentage": 10.35, "elapsed_time": "7:21:42", "remaining_time": "2 days, 15:46:00", "throughput": 19146.15, "total_tokens": 507412736} +{"current_steps": 1510, "total_steps": 14493, "loss": 0.3419, "lr": 4.660700645456655e-05, "epoch": 0.3126213089723351, "percentage": 10.42, "elapsed_time": "7:23:31", "remaining_time": "2 days, 15:33:23", "throughput": 19195.49, "total_tokens": 510814848} +{"current_steps": 1520, "total_steps": 14493, "loss": 0.3425, "lr": 4.658677157042149e-05, "epoch": 0.31469164876685385, "percentage": 10.49, "elapsed_time": "7:25:21", "remaining_time": "2 days, 15:21:02", "throughput": 19241.3, "total_tokens": 514151808} +{"current_steps": 1530, "total_steps": 14493, "loss": 0.3439, "lr": 4.656656301892605e-05, "epoch": 0.3167619885613726, "percentage": 10.56, "elapsed_time": "7:27:02", "remaining_time": "2 days, 15:07:37", "throughput": 19294.3, "total_tokens": 517527936} +{"current_steps": 1540, "total_steps": 14493, "loss": 0.3387, "lr": 4.6546380743016465e-05, "epoch": 0.3188323283558914, "percentage": 10.63, "elapsed_time": "7:28:46", "remaining_time": "2 days, 14:54:37", "throughput": 19341.32, "total_tokens": 520788864} +{"current_steps": 1550, "total_steps": 14493, "loss": 0.3398, "lr": 4.652622468580193e-05, "epoch": 0.3209026681504102, "percentage": 10.69, "elapsed_time": "7:30:36", "remaining_time": "2 days, 14:42:42", "throughput": 19387.12, "total_tokens": 524157696} +{"current_steps": 1560, "total_steps": 14493, "loss": 0.3387, "lr": 4.650609479056392e-05, "epoch": 0.32297300794492895, "percentage": 10.76, "elapsed_time": "7:32:30", "remaining_time": "2 days, 14:31:31", "throughput": 19431.47, "total_tokens": 527582720} +{"current_steps": 1570, "total_steps": 14493, "loss": 0.3404, "lr": 4.648599100075556e-05, "epoch": 0.32504334773944776, "percentage": 10.83, "elapsed_time": "7:34:24", "remaining_time": "2 days, 14:20:23", "throughput": 19474.9, "total_tokens": 530982400} +{"current_steps": 1580, "total_steps": 14493, "loss": 0.3425, "lr": 4.6465913260000945e-05, "epoch": 0.3271136875339665, "percentage": 10.9, "elapsed_time": "7:36:13", "remaining_time": "2 days, 14:08:33", "throughput": 19520.5, "total_tokens": 534334848} +{"current_steps": 1590, "total_steps": 14493, "loss": 0.3386, "lr": 4.644586151209444e-05, "epoch": 0.3291840273284853, "percentage": 10.97, "elapsed_time": "7:38:03", "remaining_time": "2 days, 13:57:11", "throughput": 19568.27, "total_tokens": 537806208} +{"current_steps": 1600, "total_steps": 14493, "loss": 0.3367, "lr": 4.6425835701000084e-05, "epoch": 0.33125436712300405, "percentage": 11.04, "elapsed_time": "7:39:58", "remaining_time": "2 days, 13:46:30", "throughput": 19612.36, "total_tokens": 541266560} +{"current_steps": 1610, "total_steps": 14493, "loss": 0.3374, "lr": 4.640583577085084e-05, "epoch": 0.33332470691752286, "percentage": 11.11, "elapsed_time": "7:41:50", "remaining_time": "2 days, 13:35:36", "throughput": 19655.04, "total_tokens": 544651904} +{"current_steps": 1620, "total_steps": 14493, "loss": 0.3371, "lr": 4.638586166594806e-05, "epoch": 0.3353950467120416, "percentage": 11.18, "elapsed_time": "7:43:34", "remaining_time": "2 days, 13:23:39", "throughput": 19703.04, "total_tokens": 548023168} +{"current_steps": 1630, "total_steps": 14493, "loss": 0.3378, "lr": 4.6365913330760726e-05, "epoch": 0.3374653865065604, "percentage": 11.25, "elapsed_time": "7:45:23", "remaining_time": "2 days, 13:12:35", "throughput": 19748.88, "total_tokens": 551456000} +{"current_steps": 1640, "total_steps": 14493, "loss": 0.3393, "lr": 4.6345990709924855e-05, "epoch": 0.33953572630107914, "percentage": 11.32, "elapsed_time": "7:47:08", "remaining_time": "2 days, 13:01:05", "throughput": 19792.1, "total_tokens": 554744192} +{"current_steps": 1650, "total_steps": 14493, "loss": 0.3358, "lr": 4.632609374824284e-05, "epoch": 0.34160606609559796, "percentage": 11.38, "elapsed_time": "7:48:56", "remaining_time": "2 days, 12:50:02", "throughput": 19835.2, "total_tokens": 558088576} +{"current_steps": 1660, "total_steps": 14493, "loss": 0.3367, "lr": 4.630622239068285e-05, "epoch": 0.3436764058901167, "percentage": 11.45, "elapsed_time": "7:50:46", "remaining_time": "2 days, 12:39:23", "throughput": 19880.39, "total_tokens": 561546112} +{"current_steps": 1670, "total_steps": 14493, "loss": 0.337, "lr": 4.628637658237808e-05, "epoch": 0.3457467456846355, "percentage": 11.52, "elapsed_time": "7:52:34", "remaining_time": "2 days, 12:28:36", "throughput": 19923.78, "total_tokens": 564923392} +{"current_steps": 1680, "total_steps": 14493, "loss": 0.3339, "lr": 4.626655626862625e-05, "epoch": 0.34781708547915424, "percentage": 11.59, "elapsed_time": "7:54:18", "remaining_time": "2 days, 12:17:27", "throughput": 19969.28, "total_tokens": 568297088} +{"current_steps": 1690, "total_steps": 14493, "loss": 0.3351, "lr": 4.624676139488888e-05, "epoch": 0.34988742527367306, "percentage": 11.66, "elapsed_time": "7:56:10", "remaining_time": "2 days, 12:07:25", "throughput": 20012.17, "total_tokens": 571764480} +{"current_steps": 1700, "total_steps": 14493, "loss": 0.3368, "lr": 4.6226991906790686e-05, "epoch": 0.3519577650681918, "percentage": 11.73, "elapsed_time": "7:57:53", "remaining_time": "2 days, 11:56:17", "throughput": 20056.6, "total_tokens": 575095936} +{"current_steps": 1710, "total_steps": 14493, "loss": 0.3359, "lr": 4.620724775011897e-05, "epoch": 0.3540281048627106, "percentage": 11.8, "elapsed_time": "7:59:47", "remaining_time": "2 days, 11:46:39", "throughput": 20097.76, "total_tokens": 578565632} +{"current_steps": 1720, "total_steps": 14493, "loss": 0.3382, "lr": 4.618752887082297e-05, "epoch": 0.35609844465722934, "percentage": 11.87, "elapsed_time": "8:01:34", "remaining_time": "2 days, 11:36:11", "throughput": 20140.74, "total_tokens": 581947648} +{"current_steps": 1730, "total_steps": 14493, "loss": 0.3353, "lr": 4.616783521501325e-05, "epoch": 0.35816878445174816, "percentage": 11.94, "elapsed_time": "8:03:26", "remaining_time": "2 days, 11:26:33", "throughput": 20181.39, "total_tokens": 585389312} +{"current_steps": 1740, "total_steps": 14493, "loss": 0.3366, "lr": 4.614816672896108e-05, "epoch": 0.3602391242462669, "percentage": 12.01, "elapsed_time": "8:05:18", "remaining_time": "2 days, 11:16:59", "throughput": 20221.55, "total_tokens": 588824320} +{"current_steps": 1750, "total_steps": 14493, "loss": 0.3353, "lr": 4.612852335909782e-05, "epoch": 0.3623094640407857, "percentage": 12.07, "elapsed_time": "8:07:04", "remaining_time": "2 days, 11:06:40", "throughput": 20263.03, "total_tokens": 592167168} +{"current_steps": 1760, "total_steps": 14493, "loss": 0.3325, "lr": 4.6108905052014323e-05, "epoch": 0.3643798038353045, "percentage": 12.14, "elapsed_time": "8:08:54", "remaining_time": "2 days, 10:57:02", "throughput": 20302.53, "total_tokens": 595558528} +{"current_steps": 1770, "total_steps": 14493, "loss": 0.3335, "lr": 4.608931175446027e-05, "epoch": 0.36645014362982326, "percentage": 12.21, "elapsed_time": "8:10:41", "remaining_time": "2 days, 10:47:05", "throughput": 20343.97, "total_tokens": 598946816} +{"current_steps": 1780, "total_steps": 14493, "loss": 0.3377, "lr": 4.606974341334367e-05, "epoch": 0.368520483424342, "percentage": 12.28, "elapsed_time": "8:12:27", "remaining_time": "2 days, 10:37:09", "throughput": 20384.88, "total_tokens": 602313984} +{"current_steps": 1790, "total_steps": 14493, "loss": 0.3347, "lr": 4.605019997573011e-05, "epoch": 0.3705908232188608, "percentage": 12.35, "elapsed_time": "8:14:16", "remaining_time": "2 days, 10:27:38", "throughput": 20424.71, "total_tokens": 605717248} +{"current_steps": 1800, "total_steps": 14493, "loss": 0.3359, "lr": 4.603068138884229e-05, "epoch": 0.3726611630133796, "percentage": 12.42, "elapsed_time": "8:16:03", "remaining_time": "2 days, 10:18:00", "throughput": 20462.44, "total_tokens": 609029888} +{"current_steps": 1810, "total_steps": 14493, "loss": 0.3367, "lr": 4.6011187600059345e-05, "epoch": 0.37473150280789835, "percentage": 12.49, "elapsed_time": "8:17:49", "remaining_time": "2 days, 10:08:21", "throughput": 20501.4, "total_tokens": 612366976} +{"current_steps": 1820, "total_steps": 14493, "loss": 0.3333, "lr": 4.599171855691629e-05, "epoch": 0.3768018426024171, "percentage": 12.56, "elapsed_time": "8:19:35", "remaining_time": "2 days, 9:58:47", "throughput": 20539.91, "total_tokens": 615700352} +{"current_steps": 1830, "total_steps": 14493, "loss": 0.3343, "lr": 4.597227420710335e-05, "epoch": 0.3788721823969359, "percentage": 12.63, "elapsed_time": "8:21:27", "remaining_time": "2 days, 9:49:52", "throughput": 20575.48, "total_tokens": 619056384} +{"current_steps": 1840, "total_steps": 14493, "loss": 0.3335, "lr": 4.595285449846551e-05, "epoch": 0.3809425221914547, "percentage": 12.7, "elapsed_time": "8:23:09", "remaining_time": "2 days, 9:40:04", "throughput": 20615.55, "total_tokens": 622381056} +{"current_steps": 1850, "total_steps": 14493, "loss": 0.3336, "lr": 4.593345937900178e-05, "epoch": 0.38301286198597345, "percentage": 12.76, "elapsed_time": "8:25:03", "remaining_time": "2 days, 9:31:33", "throughput": 20651.32, "total_tokens": 625800320} +{"current_steps": 1860, "total_steps": 14493, "loss": 0.3367, "lr": 4.591408879686472e-05, "epoch": 0.3850832017804922, "percentage": 12.83, "elapsed_time": "8:26:48", "remaining_time": "2 days, 9:22:15", "throughput": 20689.44, "total_tokens": 629142656} +{"current_steps": 1870, "total_steps": 14493, "loss": 0.3328, "lr": 4.5894742700359775e-05, "epoch": 0.387153541575011, "percentage": 12.9, "elapsed_time": "8:28:36", "remaining_time": "2 days, 9:13:11", "throughput": 20727.18, "total_tokens": 632512384} +{"current_steps": 1880, "total_steps": 14493, "loss": 0.3345, "lr": 4.587542103794477e-05, "epoch": 0.3892238813695298, "percentage": 12.97, "elapsed_time": "8:30:22", "remaining_time": "2 days, 9:04:08", "throughput": 20764.89, "total_tokens": 635876480} +{"current_steps": 1890, "total_steps": 14493, "loss": 0.3316, "lr": 4.5856123758229247e-05, "epoch": 0.39129422116404855, "percentage": 13.04, "elapsed_time": "8:32:07", "remaining_time": "2 days, 8:54:57", "throughput": 20804.84, "total_tokens": 639276160} +{"current_steps": 1900, "total_steps": 14493, "loss": 0.333, "lr": 4.5836850809973993e-05, "epoch": 0.3933645609585673, "percentage": 13.11, "elapsed_time": "8:33:51", "remaining_time": "2 days, 8:45:45", "throughput": 20842.94, "total_tokens": 642610944} +{"current_steps": 1910, "total_steps": 14493, "loss": 0.3343, "lr": 4.5817602142090385e-05, "epoch": 0.3954349007530861, "percentage": 13.18, "elapsed_time": "8:35:42", "remaining_time": "2 days, 8:37:25", "throughput": 20878.37, "total_tokens": 646021120} +{"current_steps": 1920, "total_steps": 14493, "loss": 0.3327, "lr": 4.579837770363989e-05, "epoch": 0.3975052405476049, "percentage": 13.25, "elapsed_time": "8:37:29", "remaining_time": "2 days, 8:28:45", "throughput": 20914.83, "total_tokens": 649394304} +{"current_steps": 1930, "total_steps": 14493, "loss": 0.3325, "lr": 4.57791774438334e-05, "epoch": 0.39957558034212365, "percentage": 13.32, "elapsed_time": "8:39:10", "remaining_time": "2 days, 8:19:29", "throughput": 20952.21, "total_tokens": 652673024} +{"current_steps": 1940, "total_steps": 14493, "loss": 0.3318, "lr": 4.576000131203078e-05, "epoch": 0.4016459201366424, "percentage": 13.39, "elapsed_time": "8:41:00", "remaining_time": "2 days, 8:11:12", "throughput": 20986.79, "total_tokens": 656050944} +{"current_steps": 1950, "total_steps": 14493, "loss": 0.3314, "lr": 4.574084925774023e-05, "epoch": 0.4037162599311612, "percentage": 13.45, "elapsed_time": "8:42:35", "remaining_time": "2 days, 8:01:30", "throughput": 21027.25, "total_tokens": 659325952} +{"current_steps": 1960, "total_steps": 14493, "loss": 0.3289, "lr": 4.5721721230617795e-05, "epoch": 0.40578659972568, "percentage": 13.52, "elapsed_time": "8:44:26", "remaining_time": "2 days, 7:53:27", "throughput": 21063.58, "total_tokens": 662790784} +{"current_steps": 1970, "total_steps": 14493, "loss": 0.3323, "lr": 4.57026171804667e-05, "epoch": 0.40785693952019875, "percentage": 13.59, "elapsed_time": "8:46:16", "remaining_time": "2 days, 7:45:24", "throughput": 21099.16, "total_tokens": 666229376} +{"current_steps": 1980, "total_steps": 14493, "loss": 0.3312, "lr": 4.568353705723692e-05, "epoch": 0.4099272793147175, "percentage": 13.66, "elapsed_time": "8:48:15", "remaining_time": "2 days, 7:38:24", "throughput": 21133.01, "total_tokens": 669815040} +{"current_steps": 1990, "total_steps": 14493, "loss": 0.3294, "lr": 4.566448081102455e-05, "epoch": 0.4119976191092363, "percentage": 13.73, "elapsed_time": "8:50:11", "remaining_time": "2 days, 7:31:09", "throughput": 21164.14, "total_tokens": 673265920} +{"current_steps": 2000, "total_steps": 14493, "loss": 0.3317, "lr": 4.564544839207128e-05, "epoch": 0.4140679589037551, "percentage": 13.8, "elapsed_time": "8:51:58", "remaining_time": "2 days, 7:22:56", "throughput": 21200.36, "total_tokens": 676675840} +{"current_steps": 2010, "total_steps": 14493, "loss": 0.332, "lr": 4.562643975076387e-05, "epoch": 0.41613829869827385, "percentage": 13.87, "elapsed_time": "8:53:43", "remaining_time": "2 days, 7:14:41", "throughput": 21234.34, "total_tokens": 680001792} +{"current_steps": 2020, "total_steps": 14493, "loss": 0.3273, "lr": 4.560745483763357e-05, "epoch": 0.4182086384927926, "percentage": 13.94, "elapsed_time": "8:55:30", "remaining_time": "2 days, 7:06:36", "throughput": 21267.02, "total_tokens": 683313664} +{"current_steps": 2030, "total_steps": 14493, "loss": 0.3284, "lr": 4.5588493603355595e-05, "epoch": 0.4202789782873114, "percentage": 14.01, "elapsed_time": "8:57:23", "remaining_time": "2 days, 6:59:13", "throughput": 21297.1, "total_tokens": 686682752} +{"current_steps": 2040, "total_steps": 14493, "loss": 0.3296, "lr": 4.556955599874859e-05, "epoch": 0.4223493180818302, "percentage": 14.08, "elapsed_time": "8:59:12", "remaining_time": "2 days, 6:51:30", "throughput": 21330.24, "total_tokens": 690079360} +{"current_steps": 2050, "total_steps": 14493, "loss": 0.3322, "lr": 4.555064197477409e-05, "epoch": 0.42441965787634894, "percentage": 14.14, "elapsed_time": "9:01:05", "remaining_time": "2 days, 6:44:19", "throughput": 21362.54, "total_tokens": 693553536} +{"current_steps": 2060, "total_steps": 14493, "loss": 0.3274, "lr": 4.5531751482536e-05, "epoch": 0.4264899976708677, "percentage": 14.21, "elapsed_time": "9:02:59", "remaining_time": "2 days, 6:37:12", "throughput": 21394.62, "total_tokens": 697029248} +{"current_steps": 2070, "total_steps": 14493, "loss": 0.3292, "lr": 4.5512884473280024e-05, "epoch": 0.4285603374653865, "percentage": 14.28, "elapsed_time": "9:04:44", "remaining_time": "2 days, 6:29:12", "throughput": 21427.48, "total_tokens": 700338688} +{"current_steps": 2080, "total_steps": 14493, "loss": 0.3281, "lr": 4.549404089839322e-05, "epoch": 0.4306306772599053, "percentage": 14.35, "elapsed_time": "9:06:31", "remaining_time": "2 days, 6:21:30", "throughput": 21459.43, "total_tokens": 703678976} +{"current_steps": 2090, "total_steps": 14493, "loss": 0.328, "lr": 4.547522070940335e-05, "epoch": 0.43270101705442404, "percentage": 14.42, "elapsed_time": "9:08:12", "remaining_time": "2 days, 6:13:20", "throughput": 21494.07, "total_tokens": 706999936} +{"current_steps": 2100, "total_steps": 14493, "loss": 0.3322, "lr": 4.545642385797848e-05, "epoch": 0.43477135684894286, "percentage": 14.49, "elapsed_time": "9:09:55", "remaining_time": "2 days, 6:05:22", "throughput": 21527.77, "total_tokens": 710328192} +{"current_steps": 2110, "total_steps": 14493, "loss": 0.3297, "lr": 4.543765029592637e-05, "epoch": 0.4368416966434616, "percentage": 14.56, "elapsed_time": "9:11:41", "remaining_time": "2 days, 5:57:42", "throughput": 21562.01, "total_tokens": 713730816} +{"current_steps": 2120, "total_steps": 14493, "loss": 0.3283, "lr": 4.541889997519403e-05, "epoch": 0.4389120364379804, "percentage": 14.63, "elapsed_time": "9:13:31", "remaining_time": "2 days, 5:50:33", "throughput": 21593.39, "total_tokens": 717149312} +{"current_steps": 2130, "total_steps": 14493, "loss": 0.3307, "lr": 4.5400172847867095e-05, "epoch": 0.44098237623249914, "percentage": 14.7, "elapsed_time": "9:15:13", "remaining_time": "2 days, 5:42:38", "throughput": 21627.32, "total_tokens": 720478080} +{"current_steps": 2140, "total_steps": 14493, "loss": 0.3296, "lr": 4.5381468866169466e-05, "epoch": 0.44305271602701796, "percentage": 14.77, "elapsed_time": "9:16:57", "remaining_time": "2 days, 5:34:58", "throughput": 21660.08, "total_tokens": 723818752} +{"current_steps": 2150, "total_steps": 14493, "loss": 0.3265, "lr": 4.5362787982462616e-05, "epoch": 0.4451230558215367, "percentage": 14.83, "elapsed_time": "9:18:38", "remaining_time": "2 days, 5:27:05", "throughput": 21694.74, "total_tokens": 727167104} +{"current_steps": 2160, "total_steps": 14493, "loss": 0.3264, "lr": 4.5344130149245275e-05, "epoch": 0.4471933956160555, "percentage": 14.9, "elapsed_time": "9:20:22", "remaining_time": "2 days, 5:19:33", "throughput": 21725.46, "total_tokens": 730456448} +{"current_steps": 2170, "total_steps": 14493, "loss": 0.3257, "lr": 4.5325495319152715e-05, "epoch": 0.44926373541057424, "percentage": 14.97, "elapsed_time": "9:22:08", "remaining_time": "2 days, 5:12:19", "throughput": 21755.88, "total_tokens": 733799808} +{"current_steps": 2180, "total_steps": 14493, "loss": 0.3278, "lr": 4.530688344495644e-05, "epoch": 0.45133407520509305, "percentage": 15.04, "elapsed_time": "9:23:57", "remaining_time": "2 days, 5:05:21", "throughput": 21786.63, "total_tokens": 737210496} +{"current_steps": 2190, "total_steps": 14493, "loss": 0.3276, "lr": 4.528829447956357e-05, "epoch": 0.4534044149996118, "percentage": 15.11, "elapsed_time": "9:25:42", "remaining_time": "2 days, 4:58:02", "throughput": 21818.07, "total_tokens": 740558720} +{"current_steps": 2200, "total_steps": 14493, "loss": 0.3274, "lr": 4.526972837601633e-05, "epoch": 0.4554747547941306, "percentage": 15.18, "elapsed_time": "9:27:29", "remaining_time": "2 days, 4:50:57", "throughput": 21847.19, "total_tokens": 743879040} +{"current_steps": 2210, "total_steps": 14493, "loss": 0.3299, "lr": 4.525118508749165e-05, "epoch": 0.45754509458864934, "percentage": 15.25, "elapsed_time": "9:29:23", "remaining_time": "2 days, 4:44:37", "throughput": 21875.81, "total_tokens": 747353472} +{"current_steps": 2220, "total_steps": 14493, "loss": 0.3278, "lr": 4.5232664567300546e-05, "epoch": 0.45961543438316815, "percentage": 15.32, "elapsed_time": "9:31:09", "remaining_time": "2 days, 4:37:33", "throughput": 21906.46, "total_tokens": 750718720} +{"current_steps": 2230, "total_steps": 14493, "loss": 0.3244, "lr": 4.521416676888773e-05, "epoch": 0.4616857741776869, "percentage": 15.39, "elapsed_time": "9:32:59", "remaining_time": "2 days, 4:30:57", "throughput": 21936.35, "total_tokens": 754164352} +{"current_steps": 2240, "total_steps": 14493, "loss": 0.324, "lr": 4.519569164583107e-05, "epoch": 0.4637561139722057, "percentage": 15.46, "elapsed_time": "9:34:46", "remaining_time": "2 days, 4:24:03", "throughput": 21966.16, "total_tokens": 757531776} +{"current_steps": 2250, "total_steps": 14493, "loss": 0.3291, "lr": 4.517723915184109e-05, "epoch": 0.4658264537667245, "percentage": 15.52, "elapsed_time": "9:36:41", "remaining_time": "2 days, 4:17:58", "throughput": 21992.58, "total_tokens": 760976256} +{"current_steps": 2260, "total_steps": 14493, "loss": 0.3268, "lr": 4.5158809240760506e-05, "epoch": 0.46789679356124325, "percentage": 15.59, "elapsed_time": "9:38:24", "remaining_time": "2 days, 4:10:50", "throughput": 22024.22, "total_tokens": 764344320} +{"current_steps": 2270, "total_steps": 14493, "loss": 0.3272, "lr": 4.514040186656375e-05, "epoch": 0.469967133355762, "percentage": 15.66, "elapsed_time": "9:40:08", "remaining_time": "2 days, 4:03:49", "throughput": 22053.92, "total_tokens": 767666560} +{"current_steps": 2280, "total_steps": 14493, "loss": 0.3235, "lr": 4.512201698335644e-05, "epoch": 0.4720374731502808, "percentage": 15.73, "elapsed_time": "9:41:53", "remaining_time": "2 days, 3:56:59", "throughput": 22081.62, "total_tokens": 770956160} +{"current_steps": 2290, "total_steps": 14493, "loss": 0.3264, "lr": 4.510365454537496e-05, "epoch": 0.4741078129447996, "percentage": 15.8, "elapsed_time": "9:43:36", "remaining_time": "2 days, 3:49:55", "throughput": 22111.11, "total_tokens": 774247680} +{"current_steps": 2300, "total_steps": 14493, "loss": 0.3265, "lr": 4.5085314506985945e-05, "epoch": 0.47617815273931835, "percentage": 15.87, "elapsed_time": "9:45:25", "remaining_time": "2 days, 3:43:31", "throughput": 22139.29, "total_tokens": 777657216} +{"current_steps": 2310, "total_steps": 14493, "loss": 0.3259, "lr": 4.50669968226858e-05, "epoch": 0.4782484925338371, "percentage": 15.94, "elapsed_time": "9:47:16", "remaining_time": "2 days, 3:37:16", "throughput": 22166.21, "total_tokens": 781054592} +{"current_steps": 2320, "total_steps": 14493, "loss": 0.3253, "lr": 4.504870144710027e-05, "epoch": 0.48031883232835587, "percentage": 16.01, "elapsed_time": "9:49:02", "remaining_time": "2 days, 3:30:39", "throughput": 22195.26, "total_tokens": 784428544} +{"current_steps": 2330, "total_steps": 14493, "loss": 0.3269, "lr": 4.5030428334983884e-05, "epoch": 0.4823891721228747, "percentage": 16.08, "elapsed_time": "9:50:51", "remaining_time": "2 days, 3:24:23", "throughput": 22222.36, "total_tokens": 787817088} +{"current_steps": 2340, "total_steps": 14493, "loss": 0.3276, "lr": 4.501217744121959e-05, "epoch": 0.48445951191739345, "percentage": 16.15, "elapsed_time": "9:52:41", "remaining_time": "2 days, 3:18:10", "throughput": 22248.28, "total_tokens": 791175680} +{"current_steps": 2350, "total_steps": 14493, "loss": 0.3233, "lr": 4.499394872081821e-05, "epoch": 0.4865298517119122, "percentage": 16.21, "elapsed_time": "9:54:30", "remaining_time": "2 days, 3:11:56", "throughput": 22276.17, "total_tokens": 794597888} +{"current_steps": 2360, "total_steps": 14493, "loss": 0.3274, "lr": 4.4975742128918e-05, "epoch": 0.48860019150643097, "percentage": 16.28, "elapsed_time": "9:56:20", "remaining_time": "2 days, 3:05:51", "throughput": 22301.48, "total_tokens": 797959552} +{"current_steps": 2370, "total_steps": 14493, "loss": 0.3256, "lr": 4.495755762078418e-05, "epoch": 0.4906705313009498, "percentage": 16.35, "elapsed_time": "9:58:09", "remaining_time": "2 days, 2:59:42", "throughput": 22330.23, "total_tokens": 801424256} +{"current_steps": 2380, "total_steps": 14493, "loss": 0.3207, "lr": 4.49393951518085e-05, "epoch": 0.49274087109546855, "percentage": 16.42, "elapsed_time": "9:59:51", "remaining_time": "2 days, 2:52:58", "throughput": 22358.47, "total_tokens": 804716544} +{"current_steps": 2390, "total_steps": 14493, "loss": 0.3263, "lr": 4.4921254677508716e-05, "epoch": 0.4948112108899873, "percentage": 16.49, "elapsed_time": "10:01:46", "remaining_time": "2 days, 2:47:24", "throughput": 22382.28, "total_tokens": 808146176} +{"current_steps": 2400, "total_steps": 14493, "loss": 0.3219, "lr": 4.490313615352821e-05, "epoch": 0.49688155068450607, "percentage": 16.56, "elapsed_time": "10:03:32", "remaining_time": "2 days, 2:41:08", "throughput": 22409.39, "total_tokens": 811510272} +{"current_steps": 2410, "total_steps": 14493, "loss": 0.3234, "lr": 4.48850395356355e-05, "epoch": 0.4989518904790249, "percentage": 16.63, "elapsed_time": "10:05:17", "remaining_time": "2 days, 2:34:43", "throughput": 22437.86, "total_tokens": 814883456} +{"current_steps": 2420, "total_steps": 14493, "loss": 0.3227, "lr": 4.486696477972375e-05, "epoch": 0.5010222302735436, "percentage": 16.7, "elapsed_time": "10:07:09", "remaining_time": "2 days, 2:29:02", "throughput": 22462.87, "total_tokens": 818317312} +{"current_steps": 2430, "total_steps": 14493, "loss": 0.3243, "lr": 4.484891184181041e-05, "epoch": 0.5030925700680624, "percentage": 16.77, "elapsed_time": "10:08:52", "remaining_time": "2 days, 2:22:33", "throughput": 22492.24, "total_tokens": 821692928} +{"current_steps": 2440, "total_steps": 14493, "loss": 0.3232, "lr": 4.483088067803662e-05, "epoch": 0.5051629098625812, "percentage": 16.84, "elapsed_time": "10:10:38", "remaining_time": "2 days, 2:16:27", "throughput": 22517.87, "total_tokens": 825029888} +{"current_steps": 2450, "total_steps": 14493, "loss": 0.3216, "lr": 4.481287124466697e-05, "epoch": 0.5072332496570999, "percentage": 16.9, "elapsed_time": "10:12:26", "remaining_time": "2 days, 2:10:26", "throughput": 22543.46, "total_tokens": 828385792} +{"current_steps": 2460, "total_steps": 14493, "loss": 0.3243, "lr": 4.479488349808885e-05, "epoch": 0.5093035894516188, "percentage": 16.97, "elapsed_time": "10:14:12", "remaining_time": "2 days, 2:04:23", "throughput": 22569.62, "total_tokens": 831749888} +{"current_steps": 2470, "total_steps": 14493, "loss": 0.326, "lr": 4.4776917394812114e-05, "epoch": 0.5113739292461376, "percentage": 17.04, "elapsed_time": "10:16:01", "remaining_time": "2 days, 1:58:34", "throughput": 22595.63, "total_tokens": 835169792} +{"current_steps": 2480, "total_steps": 14493, "loss": 0.3249, "lr": 4.475897289146862e-05, "epoch": 0.5134442690406563, "percentage": 17.11, "elapsed_time": "10:17:47", "remaining_time": "2 days, 1:52:31", "throughput": 22621.94, "total_tokens": 838530304} +{"current_steps": 2490, "total_steps": 14493, "loss": 0.325, "lr": 4.4741049944811806e-05, "epoch": 0.5155146088351751, "percentage": 17.18, "elapsed_time": "10:19:38", "remaining_time": "2 days, 1:46:57", "throughput": 22647.9, "total_tokens": 842010240} +{"current_steps": 2500, "total_steps": 14493, "loss": 0.3222, "lr": 4.472314851171621e-05, "epoch": 0.5175849486296938, "percentage": 17.25, "elapsed_time": "10:21:29", "remaining_time": "2 days, 1:41:24", "throughput": 22671.72, "total_tokens": 845412224} +{"current_steps": 2510, "total_steps": 14493, "loss": 0.3215, "lr": 4.4705268549177084e-05, "epoch": 0.5196552884242126, "percentage": 17.32, "elapsed_time": "10:23:12", "remaining_time": "2 days, 1:35:14", "throughput": 22697.61, "total_tokens": 848714368} +{"current_steps": 2520, "total_steps": 14493, "loss": 0.3235, "lr": 4.468741001430989e-05, "epoch": 0.5217256282187314, "percentage": 17.39, "elapsed_time": "10:25:00", "remaining_time": "2 days, 1:29:33", "throughput": 22721.85, "total_tokens": 852089088} +{"current_steps": 2530, "total_steps": 14493, "loss": 0.3214, "lr": 4.466957286434997e-05, "epoch": 0.5237959680132501, "percentage": 17.46, "elapsed_time": "10:26:47", "remaining_time": "2 days, 1:23:47", "throughput": 22747.7, "total_tokens": 855493632} +{"current_steps": 2540, "total_steps": 14493, "loss": 0.3208, "lr": 4.4651757056652e-05, "epoch": 0.525866307807769, "percentage": 17.53, "elapsed_time": "10:28:32", "remaining_time": "2 days, 1:17:51", "throughput": 22773.28, "total_tokens": 858837120} +{"current_steps": 2550, "total_steps": 14493, "loss": 0.3231, "lr": 4.463396254868968e-05, "epoch": 0.5279366476022878, "percentage": 17.59, "elapsed_time": "10:30:20", "remaining_time": "2 days, 1:12:12", "throughput": 22797.51, "total_tokens": 862209664} +{"current_steps": 2560, "total_steps": 14493, "loss": 0.3234, "lr": 4.461618929805519e-05, "epoch": 0.5300069873968065, "percentage": 17.66, "elapsed_time": "10:32:09", "remaining_time": "2 days, 1:06:43", "throughput": 22822.64, "total_tokens": 865659392} +{"current_steps": 2570, "total_steps": 14493, "loss": 0.325, "lr": 4.459843726245888e-05, "epoch": 0.5320773271913253, "percentage": 17.73, "elapsed_time": "10:34:00", "remaining_time": "2 days, 1:01:20", "throughput": 22847.62, "total_tokens": 869132288} +{"current_steps": 2580, "total_steps": 14493, "loss": 0.322, "lr": 4.458070639972875e-05, "epoch": 0.534147666985844, "percentage": 17.8, "elapsed_time": "10:35:48", "remaining_time": "2 days, 0:55:50", "throughput": 22871.69, "total_tokens": 872530048} +{"current_steps": 2590, "total_steps": 14493, "loss": 0.3209, "lr": 4.456299666781007e-05, "epoch": 0.5362180067803628, "percentage": 17.87, "elapsed_time": "10:37:36", "remaining_time": "2 days, 0:50:15", "throughput": 22896.8, "total_tokens": 875943424} +{"current_steps": 2600, "total_steps": 14493, "loss": 0.3154, "lr": 4.4545308024764984e-05, "epoch": 0.5382883465748816, "percentage": 17.94, "elapsed_time": "10:39:22", "remaining_time": "2 days, 0:44:39", "throughput": 22920.96, "total_tokens": 879308928} +{"current_steps": 2610, "total_steps": 14493, "loss": 0.3231, "lr": 4.452764042877207e-05, "epoch": 0.5403586863694004, "percentage": 18.01, "elapsed_time": "10:41:17", "remaining_time": "2 days, 0:39:42", "throughput": 22943.19, "total_tokens": 882794112} +{"current_steps": 2620, "total_steps": 14493, "loss": 0.3225, "lr": 4.45099938381259e-05, "epoch": 0.5424290261639192, "percentage": 18.08, "elapsed_time": "10:43:08", "remaining_time": "2 days, 0:34:32", "throughput": 22965.97, "total_tokens": 886231680} +{"current_steps": 2630, "total_steps": 14493, "loss": 0.3224, "lr": 4.449236821123667e-05, "epoch": 0.544499365958438, "percentage": 18.15, "elapsed_time": "10:44:56", "remaining_time": "2 days, 0:29:08", "throughput": 22989.1, "total_tokens": 889608064} +{"current_steps": 2640, "total_steps": 14493, "loss": 0.3218, "lr": 4.447476350662976e-05, "epoch": 0.5465697057529567, "percentage": 18.22, "elapsed_time": "10:46:46", "remaining_time": "2 days, 0:23:51", "throughput": 23012.0, "total_tokens": 893010048} +{"current_steps": 2650, "total_steps": 14493, "loss": 0.3223, "lr": 4.4457179682945346e-05, "epoch": 0.5486400455474755, "percentage": 18.28, "elapsed_time": "10:48:32", "remaining_time": "2 days, 0:18:23", "throughput": 23034.7, "total_tokens": 896342656} +{"current_steps": 2660, "total_steps": 14493, "loss": 0.3227, "lr": 4.443961669893798e-05, "epoch": 0.5507103853419942, "percentage": 18.35, "elapsed_time": "10:50:17", "remaining_time": "2 days, 0:12:49", "throughput": 23058.07, "total_tokens": 899669376} +{"current_steps": 2670, "total_steps": 14493, "loss": 0.3215, "lr": 4.4422074513476155e-05, "epoch": 0.552780725136513, "percentage": 18.42, "elapsed_time": "10:52:07", "remaining_time": "2 days, 0:07:42", "throughput": 23080.12, "total_tokens": 903077888} +{"current_steps": 2680, "total_steps": 14493, "loss": 0.3213, "lr": 4.4404553085541955e-05, "epoch": 0.5548510649310318, "percentage": 18.49, "elapsed_time": "10:53:48", "remaining_time": "2 days, 0:01:52", "throughput": 23105.59, "total_tokens": 906395904} +{"current_steps": 2690, "total_steps": 14493, "loss": 0.3223, "lr": 4.438705237423063e-05, "epoch": 0.5569214047255506, "percentage": 18.56, "elapsed_time": "10:55:30", "remaining_time": "1 day, 23:56:11", "throughput": 23130.34, "total_tokens": 909726080} +{"current_steps": 2700, "total_steps": 14493, "loss": 0.3214, "lr": 4.436957233875017e-05, "epoch": 0.5589917445200694, "percentage": 18.63, "elapsed_time": "10:57:22", "remaining_time": "1 day, 23:51:15", "throughput": 23151.93, "total_tokens": 913165440} +{"current_steps": 2710, "total_steps": 14493, "loss": 0.3204, "lr": 4.4352112938420956e-05, "epoch": 0.5610620843145881, "percentage": 18.7, "elapsed_time": "10:59:12", "remaining_time": "1 day, 23:46:14", "throughput": 23174.04, "total_tokens": 916596480} +{"current_steps": 2720, "total_steps": 14493, "loss": 0.3192, "lr": 4.433467413267529e-05, "epoch": 0.5631324241091069, "percentage": 18.77, "elapsed_time": "11:00:56", "remaining_time": "1 day, 23:40:44", "throughput": 23197.33, "total_tokens": 919918464} +{"current_steps": 2730, "total_steps": 14493, "loss": 0.3245, "lr": 4.431725588105708e-05, "epoch": 0.5652027639036257, "percentage": 18.84, "elapsed_time": "11:02:44", "remaining_time": "1 day, 23:35:36", "throughput": 23219.35, "total_tokens": 923304832} +{"current_steps": 2740, "total_steps": 14493, "loss": 0.3208, "lr": 4.4299858143221377e-05, "epoch": 0.5672731036981444, "percentage": 18.91, "elapsed_time": "11:04:25", "remaining_time": "1 day, 23:30:00", "throughput": 23242.25, "total_tokens": 926570624} +{"current_steps": 2750, "total_steps": 14493, "loss": 0.3174, "lr": 4.4282480878934065e-05, "epoch": 0.5693434434926632, "percentage": 18.97, "elapsed_time": "11:06:05", "remaining_time": "1 day, 23:24:20", "throughput": 23265.45, "total_tokens": 929816064} +{"current_steps": 2760, "total_steps": 14493, "loss": 0.319, "lr": 4.4265124048071346e-05, "epoch": 0.571413783287182, "percentage": 19.04, "elapsed_time": "11:07:52", "remaining_time": "1 day, 23:19:10", "throughput": 23287.21, "total_tokens": 933172608} +{"current_steps": 2770, "total_steps": 14493, "loss": 0.3207, "lr": 4.4247787610619477e-05, "epoch": 0.5734841230817008, "percentage": 19.11, "elapsed_time": "11:09:41", "remaining_time": "1 day, 23:14:11", "throughput": 23308.51, "total_tokens": 936561280} +{"current_steps": 2780, "total_steps": 14493, "loss": 0.3187, "lr": 4.42304715266743e-05, "epoch": 0.5755544628762196, "percentage": 19.18, "elapsed_time": "11:11:24", "remaining_time": "1 day, 23:08:52", "throughput": 23332.08, "total_tokens": 939926784} +{"current_steps": 2790, "total_steps": 14493, "loss": 0.3182, "lr": 4.421317575644092e-05, "epoch": 0.5776248026707383, "percentage": 19.25, "elapsed_time": "11:13:12", "remaining_time": "1 day, 23:03:51", "throughput": 23353.51, "total_tokens": 943307392} +{"current_steps": 2800, "total_steps": 14493, "loss": 0.3214, "lr": 4.419590026023325e-05, "epoch": 0.5796951424652571, "percentage": 19.32, "elapsed_time": "11:15:01", "remaining_time": "1 day, 22:58:58", "throughput": 23374.6, "total_tokens": 946712320} +{"current_steps": 2810, "total_steps": 14493, "loss": 0.3188, "lr": 4.417864499847368e-05, "epoch": 0.5817654822597759, "percentage": 19.39, "elapsed_time": "11:16:56", "remaining_time": "1 day, 22:54:29", "throughput": 23392.9, "total_tokens": 950137472} +{"current_steps": 2820, "total_steps": 14493, "loss": 0.3179, "lr": 4.4161409931692676e-05, "epoch": 0.5838358220542946, "percentage": 19.46, "elapsed_time": "11:18:41", "remaining_time": "1 day, 22:49:21", "throughput": 23413.84, "total_tokens": 953447168} +{"current_steps": 2830, "total_steps": 14493, "loss": 0.3185, "lr": 4.414419502052841e-05, "epoch": 0.5859061618488134, "percentage": 19.53, "elapsed_time": "11:20:31", "remaining_time": "1 day, 22:44:35", "throughput": 23434.64, "total_tokens": 956877696} +{"current_steps": 2840, "total_steps": 14493, "loss": 0.3217, "lr": 4.412700022572637e-05, "epoch": 0.5879765016433323, "percentage": 19.6, "elapsed_time": "11:22:20", "remaining_time": "1 day, 22:39:46", "throughput": 23455.41, "total_tokens": 960281088} +{"current_steps": 2850, "total_steps": 14493, "loss": 0.3176, "lr": 4.410982550813902e-05, "epoch": 0.590046841437851, "percentage": 19.66, "elapsed_time": "11:24:03", "remaining_time": "1 day, 22:34:32", "throughput": 23478.14, "total_tokens": 963620864} +{"current_steps": 2860, "total_steps": 14493, "loss": 0.3193, "lr": 4.409267082872535e-05, "epoch": 0.5921171812323698, "percentage": 19.73, "elapsed_time": "11:25:45", "remaining_time": "1 day, 22:29:17", "throughput": 23501.18, "total_tokens": 966960512} +{"current_steps": 2870, "total_steps": 14493, "loss": 0.3194, "lr": 4.407553614855059e-05, "epoch": 0.5941875210268885, "percentage": 19.8, "elapsed_time": "11:27:27", "remaining_time": "1 day, 22:24:05", "throughput": 23523.34, "total_tokens": 970279040} +{"current_steps": 2880, "total_steps": 14493, "loss": 0.3179, "lr": 4.405842142878579e-05, "epoch": 0.5962578608214073, "percentage": 19.87, "elapsed_time": "11:29:16", "remaining_time": "1 day, 22:19:19", "throughput": 23544.44, "total_tokens": 973706752} +{"current_steps": 2890, "total_steps": 14493, "loss": 0.3179, "lr": 4.404132663070745e-05, "epoch": 0.5983282006159261, "percentage": 19.94, "elapsed_time": "11:30:55", "remaining_time": "1 day, 22:14:00", "throughput": 23566.91, "total_tokens": 976987008} +{"current_steps": 2900, "total_steps": 14493, "loss": 0.3207, "lr": 4.402425171569716e-05, "epoch": 0.6003985404104448, "percentage": 20.01, "elapsed_time": "11:32:40", "remaining_time": "1 day, 22:09:00", "throughput": 23589.3, "total_tokens": 980378752} +{"current_steps": 2910, "total_steps": 14493, "loss": 0.3198, "lr": 4.400719664524127e-05, "epoch": 0.6024688802049636, "percentage": 20.08, "elapsed_time": "11:34:31", "remaining_time": "1 day, 22:04:29", "throughput": 23608.22, "total_tokens": 983788288} +{"current_steps": 2920, "total_steps": 14493, "loss": 0.32, "lr": 4.399016138093044e-05, "epoch": 0.6045392199994825, "percentage": 20.15, "elapsed_time": "11:36:13", "remaining_time": "1 day, 21:59:23", "throughput": 23630.91, "total_tokens": 987144704} +{"current_steps": 2930, "total_steps": 14493, "loss": 0.3175, "lr": 4.397314588445937e-05, "epoch": 0.6066095597940012, "percentage": 20.22, "elapsed_time": "11:38:02", "remaining_time": "1 day, 21:54:46", "throughput": 23649.33, "total_tokens": 990495232} +{"current_steps": 2940, "total_steps": 14493, "loss": 0.315, "lr": 4.395615011762637e-05, "epoch": 0.60867989958852, "percentage": 20.29, "elapsed_time": "11:39:50", "remaining_time": "1 day, 21:50:07", "throughput": 23669.72, "total_tokens": 993914752} +{"current_steps": 2950, "total_steps": 14493, "loss": 0.318, "lr": 4.3939174042333057e-05, "epoch": 0.6107502393830387, "percentage": 20.35, "elapsed_time": "11:41:32", "remaining_time": "1 day, 21:45:02", "throughput": 23692.04, "total_tokens": 997253376} +{"current_steps": 2960, "total_steps": 14493, "loss": 0.3195, "lr": 4.3922217620583904e-05, "epoch": 0.6128205791775575, "percentage": 20.42, "elapsed_time": "11:43:22", "remaining_time": "1 day, 21:40:32", "throughput": 23711.21, "total_tokens": 1000667776} +{"current_steps": 2970, "total_steps": 14493, "loss": 0.319, "lr": 4.3905280814486025e-05, "epoch": 0.6148909189720763, "percentage": 20.49, "elapsed_time": "11:45:09", "remaining_time": "1 day, 21:35:50", "throughput": 23733.03, "total_tokens": 1004124544} +{"current_steps": 2980, "total_steps": 14493, "loss": 0.3169, "lr": 4.388836358624867e-05, "epoch": 0.616961258766595, "percentage": 20.56, "elapsed_time": "11:47:01", "remaining_time": "1 day, 21:31:30", "throughput": 23751.29, "total_tokens": 1007554304} +{"current_steps": 2990, "total_steps": 14493, "loss": 0.3176, "lr": 4.3871465898182976e-05, "epoch": 0.6190315985611139, "percentage": 20.63, "elapsed_time": "11:48:47", "remaining_time": "1 day, 21:26:49", "throughput": 23770.32, "total_tokens": 1010888064} +{"current_steps": 3000, "total_steps": 14493, "loss": 0.3143, "lr": 4.385458771270156e-05, "epoch": 0.6211019383556327, "percentage": 20.7, "elapsed_time": "11:50:32", "remaining_time": "1 day, 21:22:03", "throughput": 23790.86, "total_tokens": 1014255744} +{"current_steps": 3010, "total_steps": 14493, "loss": 0.3145, "lr": 4.3837728992318205e-05, "epoch": 0.6231722781501514, "percentage": 20.77, "elapsed_time": "11:52:21", "remaining_time": "1 day, 21:17:34", "throughput": 23809.31, "total_tokens": 1017633920} +{"current_steps": 3020, "total_steps": 14493, "loss": 0.3147, "lr": 4.382088969964746e-05, "epoch": 0.6252426179446702, "percentage": 20.84, "elapsed_time": "11:54:07", "remaining_time": "1 day, 21:12:57", "throughput": 23828.86, "total_tokens": 1021006976} +{"current_steps": 3030, "total_steps": 14493, "loss": 0.3149, "lr": 4.380406979740436e-05, "epoch": 0.6273129577391889, "percentage": 20.91, "elapsed_time": "11:55:53", "remaining_time": "1 day, 21:08:18", "throughput": 23848.45, "total_tokens": 1024365696} +{"current_steps": 3040, "total_steps": 14493, "loss": 0.316, "lr": 4.3787269248403994e-05, "epoch": 0.6293832975337077, "percentage": 20.98, "elapsed_time": "11:57:33", "remaining_time": "1 day, 21:03:19", "throughput": 23869.92, "total_tokens": 1027672320} +{"current_steps": 3050, "total_steps": 14493, "loss": 0.3165, "lr": 4.377048801556126e-05, "epoch": 0.6314536373282265, "percentage": 21.04, "elapsed_time": "11:59:17", "remaining_time": "1 day, 20:58:38", "throughput": 23888.82, "total_tokens": 1030982400} +{"current_steps": 3060, "total_steps": 14493, "loss": 0.3196, "lr": 4.3753726061890446e-05, "epoch": 0.6335239771227452, "percentage": 21.11, "elapsed_time": "12:01:13", "remaining_time": "1 day, 20:54:42", "throughput": 23904.85, "total_tokens": 1034450176} +{"current_steps": 3070, "total_steps": 14493, "loss": 0.3161, "lr": 4.373698335050488e-05, "epoch": 0.6355943169172641, "percentage": 21.18, "elapsed_time": "12:02:59", "remaining_time": "1 day, 20:50:07", "throughput": 23923.41, "total_tokens": 1037777536} +{"current_steps": 3080, "total_steps": 14493, "loss": 0.3154, "lr": 4.372025984461667e-05, "epoch": 0.6376646567117829, "percentage": 21.25, "elapsed_time": "12:04:47", "remaining_time": "1 day, 20:45:42", "throughput": 23942.15, "total_tokens": 1041178368} +{"current_steps": 3090, "total_steps": 14493, "loss": 0.3162, "lr": 4.370355550753629e-05, "epoch": 0.6397349965063016, "percentage": 21.32, "elapsed_time": "12:06:34", "remaining_time": "1 day, 20:41:17", "throughput": 23959.36, "total_tokens": 1044502784} +{"current_steps": 3100, "total_steps": 14493, "loss": 0.3172, "lr": 4.368687030267226e-05, "epoch": 0.6418053363008204, "percentage": 21.39, "elapsed_time": "12:08:22", "remaining_time": "1 day, 20:36:52", "throughput": 23978.71, "total_tokens": 1047922688} +{"current_steps": 3110, "total_steps": 14493, "loss": 0.3147, "lr": 4.367020419353081e-05, "epoch": 0.6438756760953391, "percentage": 21.46, "elapsed_time": "12:10:08", "remaining_time": "1 day, 20:32:23", "throughput": 23997.21, "total_tokens": 1051274496} +{"current_steps": 3120, "total_steps": 14493, "loss": 0.3137, "lr": 4.365355714371558e-05, "epoch": 0.6459460158898579, "percentage": 21.53, "elapsed_time": "12:11:56", "remaining_time": "1 day, 20:28:04", "throughput": 24015.51, "total_tokens": 1054681472} +{"current_steps": 3130, "total_steps": 14493, "loss": 0.318, "lr": 4.3636929116927235e-05, "epoch": 0.6480163556843767, "percentage": 21.6, "elapsed_time": "12:13:43", "remaining_time": "1 day, 20:23:39", "throughput": 24034.4, "total_tokens": 1058071680} +{"current_steps": 3140, "total_steps": 14493, "loss": 0.3148, "lr": 4.362032007696314e-05, "epoch": 0.6500866954788955, "percentage": 21.67, "elapsed_time": "12:15:25", "remaining_time": "1 day, 20:19:01", "throughput": 24055.22, "total_tokens": 1061453824} +{"current_steps": 3150, "total_steps": 14493, "loss": 0.3164, "lr": 4.360372998771707e-05, "epoch": 0.6521570352734143, "percentage": 21.73, "elapsed_time": "12:17:13", "remaining_time": "1 day, 20:14:41", "throughput": 24072.85, "total_tokens": 1064819456} +{"current_steps": 3160, "total_steps": 14493, "loss": 0.3168, "lr": 4.358715881317884e-05, "epoch": 0.654227375067933, "percentage": 21.8, "elapsed_time": "12:18:58", "remaining_time": "1 day, 20:10:14", "throughput": 24091.42, "total_tokens": 1068173312} +{"current_steps": 3170, "total_steps": 14493, "loss": 0.3171, "lr": 4.357060651743399e-05, "epoch": 0.6562977148624518, "percentage": 21.87, "elapsed_time": "12:20:42", "remaining_time": "1 day, 20:05:46", "throughput": 24109.36, "total_tokens": 1071492096} +{"current_steps": 3180, "total_steps": 14493, "loss": 0.3139, "lr": 4.3554073064663454e-05, "epoch": 0.6583680546569706, "percentage": 21.94, "elapsed_time": "12:22:36", "remaining_time": "1 day, 20:01:52", "throughput": 24124.87, "total_tokens": 1074921984} +{"current_steps": 3190, "total_steps": 14493, "loss": 0.3128, "lr": 4.353755841914325e-05, "epoch": 0.6604383944514893, "percentage": 22.01, "elapsed_time": "12:24:20", "remaining_time": "1 day, 19:57:24", "throughput": 24143.57, "total_tokens": 1078270080} +{"current_steps": 3200, "total_steps": 14493, "loss": 0.3173, "lr": 4.3521062545244116e-05, "epoch": 0.6625087342460081, "percentage": 22.08, "elapsed_time": "12:26:13", "remaining_time": "1 day, 19:53:27", "throughput": 24159.56, "total_tokens": 1081703808} +{"current_steps": 3210, "total_steps": 14493, "loss": 0.3167, "lr": 4.350458540743126e-05, "epoch": 0.6645790740405269, "percentage": 22.15, "elapsed_time": "12:28:00", "remaining_time": "1 day, 19:49:12", "throughput": 24177.66, "total_tokens": 1085105920} +{"current_steps": 3220, "total_steps": 14493, "loss": 0.3171, "lr": 4.3488126970263955e-05, "epoch": 0.6666494138350457, "percentage": 22.22, "elapsed_time": "12:29:47", "remaining_time": "1 day, 19:44:59", "throughput": 24194.66, "total_tokens": 1088463232} +{"current_steps": 3230, "total_steps": 14493, "loss": 0.3143, "lr": 4.347168719839527e-05, "epoch": 0.6687197536295645, "percentage": 22.29, "elapsed_time": "12:31:38", "remaining_time": "1 day, 19:40:58", "throughput": 24210.73, "total_tokens": 1091865472} +{"current_steps": 3240, "total_steps": 14493, "loss": 0.3168, "lr": 4.345526605657173e-05, "epoch": 0.6707900934240832, "percentage": 22.36, "elapsed_time": "12:33:27", "remaining_time": "1 day, 19:36:51", "throughput": 24227.58, "total_tokens": 1095259776} +{"current_steps": 3250, "total_steps": 14493, "loss": 0.3163, "lr": 4.343886350963304e-05, "epoch": 0.672860433218602, "percentage": 22.42, "elapsed_time": "12:35:13", "remaining_time": "1 day, 19:32:38", "throughput": 24245.79, "total_tokens": 1098671104} +{"current_steps": 3260, "total_steps": 14493, "loss": 0.3134, "lr": 4.3422479522511697e-05, "epoch": 0.6749307730131208, "percentage": 22.49, "elapsed_time": "12:37:07", "remaining_time": "1 day, 19:28:48", "throughput": 24261.67, "total_tokens": 1102139904} +{"current_steps": 3270, "total_steps": 14493, "loss": 0.3158, "lr": 4.340611406023272e-05, "epoch": 0.6770011128076395, "percentage": 22.56, "elapsed_time": "12:38:51", "remaining_time": "1 day, 19:24:27", "throughput": 24279.52, "total_tokens": 1105474816} +{"current_steps": 3280, "total_steps": 14493, "loss": 0.3113, "lr": 4.338976708791336e-05, "epoch": 0.6790714526021583, "percentage": 22.63, "elapsed_time": "12:40:38", "remaining_time": "1 day, 19:20:18", "throughput": 24297.3, "total_tokens": 1108886400} +{"current_steps": 3290, "total_steps": 14493, "loss": 0.3141, "lr": 4.337343857076272e-05, "epoch": 0.6811417923966772, "percentage": 22.7, "elapsed_time": "12:42:25", "remaining_time": "1 day, 19:16:11", "throughput": 24314.03, "total_tokens": 1112259200} +{"current_steps": 3300, "total_steps": 14493, "loss": 0.3197, "lr": 4.33571284740815e-05, "epoch": 0.6832121321911959, "percentage": 22.77, "elapsed_time": "12:44:13", "remaining_time": "1 day, 19:12:06", "throughput": 24330.4, "total_tokens": 1115632768} +{"current_steps": 3310, "total_steps": 14493, "loss": 0.3161, "lr": 4.3340836763261675e-05, "epoch": 0.6852824719857147, "percentage": 22.84, "elapsed_time": "12:45:56", "remaining_time": "1 day, 19:07:47", "throughput": 24349.58, "total_tokens": 1119030784} +{"current_steps": 3320, "total_steps": 14493, "loss": 0.3147, "lr": 4.332456340378618e-05, "epoch": 0.6873528117802334, "percentage": 22.91, "elapsed_time": "12:47:48", "remaining_time": "1 day, 19:03:57", "throughput": 24365.68, "total_tokens": 1122492672} +{"current_steps": 3330, "total_steps": 14493, "loss": 0.3134, "lr": 4.3308308361228586e-05, "epoch": 0.6894231515747522, "percentage": 22.98, "elapsed_time": "12:49:33", "remaining_time": "1 day, 18:59:44", "throughput": 24383.64, "total_tokens": 1125872512} +{"current_steps": 3340, "total_steps": 14493, "loss": 0.3175, "lr": 4.329207160125282e-05, "epoch": 0.691493491369271, "percentage": 23.05, "elapsed_time": "12:51:14", "remaining_time": "1 day, 18:55:19", "throughput": 24402.01, "total_tokens": 1129183872} +{"current_steps": 3350, "total_steps": 14493, "loss": 0.3151, "lr": 4.327585308961287e-05, "epoch": 0.6935638311637897, "percentage": 23.11, "elapsed_time": "12:53:02", "remaining_time": "1 day, 18:51:20", "throughput": 24419.18, "total_tokens": 1132618624} +{"current_steps": 3360, "total_steps": 14493, "loss": 0.3125, "lr": 4.325965279215243e-05, "epoch": 0.6956341709583085, "percentage": 23.18, "elapsed_time": "12:54:53", "remaining_time": "1 day, 18:47:30", "throughput": 24434.27, "total_tokens": 1136031744} +{"current_steps": 3370, "total_steps": 14493, "loss": 0.314, "lr": 4.3243470674804686e-05, "epoch": 0.6977045107528274, "percentage": 23.25, "elapsed_time": "12:56:43", "remaining_time": "1 day, 18:43:40", "throughput": 24450.08, "total_tokens": 1139469440} +{"current_steps": 3380, "total_steps": 14493, "loss": 0.3141, "lr": 4.3227306703591904e-05, "epoch": 0.6997748505473461, "percentage": 23.32, "elapsed_time": "12:58:28", "remaining_time": "1 day, 18:39:32", "throughput": 24466.67, "total_tokens": 1142805760} +{"current_steps": 3390, "total_steps": 14493, "loss": 0.3126, "lr": 4.32111608446252e-05, "epoch": 0.7018451903418649, "percentage": 23.39, "elapsed_time": "13:00:16", "remaining_time": "1 day, 18:35:34", "throughput": 24482.21, "total_tokens": 1146174464} +{"current_steps": 3400, "total_steps": 14493, "loss": 0.3159, "lr": 4.319503306410426e-05, "epoch": 0.7039155301363836, "percentage": 23.46, "elapsed_time": "13:02:10", "remaining_time": "1 day, 18:31:58", "throughput": 24496.75, "total_tokens": 1149648256} +{"current_steps": 3410, "total_steps": 14493, "loss": 0.3128, "lr": 4.317892332831699e-05, "epoch": 0.7059858699309024, "percentage": 23.53, "elapsed_time": "13:03:57", "remaining_time": "1 day, 18:27:59", "throughput": 24513.07, "total_tokens": 1153039872} +{"current_steps": 3420, "total_steps": 14493, "loss": 0.3134, "lr": 4.316283160363922e-05, "epoch": 0.7080562097254212, "percentage": 23.6, "elapsed_time": "13:05:39", "remaining_time": "1 day, 18:23:43", "throughput": 24531.74, "total_tokens": 1156408192} +{"current_steps": 3430, "total_steps": 14493, "loss": 0.3146, "lr": 4.314675785653447e-05, "epoch": 0.7101265495199399, "percentage": 23.67, "elapsed_time": "13:07:28", "remaining_time": "1 day, 18:19:52", "throughput": 24547.34, "total_tokens": 1159814400} +{"current_steps": 3440, "total_steps": 14493, "loss": 0.3113, "lr": 4.3130702053553606e-05, "epoch": 0.7121968893144587, "percentage": 23.74, "elapsed_time": "13:09:15", "remaining_time": "1 day, 18:15:58", "throughput": 24563.12, "total_tokens": 1163208192} +{"current_steps": 3450, "total_steps": 14493, "loss": 0.3115, "lr": 4.3114664161334546e-05, "epoch": 0.7142672291089776, "percentage": 23.8, "elapsed_time": "13:11:04", "remaining_time": "1 day, 18:12:08", "throughput": 24578.57, "total_tokens": 1166619264} +{"current_steps": 3460, "total_steps": 14493, "loss": 0.3098, "lr": 4.3098644146601984e-05, "epoch": 0.7163375689034963, "percentage": 23.87, "elapsed_time": "13:12:47", "remaining_time": "1 day, 18:08:01", "throughput": 24597.18, "total_tokens": 1170036480} +{"current_steps": 3470, "total_steps": 14493, "loss": 0.3152, "lr": 4.30826419761671e-05, "epoch": 0.7184079086980151, "percentage": 23.94, "elapsed_time": "13:14:37", "remaining_time": "1 day, 18:04:14", "throughput": 24612.03, "total_tokens": 1173433344} +{"current_steps": 3480, "total_steps": 14493, "loss": 0.3129, "lr": 4.30666576169273e-05, "epoch": 0.7204782484925338, "percentage": 24.01, "elapsed_time": "13:16:26", "remaining_time": "1 day, 18:00:27", "throughput": 24627.41, "total_tokens": 1176857984} +{"current_steps": 3490, "total_steps": 14493, "loss": 0.3119, "lr": 4.305069103586585e-05, "epoch": 0.7225485882870526, "percentage": 24.08, "elapsed_time": "13:18:15", "remaining_time": "1 day, 17:56:41", "throughput": 24641.51, "total_tokens": 1180218240} +{"current_steps": 3500, "total_steps": 14493, "loss": 0.3094, "lr": 4.303474220005164e-05, "epoch": 0.7246189280815714, "percentage": 24.15, "elapsed_time": "13:20:01", "remaining_time": "1 day, 17:52:45", "throughput": 24656.82, "total_tokens": 1183562624} +{"current_steps": 3510, "total_steps": 14493, "loss": 0.3115, "lr": 4.3018811076638944e-05, "epoch": 0.7266892678760901, "percentage": 24.22, "elapsed_time": "13:21:52", "remaining_time": "1 day, 17:49:05", "throughput": 24670.23, "total_tokens": 1186935296} +{"current_steps": 3520, "total_steps": 14493, "loss": 0.3144, "lr": 4.300289763286704e-05, "epoch": 0.728759607670609, "percentage": 24.29, "elapsed_time": "13:23:37", "remaining_time": "1 day, 17:45:10", "throughput": 24686.88, "total_tokens": 1190348800} +{"current_steps": 3530, "total_steps": 14493, "loss": 0.3164, "lr": 4.298700183606e-05, "epoch": 0.7308299474651277, "percentage": 24.36, "elapsed_time": "13:25:26", "remaining_time": "1 day, 17:41:24", "throughput": 24701.71, "total_tokens": 1193735168} +{"current_steps": 3540, "total_steps": 14493, "loss": 0.3146, "lr": 4.297112365362637e-05, "epoch": 0.7329002872596465, "percentage": 24.43, "elapsed_time": "13:27:14", "remaining_time": "1 day, 17:37:39", "throughput": 24716.65, "total_tokens": 1197136640} +{"current_steps": 3550, "total_steps": 14493, "loss": 0.3154, "lr": 4.295526305305891e-05, "epoch": 0.7349706270541653, "percentage": 24.49, "elapsed_time": "13:28:57", "remaining_time": "1 day, 17:33:39", "throughput": 24733.01, "total_tokens": 1200481920} +{"current_steps": 3560, "total_steps": 14493, "loss": 0.3166, "lr": 4.293942000193429e-05, "epoch": 0.737040966848684, "percentage": 24.56, "elapsed_time": "13:30:50", "remaining_time": "1 day, 17:30:08", "throughput": 24746.74, "total_tokens": 1203940096} +{"current_steps": 3570, "total_steps": 14493, "loss": 0.3101, "lr": 4.2923594467912866e-05, "epoch": 0.7391113066432028, "percentage": 24.63, "elapsed_time": "13:32:31", "remaining_time": "1 day, 17:26:04", "throughput": 24762.44, "total_tokens": 1207217152} +{"current_steps": 3580, "total_steps": 14493, "loss": 0.3151, "lr": 4.290778641873832e-05, "epoch": 0.7411816464377216, "percentage": 24.7, "elapsed_time": "13:34:18", "remaining_time": "1 day, 17:22:15", "throughput": 24777.48, "total_tokens": 1210582784} +{"current_steps": 3590, "total_steps": 14493, "loss": 0.3115, "lr": 4.2891995822237455e-05, "epoch": 0.7432519862322403, "percentage": 24.77, "elapsed_time": "13:36:03", "remaining_time": "1 day, 17:18:25", "throughput": 24793.4, "total_tokens": 1213982464} +{"current_steps": 3600, "total_steps": 14493, "loss": 0.3082, "lr": 4.28762226463199e-05, "epoch": 0.7453223260267592, "percentage": 24.84, "elapsed_time": "13:37:54", "remaining_time": "1 day, 17:14:50", "throughput": 24807.82, "total_tokens": 1217424896} +{"current_steps": 3610, "total_steps": 14493, "loss": 0.3099, "lr": 4.286046685897781e-05, "epoch": 0.747392665821278, "percentage": 24.91, "elapsed_time": "13:39:44", "remaining_time": "1 day, 17:11:14", "throughput": 24821.58, "total_tokens": 1220827264} +{"current_steps": 3620, "total_steps": 14493, "loss": 0.3114, "lr": 4.284472842828562e-05, "epoch": 0.7494630056157967, "percentage": 24.98, "elapsed_time": "13:41:34", "remaining_time": "1 day, 17:07:39", "throughput": 24835.65, "total_tokens": 1224251392} +{"current_steps": 3630, "total_steps": 14493, "loss": 0.3137, "lr": 4.282900732239977e-05, "epoch": 0.7515333454103155, "percentage": 25.05, "elapsed_time": "13:43:23", "remaining_time": "1 day, 17:04:02", "throughput": 24849.27, "total_tokens": 1227640064} +{"current_steps": 3640, "total_steps": 14493, "loss": 0.3141, "lr": 4.281330350955845e-05, "epoch": 0.7536036852048342, "percentage": 25.12, "elapsed_time": "13:45:16", "remaining_time": "1 day, 17:00:38", "throughput": 24861.39, "total_tokens": 1231051776} +{"current_steps": 3650, "total_steps": 14493, "loss": 0.3106, "lr": 4.279761695808125e-05, "epoch": 0.755674024999353, "percentage": 25.18, "elapsed_time": "13:46:59", "remaining_time": "1 day, 16:56:44", "throughput": 24876.78, "total_tokens": 1234376192} +{"current_steps": 3660, "total_steps": 14493, "loss": 0.3134, "lr": 4.278194763636904e-05, "epoch": 0.7577443647938717, "percentage": 25.25, "elapsed_time": "13:48:59", "remaining_time": "1 day, 16:53:40", "throughput": 24887.38, "total_tokens": 1237884032} +{"current_steps": 3670, "total_steps": 14493, "loss": 0.3151, "lr": 4.276629551290354e-05, "epoch": 0.7598147045883906, "percentage": 25.32, "elapsed_time": "13:50:50", "remaining_time": "1 day, 16:50:10", "throughput": 24901.54, "total_tokens": 1241348992} +{"current_steps": 3680, "total_steps": 14493, "loss": 0.3126, "lr": 4.2750660556247175e-05, "epoch": 0.7618850443829094, "percentage": 25.39, "elapsed_time": "13:52:32", "remaining_time": "1 day, 16:46:16", "throughput": 24917.48, "total_tokens": 1244691968} +{"current_steps": 3690, "total_steps": 14493, "loss": 0.3122, "lr": 4.273504273504274e-05, "epoch": 0.7639553841774281, "percentage": 25.46, "elapsed_time": "13:54:16", "remaining_time": "1 day, 16:42:28", "throughput": 24933.52, "total_tokens": 1248095616} +{"current_steps": 3700, "total_steps": 14493, "loss": 0.3113, "lr": 4.271944201801317e-05, "epoch": 0.7660257239719469, "percentage": 25.53, "elapsed_time": "13:56:03", "remaining_time": "1 day, 16:38:47", "throughput": 24949.27, "total_tokens": 1251534848} +{"current_steps": 3710, "total_steps": 14493, "loss": 0.3108, "lr": 4.270385837396127e-05, "epoch": 0.7680960637664657, "percentage": 25.6, "elapsed_time": "13:57:43", "remaining_time": "1 day, 16:34:50", "throughput": 24965.46, "total_tokens": 1254860416} +{"current_steps": 3720, "total_steps": 14493, "loss": 0.3118, "lr": 4.268829177176945e-05, "epoch": 0.7701664035609844, "percentage": 25.67, "elapsed_time": "13:59:31", "remaining_time": "1 day, 16:31:14", "throughput": 24979.04, "total_tokens": 1258231552} +{"current_steps": 3730, "total_steps": 14493, "loss": 0.3103, "lr": 4.2672742180399455e-05, "epoch": 0.7722367433555032, "percentage": 25.74, "elapsed_time": "14:01:18", "remaining_time": "1 day, 16:27:35", "throughput": 24993.03, "total_tokens": 1261600768} +{"current_steps": 3740, "total_steps": 14493, "loss": 0.3107, "lr": 4.265720956889213e-05, "epoch": 0.774307083150022, "percentage": 25.81, "elapsed_time": "14:03:03", "remaining_time": "1 day, 16:23:53", "throughput": 25008.26, "total_tokens": 1264996352} +{"current_steps": 3750, "total_steps": 14493, "loss": 0.3097, "lr": 4.2641693906367113e-05, "epoch": 0.7763774229445408, "percentage": 25.87, "elapsed_time": "14:04:48", "remaining_time": "1 day, 16:20:13", "throughput": 25023.11, "total_tokens": 1268393600} +{"current_steps": 3760, "total_steps": 14493, "loss": 0.3134, "lr": 4.2626195162022646e-05, "epoch": 0.7784477627390596, "percentage": 25.94, "elapsed_time": "14:06:39", "remaining_time": "1 day, 16:16:47", "throughput": 25035.65, "total_tokens": 1271794688} +{"current_steps": 3770, "total_steps": 14493, "loss": 0.3093, "lr": 4.2610713305135255e-05, "epoch": 0.7805181025335783, "percentage": 26.01, "elapsed_time": "14:08:21", "remaining_time": "1 day, 16:13:00", "throughput": 25050.14, "total_tokens": 1275100544} +{"current_steps": 3780, "total_steps": 14493, "loss": 0.3088, "lr": 4.2595248305059546e-05, "epoch": 0.7825884423280971, "percentage": 26.08, "elapsed_time": "14:10:10", "remaining_time": "1 day, 16:09:29", "throughput": 25063.83, "total_tokens": 1278513024} +{"current_steps": 3790, "total_steps": 14493, "loss": 0.3085, "lr": 4.2579800131227916e-05, "epoch": 0.7846587821226159, "percentage": 26.15, "elapsed_time": "14:11:58", "remaining_time": "1 day, 16:05:59", "throughput": 25077.22, "total_tokens": 1281909120} +{"current_steps": 3800, "total_steps": 14493, "loss": 0.3094, "lr": 4.256436875315028e-05, "epoch": 0.7867291219171346, "percentage": 26.22, "elapsed_time": "14:13:44", "remaining_time": "1 day, 16:02:23", "throughput": 25091.19, "total_tokens": 1285287168} +{"current_steps": 3810, "total_steps": 14493, "loss": 0.3121, "lr": 4.2548954140413895e-05, "epoch": 0.7887994617116534, "percentage": 26.29, "elapsed_time": "14:15:31", "remaining_time": "1 day, 15:58:50", "throughput": 25104.81, "total_tokens": 1288668672} +{"current_steps": 3820, "total_steps": 14493, "loss": 0.3105, "lr": 4.253355626268302e-05, "epoch": 0.7908698015061723, "percentage": 26.36, "elapsed_time": "14:17:22", "remaining_time": "1 day, 15:55:30", "throughput": 25117.78, "total_tokens": 1292128768} +{"current_steps": 3830, "total_steps": 14493, "loss": 0.3098, "lr": 4.2518175089698716e-05, "epoch": 0.792940141300691, "percentage": 26.43, "elapsed_time": "14:19:13", "remaining_time": "1 day, 15:52:07", "throughput": 25129.62, "total_tokens": 1295512448} +{"current_steps": 3840, "total_steps": 14493, "loss": 0.3104, "lr": 4.25028105912786e-05, "epoch": 0.7950104810952098, "percentage": 26.5, "elapsed_time": "14:21:01", "remaining_time": "1 day, 15:48:39", "throughput": 25142.99, "total_tokens": 1298913536} +{"current_steps": 3850, "total_steps": 14493, "loss": 0.3118, "lr": 4.2487462737316565e-05, "epoch": 0.7970808208897285, "percentage": 26.56, "elapsed_time": "14:22:47", "remaining_time": "1 day, 15:45:07", "throughput": 25156.6, "total_tokens": 1302298624} +{"current_steps": 3860, "total_steps": 14493, "loss": 0.3118, "lr": 4.2472131497782555e-05, "epoch": 0.7991511606842473, "percentage": 26.63, "elapsed_time": "14:24:36", "remaining_time": "1 day, 15:41:42", "throughput": 25169.48, "total_tokens": 1305702016} +{"current_steps": 3870, "total_steps": 14493, "loss": 0.3117, "lr": 4.245681684272231e-05, "epoch": 0.8012215004787661, "percentage": 26.7, "elapsed_time": "14:26:25", "remaining_time": "1 day, 15:38:18", "throughput": 25182.29, "total_tokens": 1309120640} +{"current_steps": 3880, "total_steps": 14493, "loss": 0.3087, "lr": 4.244151874225712e-05, "epoch": 0.8032918402732848, "percentage": 26.77, "elapsed_time": "14:28:12", "remaining_time": "1 day, 15:34:47", "throughput": 25196.01, "total_tokens": 1312511488} +{"current_steps": 3890, "total_steps": 14493, "loss": 0.3107, "lr": 4.2426237166583596e-05, "epoch": 0.8053621800678036, "percentage": 26.84, "elapsed_time": "14:29:58", "remaining_time": "1 day, 15:31:18", "throughput": 25209.0, "total_tokens": 1315877760} +{"current_steps": 3900, "total_steps": 14493, "loss": 0.3092, "lr": 4.241097208597339e-05, "epoch": 0.8074325198623225, "percentage": 26.91, "elapsed_time": "14:31:47", "remaining_time": "1 day, 15:27:54", "throughput": 25222.0, "total_tokens": 1319293952} +{"current_steps": 3910, "total_steps": 14493, "loss": 0.3077, "lr": 4.2395723470773005e-05, "epoch": 0.8095028596568412, "percentage": 26.98, "elapsed_time": "14:33:28", "remaining_time": "1 day, 15:24:11", "throughput": 25236.21, "total_tokens": 1322589056} +{"current_steps": 3920, "total_steps": 14493, "loss": 0.3082, "lr": 4.238049129140347e-05, "epoch": 0.81157319945136, "percentage": 27.05, "elapsed_time": "14:35:11", "remaining_time": "1 day, 15:20:34", "throughput": 25250.37, "total_tokens": 1325942016} +{"current_steps": 3930, "total_steps": 14493, "loss": 0.3077, "lr": 4.236527551836022e-05, "epoch": 0.8136435392458787, "percentage": 27.12, "elapsed_time": "14:36:56", "remaining_time": "1 day, 15:17:00", "throughput": 25263.72, "total_tokens": 1329278720} +{"current_steps": 3940, "total_steps": 14493, "loss": 0.3058, "lr": 4.235007612221274e-05, "epoch": 0.8157138790403975, "percentage": 27.19, "elapsed_time": "14:38:43", "remaining_time": "1 day, 15:13:36", "throughput": 25276.31, "total_tokens": 1332659072} +{"current_steps": 3950, "total_steps": 14493, "loss": 0.3104, "lr": 4.2334893073604386e-05, "epoch": 0.8177842188349163, "percentage": 27.25, "elapsed_time": "14:40:27", "remaining_time": "1 day, 15:10:02", "throughput": 25290.3, "total_tokens": 1336021760} +{"current_steps": 3960, "total_steps": 14493, "loss": 0.307, "lr": 4.231972634325214e-05, "epoch": 0.819854558629435, "percentage": 27.32, "elapsed_time": "14:42:13", "remaining_time": "1 day, 15:06:34", "throughput": 25302.24, "total_tokens": 1339329408} +{"current_steps": 3970, "total_steps": 14493, "loss": 0.3101, "lr": 4.230457590194635e-05, "epoch": 0.8219248984239539, "percentage": 27.39, "elapsed_time": "14:44:02", "remaining_time": "1 day, 15:03:15", "throughput": 25314.18, "total_tokens": 1342726784} +{"current_steps": 3980, "total_steps": 14493, "loss": 0.3143, "lr": 4.228944172055053e-05, "epoch": 0.8239952382184726, "percentage": 27.46, "elapsed_time": "14:45:55", "remaining_time": "1 day, 15:00:09", "throughput": 25325.02, "total_tokens": 1346175744} +{"current_steps": 3990, "total_steps": 14493, "loss": 0.3096, "lr": 4.22743237700011e-05, "epoch": 0.8260655780129914, "percentage": 27.53, "elapsed_time": "14:47:39", "remaining_time": "1 day, 14:56:36", "throughput": 25339.58, "total_tokens": 1349573376} +{"current_steps": 4000, "total_steps": 14493, "loss": 0.3085, "lr": 4.225922202130716e-05, "epoch": 0.8281359178075102, "percentage": 27.6, "elapsed_time": "14:49:24", "remaining_time": "1 day, 14:53:08", "throughput": 25352.3, "total_tokens": 1352910080} +{"current_steps": 4010, "total_steps": 14493, "loss": 0.3112, "lr": 4.224413644555024e-05, "epoch": 0.8302062576020289, "percentage": 27.67, "elapsed_time": "14:51:11", "remaining_time": "1 day, 14:49:44", "throughput": 25363.9, "total_tokens": 1356233856} +{"current_steps": 4020, "total_steps": 14493, "loss": 0.3066, "lr": 4.222906701388411e-05, "epoch": 0.8322765973965477, "percentage": 27.74, "elapsed_time": "14:52:55", "remaining_time": "1 day, 14:46:16", "throughput": 25377.85, "total_tokens": 1359629568} +{"current_steps": 4030, "total_steps": 14493, "loss": 0.3108, "lr": 4.2214013697534466e-05, "epoch": 0.8343469371910665, "percentage": 27.81, "elapsed_time": "14:54:51", "remaining_time": "1 day, 14:43:18", "throughput": 25387.2, "total_tokens": 1363077376} +{"current_steps": 4040, "total_steps": 14493, "loss": 0.3045, "lr": 4.219897646779882e-05, "epoch": 0.8364172769855852, "percentage": 27.88, "elapsed_time": "14:56:31", "remaining_time": "1 day, 14:39:38", "throughput": 25402.11, "total_tokens": 1366411392} +{"current_steps": 4050, "total_steps": 14493, "loss": 0.3063, "lr": 4.2183955296046145e-05, "epoch": 0.8384876167801041, "percentage": 27.94, "elapsed_time": "14:58:13", "remaining_time": "1 day, 14:36:04", "throughput": 25415.52, "total_tokens": 1369720576} +{"current_steps": 4060, "total_steps": 14493, "loss": 0.3101, "lr": 4.2168950153716746e-05, "epoch": 0.8405579565746228, "percentage": 28.01, "elapsed_time": "14:59:59", "remaining_time": "1 day, 14:32:43", "throughput": 25427.45, "total_tokens": 1373076864} +{"current_steps": 4070, "total_steps": 14493, "loss": 0.3068, "lr": 4.215396101232197e-05, "epoch": 0.8426282963691416, "percentage": 28.08, "elapsed_time": "15:01:44", "remaining_time": "1 day, 14:29:17", "throughput": 25440.33, "total_tokens": 1376433536} +{"current_steps": 4080, "total_steps": 14493, "loss": 0.3086, "lr": 4.213898784344398e-05, "epoch": 0.8446986361636604, "percentage": 28.15, "elapsed_time": "15:03:37", "remaining_time": "1 day, 14:26:14", "throughput": 25450.56, "total_tokens": 1379871104} +{"current_steps": 4090, "total_steps": 14493, "loss": 0.3085, "lr": 4.21240306187356e-05, "epoch": 0.8467689759581791, "percentage": 28.22, "elapsed_time": "15:05:21", "remaining_time": "1 day, 14:22:47", "throughput": 25463.62, "total_tokens": 1383213696} +{"current_steps": 4100, "total_steps": 14493, "loss": 0.3092, "lr": 4.2109089309919967e-05, "epoch": 0.8488393157526979, "percentage": 28.29, "elapsed_time": "15:07:09", "remaining_time": "1 day, 14:19:31", "throughput": 25475.22, "total_tokens": 1386592384} +{"current_steps": 4110, "total_steps": 14493, "loss": 0.3076, "lr": 4.2094163888790445e-05, "epoch": 0.8509096555472166, "percentage": 28.36, "elapsed_time": "15:08:57", "remaining_time": "1 day, 14:16:16", "throughput": 25485.98, "total_tokens": 1389933952} +{"current_steps": 4120, "total_steps": 14493, "loss": 0.3082, "lr": 4.2079254327210294e-05, "epoch": 0.8529799953417354, "percentage": 28.43, "elapsed_time": "15:10:46", "remaining_time": "1 day, 14:13:03", "throughput": 25498.25, "total_tokens": 1393383680} +{"current_steps": 4130, "total_steps": 14493, "loss": 0.3081, "lr": 4.206436059711249e-05, "epoch": 0.8550503351362543, "percentage": 28.5, "elapsed_time": "15:12:27", "remaining_time": "1 day, 14:09:31", "throughput": 25512.07, "total_tokens": 1396716032} +{"current_steps": 4140, "total_steps": 14493, "loss": 0.3111, "lr": 4.20494826704995e-05, "epoch": 0.857120674930773, "percentage": 28.57, "elapsed_time": "15:14:11", "remaining_time": "1 day, 14:06:09", "throughput": 25524.19, "total_tokens": 1400045824} +{"current_steps": 4150, "total_steps": 14493, "loss": 0.3069, "lr": 4.203462051944307e-05, "epoch": 0.8591910147252918, "percentage": 28.63, "elapsed_time": "15:15:58", "remaining_time": "1 day, 14:02:52", "throughput": 25535.74, "total_tokens": 1403403520} +{"current_steps": 4160, "total_steps": 14493, "loss": 0.3071, "lr": 4.201977411608398e-05, "epoch": 0.8612613545198106, "percentage": 28.7, "elapsed_time": "15:17:49", "remaining_time": "1 day, 13:59:46", "throughput": 25546.76, "total_tokens": 1406847488} +{"current_steps": 4170, "total_steps": 14493, "loss": 0.309, "lr": 4.200494343263185e-05, "epoch": 0.8633316943143293, "percentage": 28.77, "elapsed_time": "15:19:36", "remaining_time": "1 day, 13:56:31", "throughput": 25559.4, "total_tokens": 1410273408} +{"current_steps": 4180, "total_steps": 14493, "loss": 0.3069, "lr": 4.1990128441364914e-05, "epoch": 0.8654020341088481, "percentage": 28.84, "elapsed_time": "15:21:16", "remaining_time": "1 day, 13:53:00", "throughput": 25573.51, "total_tokens": 1413619328} +{"current_steps": 4190, "total_steps": 14493, "loss": 0.3084, "lr": 4.197532911462977e-05, "epoch": 0.8674723739033668, "percentage": 28.91, "elapsed_time": "15:23:01", "remaining_time": "1 day, 13:49:40", "throughput": 25584.96, "total_tokens": 1416935168} +{"current_steps": 4200, "total_steps": 14493, "loss": 0.3097, "lr": 4.196054542484125e-05, "epoch": 0.8695427136978857, "percentage": 28.98, "elapsed_time": "15:24:52", "remaining_time": "1 day, 13:46:36", "throughput": 25595.48, "total_tokens": 1420354944} +{"current_steps": 4210, "total_steps": 14493, "loss": 0.3082, "lr": 4.1945777344482084e-05, "epoch": 0.8716130534924045, "percentage": 29.05, "elapsed_time": "15:26:37", "remaining_time": "1 day, 13:43:16", "throughput": 25607.57, "total_tokens": 1423704448} +{"current_steps": 4220, "total_steps": 14493, "loss": 0.3079, "lr": 4.19310248461028e-05, "epoch": 0.8736833932869232, "percentage": 29.12, "elapsed_time": "15:28:20", "remaining_time": "1 day, 13:39:55", "throughput": 25619.35, "total_tokens": 1427019392} +{"current_steps": 4230, "total_steps": 14493, "loss": 0.3077, "lr": 4.1916287902321405e-05, "epoch": 0.875753733081442, "percentage": 29.19, "elapsed_time": "15:30:09", "remaining_time": "1 day, 13:36:47", "throughput": 25630.68, "total_tokens": 1430434048} +{"current_steps": 4240, "total_steps": 14493, "loss": 0.3069, "lr": 4.190156648582328e-05, "epoch": 0.8778240728759608, "percentage": 29.26, "elapsed_time": "15:31:54", "remaining_time": "1 day, 13:33:30", "throughput": 25642.54, "total_tokens": 1433792640} +{"current_steps": 4250, "total_steps": 14493, "loss": 0.3089, "lr": 4.188686056936087e-05, "epoch": 0.8798944126704795, "percentage": 29.32, "elapsed_time": "15:33:38", "remaining_time": "1 day, 13:30:10", "throughput": 25654.77, "total_tokens": 1437136640} +{"current_steps": 4260, "total_steps": 14493, "loss": 0.3084, "lr": 4.187217012575352e-05, "epoch": 0.8819647524649983, "percentage": 29.39, "elapsed_time": "15:35:22", "remaining_time": "1 day, 13:26:51", "throughput": 25668.11, "total_tokens": 1440549248} +{"current_steps": 4270, "total_steps": 14493, "loss": 0.3055, "lr": 4.185749512788727e-05, "epoch": 0.884035092259517, "percentage": 29.46, "elapsed_time": "15:37:08", "remaining_time": "1 day, 13:23:40", "throughput": 25681.04, "total_tokens": 1444018048} +{"current_steps": 4280, "total_steps": 14493, "loss": 0.3069, "lr": 4.184283554871462e-05, "epoch": 0.8861054320540359, "percentage": 29.53, "elapsed_time": "15:38:54", "remaining_time": "1 day, 13:20:25", "throughput": 25691.61, "total_tokens": 1447319296} +{"current_steps": 4290, "total_steps": 14493, "loss": 0.3089, "lr": 4.1828191361254344e-05, "epoch": 0.8881757718485547, "percentage": 29.6, "elapsed_time": "15:40:43", "remaining_time": "1 day, 13:17:20", "throughput": 25702.3, "total_tokens": 1450720896} +{"current_steps": 4300, "total_steps": 14493, "loss": 0.308, "lr": 4.181356253859127e-05, "epoch": 0.8902461116430734, "percentage": 29.67, "elapsed_time": "15:42:28", "remaining_time": "1 day, 13:14:06", "throughput": 25713.63, "total_tokens": 1454071680} +{"current_steps": 4310, "total_steps": 14493, "loss": 0.3084, "lr": 4.179894905387606e-05, "epoch": 0.8923164514375922, "percentage": 29.74, "elapsed_time": "15:44:15", "remaining_time": "1 day, 13:10:55", "throughput": 25724.91, "total_tokens": 1457447040} +{"current_steps": 4320, "total_steps": 14493, "loss": 0.3082, "lr": 4.178435088032502e-05, "epoch": 0.894386791232111, "percentage": 29.81, "elapsed_time": "15:46:01", "remaining_time": "1 day, 13:07:45", "throughput": 25735.75, "total_tokens": 1460804480} +{"current_steps": 4330, "total_steps": 14493, "loss": 0.3079, "lr": 4.176976799121989e-05, "epoch": 0.8964571310266297, "percentage": 29.88, "elapsed_time": "15:47:48", "remaining_time": "1 day, 13:04:37", "throughput": 25746.32, "total_tokens": 1464162688} +{"current_steps": 4340, "total_steps": 14493, "loss": 0.3071, "lr": 4.1755200359907657e-05, "epoch": 0.8985274708211485, "percentage": 29.95, "elapsed_time": "15:49:32", "remaining_time": "1 day, 13:01:21", "throughput": 25758.3, "total_tokens": 1467518848} +{"current_steps": 4350, "total_steps": 14493, "loss": 0.3076, "lr": 4.174064795980028e-05, "epoch": 0.9005978106156673, "percentage": 30.01, "elapsed_time": "15:51:17", "remaining_time": "1 day, 12:58:09", "throughput": 25769.15, "total_tokens": 1470841472} +{"current_steps": 4360, "total_steps": 14493, "loss": 0.3096, "lr": 4.17261107643746e-05, "epoch": 0.9026681504101861, "percentage": 30.08, "elapsed_time": "15:53:03", "remaining_time": "1 day, 12:54:59", "throughput": 25779.69, "total_tokens": 1474180864} +{"current_steps": 4370, "total_steps": 14493, "loss": 0.3074, "lr": 4.171158874717204e-05, "epoch": 0.9047384902047049, "percentage": 30.15, "elapsed_time": "15:54:52", "remaining_time": "1 day, 12:51:55", "throughput": 25791.07, "total_tokens": 1477625856} +{"current_steps": 4380, "total_steps": 14493, "loss": 0.3078, "lr": 4.169708188179844e-05, "epoch": 0.9068088299992236, "percentage": 30.22, "elapsed_time": "15:56:37", "remaining_time": "1 day, 12:48:45", "throughput": 25802.36, "total_tokens": 1480995840} +{"current_steps": 4390, "total_steps": 14493, "loss": 0.3043, "lr": 4.1682590141923846e-05, "epoch": 0.9088791697937424, "percentage": 30.29, "elapsed_time": "15:58:29", "remaining_time": "1 day, 12:45:51", "throughput": 25811.75, "total_tokens": 1484428800} +{"current_steps": 4400, "total_steps": 14493, "loss": 0.3079, "lr": 4.1668113501282335e-05, "epoch": 0.9109495095882612, "percentage": 30.36, "elapsed_time": "16:00:12", "remaining_time": "1 day, 12:42:35", "throughput": 25822.94, "total_tokens": 1487730816} +{"current_steps": 4410, "total_steps": 14493, "loss": 0.3057, "lr": 4.165365193367178e-05, "epoch": 0.9130198493827799, "percentage": 30.43, "elapsed_time": "16:01:54", "remaining_time": "1 day, 12:39:18", "throughput": 25836.56, "total_tokens": 1491143808} +{"current_steps": 4420, "total_steps": 14493, "loss": 0.3072, "lr": 4.163920541295369e-05, "epoch": 0.9150901891772987, "percentage": 30.5, "elapsed_time": "16:03:36", "remaining_time": "1 day, 12:36:01", "throughput": 25847.84, "total_tokens": 1494428800} +{"current_steps": 4430, "total_steps": 14493, "loss": 0.3066, "lr": 4.1624773913052946e-05, "epoch": 0.9171605289718175, "percentage": 30.57, "elapsed_time": "16:05:22", "remaining_time": "1 day, 12:32:54", "throughput": 25858.97, "total_tokens": 1497816576} +{"current_steps": 4440, "total_steps": 14493, "loss": 0.31, "lr": 4.161035740795769e-05, "epoch": 0.9192308687663363, "percentage": 30.64, "elapsed_time": "16:07:10", "remaining_time": "1 day, 12:29:51", "throughput": 25869.06, "total_tokens": 1501193984} +{"current_steps": 4450, "total_steps": 14493, "loss": 0.305, "lr": 4.1595955871719055e-05, "epoch": 0.9213012085608551, "percentage": 30.7, "elapsed_time": "16:08:55", "remaining_time": "1 day, 12:26:43", "throughput": 25879.82, "total_tokens": 1504532864} +{"current_steps": 4460, "total_steps": 14493, "loss": 0.3093, "lr": 4.158156927845101e-05, "epoch": 0.9233715483553738, "percentage": 30.77, "elapsed_time": "16:10:42", "remaining_time": "1 day, 12:23:38", "throughput": 25890.42, "total_tokens": 1507910144} +{"current_steps": 4470, "total_steps": 14493, "loss": 0.3073, "lr": 4.156719760233016e-05, "epoch": 0.9254418881498926, "percentage": 30.84, "elapsed_time": "16:12:27", "remaining_time": "1 day, 12:20:32", "throughput": 25902.37, "total_tokens": 1511348096} +{"current_steps": 4480, "total_steps": 14493, "loss": 0.3056, "lr": 4.155284081759552e-05, "epoch": 0.9275122279444113, "percentage": 30.91, "elapsed_time": "16:14:10", "remaining_time": "1 day, 12:17:18", "throughput": 25914.19, "total_tokens": 1514685952} +{"current_steps": 4490, "total_steps": 14493, "loss": 0.3111, "lr": 4.1538498898548356e-05, "epoch": 0.9295825677389301, "percentage": 30.98, "elapsed_time": "16:15:51", "remaining_time": "1 day, 12:14:02", "throughput": 25926.5, "total_tokens": 1518026624} +{"current_steps": 4500, "total_steps": 14493, "loss": 0.307, "lr": 4.1524171819552e-05, "epoch": 0.931652907533449, "percentage": 31.05, "elapsed_time": "16:17:37", "remaining_time": "1 day, 12:10:58", "throughput": 25937.12, "total_tokens": 1521403392} +{"current_steps": 4510, "total_steps": 14493, "loss": 0.3062, "lr": 4.15098595550316e-05, "epoch": 0.9337232473279677, "percentage": 31.12, "elapsed_time": "16:19:23", "remaining_time": "1 day, 12:07:55", "throughput": 25948.34, "total_tokens": 1524825472} +{"current_steps": 4520, "total_steps": 14493, "loss": 0.3068, "lr": 4.1495562079474e-05, "epoch": 0.9357935871224865, "percentage": 31.19, "elapsed_time": "16:21:08", "remaining_time": "1 day, 12:04:47", "throughput": 25959.12, "total_tokens": 1528168320} +{"current_steps": 4530, "total_steps": 14493, "loss": 0.31, "lr": 4.148127936742749e-05, "epoch": 0.9378639269170053, "percentage": 31.26, "elapsed_time": "16:22:56", "remaining_time": "1 day, 12:01:48", "throughput": 25969.82, "total_tokens": 1531603200} +{"current_steps": 4540, "total_steps": 14493, "loss": 0.3038, "lr": 4.146701139350166e-05, "epoch": 0.939934266711524, "percentage": 31.33, "elapsed_time": "16:24:40", "remaining_time": "1 day, 11:58:41", "throughput": 25980.38, "total_tokens": 1534936448} +{"current_steps": 4550, "total_steps": 14493, "loss": 0.3051, "lr": 4.1452758132367196e-05, "epoch": 0.9420046065060428, "percentage": 31.39, "elapsed_time": "16:26:32", "remaining_time": "1 day, 11:55:52", "throughput": 25989.09, "total_tokens": 1538370432} +{"current_steps": 4560, "total_steps": 14493, "loss": 0.305, "lr": 4.1438519558755656e-05, "epoch": 0.9440749463005615, "percentage": 31.46, "elapsed_time": "16:28:22", "remaining_time": "1 day, 11:52:58", "throughput": 25999.06, "total_tokens": 1541817728} +{"current_steps": 4570, "total_steps": 14493, "loss": 0.3036, "lr": 4.1424295647459336e-05, "epoch": 0.9461452860950803, "percentage": 31.53, "elapsed_time": "16:30:04", "remaining_time": "1 day, 11:49:46", "throughput": 26010.09, "total_tokens": 1545106176} +{"current_steps": 4580, "total_steps": 14493, "loss": 0.3049, "lr": 4.141008637333106e-05, "epoch": 0.9482156258895992, "percentage": 31.6, "elapsed_time": "16:31:45", "remaining_time": "1 day, 11:46:33", "throughput": 26021.86, "total_tokens": 1548432512} +{"current_steps": 4590, "total_steps": 14493, "loss": 0.3033, "lr": 4.1395891711283974e-05, "epoch": 0.9502859656841179, "percentage": 31.67, "elapsed_time": "16:33:33", "remaining_time": "1 day, 11:43:36", "throughput": 26031.86, "total_tokens": 1551842560} +{"current_steps": 4600, "total_steps": 14493, "loss": 0.303, "lr": 4.1381711636291395e-05, "epoch": 0.9523563054786367, "percentage": 31.74, "elapsed_time": "16:35:23", "remaining_time": "1 day, 11:40:44", "throughput": 26041.34, "total_tokens": 1555285376} +{"current_steps": 4610, "total_steps": 14493, "loss": 0.3031, "lr": 4.1367546123386604e-05, "epoch": 0.9544266452731555, "percentage": 31.81, "elapsed_time": "16:37:10", "remaining_time": "1 day, 11:37:46", "throughput": 26052.1, "total_tokens": 1558718976} +{"current_steps": 4620, "total_steps": 14493, "loss": 0.3083, "lr": 4.1353395147662673e-05, "epoch": 0.9564969850676742, "percentage": 31.88, "elapsed_time": "16:39:02", "remaining_time": "1 day, 11:34:58", "throughput": 26060.32, "total_tokens": 1562130688} +{"current_steps": 4630, "total_steps": 14493, "loss": 0.3074, "lr": 4.133925868427225e-05, "epoch": 0.958567324862193, "percentage": 31.95, "elapsed_time": "16:40:51", "remaining_time": "1 day, 11:32:04", "throughput": 26069.22, "total_tokens": 1565501184} +{"current_steps": 4640, "total_steps": 14493, "loss": 0.3061, "lr": 4.132513670842744e-05, "epoch": 0.9606376646567117, "percentage": 32.02, "elapsed_time": "16:42:39", "remaining_time": "1 day, 11:29:08", "throughput": 26078.13, "total_tokens": 1568853632} +{"current_steps": 4650, "total_steps": 14493, "loss": 0.303, "lr": 4.1311029195399534e-05, "epoch": 0.9627080044512305, "percentage": 32.08, "elapsed_time": "16:44:21", "remaining_time": "1 day, 11:26:00", "throughput": 26089.26, "total_tokens": 1572182400} +{"current_steps": 4660, "total_steps": 14493, "loss": 0.3056, "lr": 4.129693612051892e-05, "epoch": 0.9647783442457494, "percentage": 32.15, "elapsed_time": "16:46:06", "remaining_time": "1 day, 11:22:58", "throughput": 26099.39, "total_tokens": 1575522432} +{"current_steps": 4670, "total_steps": 14493, "loss": 0.3038, "lr": 4.1282857459174826e-05, "epoch": 0.9668486840402681, "percentage": 32.22, "elapsed_time": "16:47:51", "remaining_time": "1 day, 11:19:56", "throughput": 26109.42, "total_tokens": 1578871296} +{"current_steps": 4680, "total_steps": 14493, "loss": 0.304, "lr": 4.1268793186815184e-05, "epoch": 0.9689190238347869, "percentage": 32.29, "elapsed_time": "16:49:34", "remaining_time": "1 day, 11:16:52", "throughput": 26119.62, "total_tokens": 1582190848} +{"current_steps": 4690, "total_steps": 14493, "loss": 0.3045, "lr": 4.1254743278946456e-05, "epoch": 0.9709893636293057, "percentage": 32.36, "elapsed_time": "16:51:21", "remaining_time": "1 day, 11:13:56", "throughput": 26129.82, "total_tokens": 1585605120} +{"current_steps": 4700, "total_steps": 14493, "loss": 0.3047, "lr": 4.1240707711133394e-05, "epoch": 0.9730597034238244, "percentage": 32.43, "elapsed_time": "16:53:12", "remaining_time": "1 day, 11:11:08", "throughput": 26139.23, "total_tokens": 1589077760} +{"current_steps": 4710, "total_steps": 14493, "loss": 0.3069, "lr": 4.122668645899893e-05, "epoch": 0.9751300432183432, "percentage": 32.5, "elapsed_time": "16:54:55", "remaining_time": "1 day, 11:08:03", "throughput": 26150.78, "total_tokens": 1592452864} +{"current_steps": 4720, "total_steps": 14493, "loss": 0.3045, "lr": 4.1212679498223975e-05, "epoch": 0.9772003830128619, "percentage": 32.57, "elapsed_time": "16:56:44", "remaining_time": "1 day, 11:05:13", "throughput": 26159.64, "total_tokens": 1595861888} +{"current_steps": 4730, "total_steps": 14493, "loss": 0.3059, "lr": 4.1198686804547215e-05, "epoch": 0.9792707228073808, "percentage": 32.64, "elapsed_time": "16:58:28", "remaining_time": "1 day, 11:02:11", "throughput": 26171.03, "total_tokens": 1599276544} +{"current_steps": 4740, "total_steps": 14493, "loss": 0.3059, "lr": 4.118470835376499e-05, "epoch": 0.9813410626018996, "percentage": 32.71, "elapsed_time": "17:00:13", "remaining_time": "1 day, 10:59:11", "throughput": 26181.3, "total_tokens": 1602641280} +{"current_steps": 4750, "total_steps": 14493, "loss": 0.305, "lr": 4.117074412173107e-05, "epoch": 0.9834114023964183, "percentage": 32.77, "elapsed_time": "17:01:57", "remaining_time": "1 day, 10:56:12", "throughput": 26191.86, "total_tokens": 1606030592} +{"current_steps": 4760, "total_steps": 14493, "loss": 0.3034, "lr": 4.115679408435648e-05, "epoch": 0.9854817421909371, "percentage": 32.84, "elapsed_time": "17:03:44", "remaining_time": "1 day, 10:53:16", "throughput": 26202.23, "total_tokens": 1609446912} +{"current_steps": 4770, "total_steps": 14493, "loss": 0.3049, "lr": 4.114285821760937e-05, "epoch": 0.9875520819854559, "percentage": 32.91, "elapsed_time": "17:05:29", "remaining_time": "1 day, 10:50:19", "throughput": 26211.27, "total_tokens": 1612765440} +{"current_steps": 4780, "total_steps": 14493, "loss": 0.3038, "lr": 4.11289364975148e-05, "epoch": 0.9896224217799746, "percentage": 32.98, "elapsed_time": "17:07:12", "remaining_time": "1 day, 10:47:16", "throughput": 26222.05, "total_tokens": 1616118656} +{"current_steps": 4790, "total_steps": 14493, "loss": 0.3055, "lr": 4.111502890015456e-05, "epoch": 0.9916927615744934, "percentage": 33.05, "elapsed_time": "17:08:52", "remaining_time": "1 day, 10:44:10", "throughput": 26233.61, "total_tokens": 1619471872} +{"current_steps": 4800, "total_steps": 14493, "loss": 0.3057, "lr": 4.1101135401667056e-05, "epoch": 0.9937631013690121, "percentage": 33.12, "elapsed_time": "17:10:37", "remaining_time": "1 day, 10:41:13", "throughput": 26242.7, "total_tokens": 1622792320} +{"current_steps": 4810, "total_steps": 14493, "loss": 0.3055, "lr": 4.108725597824708e-05, "epoch": 0.995833441163531, "percentage": 33.19, "elapsed_time": "17:12:26", "remaining_time": "1 day, 10:38:23", "throughput": 26251.93, "total_tokens": 1626203392} +{"current_steps": 4820, "total_steps": 14493, "loss": 0.305, "lr": 4.107339060614564e-05, "epoch": 0.9979037809580498, "percentage": 33.26, "elapsed_time": "17:14:13", "remaining_time": "1 day, 10:35:31", "throughput": 26260.55, "total_tokens": 1629555712} +{"current_steps": 4830, "total_steps": 14493, "loss": 0.306, "lr": 4.1059539261669825e-05, "epoch": 0.9999741207525685, "percentage": 33.33, "elapsed_time": "17:15:52", "remaining_time": "1 day, 10:32:23", "throughput": 26271.16, "total_tokens": 1632819840} +{"current_steps": 4840, "total_steps": 14493, "loss": 0.2968, "lr": 4.104570192118262e-05, "epoch": 1.001863305815067, "percentage": 33.4, "elapsed_time": "17:17:23", "remaining_time": "1 day, 10:28:59", "throughput": 26281.04, "total_tokens": 1635824512} +{"current_steps": 4850, "total_steps": 14493, "loss": 0.2899, "lr": 4.1031878561102714e-05, "epoch": 1.0039336456095858, "percentage": 33.46, "elapsed_time": "17:19:10", "remaining_time": "1 day, 10:26:07", "throughput": 26290.4, "total_tokens": 1639210112} +{"current_steps": 4860, "total_steps": 14493, "loss": 0.288, "lr": 4.1018069157904385e-05, "epoch": 1.0060039854041045, "percentage": 33.53, "elapsed_time": "17:20:59", "remaining_time": "1 day, 10:23:21", "throughput": 26298.57, "total_tokens": 1642606976} +{"current_steps": 4870, "total_steps": 14493, "loss": 0.2933, "lr": 4.100427368811727e-05, "epoch": 1.0080743251986233, "percentage": 33.6, "elapsed_time": "17:22:43", "remaining_time": "1 day, 10:20:24", "throughput": 26309.0, "total_tokens": 1645988992} +{"current_steps": 4880, "total_steps": 14493, "loss": 0.293, "lr": 4.099049212832622e-05, "epoch": 1.010144664993142, "percentage": 33.67, "elapsed_time": "17:24:28", "remaining_time": "1 day, 10:17:29", "throughput": 26319.17, "total_tokens": 1649388160} +{"current_steps": 4890, "total_steps": 14493, "loss": 0.2927, "lr": 4.0976724455171155e-05, "epoch": 1.0122150047876608, "percentage": 33.74, "elapsed_time": "17:26:15", "remaining_time": "1 day, 10:14:38", "throughput": 26329.2, "total_tokens": 1652821888} +{"current_steps": 4900, "total_steps": 14493, "loss": 0.2887, "lr": 4.096297064534688e-05, "epoch": 1.0142853445821796, "percentage": 33.81, "elapsed_time": "17:27:59", "remaining_time": "1 day, 10:11:43", "throughput": 26339.0, "total_tokens": 1656188288} +{"current_steps": 4910, "total_steps": 14493, "loss": 0.2922, "lr": 4.0949230675602904e-05, "epoch": 1.0163556843766983, "percentage": 33.88, "elapsed_time": "17:29:42", "remaining_time": "1 day, 10:08:45", "throughput": 26350.52, "total_tokens": 1659630208} +{"current_steps": 4920, "total_steps": 14493, "loss": 0.2897, "lr": 4.09355045227433e-05, "epoch": 1.018426024171217, "percentage": 33.95, "elapsed_time": "17:31:26", "remaining_time": "1 day, 10:05:49", "throughput": 26360.03, "total_tokens": 1662959872} +{"current_steps": 4930, "total_steps": 14493, "loss": 0.2888, "lr": 4.092179216362654e-05, "epoch": 1.0204963639657358, "percentage": 34.02, "elapsed_time": "17:33:15", "remaining_time": "1 day, 10:03:04", "throughput": 26369.01, "total_tokens": 1666406784} +{"current_steps": 4940, "total_steps": 14493, "loss": 0.2882, "lr": 4.090809357516532e-05, "epoch": 1.0225667037602546, "percentage": 34.09, "elapsed_time": "17:34:58", "remaining_time": "1 day, 10:00:07", "throughput": 26379.26, "total_tokens": 1669769984} +{"current_steps": 4950, "total_steps": 14493, "loss": 0.2927, "lr": 4.089440873432638e-05, "epoch": 1.0246370435547734, "percentage": 34.15, "elapsed_time": "17:36:44", "remaining_time": "1 day, 9:57:16", "throughput": 26388.35, "total_tokens": 1673144448} +{"current_steps": 4960, "total_steps": 14493, "loss": 0.2896, "lr": 4.088073761813037e-05, "epoch": 1.0267073833492921, "percentage": 34.22, "elapsed_time": "17:38:27", "remaining_time": "1 day, 9:54:20", "throughput": 26398.7, "total_tokens": 1676526464} +{"current_steps": 4970, "total_steps": 14493, "loss": 0.2895, "lr": 4.086708020365172e-05, "epoch": 1.0287777231438109, "percentage": 34.29, "elapsed_time": "17:40:13", "remaining_time": "1 day, 9:51:29", "throughput": 26408.45, "total_tokens": 1679932288} +{"current_steps": 4980, "total_steps": 14493, "loss": 0.2927, "lr": 4.0853436468018354e-05, "epoch": 1.0308480629383296, "percentage": 34.36, "elapsed_time": "17:42:02", "remaining_time": "1 day, 9:48:45", "throughput": 26415.77, "total_tokens": 1683285888} +{"current_steps": 4990, "total_steps": 14493, "loss": 0.2907, "lr": 4.0839806388411686e-05, "epoch": 1.0329184027328486, "percentage": 34.43, "elapsed_time": "17:43:49", "remaining_time": "1 day, 9:45:58", "throughput": 26424.43, "total_tokens": 1686668800} +{"current_steps": 5000, "total_steps": 14493, "loss": 0.2887, "lr": 4.0826189942066346e-05, "epoch": 1.0349887425273674, "percentage": 34.5, "elapsed_time": "17:45:41", "remaining_time": "1 day, 9:43:19", "throughput": 26431.74, "total_tokens": 1690081792} +{"current_steps": 5000, "total_steps": 14493, "eval_loss": 0.26896825432777405, "epoch": 1.0349887425273674, "percentage": 34.5, "elapsed_time": "17:45:43", "remaining_time": "1 day, 9:43:23", "throughput": 26430.68, "total_tokens": 1690081792} +{"current_steps": 5010, "total_steps": 14493, "loss": 0.2892, "lr": 4.081258710627008e-05, "epoch": 1.0370590823218861, "percentage": 34.57, "elapsed_time": "17:48:00", "remaining_time": "1 day, 9:41:31", "throughput": 26427.45, "total_tokens": 1693477504} +{"current_steps": 5020, "total_steps": 14493, "loss": 0.2884, "lr": 4.0798997858363557e-05, "epoch": 1.039129422116405, "percentage": 34.64, "elapsed_time": "17:49:49", "remaining_time": "1 day, 9:38:48", "throughput": 26435.13, "total_tokens": 1696851840} +{"current_steps": 5030, "total_steps": 14493, "loss": 0.2887, "lr": 4.078542217574024e-05, "epoch": 1.0411997619109237, "percentage": 34.71, "elapsed_time": "17:51:41", "remaining_time": "1 day, 9:36:10", "throughput": 26443.24, "total_tokens": 1700326784} +{"current_steps": 5040, "total_steps": 14493, "loss": 0.2935, "lr": 4.0771860035846196e-05, "epoch": 1.0432701017054424, "percentage": 34.78, "elapsed_time": "17:53:19", "remaining_time": "1 day, 9:33:07", "throughput": 26453.99, "total_tokens": 1703628672} +{"current_steps": 5050, "total_steps": 14493, "loss": 0.2898, "lr": 4.0758311416179965e-05, "epoch": 1.0453404414999612, "percentage": 34.84, "elapsed_time": "17:55:08", "remaining_time": "1 day, 9:30:25", "throughput": 26461.67, "total_tokens": 1707013376} +{"current_steps": 5060, "total_steps": 14493, "loss": 0.2902, "lr": 4.0744776294292386e-05, "epoch": 1.04741078129448, "percentage": 34.91, "elapsed_time": "17:56:56", "remaining_time": "1 day, 9:27:40", "throughput": 26470.01, "total_tokens": 1710405376} +{"current_steps": 5070, "total_steps": 14493, "loss": 0.2888, "lr": 4.073125464778646e-05, "epoch": 1.0494811210889987, "percentage": 34.98, "elapsed_time": "17:58:40", "remaining_time": "1 day, 9:24:47", "throughput": 26479.3, "total_tokens": 1713741568} +{"current_steps": 5080, "total_steps": 14493, "loss": 0.2917, "lr": 4.071774645431717e-05, "epoch": 1.0515514608835175, "percentage": 35.05, "elapsed_time": "18:00:23", "remaining_time": "1 day, 9:21:54", "throughput": 26488.36, "total_tokens": 1717066752} +{"current_steps": 5090, "total_steps": 14493, "loss": 0.2924, "lr": 4.070425169159135e-05, "epoch": 1.0536218006780362, "percentage": 35.12, "elapsed_time": "18:02:14", "remaining_time": "1 day, 9:19:17", "throughput": 26495.1, "total_tokens": 1720455552} +{"current_steps": 5100, "total_steps": 14493, "loss": 0.2864, "lr": 4.069077033736751e-05, "epoch": 1.055692140472555, "percentage": 35.19, "elapsed_time": "18:04:06", "remaining_time": "1 day, 9:16:39", "throughput": 26502.37, "total_tokens": 1723874432} +{"current_steps": 5110, "total_steps": 14493, "loss": 0.2926, "lr": 4.06773023694557e-05, "epoch": 1.0577624802670738, "percentage": 35.26, "elapsed_time": "18:05:45", "remaining_time": "1 day, 9:13:40", "throughput": 26512.71, "total_tokens": 1727180928} +{"current_steps": 5120, "total_steps": 14493, "loss": 0.2911, "lr": 4.066384776571732e-05, "epoch": 1.0598328200615925, "percentage": 35.33, "elapsed_time": "18:07:27", "remaining_time": "1 day, 9:10:46", "throughput": 26522.26, "total_tokens": 1730512128} +{"current_steps": 5130, "total_steps": 14493, "loss": 0.288, "lr": 4.065040650406504e-05, "epoch": 1.0619031598561113, "percentage": 35.4, "elapsed_time": "18:09:07", "remaining_time": "1 day, 9:07:49", "throughput": 26532.25, "total_tokens": 1733822976} +{"current_steps": 5140, "total_steps": 14493, "loss": 0.2901, "lr": 4.0636978562462576e-05, "epoch": 1.0639734996506303, "percentage": 35.47, "elapsed_time": "18:10:50", "remaining_time": "1 day, 9:04:56", "throughput": 26543.1, "total_tokens": 1737252736} +{"current_steps": 5150, "total_steps": 14493, "loss": 0.2906, "lr": 4.062356391892456e-05, "epoch": 1.066043839445149, "percentage": 35.53, "elapsed_time": "18:12:34", "remaining_time": "1 day, 9:02:06", "throughput": 26551.55, "total_tokens": 1740562432} +{"current_steps": 5160, "total_steps": 14493, "loss": 0.2895, "lr": 4.0610162551516395e-05, "epoch": 1.0681141792396678, "percentage": 35.6, "elapsed_time": "18:14:15", "remaining_time": "1 day, 8:59:13", "throughput": 26560.85, "total_tokens": 1743877504} +{"current_steps": 5170, "total_steps": 14493, "loss": 0.2872, "lr": 4.059677443835412e-05, "epoch": 1.0701845190341865, "percentage": 35.67, "elapsed_time": "18:16:00", "remaining_time": "1 day, 8:56:25", "throughput": 26570.2, "total_tokens": 1747274112} +{"current_steps": 5180, "total_steps": 14493, "loss": 0.2915, "lr": 4.058339955760423e-05, "epoch": 1.0722548588287053, "percentage": 35.74, "elapsed_time": "18:17:50", "remaining_time": "1 day, 8:53:47", "throughput": 26578.17, "total_tokens": 1750723968} +{"current_steps": 5190, "total_steps": 14493, "loss": 0.2914, "lr": 4.0570037887483535e-05, "epoch": 1.074325198623224, "percentage": 35.81, "elapsed_time": "18:19:42", "remaining_time": "1 day, 8:51:13", "throughput": 26585.71, "total_tokens": 1754199680} +{"current_steps": 5200, "total_steps": 14493, "loss": 0.2917, "lr": 4.0556689406259025e-05, "epoch": 1.0763955384177428, "percentage": 35.88, "elapsed_time": "18:21:24", "remaining_time": "1 day, 8:48:20", "throughput": 26595.39, "total_tokens": 1757548672} +{"current_steps": 5210, "total_steps": 14493, "loss": 0.2896, "lr": 4.054335409224771e-05, "epoch": 1.0784658782122616, "percentage": 35.95, "elapsed_time": "18:23:08", "remaining_time": "1 day, 8:45:32", "throughput": 26605.12, "total_tokens": 1760961536} +{"current_steps": 5220, "total_steps": 14493, "loss": 0.2895, "lr": 4.053003192381646e-05, "epoch": 1.0805362180067803, "percentage": 36.02, "elapsed_time": "18:25:03", "remaining_time": "1 day, 8:43:03", "throughput": 26611.62, "total_tokens": 1764435072} +{"current_steps": 5230, "total_steps": 14493, "loss": 0.2894, "lr": 4.051672287938189e-05, "epoch": 1.082606557801299, "percentage": 36.09, "elapsed_time": "18:26:55", "remaining_time": "1 day, 8:40:30", "throughput": 26618.74, "total_tokens": 1767894144} +{"current_steps": 5240, "total_steps": 14493, "loss": 0.2907, "lr": 4.050342693741019e-05, "epoch": 1.0846768975958179, "percentage": 36.16, "elapsed_time": "18:28:44", "remaining_time": "1 day, 8:37:50", "throughput": 26627.05, "total_tokens": 1771339136} +{"current_steps": 5250, "total_steps": 14493, "loss": 0.2884, "lr": 4.049014407641699e-05, "epoch": 1.0867472373903366, "percentage": 36.22, "elapsed_time": "18:30:32", "remaining_time": "1 day, 8:35:11", "throughput": 26634.75, "total_tokens": 1774742144} +{"current_steps": 5260, "total_steps": 14493, "loss": 0.2886, "lr": 4.047687427496717e-05, "epoch": 1.0888175771848554, "percentage": 36.29, "elapsed_time": "18:32:24", "remaining_time": "1 day, 8:32:39", "throughput": 26642.06, "total_tokens": 1778224256} +{"current_steps": 5270, "total_steps": 14493, "loss": 0.2906, "lr": 4.046361751167479e-05, "epoch": 1.0908879169793742, "percentage": 36.36, "elapsed_time": "18:34:16", "remaining_time": "1 day, 8:30:05", "throughput": 26649.1, "total_tokens": 1781669120} +{"current_steps": 5280, "total_steps": 14493, "loss": 0.2883, "lr": 4.045037376520292e-05, "epoch": 1.0929582567738931, "percentage": 36.43, "elapsed_time": "18:35:56", "remaining_time": "1 day, 8:27:11", "throughput": 26658.04, "total_tokens": 1784930688} +{"current_steps": 5290, "total_steps": 14493, "loss": 0.2906, "lr": 4.043714301426344e-05, "epoch": 1.095028596568412, "percentage": 36.5, "elapsed_time": "18:37:42", "remaining_time": "1 day, 8:24:28", "throughput": 26666.21, "total_tokens": 1788306944} +{"current_steps": 5300, "total_steps": 14493, "loss": 0.2895, "lr": 4.042392523761696e-05, "epoch": 1.0970989363629307, "percentage": 36.57, "elapsed_time": "18:39:33", "remaining_time": "1 day, 8:21:54", "throughput": 26673.95, "total_tokens": 1791783808} +{"current_steps": 5310, "total_steps": 14493, "loss": 0.288, "lr": 4.041072041407267e-05, "epoch": 1.0991692761574494, "percentage": 36.64, "elapsed_time": "18:41:18", "remaining_time": "1 day, 8:19:10", "throughput": 26682.38, "total_tokens": 1795151744} +{"current_steps": 5320, "total_steps": 14493, "loss": 0.2882, "lr": 4.039752852248815e-05, "epoch": 1.1012396159519682, "percentage": 36.71, "elapsed_time": "18:43:04", "remaining_time": "1 day, 8:16:26", "throughput": 26691.42, "total_tokens": 1798574592} +{"current_steps": 5330, "total_steps": 14493, "loss": 0.2923, "lr": 4.0384349541769286e-05, "epoch": 1.103309955746487, "percentage": 36.78, "elapsed_time": "18:44:53", "remaining_time": "1 day, 8:13:50", "throughput": 26699.36, "total_tokens": 1802032000} +{"current_steps": 5340, "total_steps": 14493, "loss": 0.2892, "lr": 4.037118345087011e-05, "epoch": 1.1053802955410057, "percentage": 36.85, "elapsed_time": "18:46:44", "remaining_time": "1 day, 8:11:16", "throughput": 26706.59, "total_tokens": 1805480576} +{"current_steps": 5350, "total_steps": 14493, "loss": 0.2893, "lr": 4.0358030228792636e-05, "epoch": 1.1074506353355245, "percentage": 36.91, "elapsed_time": "18:48:30", "remaining_time": "1 day, 8:08:35", "throughput": 26714.72, "total_tokens": 1808875008} +{"current_steps": 5360, "total_steps": 14493, "loss": 0.2902, "lr": 4.034488985458673e-05, "epoch": 1.1095209751300432, "percentage": 36.98, "elapsed_time": "18:50:20", "remaining_time": "1 day, 8:05:59", "throughput": 26721.84, "total_tokens": 1812278016} +{"current_steps": 5370, "total_steps": 14493, "loss": 0.2911, "lr": 4.033176230735001e-05, "epoch": 1.111591314924562, "percentage": 37.05, "elapsed_time": "18:51:57", "remaining_time": "1 day, 8:03:04", "throughput": 26731.8, "total_tokens": 1815562112} +{"current_steps": 5380, "total_steps": 14493, "loss": 0.2893, "lr": 4.0318647566227626e-05, "epoch": 1.1136616547190807, "percentage": 37.12, "elapsed_time": "18:53:46", "remaining_time": "1 day, 8:00:27", "throughput": 26739.11, "total_tokens": 1818960256} +{"current_steps": 5390, "total_steps": 14493, "loss": 0.291, "lr": 4.0305545610412205e-05, "epoch": 1.1157319945135995, "percentage": 37.19, "elapsed_time": "18:55:34", "remaining_time": "1 day, 7:57:50", "throughput": 26746.81, "total_tokens": 1822387072} +{"current_steps": 5400, "total_steps": 14493, "loss": 0.2896, "lr": 4.029245641914365e-05, "epoch": 1.1178023343081183, "percentage": 37.26, "elapsed_time": "18:57:24", "remaining_time": "1 day, 7:55:15", "throughput": 26754.27, "total_tokens": 1825826816} +{"current_steps": 5410, "total_steps": 14493, "loss": 0.2885, "lr": 4.027937997170904e-05, "epoch": 1.119872674102637, "percentage": 37.33, "elapsed_time": "18:59:09", "remaining_time": "1 day, 7:52:33", "throughput": 26761.86, "total_tokens": 1829152896} +{"current_steps": 5420, "total_steps": 14493, "loss": 0.2894, "lr": 4.026631624744247e-05, "epoch": 1.1219430138971558, "percentage": 37.4, "elapsed_time": "19:00:57", "remaining_time": "1 day, 7:49:57", "throughput": 26768.97, "total_tokens": 1832540800} +{"current_steps": 5430, "total_steps": 14493, "loss": 0.2886, "lr": 4.025326522572493e-05, "epoch": 1.1240133536916745, "percentage": 37.47, "elapsed_time": "19:02:46", "remaining_time": "1 day, 7:47:21", "throughput": 26776.44, "total_tokens": 1835961216} +{"current_steps": 5440, "total_steps": 14493, "loss": 0.2897, "lr": 4.024022688598415e-05, "epoch": 1.1260836934861933, "percentage": 37.54, "elapsed_time": "19:04:37", "remaining_time": "1 day, 7:44:50", "throughput": 26783.81, "total_tokens": 1839450752} +{"current_steps": 5450, "total_steps": 14493, "loss": 0.2889, "lr": 4.0227201207694494e-05, "epoch": 1.1281540332807123, "percentage": 37.6, "elapsed_time": "19:06:24", "remaining_time": "1 day, 7:42:11", "throughput": 26791.83, "total_tokens": 1842865792} +{"current_steps": 5460, "total_steps": 14493, "loss": 0.2935, "lr": 4.021418817037677e-05, "epoch": 1.130224373075231, "percentage": 37.67, "elapsed_time": "19:08:11", "remaining_time": "1 day, 7:39:33", "throughput": 26799.66, "total_tokens": 1846264192} +{"current_steps": 5470, "total_steps": 14493, "loss": 0.2882, "lr": 4.0201187753598174e-05, "epoch": 1.1322947128697498, "percentage": 37.74, "elapsed_time": "19:09:58", "remaining_time": "1 day, 7:36:55", "throughput": 26806.78, "total_tokens": 1849626368} +{"current_steps": 5480, "total_steps": 14493, "loss": 0.2919, "lr": 4.018819993697208e-05, "epoch": 1.1343650526642686, "percentage": 37.81, "elapsed_time": "19:11:49", "remaining_time": "1 day, 7:34:24", "throughput": 26813.25, "total_tokens": 1853044480} +{"current_steps": 5490, "total_steps": 14493, "loss": 0.289, "lr": 4.017522470015793e-05, "epoch": 1.1364353924587873, "percentage": 37.88, "elapsed_time": "19:13:35", "remaining_time": "1 day, 7:31:45", "throughput": 26820.67, "total_tokens": 1856402560} +{"current_steps": 5500, "total_steps": 14493, "loss": 0.289, "lr": 4.0162262022861144e-05, "epoch": 1.138505732253306, "percentage": 37.95, "elapsed_time": "19:15:23", "remaining_time": "1 day, 7:29:10", "throughput": 26828.1, "total_tokens": 1859828736} +{"current_steps": 5510, "total_steps": 14493, "loss": 0.2898, "lr": 4.0149311884832906e-05, "epoch": 1.1405760720478249, "percentage": 38.02, "elapsed_time": "19:17:11", "remaining_time": "1 day, 7:26:34", "throughput": 26835.36, "total_tokens": 1863216768} +{"current_steps": 5520, "total_steps": 14493, "loss": 0.2907, "lr": 4.0136374265870116e-05, "epoch": 1.1426464118423436, "percentage": 38.09, "elapsed_time": "19:18:58", "remaining_time": "1 day, 7:23:57", "throughput": 26843.0, "total_tokens": 1866617216} +{"current_steps": 5530, "total_steps": 14493, "loss": 0.2907, "lr": 4.0123449145815174e-05, "epoch": 1.1447167516368624, "percentage": 38.16, "elapsed_time": "19:20:42", "remaining_time": "1 day, 7:21:16", "throughput": 26850.87, "total_tokens": 1869972224} +{"current_steps": 5540, "total_steps": 14493, "loss": 0.2903, "lr": 4.011053650455592e-05, "epoch": 1.1467870914313811, "percentage": 38.23, "elapsed_time": "19:22:30", "remaining_time": "1 day, 7:18:40", "throughput": 26857.67, "total_tokens": 1873328000} +{"current_steps": 5550, "total_steps": 14493, "loss": 0.2917, "lr": 4.0097636322025466e-05, "epoch": 1.1488574312259, "percentage": 38.29, "elapsed_time": "19:24:19", "remaining_time": "1 day, 7:16:08", "throughput": 26864.3, "total_tokens": 1876735488} +{"current_steps": 5560, "total_steps": 14493, "loss": 0.2886, "lr": 4.008474857820206e-05, "epoch": 1.1509277710204187, "percentage": 38.36, "elapsed_time": "19:26:09", "remaining_time": "1 day, 7:13:36", "throughput": 26871.57, "total_tokens": 1880177152} +{"current_steps": 5570, "total_steps": 14493, "loss": 0.2899, "lr": 4.007187325310899e-05, "epoch": 1.1529981108149374, "percentage": 38.43, "elapsed_time": "19:27:52", "remaining_time": "1 day, 7:10:54", "throughput": 26879.67, "total_tokens": 1883531008} +{"current_steps": 5580, "total_steps": 14493, "loss": 0.2881, "lr": 4.00590103268144e-05, "epoch": 1.1550684506094564, "percentage": 38.5, "elapsed_time": "19:29:36", "remaining_time": "1 day, 7:08:13", "throughput": 26887.91, "total_tokens": 1886892288} +{"current_steps": 5590, "total_steps": 14493, "loss": 0.2874, "lr": 4.004615977943124e-05, "epoch": 1.157138790403975, "percentage": 38.57, "elapsed_time": "19:31:20", "remaining_time": "1 day, 7:05:33", "throughput": 26896.41, "total_tokens": 1890303872} +{"current_steps": 5600, "total_steps": 14493, "loss": 0.2916, "lr": 4.0033321591117025e-05, "epoch": 1.159209130198494, "percentage": 38.64, "elapsed_time": "19:33:07", "remaining_time": "1 day, 7:02:57", "throughput": 26903.62, "total_tokens": 1893665536} +{"current_steps": 5610, "total_steps": 14493, "loss": 0.2896, "lr": 4.002049574207381e-05, "epoch": 1.1612794699930127, "percentage": 38.71, "elapsed_time": "19:35:03", "remaining_time": "1 day, 7:00:36", "throughput": 26908.86, "total_tokens": 1897158400} +{"current_steps": 5620, "total_steps": 14493, "loss": 0.29, "lr": 4.000768221254803e-05, "epoch": 1.1633498097875314, "percentage": 38.78, "elapsed_time": "19:36:49", "remaining_time": "1 day, 6:58:00", "throughput": 26915.9, "total_tokens": 1900530304} +{"current_steps": 5630, "total_steps": 14493, "loss": 0.2873, "lr": 3.999488098283034e-05, "epoch": 1.1654201495820502, "percentage": 38.85, "elapsed_time": "19:38:33", "remaining_time": "1 day, 6:55:19", "throughput": 26924.62, "total_tokens": 1903925376} +{"current_steps": 5640, "total_steps": 14493, "loss": 0.2892, "lr": 3.9982092033255506e-05, "epoch": 1.167490489376569, "percentage": 38.92, "elapsed_time": "19:40:20", "remaining_time": "1 day, 6:52:45", "throughput": 26931.44, "total_tokens": 1907303168} +{"current_steps": 5650, "total_steps": 14493, "loss": 0.2875, "lr": 3.996931534420232e-05, "epoch": 1.1695608291710877, "percentage": 38.98, "elapsed_time": "19:42:07", "remaining_time": "1 day, 6:50:11", "throughput": 26938.51, "total_tokens": 1910690688} +{"current_steps": 5660, "total_steps": 14493, "loss": 0.2883, "lr": 3.995655089609339e-05, "epoch": 1.1716311689656065, "percentage": 39.05, "elapsed_time": "19:43:55", "remaining_time": "1 day, 6:47:38", "throughput": 26945.13, "total_tokens": 1914066176} +{"current_steps": 5670, "total_steps": 14493, "loss": 0.2891, "lr": 3.994379866939511e-05, "epoch": 1.1737015087601252, "percentage": 39.12, "elapsed_time": "19:45:41", "remaining_time": "1 day, 6:45:02", "throughput": 26952.16, "total_tokens": 1917419904} +{"current_steps": 5680, "total_steps": 14493, "loss": 0.2895, "lr": 3.993105864461745e-05, "epoch": 1.175771848554644, "percentage": 39.19, "elapsed_time": "19:47:25", "remaining_time": "1 day, 6:42:23", "throughput": 26959.65, "total_tokens": 1920761728} +{"current_steps": 5690, "total_steps": 14493, "loss": 0.2894, "lr": 3.9918330802313866e-05, "epoch": 1.1778421883491628, "percentage": 39.26, "elapsed_time": "19:49:18", "remaining_time": "1 day, 6:39:58", "throughput": 26966.04, "total_tokens": 1924248064} +{"current_steps": 5700, "total_steps": 14493, "loss": 0.2903, "lr": 3.9905615123081206e-05, "epoch": 1.1799125281436815, "percentage": 39.33, "elapsed_time": "19:51:05", "remaining_time": "1 day, 6:37:24", "throughput": 26972.3, "total_tokens": 1927586560} +{"current_steps": 5710, "total_steps": 14493, "loss": 0.2926, "lr": 3.989291158755953e-05, "epoch": 1.1819828679382003, "percentage": 39.4, "elapsed_time": "19:52:49", "remaining_time": "1 day, 6:34:46", "throughput": 26979.64, "total_tokens": 1930914688} +{"current_steps": 5720, "total_steps": 14493, "loss": 0.2906, "lr": 3.988022017643201e-05, "epoch": 1.184053207732719, "percentage": 39.47, "elapsed_time": "19:54:35", "remaining_time": "1 day, 6:32:12", "throughput": 26986.82, "total_tokens": 1934305152} +{"current_steps": 5730, "total_steps": 14493, "loss": 0.2846, "lr": 3.9867540870424826e-05, "epoch": 1.186123547527238, "percentage": 39.54, "elapsed_time": "19:56:28", "remaining_time": "1 day, 6:29:47", "throughput": 26992.89, "total_tokens": 1937769472} +{"current_steps": 5740, "total_steps": 14493, "loss": 0.2911, "lr": 3.985487365030702e-05, "epoch": 1.1881938873217566, "percentage": 39.61, "elapsed_time": "19:58:14", "remaining_time": "1 day, 6:27:13", "throughput": 26999.92, "total_tokens": 1941151872} +{"current_steps": 5750, "total_steps": 14493, "loss": 0.2893, "lr": 3.984221849689036e-05, "epoch": 1.1902642271162756, "percentage": 39.67, "elapsed_time": "19:59:59", "remaining_time": "1 day, 6:24:37", "throughput": 27006.72, "total_tokens": 1944483584} +{"current_steps": 5760, "total_steps": 14493, "loss": 0.289, "lr": 3.982957539102927e-05, "epoch": 1.1923345669107943, "percentage": 39.74, "elapsed_time": "20:01:55", "remaining_time": "1 day, 6:22:18", "throughput": 27012.73, "total_tokens": 1948048000} +{"current_steps": 5770, "total_steps": 14493, "loss": 0.2884, "lr": 3.981694431362065e-05, "epoch": 1.194404906705313, "percentage": 39.81, "elapsed_time": "20:03:45", "remaining_time": "1 day, 6:19:49", "throughput": 27019.16, "total_tokens": 1951478400} +{"current_steps": 5780, "total_steps": 14493, "loss": 0.29, "lr": 3.9804325245603786e-05, "epoch": 1.1964752464998318, "percentage": 39.88, "elapsed_time": "20:05:26", "remaining_time": "1 day, 6:17:08", "throughput": 27027.31, "total_tokens": 1954799488} +{"current_steps": 5790, "total_steps": 14493, "loss": 0.2869, "lr": 3.9791718167960226e-05, "epoch": 1.1985455862943506, "percentage": 39.95, "elapsed_time": "20:07:13", "remaining_time": "1 day, 6:14:36", "throughput": 27034.15, "total_tokens": 1958191360} +{"current_steps": 5800, "total_steps": 14493, "loss": 0.2865, "lr": 3.9779123061713665e-05, "epoch": 1.2006159260888694, "percentage": 40.02, "elapsed_time": "20:09:01", "remaining_time": "1 day, 6:12:04", "throughput": 27039.82, "total_tokens": 1961502592} +{"current_steps": 5810, "total_steps": 14493, "loss": 0.2906, "lr": 3.976653990792979e-05, "epoch": 1.2026862658833881, "percentage": 40.09, "elapsed_time": "20:10:47", "remaining_time": "1 day, 6:09:30", "throughput": 27047.29, "total_tokens": 1964909824} +{"current_steps": 5820, "total_steps": 14493, "loss": 0.2854, "lr": 3.9753968687716206e-05, "epoch": 1.2047566056779069, "percentage": 40.16, "elapsed_time": "20:12:36", "remaining_time": "1 day, 6:07:02", "throughput": 27053.68, "total_tokens": 1968342400} +{"current_steps": 5830, "total_steps": 14493, "loss": 0.287, "lr": 3.974140938222232e-05, "epoch": 1.2068269454724256, "percentage": 40.23, "elapsed_time": "20:14:25", "remaining_time": "1 day, 6:04:33", "throughput": 27060.19, "total_tokens": 1971762048} +{"current_steps": 5840, "total_steps": 14493, "loss": 0.2882, "lr": 3.972886197263915e-05, "epoch": 1.2088972852669444, "percentage": 40.3, "elapsed_time": "20:16:10", "remaining_time": "1 day, 6:01:59", "throughput": 27067.48, "total_tokens": 1975136256} +{"current_steps": 5850, "total_steps": 14493, "loss": 0.2878, "lr": 3.97163264401993e-05, "epoch": 1.2109676250614632, "percentage": 40.36, "elapsed_time": "20:17:59", "remaining_time": "1 day, 5:59:30", "throughput": 27074.49, "total_tokens": 1978589696} +{"current_steps": 5860, "total_steps": 14493, "loss": 0.2898, "lr": 3.970380276617677e-05, "epoch": 1.213037964855982, "percentage": 40.43, "elapsed_time": "20:19:47", "remaining_time": "1 day, 5:57:01", "throughput": 27081.01, "total_tokens": 1982003968} +{"current_steps": 5870, "total_steps": 14493, "loss": 0.287, "lr": 3.96912909318869e-05, "epoch": 1.2151083046505007, "percentage": 40.5, "elapsed_time": "20:21:40", "remaining_time": "1 day, 5:54:37", "throughput": 27086.56, "total_tokens": 1985454208} +{"current_steps": 5880, "total_steps": 14493, "loss": 0.2908, "lr": 3.96787909186862e-05, "epoch": 1.2171786444450194, "percentage": 40.57, "elapsed_time": "20:23:32", "remaining_time": "1 day, 5:52:14", "throughput": 27091.18, "total_tokens": 1988840576} +{"current_steps": 5890, "total_steps": 14493, "loss": 0.2865, "lr": 3.9666302707972244e-05, "epoch": 1.2192489842395382, "percentage": 40.64, "elapsed_time": "20:25:19", "remaining_time": "1 day, 5:49:43", "throughput": 27098.45, "total_tokens": 1992259712} +{"current_steps": 5900, "total_steps": 14493, "loss": 0.2875, "lr": 3.965382628118358e-05, "epoch": 1.2213193240340572, "percentage": 40.71, "elapsed_time": "20:27:05", "remaining_time": "1 day, 5:47:11", "throughput": 27104.96, "total_tokens": 1995614720} +{"current_steps": 5910, "total_steps": 14493, "loss": 0.2885, "lr": 3.964136161979959e-05, "epoch": 1.223389663828576, "percentage": 40.78, "elapsed_time": "20:28:56", "remaining_time": "1 day, 5:44:46", "throughput": 27110.51, "total_tokens": 1999035776} +{"current_steps": 5920, "total_steps": 14493, "loss": 0.2943, "lr": 3.9628908705340406e-05, "epoch": 1.2254600036230947, "percentage": 40.85, "elapsed_time": "20:30:46", "remaining_time": "1 day, 5:42:20", "throughput": 27115.35, "total_tokens": 2002382848} +{"current_steps": 5930, "total_steps": 14493, "loss": 0.2864, "lr": 3.961646751936673e-05, "epoch": 1.2275303434176135, "percentage": 40.92, "elapsed_time": "20:32:31", "remaining_time": "1 day, 5:39:46", "throughput": 27122.77, "total_tokens": 2005756288} +{"current_steps": 5940, "total_steps": 14493, "loss": 0.2878, "lr": 3.960403804347979e-05, "epoch": 1.2296006832121322, "percentage": 40.99, "elapsed_time": "20:34:14", "remaining_time": "1 day, 5:37:11", "throughput": 27130.19, "total_tokens": 2009116800} +{"current_steps": 5950, "total_steps": 14493, "loss": 0.288, "lr": 3.959162025932119e-05, "epoch": 1.231671023006651, "percentage": 41.05, "elapsed_time": "20:36:03", "remaining_time": "1 day, 5:34:44", "throughput": 27135.97, "total_tokens": 2012503808} +{"current_steps": 5960, "total_steps": 14493, "loss": 0.2886, "lr": 3.95792141485728e-05, "epoch": 1.2337413628011697, "percentage": 41.12, "elapsed_time": "20:37:46", "remaining_time": "1 day, 5:32:08", "throughput": 27143.11, "total_tokens": 2015831040} +{"current_steps": 5970, "total_steps": 14493, "loss": 0.2902, "lr": 3.956681969295664e-05, "epoch": 1.2358117025956885, "percentage": 41.19, "elapsed_time": "20:39:36", "remaining_time": "1 day, 5:29:42", "throughput": 27148.55, "total_tokens": 2019204992} +{"current_steps": 5980, "total_steps": 14493, "loss": 0.2894, "lr": 3.955443687423479e-05, "epoch": 1.2378820423902073, "percentage": 41.26, "elapsed_time": "20:41:18", "remaining_time": "1 day, 5:27:06", "throughput": 27155.83, "total_tokens": 2022528256} +{"current_steps": 5990, "total_steps": 14493, "loss": 0.2878, "lr": 3.954206567420924e-05, "epoch": 1.239952382184726, "percentage": 41.33, "elapsed_time": "20:43:04", "remaining_time": "1 day, 5:24:35", "throughput": 27162.08, "total_tokens": 2025880960} +{"current_steps": 6000, "total_steps": 14493, "loss": 0.2891, "lr": 3.952970607472179e-05, "epoch": 1.2420227219792448, "percentage": 41.4, "elapsed_time": "20:44:55", "remaining_time": "1 day, 5:22:11", "throughput": 27168.1, "total_tokens": 2029334400} +{"current_steps": 6010, "total_steps": 14493, "loss": 0.2897, "lr": 3.951735805765399e-05, "epoch": 1.2440930617737636, "percentage": 41.47, "elapsed_time": "20:46:39", "remaining_time": "1 day, 5:19:38", "throughput": 27175.64, "total_tokens": 2032724096} +{"current_steps": 6020, "total_steps": 14493, "loss": 0.2903, "lr": 3.950502160492692e-05, "epoch": 1.2461634015682823, "percentage": 41.54, "elapsed_time": "20:48:28", "remaining_time": "1 day, 5:17:11", "throughput": 27181.64, "total_tokens": 2036135168} +{"current_steps": 6030, "total_steps": 14493, "loss": 0.2896, "lr": 3.9492696698501205e-05, "epoch": 1.248233741362801, "percentage": 41.61, "elapsed_time": "20:50:18", "remaining_time": "1 day, 5:14:46", "throughput": 27187.72, "total_tokens": 2039575936} +{"current_steps": 6040, "total_steps": 14493, "loss": 0.2853, "lr": 3.9480383320376784e-05, "epoch": 1.2503040811573198, "percentage": 41.68, "elapsed_time": "20:51:58", "remaining_time": "1 day, 5:12:07", "throughput": 27196.09, "total_tokens": 2042916096} +{"current_steps": 6050, "total_steps": 14493, "loss": 0.2886, "lr": 3.94680814525929e-05, "epoch": 1.2523744209518388, "percentage": 41.74, "elapsed_time": "20:53:47", "remaining_time": "1 day, 5:09:42", "throughput": 27201.86, "total_tokens": 2046331392} +{"current_steps": 6060, "total_steps": 14493, "loss": 0.2914, "lr": 3.945579107722792e-05, "epoch": 1.2544447607463576, "percentage": 41.81, "elapsed_time": "20:55:32", "remaining_time": "1 day, 5:07:11", "throughput": 27208.61, "total_tokens": 2049691520} +{"current_steps": 6070, "total_steps": 14493, "loss": 0.2885, "lr": 3.9443512176399276e-05, "epoch": 1.2565151005408763, "percentage": 41.88, "elapsed_time": "20:57:23", "remaining_time": "1 day, 5:04:48", "throughput": 27214.38, "total_tokens": 2053149696} +{"current_steps": 6080, "total_steps": 14493, "loss": 0.287, "lr": 3.9431244732263307e-05, "epoch": 1.258585440335395, "percentage": 41.95, "elapsed_time": "20:59:04", "remaining_time": "1 day, 5:02:12", "throughput": 27222.12, "total_tokens": 2056494464} +{"current_steps": 6090, "total_steps": 14493, "loss": 0.2927, "lr": 3.941898872701519e-05, "epoch": 1.2606557801299139, "percentage": 42.02, "elapsed_time": "21:00:49", "remaining_time": "1 day, 4:59:41", "throughput": 27228.89, "total_tokens": 2059862912} +{"current_steps": 6100, "total_steps": 14493, "loss": 0.2889, "lr": 3.940674414288882e-05, "epoch": 1.2627261199244326, "percentage": 42.09, "elapsed_time": "21:02:35", "remaining_time": "1 day, 4:57:12", "throughput": 27235.58, "total_tokens": 2063249152} +{"current_steps": 6110, "total_steps": 14493, "loss": 0.2865, "lr": 3.939451096215668e-05, "epoch": 1.2647964597189514, "percentage": 42.16, "elapsed_time": "21:04:23", "remaining_time": "1 day, 4:54:45", "throughput": 27241.28, "total_tokens": 2066608512} +{"current_steps": 6120, "total_steps": 14493, "loss": 0.2886, "lr": 3.938228916712978e-05, "epoch": 1.2668667995134701, "percentage": 42.23, "elapsed_time": "21:06:13", "remaining_time": "1 day, 4:52:21", "throughput": 27247.45, "total_tokens": 2070074624} +{"current_steps": 6130, "total_steps": 14493, "loss": 0.2857, "lr": 3.937007874015748e-05, "epoch": 1.268937139307989, "percentage": 42.3, "elapsed_time": "21:08:00", "remaining_time": "1 day, 4:49:55", "throughput": 27253.56, "total_tokens": 2073476736} +{"current_steps": 6140, "total_steps": 14493, "loss": 0.2911, "lr": 3.935787966362748e-05, "epoch": 1.2710074791025077, "percentage": 42.37, "elapsed_time": "21:09:51", "remaining_time": "1 day, 4:47:32", "throughput": 27258.82, "total_tokens": 2076882816} +{"current_steps": 6150, "total_steps": 14493, "loss": 0.2895, "lr": 3.9345691919965595e-05, "epoch": 1.2730778188970264, "percentage": 42.43, "elapsed_time": "21:11:39", "remaining_time": "1 day, 4:45:06", "throughput": 27264.91, "total_tokens": 2080291840} +{"current_steps": 6160, "total_steps": 14493, "loss": 0.2866, "lr": 3.9333515491635764e-05, "epoch": 1.2751481586915452, "percentage": 42.5, "elapsed_time": "21:13:25", "remaining_time": "1 day, 4:42:38", "throughput": 27271.14, "total_tokens": 2083665408} +{"current_steps": 6170, "total_steps": 14493, "loss": 0.2889, "lr": 3.932135036113987e-05, "epoch": 1.277218498486064, "percentage": 42.57, "elapsed_time": "21:15:09", "remaining_time": "1 day, 4:40:07", "throughput": 27277.57, "total_tokens": 2086993664} +{"current_steps": 6180, "total_steps": 14493, "loss": 0.2908, "lr": 3.930919651101764e-05, "epoch": 1.279288838280583, "percentage": 42.64, "elapsed_time": "21:16:53", "remaining_time": "1 day, 4:37:35", "throughput": 27284.16, "total_tokens": 2090328320} +{"current_steps": 6190, "total_steps": 14493, "loss": 0.2895, "lr": 3.9297053923846576e-05, "epoch": 1.2813591780751015, "percentage": 42.71, "elapsed_time": "21:18:40", "remaining_time": "1 day, 4:35:09", "throughput": 27290.12, "total_tokens": 2093710208} +{"current_steps": 6200, "total_steps": 14493, "loss": 0.2931, "lr": 3.928492258224183e-05, "epoch": 1.2834295178696205, "percentage": 42.78, "elapsed_time": "21:20:24", "remaining_time": "1 day, 4:32:39", "throughput": 27296.59, "total_tokens": 2097054464} +{"current_steps": 6210, "total_steps": 14493, "loss": 0.2871, "lr": 3.927280246885609e-05, "epoch": 1.285499857664139, "percentage": 42.85, "elapsed_time": "21:22:06", "remaining_time": "1 day, 4:30:05", "throughput": 27303.56, "total_tokens": 2100360576} +{"current_steps": 6220, "total_steps": 14493, "loss": 0.2824, "lr": 3.9260693566379486e-05, "epoch": 1.287570197458658, "percentage": 42.92, "elapsed_time": "21:23:50", "remaining_time": "1 day, 4:27:35", "throughput": 27310.93, "total_tokens": 2103774080} +{"current_steps": 6230, "total_steps": 14493, "loss": 0.2894, "lr": 3.924859585753948e-05, "epoch": 1.2896405372531767, "percentage": 42.99, "elapsed_time": "21:25:38", "remaining_time": "1 day, 4:25:10", "throughput": 27316.6, "total_tokens": 2107152896} +{"current_steps": 6240, "total_steps": 14493, "loss": 0.2885, "lr": 3.923650932510079e-05, "epoch": 1.2917108770476955, "percentage": 43.06, "elapsed_time": "21:27:20", "remaining_time": "1 day, 4:22:38", "throughput": 27323.86, "total_tokens": 2110510720} +{"current_steps": 6250, "total_steps": 14493, "loss": 0.2863, "lr": 3.9224433951865215e-05, "epoch": 1.2937812168422143, "percentage": 43.12, "elapsed_time": "21:29:01", "remaining_time": "1 day, 4:20:04", "throughput": 27331.14, "total_tokens": 2113832320} +{"current_steps": 6260, "total_steps": 14493, "loss": 0.2881, "lr": 3.921236972067165e-05, "epoch": 1.295851556636733, "percentage": 43.19, "elapsed_time": "21:30:50", "remaining_time": "1 day, 4:17:40", "throughput": 27335.68, "total_tokens": 2117153408} +{"current_steps": 6270, "total_steps": 14493, "loss": 0.2879, "lr": 3.920031661439585e-05, "epoch": 1.2979218964312518, "percentage": 43.26, "elapsed_time": "21:32:34", "remaining_time": "1 day, 4:15:11", "throughput": 27341.8, "total_tokens": 2120477056} +{"current_steps": 6280, "total_steps": 14493, "loss": 0.2882, "lr": 3.918827461595045e-05, "epoch": 1.2999922362257705, "percentage": 43.33, "elapsed_time": "21:34:24", "remaining_time": "1 day, 4:12:49", "throughput": 27347.15, "total_tokens": 2123899392} +{"current_steps": 6290, "total_steps": 14493, "loss": 0.2881, "lr": 3.9176243708284746e-05, "epoch": 1.3020625760202893, "percentage": 43.4, "elapsed_time": "21:36:05", "remaining_time": "1 day, 4:10:17", "throughput": 27354.13, "total_tokens": 2127218560} +{"current_steps": 6300, "total_steps": 14493, "loss": 0.2883, "lr": 3.9164223874384715e-05, "epoch": 1.304132915814808, "percentage": 43.47, "elapsed_time": "21:37:50", "remaining_time": "1 day, 4:07:48", "throughput": 27360.78, "total_tokens": 2130584576} +{"current_steps": 6310, "total_steps": 14493, "loss": 0.2863, "lr": 3.91522150972728e-05, "epoch": 1.3062032556093268, "percentage": 43.54, "elapsed_time": "21:39:30", "remaining_time": "1 day, 4:05:14", "throughput": 27368.21, "total_tokens": 2133907840} +{"current_steps": 6320, "total_steps": 14493, "loss": 0.2874, "lr": 3.9140217360007896e-05, "epoch": 1.3082735954038456, "percentage": 43.61, "elapsed_time": "21:41:11", "remaining_time": "1 day, 4:02:41", "throughput": 27376.17, "total_tokens": 2137293184} +{"current_steps": 6330, "total_steps": 14493, "loss": 0.2885, "lr": 3.912823064568521e-05, "epoch": 1.3103439351983646, "percentage": 43.68, "elapsed_time": "21:42:56", "remaining_time": "1 day, 4:00:13", "throughput": 27382.46, "total_tokens": 2140651520} +{"current_steps": 6340, "total_steps": 14493, "loss": 0.2876, "lr": 3.9116254937436155e-05, "epoch": 1.312414274992883, "percentage": 43.75, "elapsed_time": "21:44:36", "remaining_time": "1 day, 3:57:40", "throughput": 27390.25, "total_tokens": 2144019840} +{"current_steps": 6350, "total_steps": 14493, "loss": 0.2905, "lr": 3.910429021842825e-05, "epoch": 1.314484614787402, "percentage": 43.81, "elapsed_time": "21:46:21", "remaining_time": "1 day, 3:55:14", "throughput": 27396.62, "total_tokens": 2147402112} +{"current_steps": 6360, "total_steps": 14493, "loss": 0.2879, "lr": 3.9092336471865084e-05, "epoch": 1.3165549545819206, "percentage": 43.88, "elapsed_time": "21:48:08", "remaining_time": "1 day, 3:52:48", "throughput": 27402.33, "total_tokens": 2150759680} +{"current_steps": 6370, "total_steps": 14493, "loss": 0.2906, "lr": 3.908039368098611e-05, "epoch": 1.3186252943764396, "percentage": 43.95, "elapsed_time": "21:49:54", "remaining_time": "1 day, 3:50:23", "throughput": 27408.29, "total_tokens": 2154144128} +{"current_steps": 6380, "total_steps": 14493, "loss": 0.2886, "lr": 3.9068461829066633e-05, "epoch": 1.3206956341709584, "percentage": 44.02, "elapsed_time": "21:51:41", "remaining_time": "1 day, 3:47:58", "throughput": 27414.45, "total_tokens": 2157545088} +{"current_steps": 6390, "total_steps": 14493, "loss": 0.2863, "lr": 3.9056540899417656e-05, "epoch": 1.3227659739654771, "percentage": 44.09, "elapsed_time": "21:53:28", "remaining_time": "1 day, 3:45:34", "throughput": 27420.16, "total_tokens": 2160939136} +{"current_steps": 6400, "total_steps": 14493, "loss": 0.2876, "lr": 3.904463087538585e-05, "epoch": 1.3248363137599959, "percentage": 44.16, "elapsed_time": "21:55:12", "remaining_time": "1 day, 3:43:06", "throughput": 27426.02, "total_tokens": 2164247424} +{"current_steps": 6410, "total_steps": 14493, "loss": 0.2879, "lr": 3.903273174035336e-05, "epoch": 1.3269066535545146, "percentage": 44.23, "elapsed_time": "21:56:53", "remaining_time": "1 day, 3:40:35", "throughput": 27432.38, "total_tokens": 2167530496} +{"current_steps": 6420, "total_steps": 14493, "loss": 0.2866, "lr": 3.902084347773779e-05, "epoch": 1.3289769933490334, "percentage": 44.3, "elapsed_time": "21:58:37", "remaining_time": "1 day, 3:38:08", "throughput": 27438.31, "total_tokens": 2170852224} +{"current_steps": 6430, "total_steps": 14493, "loss": 0.2874, "lr": 3.900896607099207e-05, "epoch": 1.3310473331435522, "percentage": 44.37, "elapsed_time": "22:00:18", "remaining_time": "1 day, 3:35:36", "throughput": 27445.94, "total_tokens": 2174220032} +{"current_steps": 6440, "total_steps": 14493, "loss": 0.2905, "lr": 3.899709950360437e-05, "epoch": 1.333117672938071, "percentage": 44.44, "elapsed_time": "22:02:02", "remaining_time": "1 day, 3:33:09", "throughput": 27452.19, "total_tokens": 2177565568} +{"current_steps": 6450, "total_steps": 14493, "loss": 0.2865, "lr": 3.8985243759097997e-05, "epoch": 1.3351880127325897, "percentage": 44.5, "elapsed_time": "22:03:52", "remaining_time": "1 day, 3:30:50", "throughput": 27457.25, "total_tokens": 2180990336} +{"current_steps": 6460, "total_steps": 14493, "loss": 0.2885, "lr": 3.897339882103129e-05, "epoch": 1.3372583525271085, "percentage": 44.57, "elapsed_time": "22:05:45", "remaining_time": "1 day, 3:28:34", "throughput": 27461.46, "total_tokens": 2184441216} +{"current_steps": 6470, "total_steps": 14493, "loss": 0.2863, "lr": 3.8961564672997544e-05, "epoch": 1.3393286923216272, "percentage": 44.64, "elapsed_time": "22:07:30", "remaining_time": "1 day, 3:26:08", "throughput": 27467.33, "total_tokens": 2187777152} +{"current_steps": 6480, "total_steps": 14493, "loss": 0.2855, "lr": 3.8949741298624924e-05, "epoch": 1.3413990321161462, "percentage": 44.71, "elapsed_time": "22:09:16", "remaining_time": "1 day, 3:23:44", "throughput": 27473.45, "total_tokens": 2191181952} +{"current_steps": 6490, "total_steps": 14493, "loss": 0.2874, "lr": 3.8937928681576305e-05, "epoch": 1.3434693719106647, "percentage": 44.78, "elapsed_time": "22:10:56", "remaining_time": "1 day, 3:21:13", "throughput": 27480.71, "total_tokens": 2194514048} +{"current_steps": 6500, "total_steps": 14493, "loss": 0.2926, "lr": 3.8926126805549276e-05, "epoch": 1.3455397117051837, "percentage": 44.85, "elapsed_time": "22:12:45", "remaining_time": "1 day, 3:18:53", "throughput": 27486.1, "total_tokens": 2197950464} +{"current_steps": 6510, "total_steps": 14493, "loss": 0.2862, "lr": 3.891433565427596e-05, "epoch": 1.3476100514997023, "percentage": 44.92, "elapsed_time": "22:14:34", "remaining_time": "1 day, 3:16:32", "throughput": 27491.26, "total_tokens": 2201345280} +{"current_steps": 6520, "total_steps": 14493, "loss": 0.289, "lr": 3.8902555211522964e-05, "epoch": 1.3496803912942212, "percentage": 44.99, "elapsed_time": "22:16:19", "remaining_time": "1 day, 3:14:07", "throughput": 27496.91, "total_tokens": 2204683648} +{"current_steps": 6530, "total_steps": 14493, "loss": 0.2866, "lr": 3.889078546109127e-05, "epoch": 1.35175073108874, "percentage": 45.06, "elapsed_time": "22:18:06", "remaining_time": "1 day, 3:11:45", "throughput": 27502.72, "total_tokens": 2208105856} +{"current_steps": 6540, "total_steps": 14493, "loss": 0.2872, "lr": 3.887902638681616e-05, "epoch": 1.3538210708832588, "percentage": 45.13, "elapsed_time": "22:20:00", "remaining_time": "1 day, 3:09:31", "throughput": 27506.87, "total_tokens": 2211558656} +{"current_steps": 6550, "total_steps": 14493, "loss": 0.2849, "lr": 3.886727797256707e-05, "epoch": 1.3558914106777775, "percentage": 45.19, "elapsed_time": "22:21:42", "remaining_time": "1 day, 3:07:03", "throughput": 27513.83, "total_tokens": 2214941824} +{"current_steps": 6560, "total_steps": 14493, "loss": 0.2877, "lr": 3.88555402022476e-05, "epoch": 1.3579617504722963, "percentage": 45.26, "elapsed_time": "22:23:26", "remaining_time": "1 day, 3:04:37", "throughput": 27520.05, "total_tokens": 2218303744} +{"current_steps": 6570, "total_steps": 14493, "loss": 0.2875, "lr": 3.884381305979528e-05, "epoch": 1.360032090266815, "percentage": 45.33, "elapsed_time": "22:25:12", "remaining_time": "1 day, 3:02:14", "throughput": 27525.34, "total_tokens": 2221651328} +{"current_steps": 6580, "total_steps": 14493, "loss": 0.288, "lr": 3.883209652918163e-05, "epoch": 1.3621024300613338, "percentage": 45.4, "elapsed_time": "22:26:58", "remaining_time": "1 day, 2:59:50", "throughput": 27531.52, "total_tokens": 2225043200} +{"current_steps": 6590, "total_steps": 14493, "loss": 0.2842, "lr": 3.8820390594411935e-05, "epoch": 1.3641727698558526, "percentage": 45.47, "elapsed_time": "22:28:42", "remaining_time": "1 day, 2:57:26", "throughput": 27538.23, "total_tokens": 2228475648} +{"current_steps": 6600, "total_steps": 14493, "loss": 0.2897, "lr": 3.880869523952524e-05, "epoch": 1.3662431096503713, "percentage": 45.54, "elapsed_time": "22:30:32", "remaining_time": "1 day, 2:55:07", "throughput": 27543.65, "total_tokens": 2231924224} +{"current_steps": 6610, "total_steps": 14493, "loss": 0.2911, "lr": 3.879701044859422e-05, "epoch": 1.36831344944489, "percentage": 45.61, "elapsed_time": "22:32:17", "remaining_time": "1 day, 2:52:43", "throughput": 27549.94, "total_tokens": 2235340544} +{"current_steps": 6620, "total_steps": 14493, "loss": 0.2852, "lr": 3.87853362057251e-05, "epoch": 1.3703837892394088, "percentage": 45.68, "elapsed_time": "22:34:12", "remaining_time": "1 day, 2:50:31", "throughput": 27553.69, "total_tokens": 2238794496} +{"current_steps": 6630, "total_steps": 14493, "loss": 0.2873, "lr": 3.8773672495057576e-05, "epoch": 1.3724541290339278, "percentage": 45.75, "elapsed_time": "22:35:53", "remaining_time": "1 day, 2:48:02", "throughput": 27560.24, "total_tokens": 2242110208} +{"current_steps": 6640, "total_steps": 14493, "loss": 0.2869, "lr": 3.8762019300764674e-05, "epoch": 1.3745244688284464, "percentage": 45.82, "elapsed_time": "22:37:38", "remaining_time": "1 day, 2:45:39", "throughput": 27565.97, "total_tokens": 2245491328} +{"current_steps": 6650, "total_steps": 14493, "loss": 0.2843, "lr": 3.875037660705273e-05, "epoch": 1.3765948086229653, "percentage": 45.88, "elapsed_time": "22:39:28", "remaining_time": "1 day, 2:43:22", "throughput": 27570.74, "total_tokens": 2248913920} +{"current_steps": 6660, "total_steps": 14493, "loss": 0.2868, "lr": 3.873874439816127e-05, "epoch": 1.3786651484174839, "percentage": 45.95, "elapsed_time": "22:41:10", "remaining_time": "1 day, 2:40:55", "throughput": 27577.3, "total_tokens": 2252260096} +{"current_steps": 6670, "total_steps": 14493, "loss": 0.2861, "lr": 3.872712265836289e-05, "epoch": 1.3807354882120029, "percentage": 46.02, "elapsed_time": "22:42:55", "remaining_time": "1 day, 2:38:31", "throughput": 27582.91, "total_tokens": 2255596160} +{"current_steps": 6680, "total_steps": 14493, "loss": 0.2876, "lr": 3.8715511371963225e-05, "epoch": 1.3828058280065216, "percentage": 46.09, "elapsed_time": "22:44:40", "remaining_time": "1 day, 2:36:08", "throughput": 27588.41, "total_tokens": 2258961280} +{"current_steps": 6690, "total_steps": 14493, "loss": 0.2856, "lr": 3.87039105233008e-05, "epoch": 1.3848761678010404, "percentage": 46.16, "elapsed_time": "22:46:26", "remaining_time": "1 day, 2:33:46", "throughput": 27593.51, "total_tokens": 2262307840} +{"current_steps": 6700, "total_steps": 14493, "loss": 0.2863, "lr": 3.8692320096746975e-05, "epoch": 1.3869465075955592, "percentage": 46.23, "elapsed_time": "22:48:14", "remaining_time": "1 day, 2:31:26", "throughput": 27598.89, "total_tokens": 2265717376} +{"current_steps": 6710, "total_steps": 14493, "loss": 0.2896, "lr": 3.868074007670589e-05, "epoch": 1.389016847390078, "percentage": 46.3, "elapsed_time": "22:49:59", "remaining_time": "1 day, 2:29:03", "throughput": 27605.07, "total_tokens": 2269119104} +{"current_steps": 6720, "total_steps": 14493, "loss": 0.2868, "lr": 3.866917044761428e-05, "epoch": 1.3910871871845967, "percentage": 46.37, "elapsed_time": "22:51:46", "remaining_time": "1 day, 2:26:44", "throughput": 27610.57, "total_tokens": 2272542336} +{"current_steps": 6730, "total_steps": 14493, "loss": 0.2895, "lr": 3.8657611193941486e-05, "epoch": 1.3931575269791154, "percentage": 46.44, "elapsed_time": "22:53:34", "remaining_time": "1 day, 2:24:24", "throughput": 27615.51, "total_tokens": 2275928064} +{"current_steps": 6740, "total_steps": 14493, "loss": 0.286, "lr": 3.8646062300189315e-05, "epoch": 1.3952278667736342, "percentage": 46.51, "elapsed_time": "22:55:25", "remaining_time": "1 day, 2:22:08", "throughput": 27619.81, "total_tokens": 2279326720} +{"current_steps": 6750, "total_steps": 14493, "loss": 0.2856, "lr": 3.8634523750891984e-05, "epoch": 1.397298206568153, "percentage": 46.57, "elapsed_time": "22:57:14", "remaining_time": "1 day, 2:19:50", "throughput": 27625.15, "total_tokens": 2282779136} +{"current_steps": 6760, "total_steps": 14493, "loss": 0.2859, "lr": 3.862299553061597e-05, "epoch": 1.3993685463626717, "percentage": 46.64, "elapsed_time": "22:59:00", "remaining_time": "1 day, 2:17:29", "throughput": 27630.68, "total_tokens": 2286169344} +{"current_steps": 6770, "total_steps": 14493, "loss": 0.284, "lr": 3.861147762396e-05, "epoch": 1.4014388861571905, "percentage": 46.71, "elapsed_time": "23:00:47", "remaining_time": "1 day, 2:15:10", "throughput": 27636.02, "total_tokens": 2289581184} +{"current_steps": 6780, "total_steps": 14493, "loss": 0.2847, "lr": 3.859997001555494e-05, "epoch": 1.4035092259517095, "percentage": 46.78, "elapsed_time": "23:02:38", "remaining_time": "1 day, 2:12:54", "throughput": 27639.94, "total_tokens": 2292971520} +{"current_steps": 6790, "total_steps": 14493, "loss": 0.2863, "lr": 3.8588472690063676e-05, "epoch": 1.405579565746228, "percentage": 46.85, "elapsed_time": "23:04:20", "remaining_time": "1 day, 2:10:29", "throughput": 27646.0, "total_tokens": 2296303616} +{"current_steps": 6800, "total_steps": 14493, "loss": 0.29, "lr": 3.857698563218106e-05, "epoch": 1.407649905540747, "percentage": 46.92, "elapsed_time": "23:06:06", "remaining_time": "1 day, 2:08:07", "throughput": 27651.79, "total_tokens": 2299693696} +{"current_steps": 6810, "total_steps": 14493, "loss": 0.2856, "lr": 3.8565508826633836e-05, "epoch": 1.4097202453352655, "percentage": 46.99, "elapsed_time": "23:07:58", "remaining_time": "1 day, 2:05:54", "throughput": 27656.27, "total_tokens": 2303167360} +{"current_steps": 6820, "total_steps": 14493, "loss": 0.2871, "lr": 3.855404225818049e-05, "epoch": 1.4117905851297845, "percentage": 47.06, "elapsed_time": "23:09:46", "remaining_time": "1 day, 2:03:36", "throughput": 27661.12, "total_tokens": 2306568192} +{"current_steps": 6830, "total_steps": 14493, "loss": 0.2877, "lr": 3.8542585911611286e-05, "epoch": 1.4138609249243033, "percentage": 47.13, "elapsed_time": "23:11:29", "remaining_time": "1 day, 2:01:12", "throughput": 27666.82, "total_tokens": 2309892864} +{"current_steps": 6840, "total_steps": 14493, "loss": 0.288, "lr": 3.853113977174803e-05, "epoch": 1.415931264718822, "percentage": 47.2, "elapsed_time": "23:13:17", "remaining_time": "1 day, 1:58:53", "throughput": 27671.67, "total_tokens": 2313276288} +{"current_steps": 6850, "total_steps": 14493, "loss": 0.2898, "lr": 3.851970382344411e-05, "epoch": 1.4180016045133408, "percentage": 47.26, "elapsed_time": "23:15:06", "remaining_time": "1 day, 1:56:37", "throughput": 27676.21, "total_tokens": 2316691968} +{"current_steps": 6860, "total_steps": 14493, "loss": 0.2874, "lr": 3.850827805158433e-05, "epoch": 1.4200719443078595, "percentage": 47.33, "elapsed_time": "23:16:51", "remaining_time": "1 day, 1:54:15", "throughput": 27681.45, "total_tokens": 2320029312} +{"current_steps": 6870, "total_steps": 14493, "loss": 0.2876, "lr": 3.8496862441084896e-05, "epoch": 1.4221422841023783, "percentage": 47.4, "elapsed_time": "23:18:39", "remaining_time": "1 day, 1:51:57", "throughput": 27686.34, "total_tokens": 2323414016} +{"current_steps": 6880, "total_steps": 14493, "loss": 0.2856, "lr": 3.848545697689328e-05, "epoch": 1.424212623896897, "percentage": 47.47, "elapsed_time": "23:20:22", "remaining_time": "1 day, 1:49:34", "throughput": 27692.22, "total_tokens": 2326781952} +{"current_steps": 6890, "total_steps": 14493, "loss": 0.2877, "lr": 3.8474061643988136e-05, "epoch": 1.4262829636914158, "percentage": 47.54, "elapsed_time": "23:22:05", "remaining_time": "1 day, 1:47:10", "throughput": 27698.47, "total_tokens": 2330136704} +{"current_steps": 6900, "total_steps": 14493, "loss": 0.2872, "lr": 3.846267642737925e-05, "epoch": 1.4283533034859346, "percentage": 47.61, "elapsed_time": "23:23:54", "remaining_time": "1 day, 1:44:54", "throughput": 27703.05, "total_tokens": 2333553664} +{"current_steps": 6910, "total_steps": 14493, "loss": 0.2856, "lr": 3.8451301312107455e-05, "epoch": 1.4304236432804533, "percentage": 47.68, "elapsed_time": "23:25:38", "remaining_time": "1 day, 1:42:32", "throughput": 27708.31, "total_tokens": 2336878336} +{"current_steps": 6920, "total_steps": 14493, "loss": 0.2879, "lr": 3.843993628324451e-05, "epoch": 1.432493983074972, "percentage": 47.75, "elapsed_time": "23:27:22", "remaining_time": "1 day, 1:40:11", "throughput": 27714.03, "total_tokens": 2340255104} +{"current_steps": 6930, "total_steps": 14493, "loss": 0.2866, "lr": 3.8428581325893034e-05, "epoch": 1.434564322869491, "percentage": 47.82, "elapsed_time": "23:29:17", "remaining_time": "1 day, 1:38:00", "throughput": 27717.45, "total_tokens": 2343707392} +{"current_steps": 6940, "total_steps": 14493, "loss": 0.2833, "lr": 3.8417236425186484e-05, "epoch": 1.4366346626640096, "percentage": 47.89, "elapsed_time": "23:31:00", "remaining_time": "1 day, 1:35:38", "throughput": 27722.82, "total_tokens": 2347040512} +{"current_steps": 6950, "total_steps": 14493, "loss": 0.2869, "lr": 3.840590156628895e-05, "epoch": 1.4387050024585286, "percentage": 47.95, "elapsed_time": "23:32:52", "remaining_time": "1 day, 1:33:25", "throughput": 27727.3, "total_tokens": 2350506240} +{"current_steps": 6960, "total_steps": 14493, "loss": 0.2859, "lr": 3.8394576734395205e-05, "epoch": 1.4407753422530472, "percentage": 48.02, "elapsed_time": "23:34:33", "remaining_time": "1 day, 1:31:00", "throughput": 27733.59, "total_tokens": 2353833856} +{"current_steps": 6970, "total_steps": 14493, "loss": 0.2883, "lr": 3.838326191473054e-05, "epoch": 1.4428456820475661, "percentage": 48.09, "elapsed_time": "23:36:19", "remaining_time": "1 day, 1:28:41", "throughput": 27738.28, "total_tokens": 2357184512} +{"current_steps": 6980, "total_steps": 14493, "loss": 0.2866, "lr": 3.837195709255069e-05, "epoch": 1.444916021842085, "percentage": 48.16, "elapsed_time": "23:38:00", "remaining_time": "1 day, 1:26:17", "throughput": 27743.67, "total_tokens": 2360454016} +{"current_steps": 6990, "total_steps": 14493, "loss": 0.29, "lr": 3.8360662253141796e-05, "epoch": 1.4469863616366037, "percentage": 48.23, "elapsed_time": "23:39:38", "remaining_time": "1 day, 1:23:50", "throughput": 27749.65, "total_tokens": 2363682432} +{"current_steps": 7000, "total_steps": 14493, "loss": 0.2866, "lr": 3.834937738182029e-05, "epoch": 1.4490567014311224, "percentage": 48.3, "elapsed_time": "23:41:23", "remaining_time": "1 day, 1:21:29", "throughput": 27754.78, "total_tokens": 2367023744} +{"current_steps": 7010, "total_steps": 14493, "loss": 0.2843, "lr": 3.833810246393281e-05, "epoch": 1.4511270412256412, "percentage": 48.37, "elapsed_time": "23:43:13", "remaining_time": "1 day, 1:19:15", "throughput": 27759.39, "total_tokens": 2370481024} +{"current_steps": 7020, "total_steps": 14493, "loss": 0.2834, "lr": 3.832683748485616e-05, "epoch": 1.45319738102016, "percentage": 48.44, "elapsed_time": "23:45:02", "remaining_time": "1 day, 1:16:59", "throughput": 27763.4, "total_tokens": 2373833984} +{"current_steps": 7030, "total_steps": 14493, "loss": 0.2847, "lr": 3.8315582429997184e-05, "epoch": 1.4552677208146787, "percentage": 48.51, "elapsed_time": "23:46:41", "remaining_time": "1 day, 1:14:34", "throughput": 27769.79, "total_tokens": 2377144448} +{"current_steps": 7040, "total_steps": 14493, "loss": 0.2873, "lr": 3.830433728479272e-05, "epoch": 1.4573380606091975, "percentage": 48.58, "elapsed_time": "23:48:33", "remaining_time": "1 day, 1:12:21", "throughput": 27773.22, "total_tokens": 2380534016} +{"current_steps": 7050, "total_steps": 14493, "loss": 0.2861, "lr": 3.829310203470948e-05, "epoch": 1.4594084004037162, "percentage": 48.64, "elapsed_time": "23:50:16", "remaining_time": "1 day, 1:10:00", "throughput": 27779.26, "total_tokens": 2383916288} +{"current_steps": 7060, "total_steps": 14493, "loss": 0.2863, "lr": 3.828187666524403e-05, "epoch": 1.461478740198235, "percentage": 48.71, "elapsed_time": "23:51:57", "remaining_time": "1 day, 1:07:36", "throughput": 27785.37, "total_tokens": 2387239040} +{"current_steps": 7070, "total_steps": 14493, "loss": 0.2864, "lr": 3.827066116192266e-05, "epoch": 1.4635490799927537, "percentage": 48.78, "elapsed_time": "23:53:46", "remaining_time": "1 day, 1:05:21", "throughput": 27789.44, "total_tokens": 2390615296} +{"current_steps": 7080, "total_steps": 14493, "loss": 0.2891, "lr": 3.825945551030135e-05, "epoch": 1.4656194197872725, "percentage": 48.85, "elapsed_time": "23:55:29", "remaining_time": "1 day, 1:03:00", "throughput": 27795.09, "total_tokens": 2393984000} +{"current_steps": 7090, "total_steps": 14493, "loss": 0.2863, "lr": 3.824825969596561e-05, "epoch": 1.4676897595817913, "percentage": 48.92, "elapsed_time": "23:57:13", "remaining_time": "1 day, 1:00:40", "throughput": 27800.79, "total_tokens": 2397350528} +{"current_steps": 7100, "total_steps": 14493, "loss": 0.289, "lr": 3.823707370453054e-05, "epoch": 1.4697600993763102, "percentage": 48.99, "elapsed_time": "23:58:56", "remaining_time": "1 day, 0:58:19", "throughput": 27806.69, "total_tokens": 2400721920} +{"current_steps": 7110, "total_steps": 14493, "loss": 0.2862, "lr": 3.8225897521640614e-05, "epoch": 1.4718304391708288, "percentage": 49.06, "elapsed_time": "1 day, 0:00:46", "remaining_time": "1 day, 0:56:05", "throughput": 27811.04, "total_tokens": 2404158720} +{"current_steps": 7120, "total_steps": 14493, "loss": 0.2849, "lr": 3.8214731132969675e-05, "epoch": 1.4739007789653478, "percentage": 49.13, "elapsed_time": "1 day, 0:02:28", "remaining_time": "1 day, 0:53:43", "throughput": 27816.86, "total_tokens": 2407505024} +{"current_steps": 7130, "total_steps": 14493, "loss": 0.2872, "lr": 3.820357452422084e-05, "epoch": 1.4759711187598665, "percentage": 49.2, "elapsed_time": "1 day, 0:04:16", "remaining_time": "1 day, 0:51:28", "throughput": 27821.8, "total_tokens": 2410936448} +{"current_steps": 7140, "total_steps": 14493, "loss": 0.2856, "lr": 3.8192427681126445e-05, "epoch": 1.4780414585543853, "percentage": 49.27, "elapsed_time": "1 day, 0:06:03", "remaining_time": "1 day, 0:49:12", "throughput": 27826.75, "total_tokens": 2414352000} +{"current_steps": 7150, "total_steps": 14493, "loss": 0.2842, "lr": 3.818129058944793e-05, "epoch": 1.480111798348904, "percentage": 49.33, "elapsed_time": "1 day, 0:07:51", "remaining_time": "1 day, 0:46:55", "throughput": 27830.83, "total_tokens": 2417692800} +{"current_steps": 7160, "total_steps": 14493, "loss": 0.2859, "lr": 3.817016323497578e-05, "epoch": 1.4821821381434228, "percentage": 49.4, "elapsed_time": "1 day, 0:09:37", "remaining_time": "1 day, 0:44:39", "throughput": 27835.7, "total_tokens": 2421087744} +{"current_steps": 7170, "total_steps": 14493, "loss": 0.2872, "lr": 3.8159045603529455e-05, "epoch": 1.4842524779379416, "percentage": 49.47, "elapsed_time": "1 day, 0:11:22", "remaining_time": "1 day, 0:42:20", "throughput": 27840.18, "total_tokens": 2424389120} +{"current_steps": 7180, "total_steps": 14493, "loss": 0.2848, "lr": 3.8147937680957334e-05, "epoch": 1.4863228177324603, "percentage": 49.54, "elapsed_time": "1 day, 0:13:04", "remaining_time": "1 day, 0:39:59", "throughput": 27845.95, "total_tokens": 2427725184} +{"current_steps": 7190, "total_steps": 14493, "loss": 0.2852, "lr": 3.813683945313658e-05, "epoch": 1.488393157526979, "percentage": 49.61, "elapsed_time": "1 day, 0:14:54", "remaining_time": "1 day, 0:37:46", "throughput": 27850.2, "total_tokens": 2431170816} +{"current_steps": 7200, "total_steps": 14493, "loss": 0.2849, "lr": 3.812575090597313e-05, "epoch": 1.4904634973214979, "percentage": 49.68, "elapsed_time": "1 day, 0:16:39", "remaining_time": "1 day, 0:35:28", "throughput": 27855.74, "total_tokens": 2434577280} +{"current_steps": 7210, "total_steps": 14493, "loss": 0.284, "lr": 3.811467202540156e-05, "epoch": 1.4925338371160166, "percentage": 49.75, "elapsed_time": "1 day, 0:18:24", "remaining_time": "1 day, 0:33:10", "throughput": 27860.74, "total_tokens": 2437948288} +{"current_steps": 7220, "total_steps": 14493, "loss": 0.2862, "lr": 3.810360279738507e-05, "epoch": 1.4946041769105354, "percentage": 49.82, "elapsed_time": "1 day, 0:20:13", "remaining_time": "1 day, 0:30:57", "throughput": 27864.91, "total_tokens": 2441355904} +{"current_steps": 7230, "total_steps": 14493, "loss": 0.2889, "lr": 3.809254320791535e-05, "epoch": 1.4966745167050541, "percentage": 49.89, "elapsed_time": "1 day, 0:22:03", "remaining_time": "1 day, 0:28:43", "throughput": 27869.0, "total_tokens": 2444766336} +{"current_steps": 7240, "total_steps": 14493, "loss": 0.2834, "lr": 3.808149324301256e-05, "epoch": 1.498744856499573, "percentage": 49.96, "elapsed_time": "1 day, 0:23:52", "remaining_time": "1 day, 0:26:29", "throughput": 27873.19, "total_tokens": 2448164864} +{"current_steps": 7250, "total_steps": 14493, "loss": 0.2854, "lr": 3.807045288872522e-05, "epoch": 1.5008151962940919, "percentage": 50.02, "elapsed_time": "1 day, 0:25:39", "remaining_time": "1 day, 0:24:14", "throughput": 27877.76, "total_tokens": 2451561344} +{"current_steps": 7260, "total_steps": 14493, "loss": 0.2851, "lr": 3.805942213113015e-05, "epoch": 1.5028855360886104, "percentage": 50.09, "elapsed_time": "1 day, 0:27:24", "remaining_time": "1 day, 0:21:56", "throughput": 27883.27, "total_tokens": 2454957696} +{"current_steps": 7270, "total_steps": 14493, "loss": 0.287, "lr": 3.8048400956332385e-05, "epoch": 1.5049558758831294, "percentage": 50.16, "elapsed_time": "1 day, 0:29:07", "remaining_time": "1 day, 0:19:37", "throughput": 27888.73, "total_tokens": 2458316800} +{"current_steps": 7280, "total_steps": 14493, "loss": 0.2864, "lr": 3.803738935046512e-05, "epoch": 1.507026215677648, "percentage": 50.23, "elapsed_time": "1 day, 0:31:02", "remaining_time": "1 day, 0:17:29", "throughput": 27892.16, "total_tokens": 2461824768} +{"current_steps": 7290, "total_steps": 14493, "loss": 0.2855, "lr": 3.802638729968962e-05, "epoch": 1.509096555472167, "percentage": 50.3, "elapsed_time": "1 day, 0:32:47", "remaining_time": "1 day, 0:15:12", "throughput": 27897.28, "total_tokens": 2465211520} +{"current_steps": 7300, "total_steps": 14493, "loss": 0.2876, "lr": 3.8015394790195145e-05, "epoch": 1.5111668952666857, "percentage": 50.37, "elapsed_time": "1 day, 0:34:36", "remaining_time": "1 day, 0:12:59", "throughput": 27901.1, "total_tokens": 2468598784} +{"current_steps": 7310, "total_steps": 14493, "loss": 0.2834, "lr": 3.800441180819891e-05, "epoch": 1.5132372350612044, "percentage": 50.44, "elapsed_time": "1 day, 0:36:24", "remaining_time": "1 day, 0:10:45", "throughput": 27905.48, "total_tokens": 2471990784} +{"current_steps": 7320, "total_steps": 14493, "loss": 0.287, "lr": 3.7993438339945965e-05, "epoch": 1.5153075748557232, "percentage": 50.51, "elapsed_time": "1 day, 0:38:12", "remaining_time": "1 day, 0:08:31", "throughput": 27909.77, "total_tokens": 2475398144} +{"current_steps": 7330, "total_steps": 14493, "loss": 0.2855, "lr": 3.798247437170914e-05, "epoch": 1.517377914650242, "percentage": 50.58, "elapsed_time": "1 day, 0:39:59", "remaining_time": "1 day, 0:06:16", "throughput": 27913.85, "total_tokens": 2478731520} +{"current_steps": 7340, "total_steps": 14493, "loss": 0.2858, "lr": 3.797151988978901e-05, "epoch": 1.5194482544447607, "percentage": 50.65, "elapsed_time": "1 day, 0:41:51", "remaining_time": "1 day, 0:04:06", "throughput": 27917.3, "total_tokens": 2482164352} +{"current_steps": 7350, "total_steps": 14493, "loss": 0.2849, "lr": 3.796057488051377e-05, "epoch": 1.5215185942392795, "percentage": 50.71, "elapsed_time": "1 day, 0:43:39", "remaining_time": "1 day, 0:01:52", "throughput": 27922.16, "total_tokens": 2485618048} +{"current_steps": 7360, "total_steps": 14493, "loss": 0.2879, "lr": 3.794963933023918e-05, "epoch": 1.5235889340337982, "percentage": 50.78, "elapsed_time": "1 day, 0:45:26", "remaining_time": "23:59:37", "throughput": 27926.91, "total_tokens": 2489028608} +{"current_steps": 7370, "total_steps": 14493, "loss": 0.287, "lr": 3.79387132253485e-05, "epoch": 1.525659273828317, "percentage": 50.85, "elapsed_time": "1 day, 0:47:05", "remaining_time": "23:57:14", "throughput": 27933.16, "total_tokens": 2492339712} +{"current_steps": 7380, "total_steps": 14493, "loss": 0.2831, "lr": 3.792779655225243e-05, "epoch": 1.527729613622836, "percentage": 50.92, "elapsed_time": "1 day, 0:48:46", "remaining_time": "23:54:54", "throughput": 27938.3, "total_tokens": 2495635968} +{"current_steps": 7390, "total_steps": 14493, "loss": 0.2838, "lr": 3.791688929738902e-05, "epoch": 1.5297999534173545, "percentage": 50.99, "elapsed_time": "1 day, 0:50:32", "remaining_time": "23:52:39", "throughput": 27942.77, "total_tokens": 2498993408} +{"current_steps": 7400, "total_steps": 14493, "loss": 0.2865, "lr": 3.79059914472236e-05, "epoch": 1.5318702932118735, "percentage": 51.06, "elapsed_time": "1 day, 0:52:19", "remaining_time": "23:50:24", "throughput": 27947.06, "total_tokens": 2502361344} +{"current_steps": 7410, "total_steps": 14493, "loss": 0.2871, "lr": 3.7895102988248716e-05, "epoch": 1.533940633006392, "percentage": 51.13, "elapsed_time": "1 day, 0:54:05", "remaining_time": "23:48:09", "throughput": 27951.54, "total_tokens": 2505730944} +{"current_steps": 7420, "total_steps": 14493, "loss": 0.2876, "lr": 3.7884223906984064e-05, "epoch": 1.536010972800911, "percentage": 51.2, "elapsed_time": "1 day, 0:55:49", "remaining_time": "23:45:51", "throughput": 27955.99, "total_tokens": 2509026432} +{"current_steps": 7430, "total_steps": 14493, "loss": 0.2844, "lr": 3.787335418997641e-05, "epoch": 1.5380813125954296, "percentage": 51.27, "elapsed_time": "1 day, 0:57:34", "remaining_time": "23:43:36", "throughput": 27961.08, "total_tokens": 2512430080} +{"current_steps": 7440, "total_steps": 14493, "loss": 0.2853, "lr": 3.786249382379952e-05, "epoch": 1.5401516523899486, "percentage": 51.34, "elapsed_time": "1 day, 0:59:16", "remaining_time": "23:41:17", "throughput": 27966.17, "total_tokens": 2515747456} +{"current_steps": 7450, "total_steps": 14493, "loss": 0.287, "lr": 3.785164279505411e-05, "epoch": 1.5422219921844673, "percentage": 51.4, "elapsed_time": "1 day, 1:00:59", "remaining_time": "23:38:59", "throughput": 27970.95, "total_tokens": 2519062784} +{"current_steps": 7460, "total_steps": 14493, "loss": 0.2854, "lr": 3.7840801090367744e-05, "epoch": 1.544292331978986, "percentage": 51.47, "elapsed_time": "1 day, 1:02:43", "remaining_time": "23:36:42", "throughput": 27975.88, "total_tokens": 2522407680} +{"current_steps": 7470, "total_steps": 14493, "loss": 0.2877, "lr": 3.782996869639479e-05, "epoch": 1.5463626717735048, "percentage": 51.54, "elapsed_time": "1 day, 1:04:32", "remaining_time": "23:34:30", "throughput": 27979.2, "total_tokens": 2525758848} +{"current_steps": 7480, "total_steps": 14493, "loss": 0.2849, "lr": 3.7819145599816354e-05, "epoch": 1.5484330115680236, "percentage": 51.61, "elapsed_time": "1 day, 1:06:18", "remaining_time": "23:32:15", "throughput": 27983.57, "total_tokens": 2529115392} +{"current_steps": 7490, "total_steps": 14493, "loss": 0.2823, "lr": 3.780833178734018e-05, "epoch": 1.5505033513625424, "percentage": 51.68, "elapsed_time": "1 day, 1:08:06", "remaining_time": "23:30:03", "throughput": 27987.74, "total_tokens": 2532512000} +{"current_steps": 7500, "total_steps": 14493, "loss": 0.2874, "lr": 3.77975272457006e-05, "epoch": 1.5525736911570611, "percentage": 51.75, "elapsed_time": "1 day, 1:09:52", "remaining_time": "23:27:48", "throughput": 27992.9, "total_tokens": 2535943808} +{"current_steps": 7510, "total_steps": 14493, "loss": 0.284, "lr": 3.778673196165851e-05, "epoch": 1.5546440309515799, "percentage": 51.82, "elapsed_time": "1 day, 1:11:39", "remaining_time": "23:25:35", "throughput": 27997.1, "total_tokens": 2539332224} +{"current_steps": 7520, "total_steps": 14493, "loss": 0.2829, "lr": 3.7775945922001186e-05, "epoch": 1.5567143707460986, "percentage": 51.89, "elapsed_time": "1 day, 1:13:29", "remaining_time": "23:23:23", "throughput": 28001.18, "total_tokens": 2542761856} +{"current_steps": 7530, "total_steps": 14493, "loss": 0.2863, "lr": 3.776516911354236e-05, "epoch": 1.5587847105406176, "percentage": 51.96, "elapsed_time": "1 day, 1:15:24", "remaining_time": "23:21:17", "throughput": 28004.59, "total_tokens": 2546293888} +{"current_steps": 7540, "total_steps": 14493, "loss": 0.2867, "lr": 3.775440152312205e-05, "epoch": 1.5608550503351362, "percentage": 52.03, "elapsed_time": "1 day, 1:17:14", "remaining_time": "23:19:07", "throughput": 28008.12, "total_tokens": 2549700864} +{"current_steps": 7550, "total_steps": 14493, "loss": 0.2849, "lr": 3.774364313760652e-05, "epoch": 1.5629253901296551, "percentage": 52.09, "elapsed_time": "1 day, 1:19:00", "remaining_time": "23:16:52", "throughput": 28012.85, "total_tokens": 2553100800} +{"current_steps": 7560, "total_steps": 14493, "loss": 0.2853, "lr": 3.7732893943888224e-05, "epoch": 1.5649957299241737, "percentage": 52.16, "elapsed_time": "1 day, 1:20:48", "remaining_time": "23:14:40", "throughput": 28017.11, "total_tokens": 2556515968} +{"current_steps": 7570, "total_steps": 14493, "loss": 0.2872, "lr": 3.772215392888574e-05, "epoch": 1.5670660697186927, "percentage": 52.23, "elapsed_time": "1 day, 1:22:39", "remaining_time": "23:12:31", "throughput": 28020.98, "total_tokens": 2559982208} +{"current_steps": 7580, "total_steps": 14493, "loss": 0.2822, "lr": 3.771142307954368e-05, "epoch": 1.5691364095132112, "percentage": 52.3, "elapsed_time": "1 day, 1:24:30", "remaining_time": "23:10:21", "throughput": 28024.61, "total_tokens": 2563436416} +{"current_steps": 7590, "total_steps": 14493, "loss": 0.2842, "lr": 3.770070138283264e-05, "epoch": 1.5712067493077302, "percentage": 52.37, "elapsed_time": "1 day, 1:26:22", "remaining_time": "23:08:12", "throughput": 28028.55, "total_tokens": 2566911232} +{"current_steps": 7600, "total_steps": 14493, "loss": 0.2859, "lr": 3.768998882574915e-05, "epoch": 1.573277089102249, "percentage": 52.44, "elapsed_time": "1 day, 1:28:15", "remaining_time": "23:06:05", "throughput": 28031.9, "total_tokens": 2570398720} +{"current_steps": 7610, "total_steps": 14493, "loss": 0.2845, "lr": 3.767928539531557e-05, "epoch": 1.5753474288967677, "percentage": 52.51, "elapsed_time": "1 day, 1:30:11", "remaining_time": "23:04:00", "throughput": 28034.17, "total_tokens": 2573857408} +{"current_steps": 7620, "total_steps": 14493, "loss": 0.2874, "lr": 3.7668591078580055e-05, "epoch": 1.5774177686912865, "percentage": 52.58, "elapsed_time": "1 day, 1:31:56", "remaining_time": "23:01:45", "throughput": 28038.78, "total_tokens": 2577213952} +{"current_steps": 7630, "total_steps": 14493, "loss": 0.2859, "lr": 3.765790586261647e-05, "epoch": 1.5794881084858052, "percentage": 52.65, "elapsed_time": "1 day, 1:33:42", "remaining_time": "22:59:32", "throughput": 28043.33, "total_tokens": 2580627712} +{"current_steps": 7640, "total_steps": 14493, "loss": 0.2823, "lr": 3.7647229734524326e-05, "epoch": 1.581558448280324, "percentage": 52.72, "elapsed_time": "1 day, 1:35:25", "remaining_time": "22:57:15", "throughput": 28047.88, "total_tokens": 2583936768} +{"current_steps": 7650, "total_steps": 14493, "loss": 0.2854, "lr": 3.7636562681428744e-05, "epoch": 1.5836287880748428, "percentage": 52.78, "elapsed_time": "1 day, 1:37:12", "remaining_time": "22:55:02", "throughput": 28051.93, "total_tokens": 2587294848} +{"current_steps": 7660, "total_steps": 14493, "loss": 0.2838, "lr": 3.7625904690480346e-05, "epoch": 1.5856991278693615, "percentage": 52.85, "elapsed_time": "1 day, 1:39:09", "remaining_time": "22:52:59", "throughput": 28054.7, "total_tokens": 2590841088} +{"current_steps": 7670, "total_steps": 14493, "loss": 0.2824, "lr": 3.7615255748855224e-05, "epoch": 1.5877694676638803, "percentage": 52.92, "elapsed_time": "1 day, 1:40:56", "remaining_time": "22:50:46", "throughput": 28058.24, "total_tokens": 2594164864} +{"current_steps": 7680, "total_steps": 14493, "loss": 0.2841, "lr": 3.7604615843754845e-05, "epoch": 1.5898398074583993, "percentage": 52.99, "elapsed_time": "1 day, 1:42:39", "remaining_time": "22:48:30", "throughput": 28063.1, "total_tokens": 2597507072} +{"current_steps": 7690, "total_steps": 14493, "loss": 0.2872, "lr": 3.759398496240601e-05, "epoch": 1.5919101472529178, "percentage": 53.06, "elapsed_time": "1 day, 1:44:22", "remaining_time": "22:46:14", "throughput": 28067.98, "total_tokens": 2600854400} +{"current_steps": 7700, "total_steps": 14493, "loss": 0.286, "lr": 3.7583363092060815e-05, "epoch": 1.5939804870474368, "percentage": 53.13, "elapsed_time": "1 day, 1:46:10", "remaining_time": "22:44:03", "throughput": 28072.18, "total_tokens": 2604274944} +{"current_steps": 7710, "total_steps": 14493, "loss": 0.2847, "lr": 3.757275021999649e-05, "epoch": 1.5960508268419553, "percentage": 53.2, "elapsed_time": "1 day, 1:47:55", "remaining_time": "22:41:48", "throughput": 28076.0, "total_tokens": 2607575424} +{"current_steps": 7720, "total_steps": 14493, "loss": 0.2845, "lr": 3.7562146333515445e-05, "epoch": 1.5981211666364743, "percentage": 53.27, "elapsed_time": "1 day, 1:49:36", "remaining_time": "22:39:31", "throughput": 28081.58, "total_tokens": 2610928128} +{"current_steps": 7730, "total_steps": 14493, "loss": 0.2873, "lr": 3.7551551419945167e-05, "epoch": 1.6001915064309928, "percentage": 53.34, "elapsed_time": "1 day, 1:51:20", "remaining_time": "22:37:16", "throughput": 28086.21, "total_tokens": 2614265856} +{"current_steps": 7740, "total_steps": 14493, "loss": 0.2863, "lr": 3.7540965466638104e-05, "epoch": 1.6022618462255118, "percentage": 53.41, "elapsed_time": "1 day, 1:53:01", "remaining_time": "22:34:59", "throughput": 28091.16, "total_tokens": 2617589504} +{"current_steps": 7750, "total_steps": 14493, "loss": 0.2834, "lr": 3.753038846097172e-05, "epoch": 1.6043321860200306, "percentage": 53.47, "elapsed_time": "1 day, 1:54:51", "remaining_time": "22:32:49", "throughput": 28094.94, "total_tokens": 2621012992} +{"current_steps": 7760, "total_steps": 14493, "loss": 0.2847, "lr": 3.751982039034827e-05, "epoch": 1.6064025258145493, "percentage": 53.54, "elapsed_time": "1 day, 1:56:32", "remaining_time": "22:30:32", "throughput": 28099.48, "total_tokens": 2624279808} +{"current_steps": 7770, "total_steps": 14493, "loss": 0.2859, "lr": 3.75092612421949e-05, "epoch": 1.608472865609068, "percentage": 53.61, "elapsed_time": "1 day, 1:58:10", "remaining_time": "22:28:12", "throughput": 28105.07, "total_tokens": 2627562880} +{"current_steps": 7780, "total_steps": 14493, "loss": 0.2838, "lr": 3.7498711003963475e-05, "epoch": 1.6105432054035869, "percentage": 53.68, "elapsed_time": "1 day, 1:59:58", "remaining_time": "22:26:01", "throughput": 28108.49, "total_tokens": 2630902272} +{"current_steps": 7790, "total_steps": 14493, "loss": 0.2855, "lr": 3.748816966313058e-05, "epoch": 1.6126135451981056, "percentage": 53.75, "elapsed_time": "1 day, 2:01:43", "remaining_time": "22:23:48", "throughput": 28113.28, "total_tokens": 2634312960} +{"current_steps": 7800, "total_steps": 14493, "loss": 0.285, "lr": 3.7477637207197374e-05, "epoch": 1.6146838849926244, "percentage": 53.82, "elapsed_time": "1 day, 2:03:33", "remaining_time": "22:21:39", "throughput": 28117.08, "total_tokens": 2637768960} +{"current_steps": 7810, "total_steps": 14493, "loss": 0.2841, "lr": 3.7467113623689666e-05, "epoch": 1.6167542247871431, "percentage": 53.89, "elapsed_time": "1 day, 2:05:18", "remaining_time": "22:19:26", "throughput": 28121.62, "total_tokens": 2641146368} +{"current_steps": 7820, "total_steps": 14493, "loss": 0.2873, "lr": 3.745659890015768e-05, "epoch": 1.618824564581662, "percentage": 53.96, "elapsed_time": "1 day, 2:07:04", "remaining_time": "22:17:13", "throughput": 28125.81, "total_tokens": 2644508800} +{"current_steps": 7830, "total_steps": 14493, "loss": 0.2862, "lr": 3.744609302417615e-05, "epoch": 1.620894904376181, "percentage": 54.03, "elapsed_time": "1 day, 2:09:01", "remaining_time": "22:15:10", "throughput": 28127.76, "total_tokens": 2647980544} +{"current_steps": 7840, "total_steps": 14493, "loss": 0.285, "lr": 3.7435595983344175e-05, "epoch": 1.6229652441706994, "percentage": 54.1, "elapsed_time": "1 day, 2:10:41", "remaining_time": "22:12:53", "throughput": 28133.16, "total_tokens": 2651322112} +{"current_steps": 7850, "total_steps": 14493, "loss": 0.286, "lr": 3.7425107765285155e-05, "epoch": 1.6250355839652184, "percentage": 54.16, "elapsed_time": "1 day, 2:12:29", "remaining_time": "22:10:42", "throughput": 28136.86, "total_tokens": 2654687744} +{"current_steps": 7860, "total_steps": 14493, "loss": 0.2838, "lr": 3.741462835764676e-05, "epoch": 1.627105923759737, "percentage": 54.23, "elapsed_time": "1 day, 2:14:18", "remaining_time": "22:08:32", "throughput": 28140.62, "total_tokens": 2658114944} +{"current_steps": 7870, "total_steps": 14493, "loss": 0.2859, "lr": 3.740415774810088e-05, "epoch": 1.629176263554256, "percentage": 54.3, "elapsed_time": "1 day, 2:16:03", "remaining_time": "22:06:19", "throughput": 28144.66, "total_tokens": 2661453440} +{"current_steps": 7880, "total_steps": 14493, "loss": 0.2865, "lr": 3.739369592434351e-05, "epoch": 1.6312466033487745, "percentage": 54.37, "elapsed_time": "1 day, 2:17:54", "remaining_time": "22:04:12", "throughput": 28147.63, "total_tokens": 2664871296} +{"current_steps": 7890, "total_steps": 14493, "loss": 0.2853, "lr": 3.738324287409473e-05, "epoch": 1.6333169431432935, "percentage": 54.44, "elapsed_time": "1 day, 2:19:38", "remaining_time": "22:01:58", "throughput": 28151.71, "total_tokens": 2668164992} +{"current_steps": 7900, "total_steps": 14493, "loss": 0.2854, "lr": 3.7372798585098644e-05, "epoch": 1.6353872829378122, "percentage": 54.51, "elapsed_time": "1 day, 2:21:32", "remaining_time": "21:59:53", "throughput": 28154.15, "total_tokens": 2671621888} +{"current_steps": 7910, "total_steps": 14493, "loss": 0.2843, "lr": 3.736236304512331e-05, "epoch": 1.637457622732331, "percentage": 54.58, "elapsed_time": "1 day, 2:23:16", "remaining_time": "21:57:39", "throughput": 28158.94, "total_tokens": 2675011456} +{"current_steps": 7920, "total_steps": 14493, "loss": 0.2832, "lr": 3.735193624196067e-05, "epoch": 1.6395279625268497, "percentage": 54.65, "elapsed_time": "1 day, 2:24:57", "remaining_time": "21:55:23", "throughput": 28163.91, "total_tokens": 2678306048} +{"current_steps": 7930, "total_steps": 14493, "loss": 0.2828, "lr": 3.7341518163426514e-05, "epoch": 1.6415983023213685, "percentage": 54.72, "elapsed_time": "1 day, 2:26:43", "remaining_time": "21:53:11", "throughput": 28168.34, "total_tokens": 2681715328} +{"current_steps": 7940, "total_steps": 14493, "loss": 0.2832, "lr": 3.73311087973604e-05, "epoch": 1.6436686421158873, "percentage": 54.79, "elapsed_time": "1 day, 2:28:21", "remaining_time": "21:50:53", "throughput": 28174.17, "total_tokens": 2685042304} +{"current_steps": 7950, "total_steps": 14493, "loss": 0.2845, "lr": 3.732070813162561e-05, "epoch": 1.645738981910406, "percentage": 54.85, "elapsed_time": "1 day, 2:30:03", "remaining_time": "21:48:39", "throughput": 28179.12, "total_tokens": 2688392448} +{"current_steps": 7960, "total_steps": 14493, "loss": 0.285, "lr": 3.731031615410908e-05, "epoch": 1.6478093217049248, "percentage": 54.92, "elapsed_time": "1 day, 2:31:48", "remaining_time": "21:46:26", "throughput": 28183.81, "total_tokens": 2691793920} +{"current_steps": 7970, "total_steps": 14493, "loss": 0.2864, "lr": 3.729993285272132e-05, "epoch": 1.6498796614994435, "percentage": 54.99, "elapsed_time": "1 day, 2:33:32", "remaining_time": "21:44:13", "throughput": 28187.77, "total_tokens": 2695101184} +{"current_steps": 7980, "total_steps": 14493, "loss": 0.2838, "lr": 3.7289558215396414e-05, "epoch": 1.6519500012939625, "percentage": 55.06, "elapsed_time": "1 day, 2:35:17", "remaining_time": "21:42:01", "throughput": 28191.92, "total_tokens": 2698454784} +{"current_steps": 7990, "total_steps": 14493, "loss": 0.2851, "lr": 3.727919223009191e-05, "epoch": 1.654020341088481, "percentage": 55.13, "elapsed_time": "1 day, 2:37:01", "remaining_time": "21:39:48", "throughput": 28196.13, "total_tokens": 2701784064} +{"current_steps": 8000, "total_steps": 14493, "loss": 0.2842, "lr": 3.726883488478877e-05, "epoch": 1.656090680883, "percentage": 55.2, "elapsed_time": "1 day, 2:38:42", "remaining_time": "21:37:33", "throughput": 28201.12, "total_tokens": 2705128832} +{"current_steps": 8010, "total_steps": 14493, "loss": 0.2861, "lr": 3.7258486167491323e-05, "epoch": 1.6581610206775186, "percentage": 55.27, "elapsed_time": "1 day, 2:40:33", "remaining_time": "21:35:25", "throughput": 28204.12, "total_tokens": 2708534656} +{"current_steps": 8020, "total_steps": 14493, "loss": 0.2833, "lr": 3.724814606622721e-05, "epoch": 1.6602313604720376, "percentage": 55.34, "elapsed_time": "1 day, 2:42:15", "remaining_time": "21:33:11", "throughput": 28208.89, "total_tokens": 2711886208} +{"current_steps": 8030, "total_steps": 14493, "loss": 0.2832, "lr": 3.7237814569047294e-05, "epoch": 1.662301700266556, "percentage": 55.41, "elapsed_time": "1 day, 2:44:00", "remaining_time": "21:31:00", "throughput": 28212.59, "total_tokens": 2715202688} +{"current_steps": 8040, "total_steps": 14493, "loss": 0.2858, "lr": 3.7227491664025656e-05, "epoch": 1.664372040061075, "percentage": 55.48, "elapsed_time": "1 day, 2:45:49", "remaining_time": "21:28:51", "throughput": 28216.2, "total_tokens": 2718611840} +{"current_steps": 8050, "total_steps": 14493, "loss": 0.2882, "lr": 3.721717733925948e-05, "epoch": 1.6664423798555938, "percentage": 55.54, "elapsed_time": "1 day, 2:47:33", "remaining_time": "21:26:38", "throughput": 28220.48, "total_tokens": 2721973120} +{"current_steps": 8060, "total_steps": 14493, "loss": 0.2813, "lr": 3.720687158286904e-05, "epoch": 1.6685127196501126, "percentage": 55.61, "elapsed_time": "1 day, 2:49:13", "remaining_time": "21:24:22", "throughput": 28225.58, "total_tokens": 2725266688} +{"current_steps": 8070, "total_steps": 14493, "loss": 0.2861, "lr": 3.719657438299762e-05, "epoch": 1.6705830594446314, "percentage": 55.68, "elapsed_time": "1 day, 2:50:50", "remaining_time": "21:22:05", "throughput": 28231.38, "total_tokens": 2728581376} +{"current_steps": 8080, "total_steps": 14493, "loss": 0.2839, "lr": 3.7186285727811446e-05, "epoch": 1.6726533992391501, "percentage": 55.75, "elapsed_time": "1 day, 2:52:36", "remaining_time": "21:19:54", "throughput": 28236.2, "total_tokens": 2732034816} +{"current_steps": 8090, "total_steps": 14493, "loss": 0.2828, "lr": 3.717600560549967e-05, "epoch": 1.674723739033669, "percentage": 55.82, "elapsed_time": "1 day, 2:54:21", "remaining_time": "21:17:43", "throughput": 28240.55, "total_tokens": 2735431040} +{"current_steps": 8100, "total_steps": 14493, "loss": 0.2846, "lr": 3.716573400427426e-05, "epoch": 1.6767940788281877, "percentage": 55.89, "elapsed_time": "1 day, 2:56:13", "remaining_time": "21:15:36", "throughput": 28243.31, "total_tokens": 2738841216} +{"current_steps": 8110, "total_steps": 14493, "loss": 0.2813, "lr": 3.7155470912370004e-05, "epoch": 1.6788644186227064, "percentage": 55.96, "elapsed_time": "1 day, 2:58:01", "remaining_time": "21:13:28", "throughput": 28247.17, "total_tokens": 2742282880} +{"current_steps": 8120, "total_steps": 14493, "loss": 0.2825, "lr": 3.714521631804439e-05, "epoch": 1.6809347584172252, "percentage": 56.03, "elapsed_time": "1 day, 2:59:53", "remaining_time": "21:11:22", "throughput": 28249.66, "total_tokens": 2745686912} +{"current_steps": 8130, "total_steps": 14493, "loss": 0.284, "lr": 3.713497020957759e-05, "epoch": 1.6830050982117442, "percentage": 56.1, "elapsed_time": "1 day, 3:01:43", "remaining_time": "21:09:14", "throughput": 28252.53, "total_tokens": 2749058560} +{"current_steps": 8140, "total_steps": 14493, "loss": 0.2839, "lr": 3.712473257527238e-05, "epoch": 1.6850754380062627, "percentage": 56.17, "elapsed_time": "1 day, 3:03:28", "remaining_time": "21:07:04", "throughput": 28256.54, "total_tokens": 2752432384} +{"current_steps": 8150, "total_steps": 14493, "loss": 0.2856, "lr": 3.711450340345412e-05, "epoch": 1.6871457778007817, "percentage": 56.23, "elapsed_time": "1 day, 3:05:13", "remaining_time": "21:04:53", "throughput": 28260.6, "total_tokens": 2755797504} +{"current_steps": 8160, "total_steps": 14493, "loss": 0.2835, "lr": 3.710428268247067e-05, "epoch": 1.6892161175953002, "percentage": 56.3, "elapsed_time": "1 day, 3:06:57", "remaining_time": "21:02:41", "throughput": 28265.04, "total_tokens": 2759163776} +{"current_steps": 8170, "total_steps": 14493, "loss": 0.286, "lr": 3.709407040069233e-05, "epoch": 1.6912864573898192, "percentage": 56.37, "elapsed_time": "1 day, 3:08:46", "remaining_time": "21:00:33", "throughput": 28268.67, "total_tokens": 2762585344} +{"current_steps": 8180, "total_steps": 14493, "loss": 0.2874, "lr": 3.708386654651179e-05, "epoch": 1.6933567971843377, "percentage": 56.44, "elapsed_time": "1 day, 3:10:31", "remaining_time": "20:58:22", "throughput": 28272.77, "total_tokens": 2765966976} +{"current_steps": 8190, "total_steps": 14493, "loss": 0.2853, "lr": 3.707367110834409e-05, "epoch": 1.6954271369788567, "percentage": 56.51, "elapsed_time": "1 day, 3:12:14", "remaining_time": "20:56:10", "throughput": 28276.75, "total_tokens": 2769282304} +{"current_steps": 8200, "total_steps": 14493, "loss": 0.2808, "lr": 3.7063484074626555e-05, "epoch": 1.6974974767733755, "percentage": 56.58, "elapsed_time": "1 day, 3:14:04", "remaining_time": "20:54:02", "throughput": 28279.99, "total_tokens": 2772686208} +{"current_steps": 8210, "total_steps": 14493, "loss": 0.2802, "lr": 3.7053305433818725e-05, "epoch": 1.6995678165678942, "percentage": 56.65, "elapsed_time": "1 day, 3:15:50", "remaining_time": "20:51:52", "throughput": 28284.34, "total_tokens": 2776110848} +{"current_steps": 8220, "total_steps": 14493, "loss": 0.2847, "lr": 3.704313517440232e-05, "epoch": 1.701638156362413, "percentage": 56.72, "elapsed_time": "1 day, 3:17:43", "remaining_time": "20:49:48", "throughput": 28287.08, "total_tokens": 2779589632} +{"current_steps": 8230, "total_steps": 14493, "loss": 0.2804, "lr": 3.703297328488118e-05, "epoch": 1.7037084961569318, "percentage": 56.79, "elapsed_time": "1 day, 3:19:27", "remaining_time": "20:47:37", "throughput": 28290.75, "total_tokens": 2782894592} +{"current_steps": 8240, "total_steps": 14493, "loss": 0.281, "lr": 3.70228197537812e-05, "epoch": 1.7057788359514505, "percentage": 56.86, "elapsed_time": "1 day, 3:21:13", "remaining_time": "20:45:27", "throughput": 28294.51, "total_tokens": 2786265600} +{"current_steps": 8250, "total_steps": 14493, "loss": 0.2844, "lr": 3.7012674569650305e-05, "epoch": 1.7078491757459693, "percentage": 56.92, "elapsed_time": "1 day, 3:23:00", "remaining_time": "20:43:18", "throughput": 28298.24, "total_tokens": 2789658880} +{"current_steps": 8260, "total_steps": 14493, "loss": 0.2824, "lr": 3.700253772105835e-05, "epoch": 1.709919515540488, "percentage": 56.99, "elapsed_time": "1 day, 3:24:49", "remaining_time": "20:41:10", "throughput": 28301.41, "total_tokens": 2793044864} +{"current_steps": 8270, "total_steps": 14493, "loss": 0.2837, "lr": 3.699240919659711e-05, "epoch": 1.7119898553350068, "percentage": 57.06, "elapsed_time": "1 day, 3:26:39", "remaining_time": "20:39:04", "throughput": 28304.38, "total_tokens": 2796454144} +{"current_steps": 8280, "total_steps": 14493, "loss": 0.2889, "lr": 3.698228898488019e-05, "epoch": 1.7140601951295258, "percentage": 57.13, "elapsed_time": "1 day, 3:28:23", "remaining_time": "20:36:53", "throughput": 28308.24, "total_tokens": 2799797632} +{"current_steps": 8290, "total_steps": 14493, "loss": 0.2827, "lr": 3.6972177074543e-05, "epoch": 1.7161305349240443, "percentage": 57.2, "elapsed_time": "1 day, 3:30:13", "remaining_time": "20:34:46", "throughput": 28311.02, "total_tokens": 2803164288} +{"current_steps": 8300, "total_steps": 14493, "loss": 0.2824, "lr": 3.69620734542427e-05, "epoch": 1.7182008747185633, "percentage": 57.27, "elapsed_time": "1 day, 3:31:58", "remaining_time": "20:32:36", "throughput": 28315.24, "total_tokens": 2806562048} +{"current_steps": 8310, "total_steps": 14493, "loss": 0.2815, "lr": 3.695197811265811e-05, "epoch": 1.7202712145130818, "percentage": 57.34, "elapsed_time": "1 day, 3:33:39", "remaining_time": "20:30:23", "throughput": 28320.33, "total_tokens": 2809939072} +{"current_steps": 8320, "total_steps": 14493, "loss": 0.2857, "lr": 3.6941891038489694e-05, "epoch": 1.7223415543076008, "percentage": 57.41, "elapsed_time": "1 day, 3:35:27", "remaining_time": "20:28:15", "throughput": 28323.56, "total_tokens": 2813302400} +{"current_steps": 8330, "total_steps": 14493, "loss": 0.2868, "lr": 3.693181222045952e-05, "epoch": 1.7244118941021194, "percentage": 57.48, "elapsed_time": "1 day, 3:37:19", "remaining_time": "20:26:11", "throughput": 28325.92, "total_tokens": 2816721024} +{"current_steps": 8340, "total_steps": 14493, "loss": 0.2856, "lr": 3.692174164731113e-05, "epoch": 1.7264822338966384, "percentage": 57.55, "elapsed_time": "1 day, 3:39:04", "remaining_time": "20:24:00", "throughput": 28330.46, "total_tokens": 2820129024} +{"current_steps": 8350, "total_steps": 14493, "loss": 0.2818, "lr": 3.6911679307809595e-05, "epoch": 1.7285525736911571, "percentage": 57.61, "elapsed_time": "1 day, 3:40:50", "remaining_time": "20:21:51", "throughput": 28334.23, "total_tokens": 2823518848} +{"current_steps": 8360, "total_steps": 14493, "loss": 0.2858, "lr": 3.690162519074137e-05, "epoch": 1.7306229134856759, "percentage": 57.68, "elapsed_time": "1 day, 3:42:32", "remaining_time": "20:19:39", "throughput": 28339.19, "total_tokens": 2826895488} +{"current_steps": 8370, "total_steps": 14493, "loss": 0.2862, "lr": 3.689157928491431e-05, "epoch": 1.7326932532801946, "percentage": 57.75, "elapsed_time": "1 day, 3:44:21", "remaining_time": "20:17:32", "throughput": 28341.83, "total_tokens": 2830250240} +{"current_steps": 8380, "total_steps": 14493, "loss": 0.2846, "lr": 3.6881541579157566e-05, "epoch": 1.7347635930747134, "percentage": 57.82, "elapsed_time": "1 day, 3:46:10", "remaining_time": "20:15:25", "throughput": 28345.31, "total_tokens": 2833692544} +{"current_steps": 8390, "total_steps": 14493, "loss": 0.283, "lr": 3.687151206232154e-05, "epoch": 1.7368339328692322, "percentage": 57.89, "elapsed_time": "1 day, 3:48:00", "remaining_time": "20:13:19", "throughput": 28348.81, "total_tokens": 2837162752} +{"current_steps": 8400, "total_steps": 14493, "loss": 0.282, "lr": 3.686149072327788e-05, "epoch": 1.738904272663751, "percentage": 57.96, "elapsed_time": "1 day, 3:49:50", "remaining_time": "20:11:14", "throughput": 28351.82, "total_tokens": 2840590976} +{"current_steps": 8410, "total_steps": 14493, "loss": 0.2816, "lr": 3.685147755091937e-05, "epoch": 1.7409746124582697, "percentage": 58.03, "elapsed_time": "1 day, 3:51:30", "remaining_time": "20:09:01", "throughput": 28355.96, "total_tokens": 2843847168} +{"current_steps": 8420, "total_steps": 14493, "loss": 0.2829, "lr": 3.684147253415992e-05, "epoch": 1.7430449522527884, "percentage": 58.1, "elapsed_time": "1 day, 3:53:19", "remaining_time": "20:06:54", "throughput": 28359.01, "total_tokens": 2847240576} +{"current_steps": 8430, "total_steps": 14493, "loss": 0.2846, "lr": 3.683147566193448e-05, "epoch": 1.7451152920473074, "percentage": 58.17, "elapsed_time": "1 day, 3:55:10", "remaining_time": "20:04:48", "throughput": 28361.7, "total_tokens": 2850635392} +{"current_steps": 8440, "total_steps": 14493, "loss": 0.2816, "lr": 3.6821486923199025e-05, "epoch": 1.747185631841826, "percentage": 58.24, "elapsed_time": "1 day, 3:57:00", "remaining_time": "20:02:42", "throughput": 28364.81, "total_tokens": 2854078592} +{"current_steps": 8450, "total_steps": 14493, "loss": 0.2831, "lr": 3.681150630693046e-05, "epoch": 1.749255971636345, "percentage": 58.3, "elapsed_time": "1 day, 3:58:49", "remaining_time": "20:00:36", "throughput": 28367.56, "total_tokens": 2857458944} +{"current_steps": 8460, "total_steps": 14493, "loss": 0.2819, "lr": 3.6801533802126615e-05, "epoch": 1.7513263114308635, "percentage": 58.37, "elapsed_time": "1 day, 4:00:38", "remaining_time": "19:58:30", "throughput": 28371.0, "total_tokens": 2860898048} +{"current_steps": 8470, "total_steps": 14493, "loss": 0.2822, "lr": 3.679156939780617e-05, "epoch": 1.7533966512253825, "percentage": 58.44, "elapsed_time": "1 day, 4:02:31", "remaining_time": "19:56:26", "throughput": 28373.06, "total_tokens": 2864306560} +{"current_steps": 8480, "total_steps": 14493, "loss": 0.2832, "lr": 3.6781613083008594e-05, "epoch": 1.755466991019901, "percentage": 58.51, "elapsed_time": "1 day, 4:04:14", "remaining_time": "19:54:15", "throughput": 28377.31, "total_tokens": 2867645568} +{"current_steps": 8490, "total_steps": 14493, "loss": 0.2829, "lr": 3.677166484679412e-05, "epoch": 1.75753733081442, "percentage": 58.58, "elapsed_time": "1 day, 4:06:00", "remaining_time": "19:52:07", "throughput": 28381.25, "total_tokens": 2871054848} +{"current_steps": 8500, "total_steps": 14493, "loss": 0.283, "lr": 3.676172467824368e-05, "epoch": 1.7596076706089387, "percentage": 58.65, "elapsed_time": "1 day, 4:07:47", "remaining_time": "19:49:59", "throughput": 28384.48, "total_tokens": 2874413952} +{"current_steps": 8510, "total_steps": 14493, "loss": 0.2819, "lr": 3.675179256645885e-05, "epoch": 1.7616780104034575, "percentage": 58.72, "elapsed_time": "1 day, 4:09:30", "remaining_time": "19:47:49", "throughput": 28388.58, "total_tokens": 2877769600} +{"current_steps": 8520, "total_steps": 14493, "loss": 0.2857, "lr": 3.674186850056181e-05, "epoch": 1.7637483501979763, "percentage": 58.79, "elapsed_time": "1 day, 4:11:19", "remaining_time": "19:45:43", "throughput": 28390.99, "total_tokens": 2881113600} +{"current_steps": 8530, "total_steps": 14493, "loss": 0.2834, "lr": 3.67319524696953e-05, "epoch": 1.765818689992495, "percentage": 58.86, "elapsed_time": "1 day, 4:13:00", "remaining_time": "19:43:30", "throughput": 28396.4, "total_tokens": 2884506752} +{"current_steps": 8540, "total_steps": 14493, "loss": 0.284, "lr": 3.6722044463022536e-05, "epoch": 1.7678890297870138, "percentage": 58.92, "elapsed_time": "1 day, 4:14:41", "remaining_time": "19:41:19", "throughput": 28400.26, "total_tokens": 2887786368} +{"current_steps": 8550, "total_steps": 14493, "loss": 0.2844, "lr": 3.6712144469727214e-05, "epoch": 1.7699593695815325, "percentage": 58.99, "elapsed_time": "1 day, 4:16:25", "remaining_time": "19:39:09", "throughput": 28403.72, "total_tokens": 2891079552} +{"current_steps": 8560, "total_steps": 14493, "loss": 0.2831, "lr": 3.67022524790134e-05, "epoch": 1.7720297093760513, "percentage": 59.06, "elapsed_time": "1 day, 4:18:08", "remaining_time": "19:37:00", "throughput": 28407.5, "total_tokens": 2894411904} +{"current_steps": 8570, "total_steps": 14493, "loss": 0.2828, "lr": 3.6692368480105546e-05, "epoch": 1.77410004917057, "percentage": 59.13, "elapsed_time": "1 day, 4:19:59", "remaining_time": "19:34:54", "throughput": 28410.28, "total_tokens": 2897823232} +{"current_steps": 8580, "total_steps": 14493, "loss": 0.2868, "lr": 3.6682492462248374e-05, "epoch": 1.776170388965089, "percentage": 59.2, "elapsed_time": "1 day, 4:21:45", "remaining_time": "19:32:46", "throughput": 28414.12, "total_tokens": 2901231104} +{"current_steps": 8590, "total_steps": 14493, "loss": 0.2818, "lr": 3.667262441470689e-05, "epoch": 1.7782407287596076, "percentage": 59.27, "elapsed_time": "1 day, 4:23:26", "remaining_time": "19:30:35", "throughput": 28418.55, "total_tokens": 2904566144} +{"current_steps": 8600, "total_steps": 14493, "loss": 0.2814, "lr": 3.6662764326766255e-05, "epoch": 1.7803110685541266, "percentage": 59.34, "elapsed_time": "1 day, 4:25:08", "remaining_time": "19:28:25", "throughput": 28423.06, "total_tokens": 2907929088} +{"current_steps": 8610, "total_steps": 14493, "loss": 0.2845, "lr": 3.665291218773185e-05, "epoch": 1.7823814083486451, "percentage": 59.41, "elapsed_time": "1 day, 4:26:53", "remaining_time": "19:26:16", "throughput": 28427.42, "total_tokens": 2911345664} +{"current_steps": 8620, "total_steps": 14493, "loss": 0.281, "lr": 3.664306798692912e-05, "epoch": 1.784451748143164, "percentage": 59.48, "elapsed_time": "1 day, 4:28:35", "remaining_time": "19:24:06", "throughput": 28431.56, "total_tokens": 2914687872} +{"current_steps": 8630, "total_steps": 14493, "loss": 0.2844, "lr": 3.6633231713703576e-05, "epoch": 1.7865220879376826, "percentage": 59.55, "elapsed_time": "1 day, 4:30:22", "remaining_time": "19:21:59", "throughput": 28435.06, "total_tokens": 2918080000} +{"current_steps": 8640, "total_steps": 14493, "loss": 0.2824, "lr": 3.6623403357420745e-05, "epoch": 1.7885924277322016, "percentage": 59.61, "elapsed_time": "1 day, 4:32:12", "remaining_time": "19:19:54", "throughput": 28437.86, "total_tokens": 2921499776} +{"current_steps": 8650, "total_steps": 14493, "loss": 0.2827, "lr": 3.661358290746611e-05, "epoch": 1.7906627675267204, "percentage": 59.68, "elapsed_time": "1 day, 4:34:03", "remaining_time": "19:17:49", "throughput": 28440.14, "total_tokens": 2924876416} +{"current_steps": 8660, "total_steps": 14493, "loss": 0.2843, "lr": 3.6603770353245056e-05, "epoch": 1.7927331073212391, "percentage": 59.75, "elapsed_time": "1 day, 4:35:51", "remaining_time": "19:15:43", "throughput": 28443.26, "total_tokens": 2928266496} +{"current_steps": 8670, "total_steps": 14493, "loss": 0.2842, "lr": 3.659396568418286e-05, "epoch": 1.794803447115758, "percentage": 59.82, "elapsed_time": "1 day, 4:37:33", "remaining_time": "19:13:33", "throughput": 28447.32, "total_tokens": 2931603712} +{"current_steps": 8680, "total_steps": 14493, "loss": 0.2814, "lr": 3.658416888972459e-05, "epoch": 1.7968737869102767, "percentage": 59.89, "elapsed_time": "1 day, 4:39:22", "remaining_time": "19:11:27", "throughput": 28450.76, "total_tokens": 2935042304} +{"current_steps": 8690, "total_steps": 14493, "loss": 0.2825, "lr": 3.6574379959335106e-05, "epoch": 1.7989441267047954, "percentage": 59.96, "elapsed_time": "1 day, 4:41:07", "remaining_time": "19:09:19", "throughput": 28454.9, "total_tokens": 2938460032} +{"current_steps": 8700, "total_steps": 14493, "loss": 0.2841, "lr": 3.6564598882498976e-05, "epoch": 1.8010144664993142, "percentage": 60.03, "elapsed_time": "1 day, 4:42:50", "remaining_time": "19:07:10", "throughput": 28459.57, "total_tokens": 2941872000} +{"current_steps": 8710, "total_steps": 14493, "loss": 0.2833, "lr": 3.655482564872043e-05, "epoch": 1.803084806293833, "percentage": 60.1, "elapsed_time": "1 day, 4:44:37", "remaining_time": "19:05:04", "throughput": 28463.01, "total_tokens": 2945289344} +{"current_steps": 8720, "total_steps": 14493, "loss": 0.2831, "lr": 3.654506024752336e-05, "epoch": 1.8051551460883517, "percentage": 60.17, "elapsed_time": "1 day, 4:46:28", "remaining_time": "19:03:00", "throughput": 28465.59, "total_tokens": 2948721024} +{"current_steps": 8730, "total_steps": 14493, "loss": 0.2815, "lr": 3.653530266845121e-05, "epoch": 1.8072254858828707, "percentage": 60.24, "elapsed_time": "1 day, 4:48:14", "remaining_time": "19:00:52", "throughput": 28469.26, "total_tokens": 2952114560} +{"current_steps": 8740, "total_steps": 14493, "loss": 0.2812, "lr": 3.652555290106696e-05, "epoch": 1.8092958256773892, "percentage": 60.3, "elapsed_time": "1 day, 4:49:57", "remaining_time": "18:58:43", "throughput": 28473.25, "total_tokens": 2955444480} +{"current_steps": 8750, "total_steps": 14493, "loss": 0.283, "lr": 3.6515810934953084e-05, "epoch": 1.8113661654719082, "percentage": 60.37, "elapsed_time": "1 day, 4:51:47", "remaining_time": "18:56:39", "throughput": 28475.86, "total_tokens": 2958862848} +{"current_steps": 8760, "total_steps": 14493, "loss": 0.2836, "lr": 3.650607675971151e-05, "epoch": 1.8134365052664267, "percentage": 60.44, "elapsed_time": "1 day, 4:53:30", "remaining_time": "18:54:30", "throughput": 28480.35, "total_tokens": 2962264320} +{"current_steps": 8770, "total_steps": 14493, "loss": 0.2832, "lr": 3.649635036496351e-05, "epoch": 1.8155068450609457, "percentage": 60.51, "elapsed_time": "1 day, 4:55:12", "remaining_time": "18:52:20", "throughput": 28484.51, "total_tokens": 2965593728} +{"current_steps": 8780, "total_steps": 14493, "loss": 0.2835, "lr": 3.6486631740349746e-05, "epoch": 1.8175771848554643, "percentage": 60.58, "elapsed_time": "1 day, 4:57:04", "remaining_time": "18:50:17", "throughput": 28486.47, "total_tokens": 2968997376} +{"current_steps": 8790, "total_steps": 14493, "loss": 0.2842, "lr": 3.647692087553018e-05, "epoch": 1.8196475246499833, "percentage": 60.65, "elapsed_time": "1 day, 4:58:47", "remaining_time": "18:48:08", "throughput": 28490.79, "total_tokens": 2972369920} +{"current_steps": 8800, "total_steps": 14493, "loss": 0.2845, "lr": 3.6467217760184005e-05, "epoch": 1.8217178644445018, "percentage": 60.72, "elapsed_time": "1 day, 5:00:31", "remaining_time": "18:45:59", "throughput": 28494.36, "total_tokens": 2975696640} +{"current_steps": 8810, "total_steps": 14493, "loss": 0.2837, "lr": 3.6457522384009625e-05, "epoch": 1.8237882042390208, "percentage": 60.79, "elapsed_time": "1 day, 5:02:14", "remaining_time": "18:43:51", "throughput": 28498.51, "total_tokens": 2979067392} +{"current_steps": 8820, "total_steps": 14493, "loss": 0.2826, "lr": 3.644783473672462e-05, "epoch": 1.8258585440335395, "percentage": 60.86, "elapsed_time": "1 day, 5:03:59", "remaining_time": "18:41:44", "throughput": 28502.0, "total_tokens": 2982446720} +{"current_steps": 8830, "total_steps": 14493, "loss": 0.2841, "lr": 3.643815480806568e-05, "epoch": 1.8279288838280583, "percentage": 60.93, "elapsed_time": "1 day, 5:05:44", "remaining_time": "18:39:36", "throughput": 28505.38, "total_tokens": 2985785472} +{"current_steps": 8840, "total_steps": 14493, "loss": 0.2866, "lr": 3.6428482587788555e-05, "epoch": 1.829999223622577, "percentage": 60.99, "elapsed_time": "1 day, 5:07:30", "remaining_time": "18:37:29", "throughput": 28508.82, "total_tokens": 2989167360} +{"current_steps": 8850, "total_steps": 14493, "loss": 0.2843, "lr": 3.641881806566803e-05, "epoch": 1.8320695634170958, "percentage": 61.06, "elapsed_time": "1 day, 5:09:11", "remaining_time": "18:35:20", "throughput": 28512.64, "total_tokens": 2992452992} +{"current_steps": 8860, "total_steps": 14493, "loss": 0.2827, "lr": 3.640916123149788e-05, "epoch": 1.8341399032116146, "percentage": 61.13, "elapsed_time": "1 day, 5:10:52", "remaining_time": "18:33:10", "throughput": 28516.79, "total_tokens": 2995762432} +{"current_steps": 8870, "total_steps": 14493, "loss": 0.2822, "lr": 3.639951207509079e-05, "epoch": 1.8362102430061333, "percentage": 61.2, "elapsed_time": "1 day, 5:12:33", "remaining_time": "18:31:00", "throughput": 28521.2, "total_tokens": 2999102208} +{"current_steps": 8880, "total_steps": 14493, "loss": 0.2827, "lr": 3.6389870586278333e-05, "epoch": 1.8382805828006523, "percentage": 61.27, "elapsed_time": "1 day, 5:14:18", "remaining_time": "18:28:53", "throughput": 28524.76, "total_tokens": 3002460928} +{"current_steps": 8890, "total_steps": 14493, "loss": 0.2834, "lr": 3.6380236754910965e-05, "epoch": 1.8403509225951709, "percentage": 61.34, "elapsed_time": "1 day, 5:16:08", "remaining_time": "18:26:49", "throughput": 28527.54, "total_tokens": 3005895040} +{"current_steps": 8900, "total_steps": 14493, "loss": 0.2824, "lr": 3.6370610570857897e-05, "epoch": 1.8424212623896898, "percentage": 61.41, "elapsed_time": "1 day, 5:17:49", "remaining_time": "18:24:40", "throughput": 28531.29, "total_tokens": 3009192576} +{"current_steps": 8910, "total_steps": 14493, "loss": 0.2852, "lr": 3.6360992024007114e-05, "epoch": 1.8444916021842084, "percentage": 61.48, "elapsed_time": "1 day, 5:19:33", "remaining_time": "18:22:32", "throughput": 28534.86, "total_tokens": 3012526336} +{"current_steps": 8920, "total_steps": 14493, "loss": 0.2811, "lr": 3.6351381104265304e-05, "epoch": 1.8465619419787274, "percentage": 61.55, "elapsed_time": "1 day, 5:21:15", "remaining_time": "18:20:23", "throughput": 28539.12, "total_tokens": 3015874048} +{"current_steps": 8930, "total_steps": 14493, "loss": 0.2815, "lr": 3.634177780155783e-05, "epoch": 1.848632281773246, "percentage": 61.62, "elapsed_time": "1 day, 5:22:57", "remaining_time": "18:18:14", "throughput": 28542.96, "total_tokens": 3019196928} +{"current_steps": 8940, "total_steps": 14493, "loss": 0.2834, "lr": 3.633218210582867e-05, "epoch": 1.8507026215677649, "percentage": 61.68, "elapsed_time": "1 day, 5:24:39", "remaining_time": "18:16:06", "throughput": 28546.67, "total_tokens": 3022518400} +{"current_steps": 8950, "total_steps": 14493, "loss": 0.2802, "lr": 3.6322594007040376e-05, "epoch": 1.8527729613622834, "percentage": 61.75, "elapsed_time": "1 day, 5:26:25", "remaining_time": "18:14:00", "throughput": 28549.88, "total_tokens": 3025884288} +{"current_steps": 8960, "total_steps": 14493, "loss": 0.2844, "lr": 3.631301349517403e-05, "epoch": 1.8548433011568024, "percentage": 61.82, "elapsed_time": "1 day, 5:28:09", "remaining_time": "18:11:52", "throughput": 28553.87, "total_tokens": 3029272320} +{"current_steps": 8970, "total_steps": 14493, "loss": 0.2827, "lr": 3.6303440560229216e-05, "epoch": 1.8569136409513212, "percentage": 61.89, "elapsed_time": "1 day, 5:29:51", "remaining_time": "18:09:44", "throughput": 28557.65, "total_tokens": 3032574848} +{"current_steps": 8980, "total_steps": 14493, "loss": 0.2819, "lr": 3.629387519222395e-05, "epoch": 1.85898398074584, "percentage": 61.96, "elapsed_time": "1 day, 5:31:34", "remaining_time": "18:07:36", "throughput": 28561.77, "total_tokens": 3035949952} +{"current_steps": 8990, "total_steps": 14493, "loss": 0.2805, "lr": 3.628431738119464e-05, "epoch": 1.8610543205403587, "percentage": 62.03, "elapsed_time": "1 day, 5:33:22", "remaining_time": "18:05:31", "throughput": 28565.21, "total_tokens": 3039408640} +{"current_steps": 9000, "total_steps": 14493, "loss": 0.2845, "lr": 3.62747671171961e-05, "epoch": 1.8631246603348774, "percentage": 62.1, "elapsed_time": "1 day, 5:35:10", "remaining_time": "18:03:26", "throughput": 28568.44, "total_tokens": 3042831360} +{"current_steps": 9010, "total_steps": 14493, "loss": 0.2842, "lr": 3.626522439030138e-05, "epoch": 1.8651950001293962, "percentage": 62.17, "elapsed_time": "1 day, 5:36:53", "remaining_time": "18:01:19", "throughput": 28572.18, "total_tokens": 3046190848} +{"current_steps": 9020, "total_steps": 14493, "loss": 0.2836, "lr": 3.6255689190601863e-05, "epoch": 1.867265339923915, "percentage": 62.24, "elapsed_time": "1 day, 5:38:38", "remaining_time": "17:59:12", "throughput": 28575.59, "total_tokens": 3049551360} +{"current_steps": 9030, "total_steps": 14493, "loss": 0.2811, "lr": 3.624616150820714e-05, "epoch": 1.869335679718434, "percentage": 62.31, "elapsed_time": "1 day, 5:40:25", "remaining_time": "17:57:07", "throughput": 28578.48, "total_tokens": 3052917376} +{"current_steps": 9040, "total_steps": 14493, "loss": 0.2839, "lr": 3.623664133324499e-05, "epoch": 1.8714060195129525, "percentage": 62.37, "elapsed_time": "1 day, 5:42:14", "remaining_time": "17:55:03", "throughput": 28581.58, "total_tokens": 3056348928} +{"current_steps": 9050, "total_steps": 14493, "loss": 0.2848, "lr": 3.622712865586131e-05, "epoch": 1.8734763593074715, "percentage": 62.44, "elapsed_time": "1 day, 5:44:08", "remaining_time": "17:53:02", "throughput": 28583.63, "total_tokens": 3059820800} +{"current_steps": 9060, "total_steps": 14493, "loss": 0.2819, "lr": 3.621762346622014e-05, "epoch": 1.87554669910199, "percentage": 62.51, "elapsed_time": "1 day, 5:45:57", "remaining_time": "17:50:58", "throughput": 28586.6, "total_tokens": 3063258880} +{"current_steps": 9070, "total_steps": 14493, "loss": 0.2839, "lr": 3.620812575450352e-05, "epoch": 1.877617038896509, "percentage": 62.58, "elapsed_time": "1 day, 5:47:43", "remaining_time": "17:48:53", "throughput": 28590.19, "total_tokens": 3066688384} +{"current_steps": 9080, "total_steps": 14493, "loss": 0.2825, "lr": 3.6198635510911556e-05, "epoch": 1.8796873786910275, "percentage": 62.65, "elapsed_time": "1 day, 5:49:34", "remaining_time": "17:46:51", "throughput": 28592.44, "total_tokens": 3070112768} +{"current_steps": 9090, "total_steps": 14493, "loss": 0.284, "lr": 3.618915272566228e-05, "epoch": 1.8817577184855465, "percentage": 62.72, "elapsed_time": "1 day, 5:51:17", "remaining_time": "17:44:43", "throughput": 28596.42, "total_tokens": 3073460352} +{"current_steps": 9100, "total_steps": 14493, "loss": 0.2872, "lr": 3.6179677388991694e-05, "epoch": 1.883828058280065, "percentage": 62.79, "elapsed_time": "1 day, 5:53:02", "remaining_time": "17:42:37", "throughput": 28599.45, "total_tokens": 3076786304} +{"current_steps": 9110, "total_steps": 14493, "loss": 0.2817, "lr": 3.617020949115366e-05, "epoch": 1.885898398074584, "percentage": 62.86, "elapsed_time": "1 day, 5:54:47", "remaining_time": "17:40:31", "throughput": 28602.88, "total_tokens": 3080174464} +{"current_steps": 9120, "total_steps": 14493, "loss": 0.282, "lr": 3.6160749022419886e-05, "epoch": 1.8879687378691028, "percentage": 62.93, "elapsed_time": "1 day, 5:56:32", "remaining_time": "17:38:25", "throughput": 28606.12, "total_tokens": 3083531904} +{"current_steps": 9130, "total_steps": 14493, "loss": 0.2806, "lr": 3.6151295973079887e-05, "epoch": 1.8900390776636216, "percentage": 63.0, "elapsed_time": "1 day, 5:58:20", "remaining_time": "17:36:20", "throughput": 28608.79, "total_tokens": 3086892032} +{"current_steps": 9140, "total_steps": 14493, "loss": 0.2839, "lr": 3.6141850333440934e-05, "epoch": 1.8921094174581403, "percentage": 63.06, "elapsed_time": "1 day, 6:00:03", "remaining_time": "17:34:14", "throughput": 28612.26, "total_tokens": 3090222336} +{"current_steps": 9150, "total_steps": 14493, "loss": 0.2832, "lr": 3.613241209382803e-05, "epoch": 1.894179757252659, "percentage": 63.13, "elapsed_time": "1 day, 6:01:47", "remaining_time": "17:32:07", "throughput": 28615.88, "total_tokens": 3093604352} +{"current_steps": 9160, "total_steps": 14493, "loss": 0.2859, "lr": 3.6122981244583834e-05, "epoch": 1.8962500970471778, "percentage": 63.2, "elapsed_time": "1 day, 6:03:35", "remaining_time": "17:30:03", "throughput": 28618.5, "total_tokens": 3096957184} +{"current_steps": 9170, "total_steps": 14493, "loss": 0.2808, "lr": 3.6113557776068644e-05, "epoch": 1.8983204368416966, "percentage": 63.27, "elapsed_time": "1 day, 6:05:22", "remaining_time": "17:27:59", "throughput": 28621.74, "total_tokens": 3100379776} +{"current_steps": 9180, "total_steps": 14493, "loss": 0.2837, "lr": 3.6104141678660386e-05, "epoch": 1.9003907766362156, "percentage": 63.34, "elapsed_time": "1 day, 6:07:07", "remaining_time": "17:25:53", "throughput": 28624.77, "total_tokens": 3103714688} +{"current_steps": 9190, "total_steps": 14493, "loss": 0.2838, "lr": 3.6094732942754487e-05, "epoch": 1.9024611164307341, "percentage": 63.41, "elapsed_time": "1 day, 6:09:02", "remaining_time": "17:23:53", "throughput": 28626.13, "total_tokens": 3107140224} +{"current_steps": 9200, "total_steps": 14493, "loss": 0.2823, "lr": 3.60853315587639e-05, "epoch": 1.904531456225253, "percentage": 63.48, "elapsed_time": "1 day, 6:10:46", "remaining_time": "17:21:47", "throughput": 28629.56, "total_tokens": 3110514176} +{"current_steps": 9210, "total_steps": 14493, "loss": 0.2844, "lr": 3.607593751711909e-05, "epoch": 1.9066017960197716, "percentage": 63.55, "elapsed_time": "1 day, 6:12:35", "remaining_time": "17:19:43", "throughput": 28632.21, "total_tokens": 3113902080} +{"current_steps": 9220, "total_steps": 14493, "loss": 0.2835, "lr": 3.60665508082679e-05, "epoch": 1.9086721358142906, "percentage": 63.62, "elapsed_time": "1 day, 6:14:10", "remaining_time": "17:17:32", "throughput": 28636.56, "total_tokens": 3117111424} +{"current_steps": 9230, "total_steps": 14493, "loss": 0.2847, "lr": 3.6057171422675585e-05, "epoch": 1.9107424756088092, "percentage": 63.69, "elapsed_time": "1 day, 6:15:54", "remaining_time": "17:15:26", "throughput": 28639.95, "total_tokens": 3120462848} +{"current_steps": 9240, "total_steps": 14493, "loss": 0.2809, "lr": 3.604779935082474e-05, "epoch": 1.9128128154033281, "percentage": 63.75, "elapsed_time": "1 day, 6:17:44", "remaining_time": "17:13:23", "throughput": 28642.38, "total_tokens": 3123864960} +{"current_steps": 9250, "total_steps": 14493, "loss": 0.2858, "lr": 3.603843458321526e-05, "epoch": 1.9148831551978467, "percentage": 63.82, "elapsed_time": "1 day, 6:19:31", "remaining_time": "17:11:19", "throughput": 28645.31, "total_tokens": 3127247488} +{"current_steps": 9260, "total_steps": 14493, "loss": 0.282, "lr": 3.6029077110364355e-05, "epoch": 1.9169534949923657, "percentage": 63.89, "elapsed_time": "1 day, 6:21:18", "remaining_time": "17:09:15", "throughput": 28648.18, "total_tokens": 3130622208} +{"current_steps": 9270, "total_steps": 14493, "loss": 0.2808, "lr": 3.60197269228064e-05, "epoch": 1.9190238347868844, "percentage": 63.96, "elapsed_time": "1 day, 6:23:01", "remaining_time": "17:07:09", "throughput": 28651.54, "total_tokens": 3133961984} +{"current_steps": 9280, "total_steps": 14493, "loss": 0.2814, "lr": 3.601038401109299e-05, "epoch": 1.9210941745814032, "percentage": 64.03, "elapsed_time": "1 day, 6:24:41", "remaining_time": "17:05:00", "throughput": 28655.73, "total_tokens": 3137272192} +{"current_steps": 9290, "total_steps": 14493, "loss": 0.2791, "lr": 3.6001048365792846e-05, "epoch": 1.923164514375922, "percentage": 64.1, "elapsed_time": "1 day, 6:26:33", "remaining_time": "17:02:59", "throughput": 28657.62, "total_tokens": 3140688000} +{"current_steps": 9300, "total_steps": 14493, "loss": 0.2814, "lr": 3.599171997749182e-05, "epoch": 1.9252348541704407, "percentage": 64.17, "elapsed_time": "1 day, 6:28:25", "remaining_time": "17:00:58", "throughput": 28659.87, "total_tokens": 3144148480} +{"current_steps": 9310, "total_steps": 14493, "loss": 0.2819, "lr": 3.598239883679281e-05, "epoch": 1.9273051939649595, "percentage": 64.24, "elapsed_time": "1 day, 6:30:14", "remaining_time": "16:58:55", "throughput": 28662.78, "total_tokens": 3147582464} +{"current_steps": 9320, "total_steps": 14493, "loss": 0.2821, "lr": 3.597308493431576e-05, "epoch": 1.9293755337594782, "percentage": 64.31, "elapsed_time": "1 day, 6:32:00", "remaining_time": "16:56:50", "throughput": 28665.45, "total_tokens": 3150917120} +{"current_steps": 9330, "total_steps": 14493, "loss": 0.2811, "lr": 3.596377826069758e-05, "epoch": 1.931445873553997, "percentage": 64.38, "elapsed_time": "1 day, 6:33:49", "remaining_time": "16:54:47", "throughput": 28667.87, "total_tokens": 3154303360} +{"current_steps": 9340, "total_steps": 14493, "loss": 0.2825, "lr": 3.5954478806592155e-05, "epoch": 1.9335162133485158, "percentage": 64.44, "elapsed_time": "1 day, 6:35:35", "remaining_time": "16:52:43", "throughput": 28670.95, "total_tokens": 3157683456} +{"current_steps": 9350, "total_steps": 14493, "loss": 0.2845, "lr": 3.594518656267024e-05, "epoch": 1.9355865531430347, "percentage": 64.51, "elapsed_time": "1 day, 6:37:20", "remaining_time": "16:50:38", "throughput": 28674.05, "total_tokens": 3161053952} +{"current_steps": 9360, "total_steps": 14493, "loss": 0.2821, "lr": 3.5935901519619496e-05, "epoch": 1.9376568929375533, "percentage": 64.58, "elapsed_time": "1 day, 6:39:09", "remaining_time": "16:48:35", "throughput": 28677.16, "total_tokens": 3164500096} +{"current_steps": 9370, "total_steps": 14493, "loss": 0.2818, "lr": 3.5926623668144385e-05, "epoch": 1.9397272327320723, "percentage": 64.65, "elapsed_time": "1 day, 6:40:52", "remaining_time": "16:46:29", "throughput": 28680.39, "total_tokens": 3167825280} +{"current_steps": 9380, "total_steps": 14493, "loss": 0.281, "lr": 3.5917352998966194e-05, "epoch": 1.9417975725265908, "percentage": 64.72, "elapsed_time": "1 day, 6:42:43", "remaining_time": "16:44:27", "throughput": 28682.69, "total_tokens": 3171257600} +{"current_steps": 9390, "total_steps": 14493, "loss": 0.2822, "lr": 3.5908089502822914e-05, "epoch": 1.9438679123211098, "percentage": 64.79, "elapsed_time": "1 day, 6:44:23", "remaining_time": "16:42:20", "throughput": 28686.46, "total_tokens": 3174545152} +{"current_steps": 9400, "total_steps": 14493, "loss": 0.2821, "lr": 3.589883317046929e-05, "epoch": 1.9459382521156283, "percentage": 64.86, "elapsed_time": "1 day, 6:46:08", "remaining_time": "16:40:15", "throughput": 28689.4, "total_tokens": 3177881472} +{"current_steps": 9410, "total_steps": 14493, "loss": 0.2809, "lr": 3.5889583992676715e-05, "epoch": 1.9480085919101473, "percentage": 64.93, "elapsed_time": "1 day, 6:47:50", "remaining_time": "16:38:09", "throughput": 28692.78, "total_tokens": 3181195904} +{"current_steps": 9420, "total_steps": 14493, "loss": 0.2836, "lr": 3.5880341960233244e-05, "epoch": 1.950078931704666, "percentage": 65.0, "elapsed_time": "1 day, 6:49:39", "remaining_time": "16:36:06", "throughput": 28695.65, "total_tokens": 3184615808} +{"current_steps": 9430, "total_steps": 14493, "loss": 0.2813, "lr": 3.58711070639435e-05, "epoch": 1.9521492714991848, "percentage": 65.07, "elapsed_time": "1 day, 6:51:31", "remaining_time": "16:34:05", "throughput": 28697.65, "total_tokens": 3188055552} +{"current_steps": 9440, "total_steps": 14493, "loss": 0.2806, "lr": 3.586187929462869e-05, "epoch": 1.9542196112937036, "percentage": 65.13, "elapsed_time": "1 day, 6:53:14", "remaining_time": "16:31:59", "throughput": 28701.18, "total_tokens": 3191419008} +{"current_steps": 9450, "total_steps": 14493, "loss": 0.2801, "lr": 3.585265864312651e-05, "epoch": 1.9562899510882223, "percentage": 65.2, "elapsed_time": "1 day, 6:55:05", "remaining_time": "16:29:58", "throughput": 28703.37, "total_tokens": 3194851968} +{"current_steps": 9460, "total_steps": 14493, "loss": 0.2829, "lr": 3.584344510029118e-05, "epoch": 1.958360290882741, "percentage": 65.27, "elapsed_time": "1 day, 6:56:46", "remaining_time": "16:27:51", "throughput": 28707.17, "total_tokens": 3198167552} +{"current_steps": 9470, "total_steps": 14493, "loss": 0.2857, "lr": 3.583423865699333e-05, "epoch": 1.9604306306772599, "percentage": 65.34, "elapsed_time": "1 day, 6:58:30", "remaining_time": "16:25:46", "throughput": 28710.59, "total_tokens": 3201534208} +{"current_steps": 9480, "total_steps": 14493, "loss": 0.2828, "lr": 3.5825039304119994e-05, "epoch": 1.9625009704717786, "percentage": 65.41, "elapsed_time": "1 day, 7:00:20", "remaining_time": "16:23:44", "throughput": 28713.11, "total_tokens": 3204983936} +{"current_steps": 9490, "total_steps": 14493, "loss": 0.2846, "lr": 3.581584703257461e-05, "epoch": 1.9645713102662974, "percentage": 65.48, "elapsed_time": "1 day, 7:02:07", "remaining_time": "16:21:41", "throughput": 28715.85, "total_tokens": 3208359424} +{"current_steps": 9500, "total_steps": 14493, "loss": 0.2829, "lr": 3.580666183327689e-05, "epoch": 1.9666416500608164, "percentage": 65.55, "elapsed_time": "1 day, 7:03:52", "remaining_time": "16:19:36", "throughput": 28719.4, "total_tokens": 3211772672} +{"current_steps": 9510, "total_steps": 14493, "loss": 0.281, "lr": 3.5797483697162906e-05, "epoch": 1.968711989855335, "percentage": 65.62, "elapsed_time": "1 day, 7:05:41", "remaining_time": "16:17:34", "throughput": 28721.91, "total_tokens": 3215175680} +{"current_steps": 9520, "total_steps": 14493, "loss": 0.281, "lr": 3.5788312615184936e-05, "epoch": 1.970782329649854, "percentage": 65.69, "elapsed_time": "1 day, 7:07:29", "remaining_time": "16:15:31", "throughput": 28724.2, "total_tokens": 3218520704} +{"current_steps": 9530, "total_steps": 14493, "loss": 0.2802, "lr": 3.5779148578311476e-05, "epoch": 1.9728526694443724, "percentage": 65.76, "elapsed_time": "1 day, 7:09:08", "remaining_time": "16:13:24", "throughput": 28727.86, "total_tokens": 3221786496} +{"current_steps": 9540, "total_steps": 14493, "loss": 0.2822, "lr": 3.5769991577527236e-05, "epoch": 1.9749230092388914, "percentage": 65.82, "elapsed_time": "1 day, 7:10:59", "remaining_time": "16:11:23", "throughput": 28729.87, "total_tokens": 3225212800} +{"current_steps": 9550, "total_steps": 14493, "loss": 0.2813, "lr": 3.5760841603833034e-05, "epoch": 1.97699334903341, "percentage": 65.89, "elapsed_time": "1 day, 7:12:46", "remaining_time": "16:09:20", "throughput": 28732.96, "total_tokens": 3228630016} +{"current_steps": 9560, "total_steps": 14493, "loss": 0.2833, "lr": 3.5751698648245814e-05, "epoch": 1.979063688827929, "percentage": 65.96, "elapsed_time": "1 day, 7:14:38", "remaining_time": "16:07:19", "throughput": 28735.4, "total_tokens": 3232116992} +{"current_steps": 9570, "total_steps": 14493, "loss": 0.2825, "lr": 3.574256270179857e-05, "epoch": 1.9811340286224477, "percentage": 66.03, "elapsed_time": "1 day, 7:16:27", "remaining_time": "16:05:17", "throughput": 28737.87, "total_tokens": 3235531520} +{"current_steps": 9580, "total_steps": 14493, "loss": 0.2837, "lr": 3.573343375554037e-05, "epoch": 1.9832043684169665, "percentage": 66.1, "elapsed_time": "1 day, 7:18:13", "remaining_time": "16:03:13", "throughput": 28740.81, "total_tokens": 3238911232} +{"current_steps": 9590, "total_steps": 14493, "loss": 0.2799, "lr": 3.572431180053621e-05, "epoch": 1.9852747082114852, "percentage": 66.17, "elapsed_time": "1 day, 7:19:59", "remaining_time": "16:01:09", "throughput": 28744.48, "total_tokens": 3242358144} +{"current_steps": 9600, "total_steps": 14493, "loss": 0.2817, "lr": 3.571519682786711e-05, "epoch": 1.987345048006004, "percentage": 66.24, "elapsed_time": "1 day, 7:21:40", "remaining_time": "15:59:03", "throughput": 28748.0, "total_tokens": 3245662208} +{"current_steps": 9610, "total_steps": 14493, "loss": 0.2829, "lr": 3.570608882862996e-05, "epoch": 1.9894153878005227, "percentage": 66.31, "elapsed_time": "1 day, 7:23:27", "remaining_time": "15:57:00", "throughput": 28750.5, "total_tokens": 3249015040} +{"current_steps": 9620, "total_steps": 14493, "loss": 0.2804, "lr": 3.569698779393757e-05, "epoch": 1.9914857275950415, "percentage": 66.38, "elapsed_time": "1 day, 7:25:08", "remaining_time": "15:54:54", "throughput": 28754.43, "total_tokens": 3252362240} +{"current_steps": 9630, "total_steps": 14493, "loss": 0.2809, "lr": 3.568789371491859e-05, "epoch": 1.9935560673895603, "percentage": 66.45, "elapsed_time": "1 day, 7:26:59", "remaining_time": "15:52:54", "throughput": 28756.84, "total_tokens": 3255838720} +{"current_steps": 9640, "total_steps": 14493, "loss": 0.28, "lr": 3.567880658271748e-05, "epoch": 1.995626407184079, "percentage": 66.51, "elapsed_time": "1 day, 7:28:45", "remaining_time": "15:50:50", "throughput": 28760.3, "total_tokens": 3259277568} +{"current_steps": 9650, "total_steps": 14493, "loss": 0.2799, "lr": 3.566972638849445e-05, "epoch": 1.997696746978598, "percentage": 66.58, "elapsed_time": "1 day, 7:30:33", "remaining_time": "15:48:48", "throughput": 28763.12, "total_tokens": 3262714624} +{"current_steps": 9660, "total_steps": 14493, "loss": 0.2806, "lr": 3.566065312342551e-05, "epoch": 1.9997670867731165, "percentage": 66.65, "elapsed_time": "1 day, 7:32:20", "remaining_time": "15:46:45", "throughput": 28766.3, "total_tokens": 3266129024} +{"current_steps": 9670, "total_steps": 14493, "loss": 0.2705, "lr": 3.565158677870231e-05, "epoch": 2.001656271835615, "percentage": 66.72, "elapsed_time": "1 day, 7:33:54", "remaining_time": "15:44:36", "throughput": 28769.1, "total_tokens": 3269154944} +{"current_steps": 9680, "total_steps": 14493, "loss": 0.275, "lr": 3.564252734553221e-05, "epoch": 2.003726611630134, "percentage": 66.79, "elapsed_time": "1 day, 7:35:46", "remaining_time": "15:42:35", "throughput": 28770.62, "total_tokens": 3272547712} +{"current_steps": 9690, "total_steps": 14493, "loss": 0.2721, "lr": 3.563347481513818e-05, "epoch": 2.0057969514246525, "percentage": 66.86, "elapsed_time": "1 day, 7:37:36", "remaining_time": "15:40:34", "throughput": 28772.81, "total_tokens": 3275966080} +{"current_steps": 9700, "total_steps": 14493, "loss": 0.2707, "lr": 3.56244291787588e-05, "epoch": 2.0078672912191715, "percentage": 66.93, "elapsed_time": "1 day, 7:39:28", "remaining_time": "15:38:34", "throughput": 28774.75, "total_tokens": 3279409536} +{"current_steps": 9710, "total_steps": 14493, "loss": 0.2668, "lr": 3.5615390427648216e-05, "epoch": 2.00993763101369, "percentage": 67.0, "elapsed_time": "1 day, 7:41:12", "remaining_time": "15:36:30", "throughput": 28777.91, "total_tokens": 3282764032} +{"current_steps": 9720, "total_steps": 14493, "loss": 0.2684, "lr": 3.5606358553076075e-05, "epoch": 2.012007970808209, "percentage": 67.07, "elapsed_time": "1 day, 7:43:00", "remaining_time": "15:34:28", "throughput": 28780.77, "total_tokens": 3286191488} +{"current_steps": 9730, "total_steps": 14493, "loss": 0.2713, "lr": 3.5597333546327526e-05, "epoch": 2.0140783106027276, "percentage": 67.14, "elapsed_time": "1 day, 7:44:43", "remaining_time": "15:32:23", "throughput": 28784.33, "total_tokens": 3289565568} +{"current_steps": 9740, "total_steps": 14493, "loss": 0.2699, "lr": 3.5588315398703186e-05, "epoch": 2.0161486503972466, "percentage": 67.2, "elapsed_time": "1 day, 7:46:27", "remaining_time": "15:30:19", "throughput": 28786.85, "total_tokens": 3292851840} +{"current_steps": 9750, "total_steps": 14493, "loss": 0.2718, "lr": 3.557930410151907e-05, "epoch": 2.018218990191765, "percentage": 67.27, "elapsed_time": "1 day, 7:48:12", "remaining_time": "15:28:16", "throughput": 28789.72, "total_tokens": 3296214016} +{"current_steps": 9760, "total_steps": 14493, "loss": 0.2708, "lr": 3.5570299646106606e-05, "epoch": 2.020289329986284, "percentage": 67.34, "elapsed_time": "1 day, 7:49:57", "remaining_time": "15:26:12", "throughput": 28792.66, "total_tokens": 3299566720} +{"current_steps": 9770, "total_steps": 14493, "loss": 0.2716, "lr": 3.556130202381253e-05, "epoch": 2.0223596697808026, "percentage": 67.41, "elapsed_time": "1 day, 7:51:54", "remaining_time": "15:24:14", "throughput": 28793.71, "total_tokens": 3303043840} +{"current_steps": 9780, "total_steps": 14493, "loss": 0.271, "lr": 3.555231122599892e-05, "epoch": 2.0244300095753216, "percentage": 67.48, "elapsed_time": "1 day, 7:53:40", "remaining_time": "15:22:12", "throughput": 28796.68, "total_tokens": 3306457344} +{"current_steps": 9790, "total_steps": 14493, "loss": 0.2705, "lr": 3.554332724404313e-05, "epoch": 2.02650034936984, "percentage": 67.55, "elapsed_time": "1 day, 7:55:27", "remaining_time": "15:20:09", "throughput": 28799.6, "total_tokens": 3309855872} +{"current_steps": 9800, "total_steps": 14493, "loss": 0.272, "lr": 3.553435006933777e-05, "epoch": 2.028570689164359, "percentage": 67.62, "elapsed_time": "1 day, 7:57:13", "remaining_time": "15:18:07", "throughput": 28802.59, "total_tokens": 3313271424} +{"current_steps": 9810, "total_steps": 14493, "loss": 0.2713, "lr": 3.5525379693290626e-05, "epoch": 2.0306410289588777, "percentage": 67.69, "elapsed_time": "1 day, 7:58:58", "remaining_time": "15:16:03", "throughput": 28805.5, "total_tokens": 3316635264} +{"current_steps": 9820, "total_steps": 14493, "loss": 0.2711, "lr": 3.551641610732469e-05, "epoch": 2.0327113687533966, "percentage": 67.76, "elapsed_time": "1 day, 8:00:45", "remaining_time": "15:14:01", "throughput": 28807.97, "total_tokens": 3319978752} +{"current_steps": 9830, "total_steps": 14493, "loss": 0.2704, "lr": 3.55074593028781e-05, "epoch": 2.0347817085479156, "percentage": 67.83, "elapsed_time": "1 day, 8:02:35", "remaining_time": "15:12:00", "throughput": 28810.21, "total_tokens": 3323409792} +{"current_steps": 9840, "total_steps": 14493, "loss": 0.2721, "lr": 3.5498509271404065e-05, "epoch": 2.036852048342434, "percentage": 67.89, "elapsed_time": "1 day, 8:04:28", "remaining_time": "15:10:00", "throughput": 28811.92, "total_tokens": 3326858368} +{"current_steps": 9850, "total_steps": 14493, "loss": 0.2719, "lr": 3.5489566004370893e-05, "epoch": 2.038922388136953, "percentage": 67.96, "elapsed_time": "1 day, 8:06:09", "remaining_time": "15:07:55", "throughput": 28815.71, "total_tokens": 3330210304} +{"current_steps": 9860, "total_steps": 14493, "loss": 0.2716, "lr": 3.548062949326194e-05, "epoch": 2.0409927279314717, "percentage": 68.03, "elapsed_time": "1 day, 8:07:52", "remaining_time": "15:05:51", "throughput": 28818.96, "total_tokens": 3333556480} +{"current_steps": 9870, "total_steps": 14493, "loss": 0.2722, "lr": 3.547169972957554e-05, "epoch": 2.0430630677259907, "percentage": 68.1, "elapsed_time": "1 day, 8:09:41", "remaining_time": "15:03:50", "throughput": 28820.7, "total_tokens": 3336898560} +{"current_steps": 9880, "total_steps": 14493, "loss": 0.2691, "lr": 3.5462776704825e-05, "epoch": 2.045133407520509, "percentage": 68.17, "elapsed_time": "1 day, 8:11:30", "remaining_time": "15:01:49", "throughput": 28823.04, "total_tokens": 3340307072} +{"current_steps": 9890, "total_steps": 14493, "loss": 0.2655, "lr": 3.5453860410538594e-05, "epoch": 2.047203747315028, "percentage": 68.24, "elapsed_time": "1 day, 8:13:12", "remaining_time": "14:59:45", "throughput": 28826.72, "total_tokens": 3343682304} +{"current_steps": 9900, "total_steps": 14493, "loss": 0.2718, "lr": 3.5444950838259455e-05, "epoch": 2.0492740871095467, "percentage": 68.31, "elapsed_time": "1 day, 8:15:01", "remaining_time": "14:57:44", "throughput": 28829.59, "total_tokens": 3347161344} +{"current_steps": 9910, "total_steps": 14493, "loss": 0.273, "lr": 3.543604797954563e-05, "epoch": 2.0513444269040657, "percentage": 68.38, "elapsed_time": "1 day, 8:16:48", "remaining_time": "14:55:42", "throughput": 28832.08, "total_tokens": 3350539136} +{"current_steps": 9920, "total_steps": 14493, "loss": 0.2701, "lr": 3.542715182596996e-05, "epoch": 2.0534147666985842, "percentage": 68.45, "elapsed_time": "1 day, 8:18:39", "remaining_time": "14:53:41", "throughput": 28834.27, "total_tokens": 3353976320} +{"current_steps": 9930, "total_steps": 14493, "loss": 0.2682, "lr": 3.5418262369120115e-05, "epoch": 2.0554851064931032, "percentage": 68.52, "elapsed_time": "1 day, 8:20:25", "remaining_time": "14:51:39", "throughput": 28836.64, "total_tokens": 3357320960} +{"current_steps": 9940, "total_steps": 14493, "loss": 0.2714, "lr": 3.5409379600598526e-05, "epoch": 2.0575554462876218, "percentage": 68.58, "elapsed_time": "1 day, 8:22:15", "remaining_time": "14:49:38", "throughput": 28839.4, "total_tokens": 3360817280} +{"current_steps": 9950, "total_steps": 14493, "loss": 0.2689, "lr": 3.540050351202235e-05, "epoch": 2.0596257860821408, "percentage": 68.65, "elapsed_time": "1 day, 8:23:57", "remaining_time": "14:47:34", "throughput": 28842.99, "total_tokens": 3364165760} +{"current_steps": 9960, "total_steps": 14493, "loss": 0.2729, "lr": 3.539163409502347e-05, "epoch": 2.0616961258766593, "percentage": 68.72, "elapsed_time": "1 day, 8:25:45", "remaining_time": "14:45:33", "throughput": 28845.25, "total_tokens": 3367541760} +{"current_steps": 9970, "total_steps": 14493, "loss": 0.2725, "lr": 3.5382771341248416e-05, "epoch": 2.0637664656711783, "percentage": 68.79, "elapsed_time": "1 day, 8:27:33", "remaining_time": "14:43:31", "throughput": 28847.71, "total_tokens": 3370944384} +{"current_steps": 9980, "total_steps": 14493, "loss": 0.2713, "lr": 3.537391524235835e-05, "epoch": 2.0658368054656973, "percentage": 68.86, "elapsed_time": "1 day, 8:29:19", "remaining_time": "14:41:29", "throughput": 28850.19, "total_tokens": 3374296064} +{"current_steps": 9990, "total_steps": 14493, "loss": 0.2723, "lr": 3.5365065790029055e-05, "epoch": 2.067907145260216, "percentage": 68.93, "elapsed_time": "1 day, 8:31:00", "remaining_time": "14:39:25", "throughput": 28854.19, "total_tokens": 3377681152} +{"current_steps": 10000, "total_steps": 14493, "loss": 0.2699, "lr": 3.535622297595087e-05, "epoch": 2.069977485054735, "percentage": 69.0, "elapsed_time": "1 day, 8:32:41", "remaining_time": "14:37:20", "throughput": 28857.57, "total_tokens": 3380989312} +{"current_steps": 10000, "total_steps": 14493, "eval_loss": 0.2651520371437073, "epoch": 2.069977485054735, "percentage": 69.0, "elapsed_time": "1 day, 8:32:43", "remaining_time": "14:37:21", "throughput": 28856.95, "total_tokens": 3380989312} +{"current_steps": 10010, "total_steps": 14493, "loss": 0.269, "lr": 3.534738679182869e-05, "epoch": 2.0720478248492533, "percentage": 69.07, "elapsed_time": "1 day, 8:34:59", "remaining_time": "14:35:32", "throughput": 28851.88, "total_tokens": 3384316800} +{"current_steps": 10020, "total_steps": 14493, "loss": 0.271, "lr": 3.533855722938188e-05, "epoch": 2.0741181646437723, "percentage": 69.14, "elapsed_time": "1 day, 8:36:49", "remaining_time": "14:33:32", "throughput": 28853.49, "total_tokens": 3387670272} +{"current_steps": 10030, "total_steps": 14493, "loss": 0.2741, "lr": 3.5329734280344325e-05, "epoch": 2.076188504438291, "percentage": 69.21, "elapsed_time": "1 day, 8:38:35", "remaining_time": "14:31:30", "throughput": 28856.12, "total_tokens": 3391041920} +{"current_steps": 10040, "total_steps": 14493, "loss": 0.2676, "lr": 3.5320917936464294e-05, "epoch": 2.07825884423281, "percentage": 69.27, "elapsed_time": "1 day, 8:40:20", "remaining_time": "14:29:27", "throughput": 28859.37, "total_tokens": 3394455168} +{"current_steps": 10050, "total_steps": 14493, "loss": 0.2702, "lr": 3.5312108189504505e-05, "epoch": 2.0803291840273284, "percentage": 69.34, "elapsed_time": "1 day, 8:42:06", "remaining_time": "14:27:25", "throughput": 28861.84, "total_tokens": 3397813376} +{"current_steps": 10060, "total_steps": 14493, "loss": 0.2714, "lr": 3.530330503124204e-05, "epoch": 2.0823995238218473, "percentage": 69.41, "elapsed_time": "1 day, 8:43:58", "remaining_time": "14:25:26", "throughput": 28863.84, "total_tokens": 3401273984} +{"current_steps": 10070, "total_steps": 14493, "loss": 0.2715, "lr": 3.5294508453468325e-05, "epoch": 2.084469863616366, "percentage": 69.48, "elapsed_time": "1 day, 8:45:44", "remaining_time": "14:23:24", "throughput": 28866.57, "total_tokens": 3404650112} +{"current_steps": 10080, "total_steps": 14493, "loss": 0.2687, "lr": 3.528571844798908e-05, "epoch": 2.086540203410885, "percentage": 69.55, "elapsed_time": "1 day, 8:47:29", "remaining_time": "14:21:21", "throughput": 28869.03, "total_tokens": 3407985280} +{"current_steps": 10090, "total_steps": 14493, "loss": 0.2699, "lr": 3.527693500662431e-05, "epoch": 2.0886105432054034, "percentage": 69.62, "elapsed_time": "1 day, 8:49:18", "remaining_time": "14:19:20", "throughput": 28871.61, "total_tokens": 3411413632} +{"current_steps": 10100, "total_steps": 14493, "loss": 0.2678, "lr": 3.5268158121208294e-05, "epoch": 2.0906808829999224, "percentage": 69.69, "elapsed_time": "1 day, 8:51:03", "remaining_time": "14:17:18", "throughput": 28874.88, "total_tokens": 3414834304} +{"current_steps": 10110, "total_steps": 14493, "loss": 0.2679, "lr": 3.525938778358949e-05, "epoch": 2.092751222794441, "percentage": 69.76, "elapsed_time": "1 day, 8:52:45", "remaining_time": "14:15:15", "throughput": 28878.24, "total_tokens": 3418194688} +{"current_steps": 10120, "total_steps": 14493, "loss": 0.2736, "lr": 3.5250623985630537e-05, "epoch": 2.09482156258896, "percentage": 69.83, "elapsed_time": "1 day, 8:54:34", "remaining_time": "14:13:14", "throughput": 28880.22, "total_tokens": 3421576704} +{"current_steps": 10130, "total_steps": 14493, "loss": 0.272, "lr": 3.524186671920826e-05, "epoch": 2.096891902383479, "percentage": 69.9, "elapsed_time": "1 day, 8:56:20", "remaining_time": "14:11:12", "throughput": 28882.93, "total_tokens": 3424963328} +{"current_steps": 10140, "total_steps": 14493, "loss": 0.2693, "lr": 3.523311597621358e-05, "epoch": 2.0989622421779974, "percentage": 69.96, "elapsed_time": "1 day, 8:58:05", "remaining_time": "14:09:10", "throughput": 28886.0, "total_tokens": 3428347648} +{"current_steps": 10150, "total_steps": 14493, "loss": 0.2708, "lr": 3.5224371748551505e-05, "epoch": 2.1010325819725164, "percentage": 70.03, "elapsed_time": "1 day, 8:59:53", "remaining_time": "14:07:09", "throughput": 28887.99, "total_tokens": 3431713920} +{"current_steps": 10160, "total_steps": 14493, "loss": 0.2684, "lr": 3.521563402814109e-05, "epoch": 2.103102921767035, "percentage": 70.1, "elapsed_time": "1 day, 9:01:42", "remaining_time": "14:05:09", "throughput": 28890.58, "total_tokens": 3435159168} +{"current_steps": 10170, "total_steps": 14493, "loss": 0.2688, "lr": 3.5206902806915436e-05, "epoch": 2.105173261561554, "percentage": 70.17, "elapsed_time": "1 day, 9:03:27", "remaining_time": "14:03:07", "throughput": 28892.92, "total_tokens": 3438481792} +{"current_steps": 10180, "total_steps": 14493, "loss": 0.2733, "lr": 3.5198178076821644e-05, "epoch": 2.1072436013560725, "percentage": 70.24, "elapsed_time": "1 day, 9:05:17", "remaining_time": "14:01:07", "throughput": 28894.77, "total_tokens": 3441874816} +{"current_steps": 10190, "total_steps": 14493, "loss": 0.272, "lr": 3.5189459829820743e-05, "epoch": 2.1093139411505915, "percentage": 70.31, "elapsed_time": "1 day, 9:07:04", "remaining_time": "13:59:05", "throughput": 28897.29, "total_tokens": 3445252992} +{"current_steps": 10200, "total_steps": 14493, "loss": 0.2747, "lr": 3.5180748057887714e-05, "epoch": 2.11138428094511, "percentage": 70.38, "elapsed_time": "1 day, 9:08:52", "remaining_time": "13:57:05", "throughput": 28899.58, "total_tokens": 3448665728} +{"current_steps": 10210, "total_steps": 14493, "loss": 0.2734, "lr": 3.517204275301144e-05, "epoch": 2.113454620739629, "percentage": 70.45, "elapsed_time": "1 day, 9:10:41", "remaining_time": "13:55:04", "throughput": 28901.93, "total_tokens": 3452084992} +{"current_steps": 10220, "total_steps": 14493, "loss": 0.2684, "lr": 3.5163343907194676e-05, "epoch": 2.1155249605341475, "percentage": 70.52, "elapsed_time": "1 day, 9:12:26", "remaining_time": "13:53:02", "throughput": 28904.2, "total_tokens": 3455381376} +{"current_steps": 10230, "total_steps": 14493, "loss": 0.2681, "lr": 3.5154651512453995e-05, "epoch": 2.1175953003286665, "percentage": 70.59, "elapsed_time": "1 day, 9:14:10", "remaining_time": "13:51:00", "throughput": 28907.18, "total_tokens": 3458750720} +{"current_steps": 10240, "total_steps": 14493, "loss": 0.2672, "lr": 3.514596556081981e-05, "epoch": 2.119665640123185, "percentage": 70.65, "elapsed_time": "1 day, 9:15:53", "remaining_time": "13:48:57", "throughput": 28910.43, "total_tokens": 3462125440} +{"current_steps": 10250, "total_steps": 14493, "loss": 0.2708, "lr": 3.513728604433628e-05, "epoch": 2.121735979917704, "percentage": 70.72, "elapsed_time": "1 day, 9:17:39", "remaining_time": "13:46:56", "throughput": 28912.69, "total_tokens": 3465474048} +{"current_steps": 10260, "total_steps": 14493, "loss": 0.2715, "lr": 3.5128612955061334e-05, "epoch": 2.1238063197122226, "percentage": 70.79, "elapsed_time": "1 day, 9:19:27", "remaining_time": "13:44:55", "throughput": 28914.91, "total_tokens": 3468850304} +{"current_steps": 10270, "total_steps": 14493, "loss": 0.2714, "lr": 3.5119946285066595e-05, "epoch": 2.1258766595067415, "percentage": 70.86, "elapsed_time": "1 day, 9:21:16", "remaining_time": "13:42:55", "throughput": 28917.36, "total_tokens": 3472285952} +{"current_steps": 10280, "total_steps": 14493, "loss": 0.2708, "lr": 3.511128602643739e-05, "epoch": 2.1279469993012605, "percentage": 70.93, "elapsed_time": "1 day, 9:22:59", "remaining_time": "13:40:52", "throughput": 28920.27, "total_tokens": 3475609344} +{"current_steps": 10290, "total_steps": 14493, "loss": 0.2695, "lr": 3.510263217127269e-05, "epoch": 2.130017339095779, "percentage": 71.0, "elapsed_time": "1 day, 9:24:41", "remaining_time": "13:38:49", "throughput": 28923.16, "total_tokens": 3478909696} +{"current_steps": 10300, "total_steps": 14493, "loss": 0.2707, "lr": 3.50939847116851e-05, "epoch": 2.132087678890298, "percentage": 71.07, "elapsed_time": "1 day, 9:26:24", "remaining_time": "13:36:46", "throughput": 28926.24, "total_tokens": 3482259328} +{"current_steps": 10310, "total_steps": 14493, "loss": 0.272, "lr": 3.508534363980081e-05, "epoch": 2.1341580186848166, "percentage": 71.14, "elapsed_time": "1 day, 9:28:11", "remaining_time": "13:34:46", "throughput": 28928.86, "total_tokens": 3485688064} +{"current_steps": 10320, "total_steps": 14493, "loss": 0.2722, "lr": 3.507670894775958e-05, "epoch": 2.1362283584793356, "percentage": 71.21, "elapsed_time": "1 day, 9:30:00", "remaining_time": "13:32:46", "throughput": 28931.28, "total_tokens": 3489125248} +{"current_steps": 10330, "total_steps": 14493, "loss": 0.2707, "lr": 3.506808062771471e-05, "epoch": 2.138298698273854, "percentage": 71.28, "elapsed_time": "1 day, 9:31:51", "remaining_time": "13:30:46", "throughput": 28933.27, "total_tokens": 3492563840} +{"current_steps": 10340, "total_steps": 14493, "loss": 0.2698, "lr": 3.505945867183298e-05, "epoch": 2.140369038068373, "percentage": 71.34, "elapsed_time": "1 day, 9:33:35", "remaining_time": "13:28:44", "throughput": 28936.22, "total_tokens": 3495940864} +{"current_steps": 10350, "total_steps": 14493, "loss": 0.272, "lr": 3.505084307229468e-05, "epoch": 2.1424393778628916, "percentage": 71.41, "elapsed_time": "1 day, 9:35:15", "remaining_time": "13:26:41", "throughput": 28939.45, "total_tokens": 3499233408} +{"current_steps": 10360, "total_steps": 14493, "loss": 0.2706, "lr": 3.5042233821293525e-05, "epoch": 2.1445097176574106, "percentage": 71.48, "elapsed_time": "1 day, 9:36:58", "remaining_time": "13:24:38", "throughput": 28942.85, "total_tokens": 3502624512} +{"current_steps": 10370, "total_steps": 14493, "loss": 0.2715, "lr": 3.503363091103664e-05, "epoch": 2.146580057451929, "percentage": 71.55, "elapsed_time": "1 day, 9:38:46", "remaining_time": "13:22:38", "throughput": 28945.11, "total_tokens": 3506032000} +{"current_steps": 10380, "total_steps": 14493, "loss": 0.2693, "lr": 3.5025034333744545e-05, "epoch": 2.148650397246448, "percentage": 71.62, "elapsed_time": "1 day, 9:40:34", "remaining_time": "13:20:38", "throughput": 28947.75, "total_tokens": 3509476736} +{"current_steps": 10390, "total_steps": 14493, "loss": 0.2684, "lr": 3.501644408165112e-05, "epoch": 2.1507207370409667, "percentage": 71.69, "elapsed_time": "1 day, 9:42:19", "remaining_time": "13:18:36", "throughput": 28950.57, "total_tokens": 3512858240} +{"current_steps": 10400, "total_steps": 14493, "loss": 0.2694, "lr": 3.500786014700357e-05, "epoch": 2.1527910768354857, "percentage": 71.76, "elapsed_time": "1 day, 9:44:07", "remaining_time": "13:16:36", "throughput": 28952.95, "total_tokens": 3516275456} +{"current_steps": 10410, "total_steps": 14493, "loss": 0.2703, "lr": 3.499928252206237e-05, "epoch": 2.1548614166300046, "percentage": 71.83, "elapsed_time": "1 day, 9:45:54", "remaining_time": "13:14:36", "throughput": 28955.51, "total_tokens": 3519685376} +{"current_steps": 10420, "total_steps": 14493, "loss": 0.2708, "lr": 3.499071119910131e-05, "epoch": 2.156931756424523, "percentage": 71.9, "elapsed_time": "1 day, 9:47:45", "remaining_time": "13:12:36", "throughput": 28957.23, "total_tokens": 3523087488} +{"current_steps": 10430, "total_steps": 14493, "loss": 0.2713, "lr": 3.498214617040739e-05, "epoch": 2.159002096219042, "percentage": 71.97, "elapsed_time": "1 day, 9:49:29", "remaining_time": "13:10:35", "throughput": 28960.17, "total_tokens": 3526469504} +{"current_steps": 10440, "total_steps": 14493, "loss": 0.2705, "lr": 3.49735874282808e-05, "epoch": 2.1610724360135607, "percentage": 72.03, "elapsed_time": "1 day, 9:51:18", "remaining_time": "13:08:35", "throughput": 28962.74, "total_tokens": 3529927680} +{"current_steps": 10450, "total_steps": 14493, "loss": 0.2704, "lr": 3.4965034965034965e-05, "epoch": 2.1631427758080797, "percentage": 72.1, "elapsed_time": "1 day, 9:53:15", "remaining_time": "13:06:38", "throughput": 28963.44, "total_tokens": 3533410816} +{"current_steps": 10460, "total_steps": 14493, "loss": 0.2697, "lr": 3.495648877299642e-05, "epoch": 2.165213115602598, "percentage": 72.17, "elapsed_time": "1 day, 9:55:00", "remaining_time": "13:04:37", "throughput": 28966.05, "total_tokens": 3536768128} +{"current_steps": 10470, "total_steps": 14493, "loss": 0.2689, "lr": 3.494794884450483e-05, "epoch": 2.167283455397117, "percentage": 72.24, "elapsed_time": "1 day, 9:56:47", "remaining_time": "13:02:36", "throughput": 28968.61, "total_tokens": 3540169728} +{"current_steps": 10480, "total_steps": 14493, "loss": 0.2725, "lr": 3.4939415171912954e-05, "epoch": 2.1693537951916357, "percentage": 72.31, "elapsed_time": "1 day, 9:58:27", "remaining_time": "13:00:34", "throughput": 28971.45, "total_tokens": 3543439232} +{"current_steps": 10490, "total_steps": 14493, "loss": 0.2675, "lr": 3.4930887747586616e-05, "epoch": 2.1714241349861547, "percentage": 72.38, "elapsed_time": "1 day, 10:00:16", "remaining_time": "12:58:34", "throughput": 28974.15, "total_tokens": 3546901376} +{"current_steps": 10500, "total_steps": 14493, "loss": 0.2727, "lr": 3.492236656390469e-05, "epoch": 2.1734944747806733, "percentage": 72.45, "elapsed_time": "1 day, 10:02:03", "remaining_time": "12:56:34", "throughput": 28976.09, "total_tokens": 3550265984} +{"current_steps": 10510, "total_steps": 14493, "loss": 0.2723, "lr": 3.4913851613259034e-05, "epoch": 2.1755648145751922, "percentage": 72.52, "elapsed_time": "1 day, 10:03:49", "remaining_time": "12:54:33", "throughput": 28978.63, "total_tokens": 3553630208} +{"current_steps": 10520, "total_steps": 14493, "loss": 0.2705, "lr": 3.490534288805452e-05, "epoch": 2.177635154369711, "percentage": 72.59, "elapsed_time": "1 day, 10:05:35", "remaining_time": "12:52:32", "throughput": 28981.09, "total_tokens": 3557008128} +{"current_steps": 10530, "total_steps": 14493, "loss": 0.2716, "lr": 3.489684038070891e-05, "epoch": 2.1797054941642298, "percentage": 72.66, "elapsed_time": "1 day, 10:07:25", "remaining_time": "12:50:33", "throughput": 28982.62, "total_tokens": 3560393600} +{"current_steps": 10540, "total_steps": 14493, "loss": 0.2719, "lr": 3.488834408365296e-05, "epoch": 2.1817758339587483, "percentage": 72.72, "elapsed_time": "1 day, 10:09:12", "remaining_time": "12:48:32", "throughput": 28985.15, "total_tokens": 3563785216} +{"current_steps": 10550, "total_steps": 14493, "loss": 0.2714, "lr": 3.487985398933027e-05, "epoch": 2.1838461737532673, "percentage": 72.79, "elapsed_time": "1 day, 10:10:56", "remaining_time": "12:46:31", "throughput": 28987.67, "total_tokens": 3567130496} +{"current_steps": 10560, "total_steps": 14493, "loss": 0.2694, "lr": 3.4871370090197324e-05, "epoch": 2.1859165135477863, "percentage": 72.86, "elapsed_time": "1 day, 10:12:44", "remaining_time": "12:44:31", "throughput": 28989.52, "total_tokens": 3570492672} +{"current_steps": 10570, "total_steps": 14493, "loss": 0.2707, "lr": 3.486289237872343e-05, "epoch": 2.187986853342305, "percentage": 72.93, "elapsed_time": "1 day, 10:14:34", "remaining_time": "12:42:32", "throughput": 28991.85, "total_tokens": 3573949184} +{"current_steps": 10580, "total_steps": 14493, "loss": 0.2718, "lr": 3.485442084739075e-05, "epoch": 2.190057193136824, "percentage": 73.0, "elapsed_time": "1 day, 10:16:16", "remaining_time": "12:40:30", "throughput": 28994.7, "total_tokens": 3577257344} +{"current_steps": 10590, "total_steps": 14493, "loss": 0.2702, "lr": 3.484595548869416e-05, "epoch": 2.1921275329313423, "percentage": 73.07, "elapsed_time": "1 day, 10:17:59", "remaining_time": "12:38:28", "throughput": 28997.68, "total_tokens": 3580615936} +{"current_steps": 10600, "total_steps": 14493, "loss": 0.2704, "lr": 3.4837496295141335e-05, "epoch": 2.1941978727258613, "percentage": 73.14, "elapsed_time": "1 day, 10:19:52", "remaining_time": "12:36:31", "throughput": 28999.14, "total_tokens": 3584087936} +{"current_steps": 10610, "total_steps": 14493, "loss": 0.2723, "lr": 3.482904325925266e-05, "epoch": 2.19626821252038, "percentage": 73.21, "elapsed_time": "1 day, 10:21:40", "remaining_time": "12:34:31", "throughput": 29001.46, "total_tokens": 3587484544} +{"current_steps": 10620, "total_steps": 14493, "loss": 0.2719, "lr": 3.482059637356124e-05, "epoch": 2.198338552314899, "percentage": 73.28, "elapsed_time": "1 day, 10:23:26", "remaining_time": "12:32:30", "throughput": 29003.98, "total_tokens": 3590868864} +{"current_steps": 10630, "total_steps": 14493, "loss": 0.2702, "lr": 3.481215563061281e-05, "epoch": 2.2004088921094174, "percentage": 73.35, "elapsed_time": "1 day, 10:25:06", "remaining_time": "12:30:28", "throughput": 29007.03, "total_tokens": 3594146048} +{"current_steps": 10640, "total_steps": 14493, "loss": 0.273, "lr": 3.4803721022965785e-05, "epoch": 2.2024792319039364, "percentage": 73.41, "elapsed_time": "1 day, 10:26:49", "remaining_time": "12:28:26", "throughput": 29010.02, "total_tokens": 3597516928} +{"current_steps": 10650, "total_steps": 14493, "loss": 0.2703, "lr": 3.479529254319117e-05, "epoch": 2.204549571698455, "percentage": 73.48, "elapsed_time": "1 day, 10:28:37", "remaining_time": "12:26:27", "throughput": 29012.21, "total_tokens": 3600911744} +{"current_steps": 10660, "total_steps": 14493, "loss": 0.2708, "lr": 3.478687018387257e-05, "epoch": 2.206619911492974, "percentage": 73.55, "elapsed_time": "1 day, 10:30:24", "remaining_time": "12:24:27", "throughput": 29014.56, "total_tokens": 3604305920} +{"current_steps": 10670, "total_steps": 14493, "loss": 0.2726, "lr": 3.477845393760616e-05, "epoch": 2.2086902512874924, "percentage": 73.62, "elapsed_time": "1 day, 10:32:08", "remaining_time": "12:22:26", "throughput": 29016.97, "total_tokens": 3607642880} +{"current_steps": 10680, "total_steps": 14493, "loss": 0.2709, "lr": 3.4770043797000614e-05, "epoch": 2.2107605910820114, "percentage": 73.69, "elapsed_time": "1 day, 10:33:53", "remaining_time": "12:20:25", "throughput": 29019.43, "total_tokens": 3610998912} +{"current_steps": 10690, "total_steps": 14493, "loss": 0.2707, "lr": 3.4761639754677146e-05, "epoch": 2.21283093087653, "percentage": 73.76, "elapsed_time": "1 day, 10:35:39", "remaining_time": "12:18:25", "throughput": 29022.18, "total_tokens": 3614407680} +{"current_steps": 10700, "total_steps": 14493, "loss": 0.2706, "lr": 3.4753241803269435e-05, "epoch": 2.214901270671049, "percentage": 73.83, "elapsed_time": "1 day, 10:37:25", "remaining_time": "12:16:25", "throughput": 29024.46, "total_tokens": 3617780608} +{"current_steps": 10710, "total_steps": 14493, "loss": 0.2688, "lr": 3.474484993542361e-05, "epoch": 2.2169716104655675, "percentage": 73.9, "elapsed_time": "1 day, 10:39:13", "remaining_time": "12:14:25", "throughput": 29026.65, "total_tokens": 3621171712} +{"current_steps": 10720, "total_steps": 14493, "loss": 0.2677, "lr": 3.473646414379822e-05, "epoch": 2.2190419502600864, "percentage": 73.97, "elapsed_time": "1 day, 10:41:03", "remaining_time": "12:12:26", "throughput": 29027.94, "total_tokens": 3624521344} +{"current_steps": 10730, "total_steps": 14493, "loss": 0.2711, "lr": 3.472808442106422e-05, "epoch": 2.221112290054605, "percentage": 74.04, "elapsed_time": "1 day, 10:42:50", "remaining_time": "12:10:26", "throughput": 29030.15, "total_tokens": 3627908480} +{"current_steps": 10740, "total_steps": 14493, "loss": 0.2741, "lr": 3.4719710759904936e-05, "epoch": 2.223182629849124, "percentage": 74.1, "elapsed_time": "1 day, 10:44:35", "remaining_time": "12:08:26", "throughput": 29032.37, "total_tokens": 3631250816} +{"current_steps": 10750, "total_steps": 14493, "loss": 0.2751, "lr": 3.471134315301603e-05, "epoch": 2.225252969643643, "percentage": 74.17, "elapsed_time": "1 day, 10:46:27", "remaining_time": "12:06:28", "throughput": 29033.92, "total_tokens": 3634688384} +{"current_steps": 10760, "total_steps": 14493, "loss": 0.2681, "lr": 3.470298159310549e-05, "epoch": 2.2273233094381615, "percentage": 74.24, "elapsed_time": "1 day, 10:48:05", "remaining_time": "12:04:25", "throughput": 29037.24, "total_tokens": 3637932288} +{"current_steps": 10770, "total_steps": 14493, "loss": 0.2697, "lr": 3.4694626072893585e-05, "epoch": 2.2293936492326805, "percentage": 74.31, "elapsed_time": "1 day, 10:49:56", "remaining_time": "12:02:27", "throughput": 29039.03, "total_tokens": 3641381376} +{"current_steps": 10780, "total_steps": 14493, "loss": 0.2732, "lr": 3.468627658511285e-05, "epoch": 2.231463989027199, "percentage": 74.38, "elapsed_time": "1 day, 10:51:41", "remaining_time": "12:00:27", "throughput": 29040.89, "total_tokens": 3644686592} +{"current_steps": 10790, "total_steps": 14493, "loss": 0.272, "lr": 3.467793312250806e-05, "epoch": 2.233534328821718, "percentage": 74.45, "elapsed_time": "1 day, 10:53:34", "remaining_time": "11:58:29", "throughput": 29042.07, "total_tokens": 3648114944} +{"current_steps": 10800, "total_steps": 14493, "loss": 0.2695, "lr": 3.466959567783619e-05, "epoch": 2.2356046686162365, "percentage": 74.52, "elapsed_time": "1 day, 10:55:23", "remaining_time": "11:56:30", "throughput": 29044.43, "total_tokens": 3651562880} +{"current_steps": 10810, "total_steps": 14493, "loss": 0.2706, "lr": 3.466126424386642e-05, "epoch": 2.2376750084107555, "percentage": 74.59, "elapsed_time": "1 day, 10:57:17", "remaining_time": "11:54:33", "throughput": 29046.34, "total_tokens": 3655113984} +{"current_steps": 10820, "total_steps": 14493, "loss": 0.2717, "lr": 3.4652938813380056e-05, "epoch": 2.239745348205274, "percentage": 74.66, "elapsed_time": "1 day, 10:59:02", "remaining_time": "11:52:32", "throughput": 29048.56, "total_tokens": 3658442880} +{"current_steps": 10830, "total_steps": 14493, "loss": 0.2701, "lr": 3.464461937917057e-05, "epoch": 2.241815687999793, "percentage": 74.73, "elapsed_time": "1 day, 11:00:49", "remaining_time": "11:50:33", "throughput": 29051.24, "total_tokens": 3661884160} +{"current_steps": 10840, "total_steps": 14493, "loss": 0.2688, "lr": 3.4636305934043525e-05, "epoch": 2.2438860277943116, "percentage": 74.79, "elapsed_time": "1 day, 11:02:29", "remaining_time": "11:48:31", "throughput": 29054.53, "total_tokens": 3665205376} +{"current_steps": 10850, "total_steps": 14493, "loss": 0.2695, "lr": 3.4627998470816544e-05, "epoch": 2.2459563675888305, "percentage": 74.86, "elapsed_time": "1 day, 11:04:12", "remaining_time": "11:46:30", "throughput": 29057.73, "total_tokens": 3668617856} +{"current_steps": 10860, "total_steps": 14493, "loss": 0.2706, "lr": 3.4619696982319334e-05, "epoch": 2.248026707383349, "percentage": 74.93, "elapsed_time": "1 day, 11:06:03", "remaining_time": "11:44:32", "throughput": 29059.59, "total_tokens": 3672061824} +{"current_steps": 10870, "total_steps": 14493, "loss": 0.2707, "lr": 3.461140146139361e-05, "epoch": 2.250097047177868, "percentage": 75.0, "elapsed_time": "1 day, 11:07:44", "remaining_time": "11:42:30", "throughput": 29062.85, "total_tokens": 3675413632} +{"current_steps": 10880, "total_steps": 14493, "loss": 0.2714, "lr": 3.460311190089309e-05, "epoch": 2.2521673869723866, "percentage": 75.07, "elapsed_time": "1 day, 11:09:29", "remaining_time": "11:40:30", "throughput": 29065.6, "total_tokens": 3678803840} +{"current_steps": 10890, "total_steps": 14493, "loss": 0.2721, "lr": 3.459482829368348e-05, "epoch": 2.2542377267669056, "percentage": 75.14, "elapsed_time": "1 day, 11:11:11", "remaining_time": "11:38:29", "throughput": 29068.32, "total_tokens": 3682127872} +{"current_steps": 10900, "total_steps": 14493, "loss": 0.2695, "lr": 3.4586550632642425e-05, "epoch": 2.2563080665614246, "percentage": 75.21, "elapsed_time": "1 day, 11:12:58", "remaining_time": "11:36:30", "throughput": 29070.5, "total_tokens": 3685502848} +{"current_steps": 10910, "total_steps": 14493, "loss": 0.2707, "lr": 3.457827891065949e-05, "epoch": 2.258378406355943, "percentage": 75.28, "elapsed_time": "1 day, 11:14:47", "remaining_time": "11:34:31", "throughput": 29072.45, "total_tokens": 3688922496} +{"current_steps": 10920, "total_steps": 14493, "loss": 0.2739, "lr": 3.457001312063614e-05, "epoch": 2.260448746150462, "percentage": 75.35, "elapsed_time": "1 day, 11:16:34", "remaining_time": "11:32:32", "throughput": 29074.66, "total_tokens": 3692317056} +{"current_steps": 10930, "total_steps": 14493, "loss": 0.2699, "lr": 3.45617532554857e-05, "epoch": 2.2625190859449806, "percentage": 75.42, "elapsed_time": "1 day, 11:18:20", "remaining_time": "11:30:32", "throughput": 29076.81, "total_tokens": 3695671936} +{"current_steps": 10940, "total_steps": 14493, "loss": 0.2698, "lr": 3.455349930813339e-05, "epoch": 2.2645894257394996, "percentage": 75.48, "elapsed_time": "1 day, 11:20:13", "remaining_time": "11:28:35", "throughput": 29078.03, "total_tokens": 3699119744} +{"current_steps": 10950, "total_steps": 14493, "loss": 0.2706, "lr": 3.45452512715162e-05, "epoch": 2.266659765534018, "percentage": 75.55, "elapsed_time": "1 day, 11:21:53", "remaining_time": "11:26:33", "throughput": 29081.21, "total_tokens": 3702443776} +{"current_steps": 10960, "total_steps": 14493, "loss": 0.2716, "lr": 3.4537009138582935e-05, "epoch": 2.268730105328537, "percentage": 75.62, "elapsed_time": "1 day, 11:23:40", "remaining_time": "11:24:34", "throughput": 29083.63, "total_tokens": 3705846656} +{"current_steps": 10970, "total_steps": 14493, "loss": 0.2682, "lr": 3.4528772902294174e-05, "epoch": 2.2708004451230557, "percentage": 75.69, "elapsed_time": "1 day, 11:25:31", "remaining_time": "11:22:36", "throughput": 29085.15, "total_tokens": 3709280768} +{"current_steps": 10980, "total_steps": 14493, "loss": 0.2712, "lr": 3.452054255562222e-05, "epoch": 2.2728707849175747, "percentage": 75.76, "elapsed_time": "1 day, 11:27:23", "remaining_time": "11:20:38", "throughput": 29086.69, "total_tokens": 3712730496} +{"current_steps": 10990, "total_steps": 14493, "loss": 0.2718, "lr": 3.451231809155115e-05, "epoch": 2.274941124712093, "percentage": 75.83, "elapsed_time": "1 day, 11:29:09", "remaining_time": "11:18:39", "throughput": 29089.18, "total_tokens": 3716115456} +{"current_steps": 11000, "total_steps": 14493, "loss": 0.2718, "lr": 3.450409950307666e-05, "epoch": 2.277011464506612, "percentage": 75.9, "elapsed_time": "1 day, 11:30:59", "remaining_time": "11:16:41", "throughput": 29090.8, "total_tokens": 3719530368} +{"current_steps": 11010, "total_steps": 14493, "loss": 0.2696, "lr": 3.449588678320619e-05, "epoch": 2.279081804301131, "percentage": 75.97, "elapsed_time": "1 day, 11:32:50", "remaining_time": "11:14:43", "throughput": 29092.43, "total_tokens": 3722962944} +{"current_steps": 11020, "total_steps": 14493, "loss": 0.2703, "lr": 3.4487679924958767e-05, "epoch": 2.2811521440956497, "percentage": 76.04, "elapsed_time": "1 day, 11:34:32", "remaining_time": "11:12:42", "throughput": 29095.46, "total_tokens": 3726328192} +{"current_steps": 11030, "total_steps": 14493, "loss": 0.2675, "lr": 3.4479478921365076e-05, "epoch": 2.2832224838901682, "percentage": 76.11, "elapsed_time": "1 day, 11:36:15", "remaining_time": "11:10:42", "throughput": 29098.25, "total_tokens": 3729693056} +{"current_steps": 11040, "total_steps": 14493, "loss": 0.2687, "lr": 3.447128376546738e-05, "epoch": 2.2852928236846872, "percentage": 76.17, "elapsed_time": "1 day, 11:37:55", "remaining_time": "11:08:40", "throughput": 29101.78, "total_tokens": 3733043840} +{"current_steps": 11050, "total_steps": 14493, "loss": 0.2716, "lr": 3.4463094450319505e-05, "epoch": 2.287363163479206, "percentage": 76.24, "elapsed_time": "1 day, 11:39:38", "remaining_time": "11:06:40", "throughput": 29104.33, "total_tokens": 3736369664} +{"current_steps": 11060, "total_steps": 14493, "loss": 0.2719, "lr": 3.4454910968986855e-05, "epoch": 2.2894335032737247, "percentage": 76.31, "elapsed_time": "1 day, 11:41:19", "remaining_time": "11:04:39", "throughput": 29107.47, "total_tokens": 3739715456} +{"current_steps": 11070, "total_steps": 14493, "loss": 0.2713, "lr": 3.4446733314546336e-05, "epoch": 2.2915038430682437, "percentage": 76.38, "elapsed_time": "1 day, 11:43:08", "remaining_time": "11:02:41", "throughput": 29109.46, "total_tokens": 3743133696} +{"current_steps": 11080, "total_steps": 14493, "loss": 0.2731, "lr": 3.443856148008633e-05, "epoch": 2.2935741828627623, "percentage": 76.45, "elapsed_time": "1 day, 11:45:00", "remaining_time": "11:00:43", "throughput": 29110.34, "total_tokens": 3746504192} +{"current_steps": 11090, "total_steps": 14493, "loss": 0.273, "lr": 3.443039545870672e-05, "epoch": 2.2956445226572813, "percentage": 76.52, "elapsed_time": "1 day, 11:46:40", "remaining_time": "10:58:42", "throughput": 29113.07, "total_tokens": 3749780608} +{"current_steps": 11100, "total_steps": 14493, "loss": 0.2702, "lr": 3.442223524351883e-05, "epoch": 2.2977148624518, "percentage": 76.59, "elapsed_time": "1 day, 11:48:25", "remaining_time": "10:56:43", "throughput": 29115.1, "total_tokens": 3753084032} +{"current_steps": 11110, "total_steps": 14493, "loss": 0.2721, "lr": 3.44140808276454e-05, "epoch": 2.2997852022463188, "percentage": 76.66, "elapsed_time": "1 day, 11:50:09", "remaining_time": "10:54:43", "throughput": 29117.66, "total_tokens": 3756464512} +{"current_steps": 11120, "total_steps": 14493, "loss": 0.2694, "lr": 3.4405932204220575e-05, "epoch": 2.3018555420408373, "percentage": 76.73, "elapsed_time": "1 day, 11:51:53", "remaining_time": "10:52:43", "throughput": 29120.18, "total_tokens": 3759801984} +{"current_steps": 11130, "total_steps": 14493, "loss": 0.2739, "lr": 3.4397789366389876e-05, "epoch": 2.3039258818353563, "percentage": 76.8, "elapsed_time": "1 day, 11:53:39", "remaining_time": "10:50:44", "throughput": 29122.89, "total_tokens": 3763232512} +{"current_steps": 11140, "total_steps": 14493, "loss": 0.272, "lr": 3.438965230731016e-05, "epoch": 2.305996221629875, "percentage": 76.86, "elapsed_time": "1 day, 11:55:18", "remaining_time": "10:48:43", "throughput": 29125.88, "total_tokens": 3766510464} +{"current_steps": 11150, "total_steps": 14493, "loss": 0.2699, "lr": 3.438152102014964e-05, "epoch": 2.308066561424394, "percentage": 76.93, "elapsed_time": "1 day, 11:57:02", "remaining_time": "10:46:43", "throughput": 29128.34, "total_tokens": 3769855616} +{"current_steps": 11160, "total_steps": 14493, "loss": 0.2699, "lr": 3.437339549808778e-05, "epoch": 2.310136901218913, "percentage": 77.0, "elapsed_time": "1 day, 11:58:48", "remaining_time": "10:44:44", "throughput": 29130.59, "total_tokens": 3773248000} +{"current_steps": 11170, "total_steps": 14493, "loss": 0.2694, "lr": 3.43652757343154e-05, "epoch": 2.3122072410134313, "percentage": 77.07, "elapsed_time": "1 day, 12:00:36", "remaining_time": "10:42:46", "throughput": 29132.65, "total_tokens": 3776663040} +{"current_steps": 11180, "total_steps": 14493, "loss": 0.2718, "lr": 3.435716172203449e-05, "epoch": 2.31427758080795, "percentage": 77.14, "elapsed_time": "1 day, 12:02:22", "remaining_time": "10:40:47", "throughput": 29134.91, "total_tokens": 3780049536} +{"current_steps": 11190, "total_steps": 14493, "loss": 0.2704, "lr": 3.434905345445833e-05, "epoch": 2.316347920602469, "percentage": 77.21, "elapsed_time": "1 day, 12:04:06", "remaining_time": "10:38:47", "throughput": 29137.2, "total_tokens": 3783375360} +{"current_steps": 11200, "total_steps": 14493, "loss": 0.2703, "lr": 3.4340950924811374e-05, "epoch": 2.318418260396988, "percentage": 77.28, "elapsed_time": "1 day, 12:05:49", "remaining_time": "10:36:47", "throughput": 29140.02, "total_tokens": 3786723584} +{"current_steps": 11210, "total_steps": 14493, "loss": 0.2689, "lr": 3.433285412632927e-05, "epoch": 2.3204886001915064, "percentage": 77.35, "elapsed_time": "1 day, 12:07:29", "remaining_time": "10:34:46", "throughput": 29143.01, "total_tokens": 3790046336} +{"current_steps": 11220, "total_steps": 14493, "loss": 0.2684, "lr": 3.4324763052258835e-05, "epoch": 2.3225589399860254, "percentage": 77.42, "elapsed_time": "1 day, 12:09:21", "remaining_time": "10:32:49", "throughput": 29144.66, "total_tokens": 3793525504} +{"current_steps": 11230, "total_steps": 14493, "loss": 0.2694, "lr": 3.4316677695858003e-05, "epoch": 2.324629279780544, "percentage": 77.49, "elapsed_time": "1 day, 12:11:07", "remaining_time": "10:30:50", "throughput": 29146.74, "total_tokens": 3796865664} +{"current_steps": 11240, "total_steps": 14493, "loss": 0.2707, "lr": 3.430859805039583e-05, "epoch": 2.326699619575063, "percentage": 77.55, "elapsed_time": "1 day, 12:12:51", "remaining_time": "10:28:51", "throughput": 29149.73, "total_tokens": 3800283136} +{"current_steps": 11250, "total_steps": 14493, "loss": 0.2734, "lr": 3.430052410915246e-05, "epoch": 2.3287699593695814, "percentage": 77.62, "elapsed_time": "1 day, 12:14:37", "remaining_time": "10:26:52", "throughput": 29152.19, "total_tokens": 3803693056} +{"current_steps": 11260, "total_steps": 14493, "loss": 0.2712, "lr": 3.4292455865419086e-05, "epoch": 2.3308402991641004, "percentage": 77.69, "elapsed_time": "1 day, 12:16:21", "remaining_time": "10:24:52", "throughput": 29154.75, "total_tokens": 3807059200} +{"current_steps": 11270, "total_steps": 14493, "loss": 0.2703, "lr": 3.4284393312497973e-05, "epoch": 2.332910638958619, "percentage": 77.76, "elapsed_time": "1 day, 12:18:06", "remaining_time": "10:22:53", "throughput": 29156.79, "total_tokens": 3810407296} +{"current_steps": 11280, "total_steps": 14493, "loss": 0.2734, "lr": 3.427633644370238e-05, "epoch": 2.334980978753138, "percentage": 77.83, "elapsed_time": "1 day, 12:19:56", "remaining_time": "10:20:56", "throughput": 29157.99, "total_tokens": 3813771008} +{"current_steps": 11290, "total_steps": 14493, "loss": 0.2697, "lr": 3.4268285252356564e-05, "epoch": 2.3370513185476565, "percentage": 77.9, "elapsed_time": "1 day, 12:21:41", "remaining_time": "10:18:57", "throughput": 29160.3, "total_tokens": 3817123200} +{"current_steps": 11300, "total_steps": 14493, "loss": 0.2717, "lr": 3.426023973179575e-05, "epoch": 2.3391216583421754, "percentage": 77.97, "elapsed_time": "1 day, 12:23:21", "remaining_time": "10:16:56", "throughput": 29162.67, "total_tokens": 3820353792} +{"current_steps": 11310, "total_steps": 14493, "loss": 0.2698, "lr": 3.425219987536614e-05, "epoch": 2.3411919981366944, "percentage": 78.04, "elapsed_time": "1 day, 12:25:01", "remaining_time": "10:14:56", "throughput": 29165.66, "total_tokens": 3823666816} +{"current_steps": 11320, "total_steps": 14493, "loss": 0.2739, "lr": 3.4244165676424815e-05, "epoch": 2.343262337931213, "percentage": 78.11, "elapsed_time": "1 day, 12:26:48", "remaining_time": "10:12:57", "throughput": 29167.76, "total_tokens": 3827049984} +{"current_steps": 11330, "total_steps": 14493, "loss": 0.2739, "lr": 3.423613712833979e-05, "epoch": 2.3453326777257315, "percentage": 78.18, "elapsed_time": "1 day, 12:28:35", "remaining_time": "10:10:59", "throughput": 29169.79, "total_tokens": 3830448256} +{"current_steps": 11340, "total_steps": 14493, "loss": 0.2698, "lr": 3.422811422448995e-05, "epoch": 2.3474030175202505, "percentage": 78.24, "elapsed_time": "1 day, 12:30:20", "remaining_time": "10:09:00", "throughput": 29171.95, "total_tokens": 3833796992} +{"current_steps": 11350, "total_steps": 14493, "loss": 0.267, "lr": 3.422009695826503e-05, "epoch": 2.3494733573147695, "percentage": 78.31, "elapsed_time": "1 day, 12:32:03", "remaining_time": "10:07:01", "throughput": 29174.26, "total_tokens": 3837101056} +{"current_steps": 11360, "total_steps": 14493, "loss": 0.2722, "lr": 3.4212085323065626e-05, "epoch": 2.351543697109288, "percentage": 78.38, "elapsed_time": "1 day, 12:33:45", "remaining_time": "10:05:01", "throughput": 29177.32, "total_tokens": 3840469760} +{"current_steps": 11370, "total_steps": 14493, "loss": 0.2709, "lr": 3.4204079312303103e-05, "epoch": 2.353614036903807, "percentage": 78.45, "elapsed_time": "1 day, 12:35:34", "remaining_time": "10:03:03", "throughput": 29179.56, "total_tokens": 3843967104} +{"current_steps": 11380, "total_steps": 14493, "loss": 0.2711, "lr": 3.419607891939964e-05, "epoch": 2.3556843766983255, "percentage": 78.52, "elapsed_time": "1 day, 12:37:20", "remaining_time": "10:01:04", "throughput": 29181.73, "total_tokens": 3847323136} +{"current_steps": 11390, "total_steps": 14493, "loss": 0.2732, "lr": 3.4188084137788166e-05, "epoch": 2.3577547164928445, "percentage": 78.59, "elapsed_time": "1 day, 12:39:03", "remaining_time": "9:59:05", "throughput": 29184.44, "total_tokens": 3850701952} +{"current_steps": 11400, "total_steps": 14493, "loss": 0.269, "lr": 3.418009496091238e-05, "epoch": 2.359825056287363, "percentage": 78.66, "elapsed_time": "1 day, 12:40:41", "remaining_time": "9:57:05", "throughput": 29187.35, "total_tokens": 3853952256} +{"current_steps": 11410, "total_steps": 14493, "loss": 0.2733, "lr": 3.417211138222666e-05, "epoch": 2.361895396081882, "percentage": 78.73, "elapsed_time": "1 day, 12:42:30", "remaining_time": "9:55:07", "throughput": 29189.04, "total_tokens": 3857355776} +{"current_steps": 11420, "total_steps": 14493, "loss": 0.2715, "lr": 3.416413339519612e-05, "epoch": 2.3639657358764006, "percentage": 78.8, "elapsed_time": "1 day, 12:44:09", "remaining_time": "9:53:06", "throughput": 29192.28, "total_tokens": 3860671232} +{"current_steps": 11430, "total_steps": 14493, "loss": 0.2704, "lr": 3.4156160993296524e-05, "epoch": 2.3660360756709196, "percentage": 78.87, "elapsed_time": "1 day, 12:45:57", "remaining_time": "9:51:09", "throughput": 29194.28, "total_tokens": 3864082048} +{"current_steps": 11440, "total_steps": 14493, "loss": 0.2726, "lr": 3.4148194170014295e-05, "epoch": 2.368106415465438, "percentage": 78.93, "elapsed_time": "1 day, 12:47:47", "remaining_time": "9:49:11", "throughput": 29195.91, "total_tokens": 3867499520} +{"current_steps": 11450, "total_steps": 14493, "loss": 0.2725, "lr": 3.4140232918846484e-05, "epoch": 2.370176755259957, "percentage": 79.0, "elapsed_time": "1 day, 12:49:29", "remaining_time": "9:47:12", "throughput": 29198.57, "total_tokens": 3870838912} +{"current_steps": 11460, "total_steps": 14493, "loss": 0.2718, "lr": 3.4132277233300753e-05, "epoch": 2.372247095054476, "percentage": 79.07, "elapsed_time": "1 day, 12:51:15", "remaining_time": "9:45:13", "throughput": 29200.62, "total_tokens": 3874217600} +{"current_steps": 11470, "total_steps": 14493, "loss": 0.2712, "lr": 3.4124327106895356e-05, "epoch": 2.3743174348489946, "percentage": 79.14, "elapsed_time": "1 day, 12:52:54", "remaining_time": "9:43:13", "throughput": 29203.18, "total_tokens": 3877440256} +{"current_steps": 11480, "total_steps": 14493, "loss": 0.2682, "lr": 3.4116382533159097e-05, "epoch": 2.376387774643513, "percentage": 79.21, "elapsed_time": "1 day, 12:54:46", "remaining_time": "9:41:17", "throughput": 29204.16, "total_tokens": 3880850432} +{"current_steps": 11490, "total_steps": 14493, "loss": 0.2706, "lr": 3.4108443505631335e-05, "epoch": 2.378458114438032, "percentage": 79.28, "elapsed_time": "1 day, 12:56:34", "remaining_time": "9:39:19", "throughput": 29206.12, "total_tokens": 3884266368} +{"current_steps": 11500, "total_steps": 14493, "loss": 0.2688, "lr": 3.410051001786192e-05, "epoch": 2.380528454232551, "percentage": 79.35, "elapsed_time": "1 day, 12:58:22", "remaining_time": "9:37:21", "throughput": 29208.14, "total_tokens": 3887685376} +{"current_steps": 11510, "total_steps": 14493, "loss": 0.269, "lr": 3.409258206341124e-05, "epoch": 2.3825987940270696, "percentage": 79.42, "elapsed_time": "1 day, 13:00:14", "remaining_time": "9:35:24", "throughput": 29209.78, "total_tokens": 3891165824} +{"current_steps": 11520, "total_steps": 14493, "loss": 0.2691, "lr": 3.4084659635850134e-05, "epoch": 2.3846691338215886, "percentage": 79.49, "elapsed_time": "1 day, 13:02:03", "remaining_time": "9:33:27", "throughput": 29211.66, "total_tokens": 3894601216} +{"current_steps": 11530, "total_steps": 14493, "loss": 0.2691, "lr": 3.40767427287599e-05, "epoch": 2.386739473616107, "percentage": 79.56, "elapsed_time": "1 day, 13:03:44", "remaining_time": "9:31:27", "throughput": 29213.95, "total_tokens": 3897860608} +{"current_steps": 11540, "total_steps": 14493, "loss": 0.2691, "lr": 3.406883133573224e-05, "epoch": 2.388809813410626, "percentage": 79.62, "elapsed_time": "1 day, 13:05:37", "remaining_time": "9:29:31", "throughput": 29215.36, "total_tokens": 3901345664} +{"current_steps": 11550, "total_steps": 14493, "loss": 0.2665, "lr": 3.406092545036932e-05, "epoch": 2.3908801532051447, "percentage": 79.69, "elapsed_time": "1 day, 13:07:28", "remaining_time": "9:27:34", "throughput": 29216.76, "total_tokens": 3904770560} +{"current_steps": 11560, "total_steps": 14493, "loss": 0.272, "lr": 3.405302506628365e-05, "epoch": 2.3929504929996637, "percentage": 79.76, "elapsed_time": "1 day, 13:09:14", "remaining_time": "9:25:36", "throughput": 29218.9, "total_tokens": 3908147584} +{"current_steps": 11570, "total_steps": 14493, "loss": 0.2685, "lr": 3.404513017709813e-05, "epoch": 2.395020832794182, "percentage": 79.83, "elapsed_time": "1 day, 13:11:00", "remaining_time": "9:23:37", "throughput": 29220.99, "total_tokens": 3911535360} +{"current_steps": 11580, "total_steps": 14493, "loss": 0.2711, "lr": 3.403724077644598e-05, "epoch": 2.397091172588701, "percentage": 79.9, "elapsed_time": "1 day, 13:12:47", "remaining_time": "9:21:39", "throughput": 29223.02, "total_tokens": 3914920960} +{"current_steps": 11590, "total_steps": 14493, "loss": 0.2674, "lr": 3.402935685797077e-05, "epoch": 2.3991615123832197, "percentage": 79.97, "elapsed_time": "1 day, 13:14:28", "remaining_time": "9:19:40", "throughput": 29225.77, "total_tokens": 3918266368} +{"current_steps": 11600, "total_steps": 14493, "loss": 0.2694, "lr": 3.4021478415326355e-05, "epoch": 2.4012318521777387, "percentage": 80.04, "elapsed_time": "1 day, 13:16:11", "remaining_time": "9:17:41", "throughput": 29228.53, "total_tokens": 3921625984} +{"current_steps": 11610, "total_steps": 14493, "loss": 0.2699, "lr": 3.401360544217687e-05, "epoch": 2.4033021919722577, "percentage": 80.11, "elapsed_time": "1 day, 13:18:01", "remaining_time": "9:15:44", "throughput": 29230.04, "total_tokens": 3925057024} +{"current_steps": 11620, "total_steps": 14493, "loss": 0.2701, "lr": 3.400573793219672e-05, "epoch": 2.4053725317667762, "percentage": 80.18, "elapsed_time": "1 day, 13:19:45", "remaining_time": "9:13:46", "throughput": 29232.77, "total_tokens": 3928446976} +{"current_steps": 11630, "total_steps": 14493, "loss": 0.2704, "lr": 3.3997875879070546e-05, "epoch": 2.4074428715612948, "percentage": 80.25, "elapsed_time": "1 day, 13:21:28", "remaining_time": "9:11:47", "throughput": 29234.9, "total_tokens": 3931762432} +{"current_steps": 11640, "total_steps": 14493, "loss": 0.2709, "lr": 3.399001927649318e-05, "epoch": 2.4095132113558138, "percentage": 80.31, "elapsed_time": "1 day, 13:23:13", "remaining_time": "9:09:49", "throughput": 29237.14, "total_tokens": 3935140608} +{"current_steps": 11650, "total_steps": 14493, "loss": 0.268, "lr": 3.398216811816968e-05, "epoch": 2.4115835511503327, "percentage": 80.38, "elapsed_time": "1 day, 13:24:59", "remaining_time": "9:07:51", "throughput": 29239.01, "total_tokens": 3938486912} +{"current_steps": 11660, "total_steps": 14493, "loss": 0.2729, "lr": 3.397432239781527e-05, "epoch": 2.4136538909448513, "percentage": 80.45, "elapsed_time": "1 day, 13:26:39", "remaining_time": "9:05:51", "throughput": 29241.8, "total_tokens": 3941780224} +{"current_steps": 11670, "total_steps": 14493, "loss": 0.2698, "lr": 3.396648210915531e-05, "epoch": 2.4157242307393703, "percentage": 80.52, "elapsed_time": "1 day, 13:28:22", "remaining_time": "9:03:53", "throughput": 29244.15, "total_tokens": 3945117312} +{"current_steps": 11680, "total_steps": 14493, "loss": 0.2725, "lr": 3.3958647245925315e-05, "epoch": 2.417794570533889, "percentage": 80.59, "elapsed_time": "1 day, 13:30:19", "remaining_time": "9:01:58", "throughput": 29244.93, "total_tokens": 3948648192} +{"current_steps": 11690, "total_steps": 14493, "loss": 0.2704, "lr": 3.3950817801870885e-05, "epoch": 2.419864910328408, "percentage": 80.66, "elapsed_time": "1 day, 13:32:08", "remaining_time": "9:00:00", "throughput": 29246.33, "total_tokens": 3952014208} +{"current_steps": 11700, "total_steps": 14493, "loss": 0.2709, "lr": 3.3942993770747735e-05, "epoch": 2.4219352501229263, "percentage": 80.73, "elapsed_time": "1 day, 13:33:58", "remaining_time": "8:58:03", "throughput": 29247.95, "total_tokens": 3955456384} +{"current_steps": 11710, "total_steps": 14493, "loss": 0.2706, "lr": 3.3935175146321626e-05, "epoch": 2.4240055899174453, "percentage": 80.8, "elapsed_time": "1 day, 13:35:44", "remaining_time": "8:56:05", "throughput": 29250.11, "total_tokens": 3958834432} +{"current_steps": 11720, "total_steps": 14493, "loss": 0.2724, "lr": 3.392736192236839e-05, "epoch": 2.426075929711964, "percentage": 80.87, "elapsed_time": "1 day, 13:37:31", "remaining_time": "8:54:08", "throughput": 29251.79, "total_tokens": 3962187392} +{"current_steps": 11730, "total_steps": 14493, "loss": 0.2684, "lr": 3.391955409267387e-05, "epoch": 2.428146269506483, "percentage": 80.94, "elapsed_time": "1 day, 13:39:20", "remaining_time": "8:52:11", "throughput": 29253.19, "total_tokens": 3965579520} +{"current_steps": 11740, "total_steps": 14493, "loss": 0.2757, "lr": 3.3911751651033896e-05, "epoch": 2.4302166093010014, "percentage": 81.0, "elapsed_time": "1 day, 13:41:09", "remaining_time": "8:50:14", "throughput": 29254.81, "total_tokens": 3968986624} +{"current_steps": 11750, "total_steps": 14493, "loss": 0.2695, "lr": 3.3903954591254334e-05, "epoch": 2.4322869490955203, "percentage": 81.07, "elapsed_time": "1 day, 13:43:02", "remaining_time": "8:48:18", "throughput": 29255.81, "total_tokens": 3972430464} +{"current_steps": 11760, "total_steps": 14493, "loss": 0.2734, "lr": 3.389616290715097e-05, "epoch": 2.434357288890039, "percentage": 81.14, "elapsed_time": "1 day, 13:44:48", "remaining_time": "8:46:20", "throughput": 29257.83, "total_tokens": 3975793920} +{"current_steps": 11770, "total_steps": 14493, "loss": 0.2686, "lr": 3.388837659254955e-05, "epoch": 2.436427628684558, "percentage": 81.21, "elapsed_time": "1 day, 13:46:32", "remaining_time": "8:44:22", "throughput": 29260.07, "total_tokens": 3979154944} +{"current_steps": 11780, "total_steps": 14493, "loss": 0.2725, "lr": 3.3880595641285746e-05, "epoch": 2.4384979684790764, "percentage": 81.28, "elapsed_time": "1 day, 13:48:16", "remaining_time": "8:42:23", "throughput": 29262.47, "total_tokens": 3982528768} +{"current_steps": 11790, "total_steps": 14493, "loss": 0.2688, "lr": 3.387282004720513e-05, "epoch": 2.4405683082735954, "percentage": 81.35, "elapsed_time": "1 day, 13:50:02", "remaining_time": "8:40:26", "throughput": 29264.35, "total_tokens": 3985890048} +{"current_steps": 11800, "total_steps": 14493, "loss": 0.2714, "lr": 3.386504980416316e-05, "epoch": 2.4426386480681144, "percentage": 81.42, "elapsed_time": "1 day, 13:51:51", "remaining_time": "8:38:29", "throughput": 29265.87, "total_tokens": 3989282816} +{"current_steps": 11810, "total_steps": 14493, "loss": 0.2719, "lr": 3.385728490602515e-05, "epoch": 2.444708987862633, "percentage": 81.49, "elapsed_time": "1 day, 13:53:38", "remaining_time": "8:36:31", "throughput": 29267.76, "total_tokens": 3992663040} +{"current_steps": 11820, "total_steps": 14493, "loss": 0.2701, "lr": 3.384952534666625e-05, "epoch": 2.446779327657152, "percentage": 81.56, "elapsed_time": "1 day, 13:55:30", "remaining_time": "8:34:35", "throughput": 29269.12, "total_tokens": 3996112640} +{"current_steps": 11830, "total_steps": 14493, "loss": 0.2729, "lr": 3.3841771119971455e-05, "epoch": 2.4488496674516704, "percentage": 81.63, "elapsed_time": "1 day, 13:57:19", "remaining_time": "8:32:38", "throughput": 29270.61, "total_tokens": 3999510912} +{"current_steps": 11840, "total_steps": 14493, "loss": 0.2693, "lr": 3.383402221983554e-05, "epoch": 2.4509200072461894, "percentage": 81.69, "elapsed_time": "1 day, 13:59:07", "remaining_time": "8:30:41", "throughput": 29272.33, "total_tokens": 4002907264} +{"current_steps": 11850, "total_steps": 14493, "loss": 0.2702, "lr": 3.3826278640163064e-05, "epoch": 2.452990347040708, "percentage": 81.76, "elapsed_time": "1 day, 14:00:53", "remaining_time": "8:28:43", "throughput": 29274.08, "total_tokens": 4006262016} +{"current_steps": 11860, "total_steps": 14493, "loss": 0.2674, "lr": 3.3818540374868354e-05, "epoch": 2.455060686835227, "percentage": 81.83, "elapsed_time": "1 day, 14:02:42", "remaining_time": "8:26:46", "throughput": 29275.76, "total_tokens": 4009685248} +{"current_steps": 11870, "total_steps": 14493, "loss": 0.268, "lr": 3.381080741787547e-05, "epoch": 2.4571310266297455, "percentage": 81.9, "elapsed_time": "1 day, 14:04:27", "remaining_time": "8:24:48", "throughput": 29277.9, "total_tokens": 4013036288} +{"current_steps": 11880, "total_steps": 14493, "loss": 0.2684, "lr": 3.38030797631182e-05, "epoch": 2.4592013664242645, "percentage": 81.97, "elapsed_time": "1 day, 14:06:14", "remaining_time": "8:22:51", "throughput": 29279.76, "total_tokens": 4016430720} +{"current_steps": 11890, "total_steps": 14493, "loss": 0.2671, "lr": 3.379535740454003e-05, "epoch": 2.461271706218783, "percentage": 82.04, "elapsed_time": "1 day, 14:08:05", "remaining_time": "8:20:55", "throughput": 29280.87, "total_tokens": 4019847040} +{"current_steps": 11900, "total_steps": 14493, "loss": 0.2703, "lr": 3.3787640336094126e-05, "epoch": 2.463342046013302, "percentage": 82.11, "elapsed_time": "1 day, 14:09:52", "remaining_time": "8:18:57", "throughput": 29283.11, "total_tokens": 4023280000} +{"current_steps": 11910, "total_steps": 14493, "loss": 0.2706, "lr": 3.3779928551743325e-05, "epoch": 2.4654123858078205, "percentage": 82.18, "elapsed_time": "1 day, 14:11:46", "remaining_time": "8:17:01", "throughput": 29283.97, "total_tokens": 4026739456} +{"current_steps": 11920, "total_steps": 14493, "loss": 0.2675, "lr": 3.3772222045460084e-05, "epoch": 2.4674827256023395, "percentage": 82.25, "elapsed_time": "1 day, 14:13:35", "remaining_time": "8:15:05", "throughput": 29285.54, "total_tokens": 4030156288} +{"current_steps": 11930, "total_steps": 14493, "loss": 0.2706, "lr": 3.37645208112265e-05, "epoch": 2.469553065396858, "percentage": 82.32, "elapsed_time": "1 day, 14:15:22", "remaining_time": "8:13:07", "throughput": 29287.46, "total_tokens": 4033531136} +{"current_steps": 11940, "total_steps": 14493, "loss": 0.2697, "lr": 3.3756824843034255e-05, "epoch": 2.471623405191377, "percentage": 82.38, "elapsed_time": "1 day, 14:17:07", "remaining_time": "8:11:10", "throughput": 29289.6, "total_tokens": 4036899712} +{"current_steps": 11950, "total_steps": 14493, "loss": 0.273, "lr": 3.374913413488464e-05, "epoch": 2.473693744985896, "percentage": 82.45, "elapsed_time": "1 day, 14:18:53", "remaining_time": "8:09:12", "throughput": 29291.72, "total_tokens": 4040302336} +{"current_steps": 11960, "total_steps": 14493, "loss": 0.2705, "lr": 3.374144868078848e-05, "epoch": 2.4757640847804145, "percentage": 82.52, "elapsed_time": "1 day, 14:20:40", "remaining_time": "8:07:15", "throughput": 29293.12, "total_tokens": 4043630080} +{"current_steps": 11970, "total_steps": 14493, "loss": 0.2693, "lr": 3.373376847476615e-05, "epoch": 2.4778344245749335, "percentage": 82.59, "elapsed_time": "1 day, 14:22:21", "remaining_time": "8:05:17", "throughput": 29295.65, "total_tokens": 4046940160} +{"current_steps": 11980, "total_steps": 14493, "loss": 0.2704, "lr": 3.3726093510847566e-05, "epoch": 2.479904764369452, "percentage": 82.66, "elapsed_time": "1 day, 14:24:05", "remaining_time": "8:03:19", "throughput": 29298.1, "total_tokens": 4050341376} +{"current_steps": 11990, "total_steps": 14493, "loss": 0.2704, "lr": 3.371842378307212e-05, "epoch": 2.481975104163971, "percentage": 82.73, "elapsed_time": "1 day, 14:25:56", "remaining_time": "8:01:22", "throughput": 29299.55, "total_tokens": 4053787520} +{"current_steps": 12000, "total_steps": 14493, "loss": 0.2707, "lr": 3.371075928548872e-05, "epoch": 2.4840454439584896, "percentage": 82.8, "elapsed_time": "1 day, 14:27:47", "remaining_time": "7:59:26", "throughput": 29300.78, "total_tokens": 4057199232} +{"current_steps": 12010, "total_steps": 14493, "loss": 0.2699, "lr": 3.37031000121557e-05, "epoch": 2.4861157837530086, "percentage": 82.87, "elapsed_time": "1 day, 14:29:37", "remaining_time": "7:57:30", "throughput": 29302.17, "total_tokens": 4060614272} +{"current_steps": 12020, "total_steps": 14493, "loss": 0.2739, "lr": 3.369544595714088e-05, "epoch": 2.488186123547527, "percentage": 82.94, "elapsed_time": "1 day, 14:31:28", "remaining_time": "7:55:33", "throughput": 29303.44, "total_tokens": 4064051712} +{"current_steps": 12030, "total_steps": 14493, "loss": 0.269, "lr": 3.368779711452148e-05, "epoch": 2.490256463342046, "percentage": 83.01, "elapsed_time": "1 day, 14:33:15", "remaining_time": "7:53:36", "throughput": 29305.15, "total_tokens": 4067410688} +{"current_steps": 12040, "total_steps": 14493, "loss": 0.2716, "lr": 3.368015347838413e-05, "epoch": 2.4923268031365646, "percentage": 83.07, "elapsed_time": "1 day, 14:35:02", "remaining_time": "7:51:39", "throughput": 29306.85, "total_tokens": 4070789632} +{"current_steps": 12050, "total_steps": 14493, "loss": 0.271, "lr": 3.3672515042824855e-05, "epoch": 2.4943971429310836, "percentage": 83.14, "elapsed_time": "1 day, 14:36:42", "remaining_time": "7:49:41", "throughput": 29309.11, "total_tokens": 4074053632} +{"current_steps": 12060, "total_steps": 14493, "loss": 0.2716, "lr": 3.366488180194904e-05, "epoch": 2.496467482725602, "percentage": 83.21, "elapsed_time": "1 day, 14:38:24", "remaining_time": "7:47:43", "throughput": 29312.0, "total_tokens": 4077438336} +{"current_steps": 12070, "total_steps": 14493, "loss": 0.2707, "lr": 3.365725374987143e-05, "epoch": 2.498537822520121, "percentage": 83.28, "elapsed_time": "1 day, 14:40:05", "remaining_time": "7:45:44", "throughput": 29314.68, "total_tokens": 4080765184} +{"current_steps": 12080, "total_steps": 14493, "loss": 0.2718, "lr": 3.36496308807161e-05, "epoch": 2.5006081623146397, "percentage": 83.35, "elapsed_time": "1 day, 14:41:57", "remaining_time": "7:43:48", "throughput": 29315.82, "total_tokens": 4084207488} +{"current_steps": 12090, "total_steps": 14493, "loss": 0.2708, "lr": 3.3642013188616426e-05, "epoch": 2.5026785021091587, "percentage": 83.42, "elapsed_time": "1 day, 14:43:40", "remaining_time": "7:41:51", "throughput": 29318.02, "total_tokens": 4087519872} +{"current_steps": 12100, "total_steps": 14493, "loss": 0.2697, "lr": 3.3634400667715074e-05, "epoch": 2.5047488419036776, "percentage": 83.49, "elapsed_time": "1 day, 14:45:25", "remaining_time": "7:39:53", "throughput": 29319.99, "total_tokens": 4090899840} +{"current_steps": 12110, "total_steps": 14493, "loss": 0.2705, "lr": 3.3626793312164013e-05, "epoch": 2.506819181698196, "percentage": 83.56, "elapsed_time": "1 day, 14:47:13", "remaining_time": "7:37:56", "throughput": 29321.57, "total_tokens": 4094266112} +{"current_steps": 12120, "total_steps": 14493, "loss": 0.2692, "lr": 3.361919111612443e-05, "epoch": 2.508889521492715, "percentage": 83.63, "elapsed_time": "1 day, 14:48:56", "remaining_time": "7:35:59", "throughput": 29323.76, "total_tokens": 4097609600} +{"current_steps": 12130, "total_steps": 14493, "loss": 0.269, "lr": 3.361159407376678e-05, "epoch": 2.5109598612872337, "percentage": 83.7, "elapsed_time": "1 day, 14:50:45", "remaining_time": "7:34:02", "throughput": 29325.34, "total_tokens": 4101011456} +{"current_steps": 12140, "total_steps": 14493, "loss": 0.2712, "lr": 3.3604002179270685e-05, "epoch": 2.5130302010817527, "percentage": 83.76, "elapsed_time": "1 day, 14:52:30", "remaining_time": "7:32:05", "throughput": 29327.47, "total_tokens": 4104402816} +{"current_steps": 12150, "total_steps": 14493, "loss": 0.2695, "lr": 3.359641542682504e-05, "epoch": 2.515100540876271, "percentage": 83.83, "elapsed_time": "1 day, 14:54:17", "remaining_time": "7:30:08", "throughput": 29329.15, "total_tokens": 4107755008} +{"current_steps": 12160, "total_steps": 14493, "loss": 0.272, "lr": 3.3588833810627854e-05, "epoch": 2.51717088067079, "percentage": 83.9, "elapsed_time": "1 day, 14:56:09", "remaining_time": "7:28:12", "throughput": 29330.0, "total_tokens": 4111179520} +{"current_steps": 12170, "total_steps": 14493, "loss": 0.2702, "lr": 3.358125732488632e-05, "epoch": 2.5192412204653087, "percentage": 83.97, "elapsed_time": "1 day, 14:57:54", "remaining_time": "7:26:15", "throughput": 29331.47, "total_tokens": 4114452224} +{"current_steps": 12180, "total_steps": 14493, "loss": 0.273, "lr": 3.357368596381679e-05, "epoch": 2.5213115602598277, "percentage": 84.04, "elapsed_time": "1 day, 14:59:43", "remaining_time": "7:24:18", "throughput": 29333.5, "total_tokens": 4117924224} +{"current_steps": 12190, "total_steps": 14493, "loss": 0.2685, "lr": 3.356611972164471e-05, "epoch": 2.5233819000543463, "percentage": 84.11, "elapsed_time": "1 day, 15:01:31", "remaining_time": "7:22:22", "throughput": 29335.51, "total_tokens": 4121380608} +{"current_steps": 12200, "total_steps": 14493, "loss": 0.2694, "lr": 3.355855859260466e-05, "epoch": 2.5254522398488652, "percentage": 84.18, "elapsed_time": "1 day, 15:03:14", "remaining_time": "7:20:24", "throughput": 29337.82, "total_tokens": 4124746880} +{"current_steps": 12210, "total_steps": 14493, "loss": 0.2709, "lr": 3.3551002570940285e-05, "epoch": 2.5275225796433842, "percentage": 84.25, "elapsed_time": "1 day, 15:05:04", "remaining_time": "7:18:28", "throughput": 29339.03, "total_tokens": 4128127104} +{"current_steps": 12220, "total_steps": 14493, "loss": 0.2726, "lr": 3.354345165090431e-05, "epoch": 2.5295929194379028, "percentage": 84.32, "elapsed_time": "1 day, 15:06:47", "remaining_time": "7:16:31", "throughput": 29341.56, "total_tokens": 4131522304} +{"current_steps": 12230, "total_steps": 14493, "loss": 0.2696, "lr": 3.3535905826758515e-05, "epoch": 2.5316632592324213, "percentage": 84.39, "elapsed_time": "1 day, 15:08:31", "remaining_time": "7:14:33", "throughput": 29343.98, "total_tokens": 4134894208} +{"current_steps": 12240, "total_steps": 14493, "loss": 0.2714, "lr": 3.352836509277369e-05, "epoch": 2.5337335990269403, "percentage": 84.45, "elapsed_time": "1 day, 15:10:17", "remaining_time": "7:12:36", "throughput": 29345.69, "total_tokens": 4138269696} +{"current_steps": 12250, "total_steps": 14493, "loss": 0.2717, "lr": 3.352082944322966e-05, "epoch": 2.5358039388214593, "percentage": 84.52, "elapsed_time": "1 day, 15:12:11", "remaining_time": "7:10:41", "throughput": 29346.33, "total_tokens": 4141698816} +{"current_steps": 12260, "total_steps": 14493, "loss": 0.271, "lr": 3.351329887241524e-05, "epoch": 2.537874278615978, "percentage": 84.59, "elapsed_time": "1 day, 15:13:55", "remaining_time": "7:08:44", "throughput": 29348.35, "total_tokens": 4145019648} +{"current_steps": 12270, "total_steps": 14493, "loss": 0.269, "lr": 3.3505773374628225e-05, "epoch": 2.5399446184104963, "percentage": 84.66, "elapsed_time": "1 day, 15:15:37", "remaining_time": "7:06:46", "throughput": 29350.96, "total_tokens": 4148381568} +{"current_steps": 12280, "total_steps": 14493, "loss": 0.2686, "lr": 3.3498252944175354e-05, "epoch": 2.5420149582050153, "percentage": 84.73, "elapsed_time": "1 day, 15:17:25", "remaining_time": "7:04:50", "throughput": 29352.82, "total_tokens": 4151816704} +{"current_steps": 12290, "total_steps": 14493, "loss": 0.269, "lr": 3.3490737575372326e-05, "epoch": 2.5440852979995343, "percentage": 84.8, "elapsed_time": "1 day, 15:19:05", "remaining_time": "7:02:52", "throughput": 29355.53, "total_tokens": 4155143296} +{"current_steps": 12300, "total_steps": 14493, "loss": 0.2698, "lr": 3.348322726254375e-05, "epoch": 2.546155637794053, "percentage": 84.87, "elapsed_time": "1 day, 15:20:54", "remaining_time": "7:00:56", "throughput": 29356.74, "total_tokens": 4158526720} +{"current_steps": 12310, "total_steps": 14493, "loss": 0.2695, "lr": 3.347572200002315e-05, "epoch": 2.548225977588572, "percentage": 84.94, "elapsed_time": "1 day, 15:22:44", "remaining_time": "6:58:59", "throughput": 29358.33, "total_tokens": 4161978752} +{"current_steps": 12320, "total_steps": 14493, "loss": 0.2687, "lr": 3.3468221782152924e-05, "epoch": 2.5502963173830904, "percentage": 85.01, "elapsed_time": "1 day, 15:24:34", "remaining_time": "6:57:03", "throughput": 29359.78, "total_tokens": 4165393408} +{"current_steps": 12330, "total_steps": 14493, "loss": 0.2724, "lr": 3.346072660328435e-05, "epoch": 2.5523666571776094, "percentage": 85.08, "elapsed_time": "1 day, 15:26:20", "remaining_time": "6:55:07", "throughput": 29361.59, "total_tokens": 4168770304} +{"current_steps": 12340, "total_steps": 14493, "loss": 0.2715, "lr": 3.345323645777756e-05, "epoch": 2.554436996972128, "percentage": 85.14, "elapsed_time": "1 day, 15:28:07", "remaining_time": "6:53:10", "throughput": 29363.39, "total_tokens": 4172159360} +{"current_steps": 12350, "total_steps": 14493, "loss": 0.2707, "lr": 3.34457513400015e-05, "epoch": 2.556507336766647, "percentage": 85.21, "elapsed_time": "1 day, 15:29:59", "remaining_time": "6:51:14", "throughput": 29363.96, "total_tokens": 4175553664} +{"current_steps": 12360, "total_steps": 14493, "loss": 0.2677, "lr": 3.343827124433396e-05, "epoch": 2.558577676561166, "percentage": 85.28, "elapsed_time": "1 day, 15:31:43", "remaining_time": "6:49:17", "throughput": 29366.2, "total_tokens": 4178921600} +{"current_steps": 12370, "total_steps": 14493, "loss": 0.2727, "lr": 3.343079616516151e-05, "epoch": 2.5606480163556844, "percentage": 85.35, "elapsed_time": "1 day, 15:33:28", "remaining_time": "6:47:20", "throughput": 29368.29, "total_tokens": 4182285056} +{"current_steps": 12380, "total_steps": 14493, "loss": 0.2697, "lr": 3.3423326096879495e-05, "epoch": 2.562718356150203, "percentage": 85.42, "elapsed_time": "1 day, 15:35:14", "remaining_time": "6:45:24", "throughput": 29370.1, "total_tokens": 4185657984} +{"current_steps": 12390, "total_steps": 14493, "loss": 0.2728, "lr": 3.341586103389203e-05, "epoch": 2.564788695944722, "percentage": 85.49, "elapsed_time": "1 day, 15:37:02", "remaining_time": "6:43:27", "throughput": 29372.01, "total_tokens": 4189115776} +{"current_steps": 12400, "total_steps": 14493, "loss": 0.2726, "lr": 3.3408400970611995e-05, "epoch": 2.566859035739241, "percentage": 85.56, "elapsed_time": "1 day, 15:38:45", "remaining_time": "6:41:30", "throughput": 29374.23, "total_tokens": 4192450432} +{"current_steps": 12410, "total_steps": 14493, "loss": 0.2695, "lr": 3.340094590146095e-05, "epoch": 2.5689293755337594, "percentage": 85.63, "elapsed_time": "1 day, 15:40:29", "remaining_time": "6:39:33", "throughput": 29376.36, "total_tokens": 4195802624} +{"current_steps": 12420, "total_steps": 14493, "loss": 0.2696, "lr": 3.3393495820869215e-05, "epoch": 2.570999715328278, "percentage": 85.7, "elapsed_time": "1 day, 15:42:20", "remaining_time": "6:37:37", "throughput": 29377.35, "total_tokens": 4199207424} +{"current_steps": 12430, "total_steps": 14493, "loss": 0.2694, "lr": 3.338605072327576e-05, "epoch": 2.573070055122797, "percentage": 85.77, "elapsed_time": "1 day, 15:44:01", "remaining_time": "6:35:40", "throughput": 29380.09, "total_tokens": 4202559872} +{"current_steps": 12440, "total_steps": 14493, "loss": 0.2702, "lr": 3.337861060312827e-05, "epoch": 2.575140394917316, "percentage": 85.83, "elapsed_time": "1 day, 15:45:50", "remaining_time": "6:33:44", "throughput": 29381.35, "total_tokens": 4205946112} +{"current_steps": 12450, "total_steps": 14493, "loss": 0.2681, "lr": 3.337117545488306e-05, "epoch": 2.5772107347118345, "percentage": 85.9, "elapsed_time": "1 day, 15:47:30", "remaining_time": "6:31:46", "throughput": 29383.82, "total_tokens": 4209242240} +{"current_steps": 12460, "total_steps": 14493, "loss": 0.2743, "lr": 3.33637452730051e-05, "epoch": 2.5792810745063535, "percentage": 85.97, "elapsed_time": "1 day, 15:49:10", "remaining_time": "6:29:49", "throughput": 29386.03, "total_tokens": 4212502272} +{"current_steps": 12470, "total_steps": 14493, "loss": 0.2676, "lr": 3.335632005196796e-05, "epoch": 2.581351414300872, "percentage": 86.04, "elapsed_time": "1 day, 15:51:01", "remaining_time": "6:27:53", "throughput": 29387.47, "total_tokens": 4215956096} +{"current_steps": 12480, "total_steps": 14493, "loss": 0.2693, "lr": 3.334889978625383e-05, "epoch": 2.583421754095391, "percentage": 86.11, "elapsed_time": "1 day, 15:52:50", "remaining_time": "6:25:57", "throughput": 29388.15, "total_tokens": 4219264512} +{"current_steps": 12490, "total_steps": 14493, "loss": 0.2741, "lr": 3.3341484470353515e-05, "epoch": 2.5854920938899095, "percentage": 86.18, "elapsed_time": "1 day, 15:54:30", "remaining_time": "6:24:00", "throughput": 29390.58, "total_tokens": 4222557824} +{"current_steps": 12500, "total_steps": 14493, "loss": 0.2703, "lr": 3.333407409876635e-05, "epoch": 2.5875624336844285, "percentage": 86.25, "elapsed_time": "1 day, 15:56:17", "remaining_time": "6:22:03", "throughput": 29392.1, "total_tokens": 4225936000} +{"current_steps": 12510, "total_steps": 14493, "loss": 0.2732, "lr": 3.332666866600024e-05, "epoch": 2.5896327734789475, "percentage": 86.32, "elapsed_time": "1 day, 15:58:02", "remaining_time": "6:20:07", "throughput": 29394.02, "total_tokens": 4229295872} +{"current_steps": 12520, "total_steps": 14493, "loss": 0.271, "lr": 3.331926816657162e-05, "epoch": 2.591703113273466, "percentage": 86.39, "elapsed_time": "1 day, 15:59:50", "remaining_time": "6:18:11", "throughput": 29395.65, "total_tokens": 4232688896} +{"current_steps": 12530, "total_steps": 14493, "loss": 0.2697, "lr": 3.331187259500546e-05, "epoch": 2.5937734530679846, "percentage": 86.46, "elapsed_time": "1 day, 16:01:35", "remaining_time": "6:16:14", "throughput": 29397.61, "total_tokens": 4236074496} +{"current_steps": 12540, "total_steps": 14493, "loss": 0.2723, "lr": 3.3304481945835235e-05, "epoch": 2.5958437928625036, "percentage": 86.52, "elapsed_time": "1 day, 16:03:18", "remaining_time": "6:14:17", "throughput": 29400.0, "total_tokens": 4239444224} +{"current_steps": 12550, "total_steps": 14493, "loss": 0.2728, "lr": 3.329709621360288e-05, "epoch": 2.5979141326570225, "percentage": 86.59, "elapsed_time": "1 day, 16:05:03", "remaining_time": "6:12:21", "throughput": 29402.04, "total_tokens": 4242810240} +{"current_steps": 12560, "total_steps": 14493, "loss": 0.2704, "lr": 3.328971539285882e-05, "epoch": 2.599984472451541, "percentage": 86.66, "elapsed_time": "1 day, 16:06:45", "remaining_time": "6:10:24", "throughput": 29404.42, "total_tokens": 4246157440} +{"current_steps": 12570, "total_steps": 14493, "loss": 0.2703, "lr": 3.3282339478161935e-05, "epoch": 2.6020548122460596, "percentage": 86.73, "elapsed_time": "1 day, 16:08:33", "remaining_time": "6:08:28", "throughput": 29405.65, "total_tokens": 4249519488} +{"current_steps": 12580, "total_steps": 14493, "loss": 0.2696, "lr": 3.327496846407953e-05, "epoch": 2.6041251520405786, "percentage": 86.8, "elapsed_time": "1 day, 16:10:19", "remaining_time": "6:06:31", "throughput": 29407.71, "total_tokens": 4252939904} +{"current_steps": 12590, "total_steps": 14493, "loss": 0.2692, "lr": 3.3267602345187304e-05, "epoch": 2.6061954918350976, "percentage": 86.87, "elapsed_time": "1 day, 16:12:05", "remaining_time": "6:04:35", "throughput": 29409.37, "total_tokens": 4256299264} +{"current_steps": 12600, "total_steps": 14493, "loss": 0.268, "lr": 3.326024111606942e-05, "epoch": 2.608265831629616, "percentage": 86.94, "elapsed_time": "1 day, 16:13:53", "remaining_time": "6:02:39", "throughput": 29411.19, "total_tokens": 4259729280} +{"current_steps": 12610, "total_steps": 14493, "loss": 0.2709, "lr": 3.325288477131839e-05, "epoch": 2.610336171424135, "percentage": 87.01, "elapsed_time": "1 day, 16:15:43", "remaining_time": "6:00:43", "throughput": 29412.68, "total_tokens": 4263167872} +{"current_steps": 12620, "total_steps": 14493, "loss": 0.271, "lr": 3.324553330553507e-05, "epoch": 2.6124065112186536, "percentage": 87.08, "elapsed_time": "1 day, 16:17:31", "remaining_time": "5:58:47", "throughput": 29414.21, "total_tokens": 4266584064} +{"current_steps": 12630, "total_steps": 14493, "loss": 0.272, "lr": 3.323818671332871e-05, "epoch": 2.6144768510131726, "percentage": 87.15, "elapsed_time": "1 day, 16:19:25", "remaining_time": "5:56:52", "throughput": 29414.88, "total_tokens": 4270020736} +{"current_steps": 12640, "total_steps": 14493, "loss": 0.2721, "lr": 3.323084498931687e-05, "epoch": 2.616547190807691, "percentage": 87.21, "elapsed_time": "1 day, 16:21:12", "remaining_time": "5:54:56", "throughput": 29416.68, "total_tokens": 4273442048} +{"current_steps": 12650, "total_steps": 14493, "loss": 0.267, "lr": 3.322350812812545e-05, "epoch": 2.61861753060221, "percentage": 87.28, "elapsed_time": "1 day, 16:22:59", "remaining_time": "5:53:00", "throughput": 29418.47, "total_tokens": 4276850816} +{"current_steps": 12660, "total_steps": 14493, "loss": 0.2691, "lr": 3.321617612438862e-05, "epoch": 2.620687870396729, "percentage": 87.35, "elapsed_time": "1 day, 16:24:42", "remaining_time": "5:51:03", "throughput": 29420.5, "total_tokens": 4280161152} +{"current_steps": 12670, "total_steps": 14493, "loss": 0.2712, "lr": 3.320884897274886e-05, "epoch": 2.6227582101912477, "percentage": 87.42, "elapsed_time": "1 day, 16:26:25", "remaining_time": "5:49:07", "throughput": 29422.78, "total_tokens": 4283533312} +{"current_steps": 12680, "total_steps": 14493, "loss": 0.2715, "lr": 3.320152666785692e-05, "epoch": 2.624828549985766, "percentage": 87.49, "elapsed_time": "1 day, 16:28:14", "remaining_time": "5:47:11", "throughput": 29423.96, "total_tokens": 4286921472} +{"current_steps": 12690, "total_steps": 14493, "loss": 0.2724, "lr": 3.319420920437179e-05, "epoch": 2.626898889780285, "percentage": 87.56, "elapsed_time": "1 day, 16:30:03", "remaining_time": "5:45:15", "throughput": 29425.55, "total_tokens": 4290343040} +{"current_steps": 12700, "total_steps": 14493, "loss": 0.2721, "lr": 3.31868965769607e-05, "epoch": 2.628969229574804, "percentage": 87.63, "elapsed_time": "1 day, 16:31:45", "remaining_time": "5:43:19", "throughput": 29427.8, "total_tokens": 4293687168} +{"current_steps": 12710, "total_steps": 14493, "loss": 0.2718, "lr": 3.317958878029911e-05, "epoch": 2.6310395693693227, "percentage": 87.7, "elapsed_time": "1 day, 16:33:26", "remaining_time": "5:41:22", "throughput": 29430.2, "total_tokens": 4297003136} +{"current_steps": 12720, "total_steps": 14493, "loss": 0.2701, "lr": 3.3172285809070665e-05, "epoch": 2.6331099091638412, "percentage": 87.77, "elapsed_time": "1 day, 16:35:09", "remaining_time": "5:39:25", "throughput": 29432.61, "total_tokens": 4300371328} +{"current_steps": 12730, "total_steps": 14493, "loss": 0.2702, "lr": 3.3164987657967214e-05, "epoch": 2.6351802489583602, "percentage": 87.84, "elapsed_time": "1 day, 16:36:52", "remaining_time": "5:37:29", "throughput": 29434.38, "total_tokens": 4303677184} +{"current_steps": 12740, "total_steps": 14493, "loss": 0.2702, "lr": 3.315769432168877e-05, "epoch": 2.637250588752879, "percentage": 87.9, "elapsed_time": "1 day, 16:38:38", "remaining_time": "5:35:33", "throughput": 29435.55, "total_tokens": 4306978688} +{"current_steps": 12750, "total_steps": 14493, "loss": 0.2718, "lr": 3.315040579494349e-05, "epoch": 2.6393209285473977, "percentage": 87.97, "elapsed_time": "1 day, 16:40:27", "remaining_time": "5:33:37", "throughput": 29437.01, "total_tokens": 4310374528} +{"current_steps": 12760, "total_steps": 14493, "loss": 0.27, "lr": 3.31431220724477e-05, "epoch": 2.6413912683419167, "percentage": 88.04, "elapsed_time": "1 day, 16:42:13", "remaining_time": "5:31:41", "throughput": 29439.05, "total_tokens": 4313803520} +{"current_steps": 12770, "total_steps": 14493, "loss": 0.2699, "lr": 3.3135843148925834e-05, "epoch": 2.6434616081364353, "percentage": 88.11, "elapsed_time": "1 day, 16:43:55", "remaining_time": "5:29:44", "throughput": 29441.4, "total_tokens": 4317165568} +{"current_steps": 12780, "total_steps": 14493, "loss": 0.2685, "lr": 3.3128569019110414e-05, "epoch": 2.6455319479309543, "percentage": 88.18, "elapsed_time": "1 day, 16:45:43", "remaining_time": "5:27:49", "throughput": 29443.02, "total_tokens": 4320568832} +{"current_steps": 12790, "total_steps": 14493, "loss": 0.2715, "lr": 3.312129967774207e-05, "epoch": 2.647602287725473, "percentage": 88.25, "elapsed_time": "1 day, 16:47:25", "remaining_time": "5:25:52", "throughput": 29445.21, "total_tokens": 4323887104} +{"current_steps": 12800, "total_steps": 14493, "loss": 0.2693, "lr": 3.311403511956952e-05, "epoch": 2.6496726275199918, "percentage": 88.32, "elapsed_time": "1 day, 16:49:12", "remaining_time": "5:23:56", "throughput": 29446.56, "total_tokens": 4327256576} +{"current_steps": 12810, "total_steps": 14493, "loss": 0.2697, "lr": 3.310677533934952e-05, "epoch": 2.6517429673145108, "percentage": 88.39, "elapsed_time": "1 day, 16:50:55", "remaining_time": "5:22:00", "throughput": 29448.84, "total_tokens": 4330600576} +{"current_steps": 12820, "total_steps": 14493, "loss": 0.2701, "lr": 3.309952033184686e-05, "epoch": 2.6538133071090293, "percentage": 88.46, "elapsed_time": "1 day, 16:52:35", "remaining_time": "5:20:03", "throughput": 29451.17, "total_tokens": 4333886848} +{"current_steps": 12830, "total_steps": 14493, "loss": 0.2744, "lr": 3.309227009183439e-05, "epoch": 2.655883646903548, "percentage": 88.53, "elapsed_time": "1 day, 16:54:19", "remaining_time": "5:18:07", "throughput": 29452.57, "total_tokens": 4337180160} +{"current_steps": 12840, "total_steps": 14493, "loss": 0.2689, "lr": 3.308502461409295e-05, "epoch": 2.657953986698067, "percentage": 88.59, "elapsed_time": "1 day, 16:56:04", "remaining_time": "5:16:11", "throughput": 29454.38, "total_tokens": 4340531584} +{"current_steps": 12850, "total_steps": 14493, "loss": 0.2705, "lr": 3.3077783893411386e-05, "epoch": 2.660024326492586, "percentage": 88.66, "elapsed_time": "1 day, 16:57:51", "remaining_time": "5:14:15", "throughput": 29455.98, "total_tokens": 4343913984} +{"current_steps": 12860, "total_steps": 14493, "loss": 0.2733, "lr": 3.30705479245865e-05, "epoch": 2.6620946662871043, "percentage": 88.73, "elapsed_time": "1 day, 16:59:36", "remaining_time": "5:12:19", "throughput": 29457.51, "total_tokens": 4347237248} +{"current_steps": 12870, "total_steps": 14493, "loss": 0.2727, "lr": 3.3063316702423094e-05, "epoch": 2.664165006081623, "percentage": 88.8, "elapsed_time": "1 day, 17:01:19", "remaining_time": "5:10:23", "throughput": 29460.01, "total_tokens": 4350638976} +{"current_steps": 12880, "total_steps": 14493, "loss": 0.2726, "lr": 3.305609022173388e-05, "epoch": 2.666235345876142, "percentage": 88.87, "elapsed_time": "1 day, 17:03:03", "remaining_time": "5:08:27", "throughput": 29462.05, "total_tokens": 4354015360} +{"current_steps": 12890, "total_steps": 14493, "loss": 0.271, "lr": 3.304886847733954e-05, "epoch": 2.668305685670661, "percentage": 88.94, "elapsed_time": "1 day, 17:04:51", "remaining_time": "5:06:31", "throughput": 29463.59, "total_tokens": 4357413120} +{"current_steps": 12900, "total_steps": 14493, "loss": 0.2702, "lr": 3.304165146406865e-05, "epoch": 2.6703760254651794, "percentage": 89.01, "elapsed_time": "1 day, 17:06:33", "remaining_time": "5:04:35", "throughput": 29465.63, "total_tokens": 4360725888} +{"current_steps": 12910, "total_steps": 14493, "loss": 0.2681, "lr": 3.30344391767577e-05, "epoch": 2.6724463652596984, "percentage": 89.08, "elapsed_time": "1 day, 17:08:32", "remaining_time": "5:02:41", "throughput": 29466.02, "total_tokens": 4364278144} +{"current_steps": 12920, "total_steps": 14493, "loss": 0.2703, "lr": 3.302723161025104e-05, "epoch": 2.674516705054217, "percentage": 89.15, "elapsed_time": "1 day, 17:10:22", "remaining_time": "5:00:45", "throughput": 29467.93, "total_tokens": 4367799808} +{"current_steps": 12930, "total_steps": 14493, "loss": 0.2674, "lr": 3.302002875940093e-05, "epoch": 2.676587044848736, "percentage": 89.22, "elapsed_time": "1 day, 17:12:05", "remaining_time": "4:58:49", "throughput": 29470.07, "total_tokens": 4371157504} +{"current_steps": 12940, "total_steps": 14493, "loss": 0.2693, "lr": 3.3012830619067466e-05, "epoch": 2.6786573846432544, "percentage": 89.28, "elapsed_time": "1 day, 17:13:56", "remaining_time": "4:56:54", "throughput": 29471.15, "total_tokens": 4374606208} +{"current_steps": 12950, "total_steps": 14493, "loss": 0.271, "lr": 3.300563718411857e-05, "epoch": 2.6807277244377734, "percentage": 89.35, "elapsed_time": "1 day, 17:15:47", "remaining_time": "4:54:59", "throughput": 29471.76, "total_tokens": 4377956864} +{"current_steps": 12960, "total_steps": 14493, "loss": 0.2718, "lr": 3.299844844943e-05, "epoch": 2.6827980642322924, "percentage": 89.42, "elapsed_time": "1 day, 17:17:38", "remaining_time": "4:53:04", "throughput": 29472.59, "total_tokens": 4381339776} +{"current_steps": 12970, "total_steps": 14493, "loss": 0.2704, "lr": 3.299126440988535e-05, "epoch": 2.684868404026811, "percentage": 89.49, "elapsed_time": "1 day, 17:19:27", "remaining_time": "4:51:09", "throughput": 29473.92, "total_tokens": 4384766208} +{"current_steps": 12980, "total_steps": 14493, "loss": 0.2705, "lr": 3.298408506037596e-05, "epoch": 2.6869387438213295, "percentage": 89.56, "elapsed_time": "1 day, 17:21:16", "remaining_time": "4:49:13", "throughput": 29475.21, "total_tokens": 4388174464} +{"current_steps": 12990, "total_steps": 14493, "loss": 0.2668, "lr": 3.297691039580097e-05, "epoch": 2.6890090836158484, "percentage": 89.63, "elapsed_time": "1 day, 17:23:01", "remaining_time": "4:47:17", "throughput": 29477.21, "total_tokens": 4391567360} +{"current_steps": 13000, "total_steps": 14493, "loss": 0.2698, "lr": 3.29697404110673e-05, "epoch": 2.6910794234103674, "percentage": 89.7, "elapsed_time": "1 day, 17:24:49", "remaining_time": "4:45:22", "throughput": 29478.72, "total_tokens": 4394956032} +{"current_steps": 13010, "total_steps": 14493, "loss": 0.2687, "lr": 3.2962575101089594e-05, "epoch": 2.693149763204886, "percentage": 89.77, "elapsed_time": "1 day, 17:26:41", "remaining_time": "4:43:27", "throughput": 29479.53, "total_tokens": 4398380544} +{"current_steps": 13020, "total_steps": 14493, "loss": 0.2674, "lr": 3.295541446079024e-05, "epoch": 2.6952201029994045, "percentage": 89.84, "elapsed_time": "1 day, 17:28:29", "remaining_time": "4:41:31", "throughput": 29480.71, "total_tokens": 4401741184} +{"current_steps": 13030, "total_steps": 14493, "loss": 0.2725, "lr": 3.2948258485099336e-05, "epoch": 2.6972904427939235, "percentage": 89.91, "elapsed_time": "1 day, 17:30:20", "remaining_time": "4:39:36", "throughput": 29481.98, "total_tokens": 4405198720} +{"current_steps": 13040, "total_steps": 14493, "loss": 0.269, "lr": 3.29411071689547e-05, "epoch": 2.6993607825884425, "percentage": 89.97, "elapsed_time": "1 day, 17:32:08", "remaining_time": "4:37:41", "throughput": 29483.51, "total_tokens": 4408628608} +{"current_steps": 13050, "total_steps": 14493, "loss": 0.2694, "lr": 3.2933960507301826e-05, "epoch": 2.701431122382961, "percentage": 90.04, "elapsed_time": "1 day, 17:33:54", "remaining_time": "4:35:45", "throughput": 29485.47, "total_tokens": 4412034304} +{"current_steps": 13060, "total_steps": 14493, "loss": 0.2701, "lr": 3.292681849509387e-05, "epoch": 2.70350146217748, "percentage": 90.11, "elapsed_time": "1 day, 17:35:39", "remaining_time": "4:33:50", "throughput": 29486.95, "total_tokens": 4415366144} +{"current_steps": 13070, "total_steps": 14493, "loss": 0.2692, "lr": 3.291968112729166e-05, "epoch": 2.7055718019719985, "percentage": 90.18, "elapsed_time": "1 day, 17:37:30", "remaining_time": "4:31:55", "throughput": 29488.41, "total_tokens": 4418853504} +{"current_steps": 13080, "total_steps": 14493, "loss": 0.2705, "lr": 3.291254839886367e-05, "epoch": 2.7076421417665175, "percentage": 90.25, "elapsed_time": "1 day, 17:39:17", "remaining_time": "4:29:59", "throughput": 29489.94, "total_tokens": 4422239488} +{"current_steps": 13090, "total_steps": 14493, "loss": 0.2697, "lr": 3.2905420304785995e-05, "epoch": 2.709712481561036, "percentage": 90.32, "elapsed_time": "1 day, 17:41:09", "remaining_time": "4:28:04", "throughput": 29491.29, "total_tokens": 4425743232} +{"current_steps": 13100, "total_steps": 14493, "loss": 0.2685, "lr": 3.289829684004235e-05, "epoch": 2.711782821355555, "percentage": 90.39, "elapsed_time": "1 day, 17:42:54", "remaining_time": "4:26:08", "throughput": 29493.17, "total_tokens": 4429121536} +{"current_steps": 13110, "total_steps": 14493, "loss": 0.2677, "lr": 3.289117799962402e-05, "epoch": 2.713853161150074, "percentage": 90.46, "elapsed_time": "1 day, 17:44:45", "remaining_time": "4:24:13", "throughput": 29494.17, "total_tokens": 4432544256} +{"current_steps": 13120, "total_steps": 14493, "loss": 0.2701, "lr": 3.2884063778529914e-05, "epoch": 2.7159235009445926, "percentage": 90.53, "elapsed_time": "1 day, 17:46:28", "remaining_time": "4:22:18", "throughput": 29496.24, "total_tokens": 4435897216} +{"current_steps": 13130, "total_steps": 14493, "loss": 0.2695, "lr": 3.28769541717665e-05, "epoch": 2.717993840739111, "percentage": 90.6, "elapsed_time": "1 day, 17:48:14", "remaining_time": "4:20:22", "throughput": 29498.41, "total_tokens": 4439341312} +{"current_steps": 13140, "total_steps": 14493, "loss": 0.2701, "lr": 3.2869849174347775e-05, "epoch": 2.72006418053363, "percentage": 90.66, "elapsed_time": "1 day, 17:49:55", "remaining_time": "4:18:26", "throughput": 29500.78, "total_tokens": 4442677376} +{"current_steps": 13150, "total_steps": 14493, "loss": 0.2713, "lr": 3.2862748781295294e-05, "epoch": 2.722134520328149, "percentage": 90.73, "elapsed_time": "1 day, 17:51:41", "remaining_time": "4:16:30", "throughput": 29502.62, "total_tokens": 4446077440} +{"current_steps": 13160, "total_steps": 14493, "loss": 0.2733, "lr": 3.2855652987638146e-05, "epoch": 2.7242048601226676, "percentage": 90.8, "elapsed_time": "1 day, 17:53:27", "remaining_time": "4:14:35", "throughput": 29504.05, "total_tokens": 4449442816} +{"current_steps": 13170, "total_steps": 14493, "loss": 0.2709, "lr": 3.284856178841291e-05, "epoch": 2.726275199917186, "percentage": 90.87, "elapsed_time": "1 day, 17:55:14", "remaining_time": "4:12:40", "throughput": 29505.45, "total_tokens": 4452803328} +{"current_steps": 13180, "total_steps": 14493, "loss": 0.2734, "lr": 3.284147517866367e-05, "epoch": 2.728345539711705, "percentage": 90.94, "elapsed_time": "1 day, 17:57:04", "remaining_time": "4:10:45", "throughput": 29506.73, "total_tokens": 4456231808} +{"current_steps": 13190, "total_steps": 14493, "loss": 0.2699, "lr": 3.2834393153441976e-05, "epoch": 2.730415879506224, "percentage": 91.01, "elapsed_time": "1 day, 17:58:51", "remaining_time": "4:08:49", "throughput": 29508.49, "total_tokens": 4459656832} +{"current_steps": 13200, "total_steps": 14493, "loss": 0.2708, "lr": 3.282731570780689e-05, "epoch": 2.7324862193007426, "percentage": 91.08, "elapsed_time": "1 day, 18:00:35", "remaining_time": "4:06:54", "throughput": 29510.53, "total_tokens": 4463031040} +{"current_steps": 13210, "total_steps": 14493, "loss": 0.2708, "lr": 3.2820242836824875e-05, "epoch": 2.7345565590952616, "percentage": 91.15, "elapsed_time": "1 day, 18:02:18", "remaining_time": "4:04:58", "throughput": 29512.57, "total_tokens": 4466385152} +{"current_steps": 13220, "total_steps": 14493, "loss": 0.269, "lr": 3.2813174535569854e-05, "epoch": 2.73662689888978, "percentage": 91.22, "elapsed_time": "1 day, 18:04:04", "remaining_time": "4:03:03", "throughput": 29514.19, "total_tokens": 4469772672} +{"current_steps": 13230, "total_steps": 14493, "loss": 0.2709, "lr": 3.280611079912318e-05, "epoch": 2.738697238684299, "percentage": 91.29, "elapsed_time": "1 day, 18:05:51", "remaining_time": "4:01:07", "throughput": 29516.0, "total_tokens": 4473180288} +{"current_steps": 13240, "total_steps": 14493, "loss": 0.2738, "lr": 3.279905162257358e-05, "epoch": 2.7407675784788177, "percentage": 91.35, "elapsed_time": "1 day, 18:07:37", "remaining_time": "3:59:12", "throughput": 29517.25, "total_tokens": 4476518912} +{"current_steps": 13250, "total_steps": 14493, "loss": 0.2676, "lr": 3.279199700101723e-05, "epoch": 2.7428379182733367, "percentage": 91.42, "elapsed_time": "1 day, 18:09:17", "remaining_time": "3:57:16", "throughput": 29520.05, "total_tokens": 4479880320} +{"current_steps": 13260, "total_steps": 14493, "loss": 0.2695, "lr": 3.2784946929557644e-05, "epoch": 2.7449082580678557, "percentage": 91.49, "elapsed_time": "1 day, 18:11:03", "remaining_time": "3:55:21", "throughput": 29521.74, "total_tokens": 4483264000} +{"current_steps": 13270, "total_steps": 14493, "loss": 0.2699, "lr": 3.277790140330571e-05, "epoch": 2.746978597862374, "percentage": 91.56, "elapsed_time": "1 day, 18:12:47", "remaining_time": "3:53:25", "throughput": 29523.26, "total_tokens": 4486577536} +{"current_steps": 13280, "total_steps": 14493, "loss": 0.2707, "lr": 3.277086041737968e-05, "epoch": 2.7490489376568927, "percentage": 91.63, "elapsed_time": "1 day, 18:14:31", "remaining_time": "3:51:30", "throughput": 29525.16, "total_tokens": 4489940864} +{"current_steps": 13290, "total_steps": 14493, "loss": 0.2678, "lr": 3.276382396690513e-05, "epoch": 2.7511192774514117, "percentage": 91.7, "elapsed_time": "1 day, 18:16:20", "remaining_time": "3:49:35", "throughput": 29526.15, "total_tokens": 4493302272} +{"current_steps": 13300, "total_steps": 14493, "loss": 0.2686, "lr": 3.275679204701496e-05, "epoch": 2.7531896172459307, "percentage": 91.77, "elapsed_time": "1 day, 18:18:08", "remaining_time": "3:47:40", "throughput": 29527.64, "total_tokens": 4496730496} +{"current_steps": 13310, "total_steps": 14493, "loss": 0.2689, "lr": 3.274976465284939e-05, "epoch": 2.7552599570404492, "percentage": 91.84, "elapsed_time": "1 day, 18:19:54", "remaining_time": "3:45:44", "throughput": 29529.21, "total_tokens": 4500094592} +{"current_steps": 13320, "total_steps": 14493, "loss": 0.2709, "lr": 3.274274177955593e-05, "epoch": 2.7573302968349678, "percentage": 91.91, "elapsed_time": "1 day, 18:21:38", "remaining_time": "3:43:49", "throughput": 29531.07, "total_tokens": 4503450112} +{"current_steps": 13330, "total_steps": 14493, "loss": 0.2703, "lr": 3.273572342228937e-05, "epoch": 2.7594006366294868, "percentage": 91.98, "elapsed_time": "1 day, 18:23:19", "remaining_time": "3:41:53", "throughput": 29532.93, "total_tokens": 4506718080} +{"current_steps": 13340, "total_steps": 14493, "loss": 0.2691, "lr": 3.272870957621176e-05, "epoch": 2.7614709764240057, "percentage": 92.04, "elapsed_time": "1 day, 18:25:05", "remaining_time": "3:39:58", "throughput": 29534.68, "total_tokens": 4510113408} +{"current_steps": 13350, "total_steps": 14493, "loss": 0.2709, "lr": 3.2721700236492414e-05, "epoch": 2.7635413162185243, "percentage": 92.11, "elapsed_time": "1 day, 18:26:53", "remaining_time": "3:38:03", "throughput": 29535.95, "total_tokens": 4513499136} +{"current_steps": 13360, "total_steps": 14493, "loss": 0.2692, "lr": 3.271469539830788e-05, "epoch": 2.7656116560130433, "percentage": 92.18, "elapsed_time": "1 day, 18:28:32", "remaining_time": "3:36:07", "throughput": 29538.37, "total_tokens": 4516797824} +{"current_steps": 13370, "total_steps": 14493, "loss": 0.2685, "lr": 3.270769505684193e-05, "epoch": 2.767681995807562, "percentage": 92.25, "elapsed_time": "1 day, 18:30:19", "remaining_time": "3:34:12", "throughput": 29539.65, "total_tokens": 4520146304} +{"current_steps": 13380, "total_steps": 14493, "loss": 0.2736, "lr": 3.2700699207285544e-05, "epoch": 2.769752335602081, "percentage": 92.32, "elapsed_time": "1 day, 18:31:59", "remaining_time": "3:32:17", "throughput": 29542.1, "total_tokens": 4523468032} +{"current_steps": 13390, "total_steps": 14493, "loss": 0.2716, "lr": 3.269370784483691e-05, "epoch": 2.7718226753965993, "percentage": 92.39, "elapsed_time": "1 day, 18:33:44", "remaining_time": "3:30:21", "throughput": 29543.82, "total_tokens": 4526840320} +{"current_steps": 13400, "total_steps": 14493, "loss": 0.2684, "lr": 3.268672096470138e-05, "epoch": 2.7738930151911183, "percentage": 92.46, "elapsed_time": "1 day, 18:35:31", "remaining_time": "3:28:26", "throughput": 29545.39, "total_tokens": 4530247936} +{"current_steps": 13410, "total_steps": 14493, "loss": 0.2691, "lr": 3.2679738562091506e-05, "epoch": 2.7759633549856373, "percentage": 92.53, "elapsed_time": "1 day, 18:37:26", "remaining_time": "3:26:32", "throughput": 29545.66, "total_tokens": 4533664896} +{"current_steps": 13420, "total_steps": 14493, "loss": 0.2683, "lr": 3.2672760632226964e-05, "epoch": 2.778033694780156, "percentage": 92.6, "elapsed_time": "1 day, 18:39:11", "remaining_time": "3:24:37", "throughput": 29547.41, "total_tokens": 4537057792} +{"current_steps": 13430, "total_steps": 14493, "loss": 0.2736, "lr": 3.266578717033458e-05, "epoch": 2.7801040345746744, "percentage": 92.67, "elapsed_time": "1 day, 18:41:02", "remaining_time": "3:22:42", "throughput": 29548.26, "total_tokens": 4540466048} +{"current_steps": 13440, "total_steps": 14493, "loss": 0.2714, "lr": 3.265881817164833e-05, "epoch": 2.7821743743691933, "percentage": 92.73, "elapsed_time": "1 day, 18:42:48", "remaining_time": "3:20:47", "throughput": 29549.79, "total_tokens": 4543830400} +{"current_steps": 13450, "total_steps": 14493, "loss": 0.2691, "lr": 3.265185363140928e-05, "epoch": 2.7842447141637123, "percentage": 92.8, "elapsed_time": "1 day, 18:44:30", "remaining_time": "3:18:52", "throughput": 29551.7, "total_tokens": 4547137920} +{"current_steps": 13460, "total_steps": 14493, "loss": 0.27, "lr": 3.26448935448656e-05, "epoch": 2.786315053958231, "percentage": 92.87, "elapsed_time": "1 day, 18:46:21", "remaining_time": "3:16:57", "throughput": 29552.77, "total_tokens": 4550589568} +{"current_steps": 13470, "total_steps": 14493, "loss": 0.2708, "lr": 3.263793790727256e-05, "epoch": 2.7883853937527494, "percentage": 92.94, "elapsed_time": "1 day, 18:48:09", "remaining_time": "3:15:02", "throughput": 29553.96, "total_tokens": 4553942272} +{"current_steps": 13480, "total_steps": 14493, "loss": 0.2693, "lr": 3.2630986713892495e-05, "epoch": 2.7904557335472684, "percentage": 93.01, "elapsed_time": "1 day, 18:49:57", "remaining_time": "3:13:07", "throughput": 29555.2, "total_tokens": 4557331456} +{"current_steps": 13490, "total_steps": 14493, "loss": 0.2686, "lr": 3.26240399599948e-05, "epoch": 2.7925260733417874, "percentage": 93.08, "elapsed_time": "1 day, 18:51:39", "remaining_time": "3:11:12", "throughput": 29557.41, "total_tokens": 4560694528} +{"current_steps": 13500, "total_steps": 14493, "loss": 0.269, "lr": 3.2617097640855914e-05, "epoch": 2.794596413136306, "percentage": 93.15, "elapsed_time": "1 day, 18:53:26", "remaining_time": "3:09:17", "throughput": 29558.89, "total_tokens": 4564078848} +{"current_steps": 13510, "total_steps": 14493, "loss": 0.2688, "lr": 3.2610159751759314e-05, "epoch": 2.796666752930825, "percentage": 93.22, "elapsed_time": "1 day, 18:55:13", "remaining_time": "3:07:22", "throughput": 29560.36, "total_tokens": 4567471104} +{"current_steps": 13520, "total_steps": 14493, "loss": 0.2704, "lr": 3.26032262879955e-05, "epoch": 2.7987370927253434, "percentage": 93.29, "elapsed_time": "1 day, 18:57:00", "remaining_time": "3:05:27", "throughput": 29561.9, "total_tokens": 4570861568} +{"current_steps": 13530, "total_steps": 14493, "loss": 0.267, "lr": 3.259629724486198e-05, "epoch": 2.8008074325198624, "percentage": 93.36, "elapsed_time": "1 day, 18:58:45", "remaining_time": "3:03:32", "throughput": 29563.65, "total_tokens": 4574249216} +{"current_steps": 13540, "total_steps": 14493, "loss": 0.2684, "lr": 3.258937261766323e-05, "epoch": 2.802877772314381, "percentage": 93.42, "elapsed_time": "1 day, 19:00:30", "remaining_time": "3:01:37", "throughput": 29565.12, "total_tokens": 4577585408} +{"current_steps": 13550, "total_steps": 14493, "loss": 0.2704, "lr": 3.258245240171074e-05, "epoch": 2.8049481121089, "percentage": 93.49, "elapsed_time": "1 day, 19:02:15", "remaining_time": "2:59:42", "throughput": 29566.99, "total_tokens": 4580976896} +{"current_steps": 13560, "total_steps": 14493, "loss": 0.267, "lr": 3.2575536592322935e-05, "epoch": 2.807018451903419, "percentage": 93.56, "elapsed_time": "1 day, 19:04:01", "remaining_time": "2:57:47", "throughput": 29568.8, "total_tokens": 4584384640} +{"current_steps": 13570, "total_steps": 14493, "loss": 0.2692, "lr": 3.256862518482523e-05, "epoch": 2.8090887916979375, "percentage": 93.63, "elapsed_time": "1 day, 19:05:49", "remaining_time": "2:55:52", "throughput": 29570.29, "total_tokens": 4587817600} +{"current_steps": 13580, "total_steps": 14493, "loss": 0.2706, "lr": 3.256171817454994e-05, "epoch": 2.811159131492456, "percentage": 93.7, "elapsed_time": "1 day, 19:07:32", "remaining_time": "2:53:57", "throughput": 29572.53, "total_tokens": 4591202816} +{"current_steps": 13590, "total_steps": 14493, "loss": 0.2704, "lr": 3.255481555683633e-05, "epoch": 2.813229471286975, "percentage": 93.77, "elapsed_time": "1 day, 19:09:14", "remaining_time": "2:52:02", "throughput": 29574.43, "total_tokens": 4594527232} +{"current_steps": 13600, "total_steps": 14493, "loss": 0.2681, "lr": 3.254791732703057e-05, "epoch": 2.815299811081494, "percentage": 93.84, "elapsed_time": "1 day, 19:11:01", "remaining_time": "2:50:07", "throughput": 29576.14, "total_tokens": 4597945344} +{"current_steps": 13610, "total_steps": 14493, "loss": 0.2699, "lr": 3.254102348048575e-05, "epoch": 2.8173701508760125, "percentage": 93.91, "elapsed_time": "1 day, 19:12:49", "remaining_time": "2:48:13", "throughput": 29577.49, "total_tokens": 4601346048} +{"current_steps": 13620, "total_steps": 14493, "loss": 0.2692, "lr": 3.25341340125618e-05, "epoch": 2.819440490670531, "percentage": 93.98, "elapsed_time": "1 day, 19:14:40", "remaining_time": "2:46:18", "throughput": 29578.65, "total_tokens": 4604830848} +{"current_steps": 13630, "total_steps": 14493, "loss": 0.2692, "lr": 3.2527248918625575e-05, "epoch": 2.82151083046505, "percentage": 94.05, "elapsed_time": "1 day, 19:16:23", "remaining_time": "2:44:23", "throughput": 29580.35, "total_tokens": 4608124416} +{"current_steps": 13640, "total_steps": 14493, "loss": 0.2694, "lr": 3.252036819405075e-05, "epoch": 2.823581170259569, "percentage": 94.11, "elapsed_time": "1 day, 19:18:03", "remaining_time": "2:42:28", "throughput": 29582.88, "total_tokens": 4611473408} +{"current_steps": 13650, "total_steps": 14493, "loss": 0.268, "lr": 3.251349183421788e-05, "epoch": 2.8256515100540875, "percentage": 94.18, "elapsed_time": "1 day, 19:19:46", "remaining_time": "2:40:33", "throughput": 29584.71, "total_tokens": 4614825984} +{"current_steps": 13660, "total_steps": 14493, "loss": 0.2699, "lr": 3.250661983451434e-05, "epoch": 2.8277218498486065, "percentage": 94.25, "elapsed_time": "1 day, 19:21:30", "remaining_time": "2:38:38", "throughput": 29586.44, "total_tokens": 4618166016} +{"current_steps": 13670, "total_steps": 14493, "loss": 0.2695, "lr": 3.2499752190334326e-05, "epoch": 2.829792189643125, "percentage": 94.32, "elapsed_time": "1 day, 19:23:16", "remaining_time": "2:36:43", "throughput": 29588.01, "total_tokens": 4621534208} +{"current_steps": 13680, "total_steps": 14493, "loss": 0.2685, "lr": 3.2492888897078834e-05, "epoch": 2.831862529437644, "percentage": 94.39, "elapsed_time": "1 day, 19:25:02", "remaining_time": "2:34:49", "throughput": 29589.37, "total_tokens": 4624901248} +{"current_steps": 13690, "total_steps": 14493, "loss": 0.271, "lr": 3.248602995015567e-05, "epoch": 2.8339328692321626, "percentage": 94.46, "elapsed_time": "1 day, 19:26:53", "remaining_time": "2:32:54", "throughput": 29590.35, "total_tokens": 4628327168} +{"current_steps": 13700, "total_steps": 14493, "loss": 0.2715, "lr": 3.247917534497943e-05, "epoch": 2.8360032090266816, "percentage": 94.53, "elapsed_time": "1 day, 19:28:40", "remaining_time": "2:30:59", "throughput": 29591.74, "total_tokens": 4631710976} +{"current_steps": 13710, "total_steps": 14493, "loss": 0.2703, "lr": 3.247232507697145e-05, "epoch": 2.8380735488212006, "percentage": 94.6, "elapsed_time": "1 day, 19:30:25", "remaining_time": "2:29:05", "throughput": 29594.01, "total_tokens": 4635181952} +{"current_steps": 13720, "total_steps": 14493, "loss": 0.2705, "lr": 3.246547914155985e-05, "epoch": 2.840143888615719, "percentage": 94.67, "elapsed_time": "1 day, 19:32:13", "remaining_time": "2:27:10", "throughput": 29595.49, "total_tokens": 4638603264} +{"current_steps": 13730, "total_steps": 14493, "loss": 0.2673, "lr": 3.245863753417949e-05, "epoch": 2.8422142284102376, "percentage": 94.74, "elapsed_time": "1 day, 19:33:58", "remaining_time": "2:25:15", "throughput": 29597.02, "total_tokens": 4641947776} +{"current_steps": 13740, "total_steps": 14493, "loss": 0.2713, "lr": 3.2451800250271944e-05, "epoch": 2.8442845682047566, "percentage": 94.8, "elapsed_time": "1 day, 19:35:47", "remaining_time": "2:23:21", "throughput": 29597.99, "total_tokens": 4645344896} +{"current_steps": 13750, "total_steps": 14493, "loss": 0.2717, "lr": 3.244496728528553e-05, "epoch": 2.8463549079992756, "percentage": 94.87, "elapsed_time": "1 day, 19:37:40", "remaining_time": "2:21:26", "throughput": 29598.91, "total_tokens": 4648814720} +{"current_steps": 13760, "total_steps": 14493, "loss": 0.2697, "lr": 3.243813863467525e-05, "epoch": 2.848425247793794, "percentage": 94.94, "elapsed_time": "1 day, 19:39:28", "remaining_time": "2:19:32", "throughput": 29600.2, "total_tokens": 4652220032} +{"current_steps": 13770, "total_steps": 14493, "loss": 0.2686, "lr": 3.243131429390281e-05, "epoch": 2.8504955875883127, "percentage": 95.01, "elapsed_time": "1 day, 19:41:10", "remaining_time": "2:17:37", "throughput": 29602.57, "total_tokens": 4655608320} +{"current_steps": 13780, "total_steps": 14493, "loss": 0.2712, "lr": 3.2424494258436594e-05, "epoch": 2.8525659273828317, "percentage": 95.08, "elapsed_time": "1 day, 19:43:01", "remaining_time": "2:15:43", "throughput": 29603.34, "total_tokens": 4659012608} +{"current_steps": 13790, "total_steps": 14493, "loss": 0.2685, "lr": 3.241767852375166e-05, "epoch": 2.8546362671773506, "percentage": 95.15, "elapsed_time": "1 day, 19:44:52", "remaining_time": "2:13:48", "throughput": 29604.39, "total_tokens": 4662478336} +{"current_steps": 13800, "total_steps": 14493, "loss": 0.2681, "lr": 3.241086708532971e-05, "epoch": 2.856706606971869, "percentage": 95.22, "elapsed_time": "1 day, 19:46:41", "remaining_time": "2:11:54", "throughput": 29605.46, "total_tokens": 4665869184} +{"current_steps": 13810, "total_steps": 14493, "loss": 0.2699, "lr": 3.24040599386591e-05, "epoch": 2.858776946766388, "percentage": 95.29, "elapsed_time": "1 day, 19:48:28", "remaining_time": "2:09:59", "throughput": 29606.84, "total_tokens": 4669241344} +{"current_steps": 13820, "total_steps": 14493, "loss": 0.2685, "lr": 3.23972570792348e-05, "epoch": 2.8608472865609067, "percentage": 95.36, "elapsed_time": "1 day, 19:50:10", "remaining_time": "2:08:05", "throughput": 29608.46, "total_tokens": 4672539648} +{"current_steps": 13830, "total_steps": 14493, "loss": 0.2701, "lr": 3.239045850255842e-05, "epoch": 2.8629176263554257, "percentage": 95.43, "elapsed_time": "1 day, 19:52:02", "remaining_time": "2:06:10", "throughput": 29609.35, "total_tokens": 4675985408} +{"current_steps": 13840, "total_steps": 14493, "loss": 0.2685, "lr": 3.238366420413817e-05, "epoch": 2.864987966149944, "percentage": 95.49, "elapsed_time": "1 day, 19:53:49", "remaining_time": "2:04:16", "throughput": 29610.83, "total_tokens": 4679389952} +{"current_steps": 13850, "total_steps": 14493, "loss": 0.2722, "lr": 3.237687417948882e-05, "epoch": 2.867058305944463, "percentage": 95.56, "elapsed_time": "1 day, 19:55:37", "remaining_time": "2:02:21", "throughput": 29611.82, "total_tokens": 4682747264} +{"current_steps": 13860, "total_steps": 14493, "loss": 0.2697, "lr": 3.2370088424131776e-05, "epoch": 2.869128645738982, "percentage": 95.63, "elapsed_time": "1 day, 19:57:29", "remaining_time": "2:00:27", "throughput": 29612.68, "total_tokens": 4686179584} +{"current_steps": 13870, "total_steps": 14493, "loss": 0.2717, "lr": 3.236330693359497e-05, "epoch": 2.8711989855335007, "percentage": 95.7, "elapsed_time": "1 day, 19:59:11", "remaining_time": "1:58:32", "throughput": 29614.5, "total_tokens": 4689512448} +{"current_steps": 13880, "total_steps": 14493, "loss": 0.2712, "lr": 3.2356529703412894e-05, "epoch": 2.8732693253280193, "percentage": 95.77, "elapsed_time": "1 day, 20:00:59", "remaining_time": "1:56:38", "throughput": 29615.85, "total_tokens": 4692921344} +{"current_steps": 13890, "total_steps": 14493, "loss": 0.2704, "lr": 3.234975672912661e-05, "epoch": 2.8753396651225382, "percentage": 95.84, "elapsed_time": "1 day, 20:02:51", "remaining_time": "1:54:43", "throughput": 29617.1, "total_tokens": 4696414336} +{"current_steps": 13900, "total_steps": 14493, "loss": 0.2712, "lr": 3.234298800628368e-05, "epoch": 2.8774100049170572, "percentage": 95.91, "elapsed_time": "1 day, 20:04:44", "remaining_time": "1:52:49", "throughput": 29617.85, "total_tokens": 4699888128} +{"current_steps": 13910, "total_steps": 14493, "loss": 0.2695, "lr": 3.2336223530438195e-05, "epoch": 2.8794803447115758, "percentage": 95.98, "elapsed_time": "1 day, 20:06:24", "remaining_time": "1:50:55", "throughput": 29620.25, "total_tokens": 4703223168} +{"current_steps": 13920, "total_steps": 14493, "loss": 0.2668, "lr": 3.232946329715076e-05, "epoch": 2.8815506845060943, "percentage": 96.05, "elapsed_time": "1 day, 20:08:10", "remaining_time": "1:49:00", "throughput": 29621.64, "total_tokens": 4706600448} +{"current_steps": 13930, "total_steps": 14493, "loss": 0.2672, "lr": 3.2322707301988456e-05, "epoch": 2.8836210243006133, "percentage": 96.12, "elapsed_time": "1 day, 20:09:58", "remaining_time": "1:47:06", "throughput": 29622.85, "total_tokens": 4709989376} +{"current_steps": 13940, "total_steps": 14493, "loss": 0.2686, "lr": 3.231595554052488e-05, "epoch": 2.8856913640951323, "percentage": 96.18, "elapsed_time": "1 day, 20:11:44", "remaining_time": "1:45:11", "throughput": 29624.34, "total_tokens": 4713357824} +{"current_steps": 13950, "total_steps": 14493, "loss": 0.2704, "lr": 3.230920800834005e-05, "epoch": 2.887761703889651, "percentage": 96.25, "elapsed_time": "1 day, 20:13:32", "remaining_time": "1:43:17", "throughput": 29625.63, "total_tokens": 4716783360} +{"current_steps": 13960, "total_steps": 14493, "loss": 0.2695, "lr": 3.2302464701020486e-05, "epoch": 2.88983204368417, "percentage": 96.32, "elapsed_time": "1 day, 20:15:20", "remaining_time": "1:41:22", "throughput": 29626.97, "total_tokens": 4720172672} +{"current_steps": 13970, "total_steps": 14493, "loss": 0.27, "lr": 3.2295725614159126e-05, "epoch": 2.8919023834786883, "percentage": 96.39, "elapsed_time": "1 day, 20:17:05", "remaining_time": "1:39:28", "throughput": 29628.39, "total_tokens": 4723516160} +{"current_steps": 13980, "total_steps": 14493, "loss": 0.2682, "lr": 3.228899074335536e-05, "epoch": 2.8939727232732073, "percentage": 96.46, "elapsed_time": "1 day, 20:18:50", "remaining_time": "1:37:34", "throughput": 29630.04, "total_tokens": 4726898176} +{"current_steps": 13990, "total_steps": 14493, "loss": 0.2697, "lr": 3.228226008421498e-05, "epoch": 2.896043063067726, "percentage": 96.53, "elapsed_time": "1 day, 20:20:30", "remaining_time": "1:35:39", "throughput": 29632.44, "total_tokens": 4730227968} +{"current_steps": 14000, "total_steps": 14493, "loss": 0.2697, "lr": 3.2275533632350193e-05, "epoch": 2.898113402862245, "percentage": 96.6, "elapsed_time": "1 day, 20:22:10", "remaining_time": "1:33:44", "throughput": 29634.72, "total_tokens": 4733556992} +{"current_steps": 14010, "total_steps": 14493, "loss": 0.2671, "lr": 3.226881138337963e-05, "epoch": 2.9001837426567634, "percentage": 96.67, "elapsed_time": "1 day, 20:23:57", "remaining_time": "1:31:50", "throughput": 29635.98, "total_tokens": 4736946944} +{"current_steps": 14020, "total_steps": 14493, "loss": 0.2725, "lr": 3.2262093332928256e-05, "epoch": 2.9022540824512824, "percentage": 96.74, "elapsed_time": "1 day, 20:25:49", "remaining_time": "1:29:56", "throughput": 29636.51, "total_tokens": 4740333056} +{"current_steps": 14030, "total_steps": 14493, "loss": 0.2707, "lr": 3.225537947662746e-05, "epoch": 2.904324422245801, "percentage": 96.81, "elapsed_time": "1 day, 20:27:36", "remaining_time": "1:28:01", "throughput": 29637.76, "total_tokens": 4743713408} +{"current_steps": 14040, "total_steps": 14493, "loss": 0.2678, "lr": 3.224866981011494e-05, "epoch": 2.90639476204032, "percentage": 96.87, "elapsed_time": "1 day, 20:29:23", "remaining_time": "1:26:07", "throughput": 29639.12, "total_tokens": 4747108352} +{"current_steps": 14050, "total_steps": 14493, "loss": 0.2709, "lr": 3.22419643290348e-05, "epoch": 2.908465101834839, "percentage": 96.94, "elapsed_time": "1 day, 20:31:15", "remaining_time": "1:24:13", "throughput": 29640.13, "total_tokens": 4750592000} +{"current_steps": 14060, "total_steps": 14493, "loss": 0.2682, "lr": 3.2235263029037446e-05, "epoch": 2.9105354416293574, "percentage": 97.01, "elapsed_time": "1 day, 20:33:00", "remaining_time": "1:22:19", "throughput": 29641.54, "total_tokens": 4753936128} +{"current_steps": 14070, "total_steps": 14493, "loss": 0.2714, "lr": 3.222856590577962e-05, "epoch": 2.912605781423876, "percentage": 97.08, "elapsed_time": "1 day, 20:34:52", "remaining_time": "1:20:25", "throughput": 29642.55, "total_tokens": 4757401088} +{"current_steps": 14080, "total_steps": 14493, "loss": 0.2687, "lr": 3.222187295492436e-05, "epoch": 2.914676121218395, "percentage": 97.15, "elapsed_time": "1 day, 20:36:31", "remaining_time": "1:18:30", "throughput": 29644.71, "total_tokens": 4760692608} +{"current_steps": 14090, "total_steps": 14493, "loss": 0.2711, "lr": 3.221518417214104e-05, "epoch": 2.916746461012914, "percentage": 97.22, "elapsed_time": "1 day, 20:38:13", "remaining_time": "1:16:36", "throughput": 29646.86, "total_tokens": 4764062464} +{"current_steps": 14100, "total_steps": 14493, "loss": 0.2676, "lr": 3.22084995531053e-05, "epoch": 2.9188168008074324, "percentage": 97.29, "elapsed_time": "1 day, 20:39:57", "remaining_time": "1:14:41", "throughput": 29648.72, "total_tokens": 4767445632} +{"current_steps": 14110, "total_steps": 14493, "loss": 0.2684, "lr": 3.220181909349907e-05, "epoch": 2.9208871406019514, "percentage": 97.36, "elapsed_time": "1 day, 20:41:45", "remaining_time": "1:12:47", "throughput": 29650.16, "total_tokens": 4770881664} +{"current_steps": 14120, "total_steps": 14493, "loss": 0.2661, "lr": 3.219514278901053e-05, "epoch": 2.92295748039647, "percentage": 97.43, "elapsed_time": "1 day, 20:43:33", "remaining_time": "1:10:53", "throughput": 29651.5, "total_tokens": 4774284160} +{"current_steps": 14130, "total_steps": 14493, "loss": 0.2684, "lr": 3.218847063533413e-05, "epoch": 2.925027820190989, "percentage": 97.5, "elapsed_time": "1 day, 20:45:20", "remaining_time": "1:08:59", "throughput": 29652.86, "total_tokens": 4777670400} +{"current_steps": 14140, "total_steps": 14493, "loss": 0.2715, "lr": 3.218180262817055e-05, "epoch": 2.9270981599855075, "percentage": 97.56, "elapsed_time": "1 day, 20:47:07", "remaining_time": "1:07:04", "throughput": 29653.96, "total_tokens": 4781025664} +{"current_steps": 14150, "total_steps": 14493, "loss": 0.2691, "lr": 3.217513876322674e-05, "epoch": 2.9291684997800265, "percentage": 97.63, "elapsed_time": "1 day, 20:48:53", "remaining_time": "1:05:10", "throughput": 29655.55, "total_tokens": 4784430976} +{"current_steps": 14160, "total_steps": 14493, "loss": 0.2677, "lr": 3.216847903621581e-05, "epoch": 2.931238839574545, "percentage": 97.7, "elapsed_time": "1 day, 20:50:39", "remaining_time": "1:03:16", "throughput": 29657.12, "total_tokens": 4787831680} +{"current_steps": 14170, "total_steps": 14493, "loss": 0.2724, "lr": 3.216182344285713e-05, "epoch": 2.933309179369064, "percentage": 97.77, "elapsed_time": "1 day, 20:52:23", "remaining_time": "1:01:22", "throughput": 29658.34, "total_tokens": 4791111808} +{"current_steps": 14180, "total_steps": 14493, "loss": 0.2686, "lr": 3.215517197887625e-05, "epoch": 2.9353795191635825, "percentage": 97.84, "elapsed_time": "1 day, 20:54:14", "remaining_time": "0:59:28", "throughput": 29659.13, "total_tokens": 4794531456} +{"current_steps": 14190, "total_steps": 14493, "loss": 0.268, "lr": 3.214852464000488e-05, "epoch": 2.9374498589581015, "percentage": 97.91, "elapsed_time": "1 day, 20:56:00", "remaining_time": "0:57:34", "throughput": 29660.38, "total_tokens": 4797864704} +{"current_steps": 14200, "total_steps": 14493, "loss": 0.267, "lr": 3.2141881421980945e-05, "epoch": 2.9395201987526205, "percentage": 97.98, "elapsed_time": "1 day, 20:57:48", "remaining_time": "0:55:39", "throughput": 29661.57, "total_tokens": 4801268608} +{"current_steps": 14210, "total_steps": 14493, "loss": 0.2697, "lr": 3.213524232054851e-05, "epoch": 2.941590538547139, "percentage": 98.05, "elapsed_time": "1 day, 20:59:33", "remaining_time": "0:53:45", "throughput": 29663.01, "total_tokens": 4804613376} +{"current_steps": 14220, "total_steps": 14493, "loss": 0.2713, "lr": 3.21286073314578e-05, "epoch": 2.9436608783416576, "percentage": 98.12, "elapsed_time": "1 day, 21:01:18", "remaining_time": "0:51:51", "throughput": 29664.47, "total_tokens": 4807970048} +{"current_steps": 14230, "total_steps": 14493, "loss": 0.2722, "lr": 3.2121976450465155e-05, "epoch": 2.9457312181361766, "percentage": 98.19, "elapsed_time": "1 day, 21:03:07", "remaining_time": "0:49:57", "throughput": 29665.46, "total_tokens": 4811376256} +{"current_steps": 14240, "total_steps": 14493, "loss": 0.2705, "lr": 3.211534967333308e-05, "epoch": 2.9478015579306955, "percentage": 98.25, "elapsed_time": "1 day, 21:04:46", "remaining_time": "0:48:03", "throughput": 29667.91, "total_tokens": 4814713472} +{"current_steps": 14250, "total_steps": 14493, "loss": 0.2702, "lr": 3.210872699583019e-05, "epoch": 2.949871897725214, "percentage": 98.32, "elapsed_time": "1 day, 21:06:35", "remaining_time": "0:46:09", "throughput": 29668.95, "total_tokens": 4818105344} +{"current_steps": 14260, "total_steps": 14493, "loss": 0.2679, "lr": 3.210210841373118e-05, "epoch": 2.951942237519733, "percentage": 98.39, "elapsed_time": "1 day, 21:08:22", "remaining_time": "0:44:15", "throughput": 29670.22, "total_tokens": 4821496320} +{"current_steps": 14270, "total_steps": 14493, "loss": 0.2667, "lr": 3.2095493922816855e-05, "epoch": 2.9540125773142516, "percentage": 98.46, "elapsed_time": "1 day, 21:10:08", "remaining_time": "0:42:21", "throughput": 29671.33, "total_tokens": 4824797056} +{"current_steps": 14280, "total_steps": 14493, "loss": 0.2701, "lr": 3.2088883518874105e-05, "epoch": 2.9560829171087706, "percentage": 98.53, "elapsed_time": "1 day, 21:11:52", "remaining_time": "0:40:27", "throughput": 29672.69, "total_tokens": 4828126080} +{"current_steps": 14290, "total_steps": 14493, "loss": 0.2713, "lr": 3.208227719769589e-05, "epoch": 2.958153256903289, "percentage": 98.6, "elapsed_time": "1 day, 21:13:44", "remaining_time": "0:38:33", "throughput": 29673.59, "total_tokens": 4831575040} +{"current_steps": 14300, "total_steps": 14493, "loss": 0.2683, "lr": 3.207567495508124e-05, "epoch": 2.960223596697808, "percentage": 98.67, "elapsed_time": "1 day, 21:15:25", "remaining_time": "0:36:38", "throughput": 29675.43, "total_tokens": 4834884224} +{"current_steps": 14310, "total_steps": 14493, "loss": 0.2704, "lr": 3.2069076786835205e-05, "epoch": 2.9622939364923266, "percentage": 98.74, "elapsed_time": "1 day, 21:17:09", "remaining_time": "0:34:44", "throughput": 29677.23, "total_tokens": 4838253824} +{"current_steps": 14320, "total_steps": 14493, "loss": 0.2686, "lr": 3.2062482688768904e-05, "epoch": 2.9643642762868456, "percentage": 98.81, "elapsed_time": "1 day, 21:18:51", "remaining_time": "0:32:50", "throughput": 29679.05, "total_tokens": 4841579136} +{"current_steps": 14330, "total_steps": 14493, "loss": 0.2694, "lr": 3.205589265669947e-05, "epoch": 2.966434616081364, "percentage": 98.88, "elapsed_time": "1 day, 21:20:38", "remaining_time": "0:30:56", "throughput": 29680.0, "total_tokens": 4844923904} +{"current_steps": 14340, "total_steps": 14493, "loss": 0.2695, "lr": 3.204930668645005e-05, "epoch": 2.968504955875883, "percentage": 98.94, "elapsed_time": "1 day, 21:22:27", "remaining_time": "0:29:02", "throughput": 29681.34, "total_tokens": 4848359936} +{"current_steps": 14350, "total_steps": 14493, "loss": 0.2704, "lr": 3.20427247738498e-05, "epoch": 2.970575295670402, "percentage": 99.01, "elapsed_time": "1 day, 21:24:10", "remaining_time": "0:27:08", "throughput": 29682.97, "total_tokens": 4851697408} +{"current_steps": 14360, "total_steps": 14493, "loss": 0.27, "lr": 3.2036146914733854e-05, "epoch": 2.9726456354649207, "percentage": 99.08, "elapsed_time": "1 day, 21:25:51", "remaining_time": "0:25:14", "throughput": 29684.78, "total_tokens": 4854981888} +{"current_steps": 14370, "total_steps": 14493, "loss": 0.2682, "lr": 3.202957310494336e-05, "epoch": 2.974715975259439, "percentage": 99.15, "elapsed_time": "1 day, 21:27:36", "remaining_time": "0:23:20", "throughput": 29686.04, "total_tokens": 4858321792} +{"current_steps": 14380, "total_steps": 14493, "loss": 0.2713, "lr": 3.202300334032542e-05, "epoch": 2.976786315053958, "percentage": 99.22, "elapsed_time": "1 day, 21:29:23", "remaining_time": "0:21:26", "throughput": 29687.4, "total_tokens": 4861717376} +{"current_steps": 14390, "total_steps": 14493, "loss": 0.2692, "lr": 3.201643761673308e-05, "epoch": 2.978856654848477, "percentage": 99.29, "elapsed_time": "1 day, 21:31:05", "remaining_time": "0:19:32", "throughput": 29689.36, "total_tokens": 4865056384} +{"current_steps": 14400, "total_steps": 14493, "loss": 0.2684, "lr": 3.200987593002536e-05, "epoch": 2.9809269946429957, "percentage": 99.36, "elapsed_time": "1 day, 21:32:47", "remaining_time": "0:17:38", "throughput": 29690.95, "total_tokens": 4868344704} +{"current_steps": 14410, "total_steps": 14493, "loss": 0.2725, "lr": 3.200331827606721e-05, "epoch": 2.9829973344375147, "percentage": 99.43, "elapsed_time": "1 day, 21:34:28", "remaining_time": "0:15:45", "throughput": 29693.03, "total_tokens": 4871676544} +{"current_steps": 14420, "total_steps": 14493, "loss": 0.2677, "lr": 3.199676465072951e-05, "epoch": 2.9850676742320332, "percentage": 99.5, "elapsed_time": "1 day, 21:36:16", "remaining_time": "0:13:51", "throughput": 29693.98, "total_tokens": 4875049984} +{"current_steps": 14430, "total_steps": 14493, "loss": 0.2702, "lr": 3.1990215049889046e-05, "epoch": 2.987138014026552, "percentage": 99.57, "elapsed_time": "1 day, 21:38:01", "remaining_time": "0:11:57", "throughput": 29694.97, "total_tokens": 4878346624} +{"current_steps": 14440, "total_steps": 14493, "loss": 0.2693, "lr": 3.198366946942851e-05, "epoch": 2.9892083538210708, "percentage": 99.63, "elapsed_time": "1 day, 21:39:53", "remaining_time": "0:10:03", "throughput": 29695.7, "total_tokens": 4881795072} +{"current_steps": 14450, "total_steps": 14493, "loss": 0.2725, "lr": 3.1977127905236514e-05, "epoch": 2.9912786936155897, "percentage": 99.7, "elapsed_time": "1 day, 21:41:37", "remaining_time": "0:08:09", "throughput": 29697.41, "total_tokens": 4885157504} +{"current_steps": 14460, "total_steps": 14493, "loss": 0.2698, "lr": 3.197059035320752e-05, "epoch": 2.9933490334101083, "percentage": 99.77, "elapsed_time": "1 day, 21:43:28", "remaining_time": "0:06:15", "throughput": 29698.16, "total_tokens": 4888571392} +{"current_steps": 14470, "total_steps": 14493, "loss": 0.2675, "lr": 3.196405680924189e-05, "epoch": 2.9954193732046273, "percentage": 99.84, "elapsed_time": "1 day, 21:45:19", "remaining_time": "0:04:21", "throughput": 29698.93, "total_tokens": 4892002048} +{"current_steps": 14480, "total_steps": 14493, "loss": 0.2711, "lr": 3.195752726924582e-05, "epoch": 2.997489712999146, "percentage": 99.91, "elapsed_time": "1 day, 21:47:07", "remaining_time": "0:02:27", "throughput": 29700.04, "total_tokens": 4895379712} +{"current_steps": 14490, "total_steps": 14493, "loss": 0.2687, "lr": 3.195100172913139e-05, "epoch": 2.9995600527936648, "percentage": 99.98, "elapsed_time": "1 day, 21:48:52", "remaining_time": "0:00:34", "throughput": 29701.67, "total_tokens": 4898771456} +{"current_steps": 14493, "total_steps": 14493, "epoch": 3.0, "percentage": 100.0, "elapsed_time": "1 day, 21:50:49", "remaining_time": "0:00:00", "throughput": 29685.06, "total_tokens": 4899505792}