commit d8f4b296591f787ee1de59afef8a02482b126d33 Author: ModelHub XC Date: Sat Jun 20 11:02:16 2026 +0800 初始化项目,由ModelHub XC社区提供模型 Model: baban/QwenTranslate_Telugu_English Source: Original Platform diff --git a/.gitattributes b/.gitattributes new file mode 100644 index 0000000..52373fe --- /dev/null +++ b/.gitattributes @@ -0,0 +1,36 @@ +*.7z filter=lfs diff=lfs merge=lfs -text +*.arrow filter=lfs diff=lfs merge=lfs -text +*.bin filter=lfs diff=lfs merge=lfs -text +*.bz2 filter=lfs diff=lfs merge=lfs -text +*.ckpt filter=lfs diff=lfs merge=lfs -text +*.ftz filter=lfs diff=lfs merge=lfs -text +*.gz filter=lfs diff=lfs merge=lfs -text +*.h5 filter=lfs diff=lfs merge=lfs -text +*.joblib filter=lfs diff=lfs merge=lfs -text +*.lfs.* filter=lfs diff=lfs merge=lfs -text +*.mlmodel filter=lfs diff=lfs merge=lfs -text +*.model filter=lfs diff=lfs merge=lfs -text +*.msgpack filter=lfs diff=lfs merge=lfs -text +*.npy filter=lfs diff=lfs merge=lfs -text +*.npz filter=lfs diff=lfs merge=lfs -text +*.onnx filter=lfs diff=lfs merge=lfs -text +*.ot filter=lfs diff=lfs merge=lfs -text +*.parquet filter=lfs diff=lfs merge=lfs -text +*.pb filter=lfs diff=lfs merge=lfs -text +*.pickle filter=lfs diff=lfs merge=lfs -text +*.pkl filter=lfs diff=lfs merge=lfs -text +*.pt filter=lfs diff=lfs merge=lfs -text +*.pth filter=lfs diff=lfs merge=lfs -text +*.rar filter=lfs diff=lfs merge=lfs -text +*.safetensors filter=lfs diff=lfs merge=lfs -text +saved_model/**/* filter=lfs diff=lfs merge=lfs -text +*.tar.* filter=lfs diff=lfs merge=lfs -text +*.tar filter=lfs diff=lfs merge=lfs -text +*.tflite filter=lfs diff=lfs merge=lfs -text +*.tgz filter=lfs diff=lfs merge=lfs -text +*.wasm filter=lfs diff=lfs merge=lfs -text +*.xz filter=lfs diff=lfs merge=lfs -text +*.zip filter=lfs diff=lfs merge=lfs -text +*.zst filter=lfs diff=lfs merge=lfs -text +*tfevents* filter=lfs diff=lfs merge=lfs -text +tokenizer.json filter=lfs diff=lfs merge=lfs -text diff --git a/README.md b/README.md new file mode 100644 index 0000000..b74502b --- /dev/null +++ b/README.md @@ -0,0 +1,63 @@ +--- +library_name: transformers +license: other +base_model: Qwen/Qwen2.5-3B-Instruct +tags: +- llama-factory +- full +- generated_from_trainer +model-index: +- name: MT_Telugu_En + results: [] +--- + + + +# MT_Telugu_En + +This model is a fine-tuned version of [Qwen/Qwen2.5-3B-Instruct](https://huggingface.co/Qwen/Qwen2.5-3B-Instruct) on the MT_Telugu_En dataset. +It achieves the following results on the evaluation set: +- Loss: 1.0262 +- Num Input Tokens Seen: 4065497216 + +## Model description + +More information needed + +## Intended uses & limitations + +More information needed + +## Training and evaluation data + +More information needed + +## Training procedure + +### Training hyperparameters + +The following hyperparameters were used during training: +- learning_rate: 5e-05 +- train_batch_size: 8 +- eval_batch_size: 16 +- seed: 42 +- distributed_type: multi-GPU +- num_devices: 8 +- gradient_accumulation_steps: 16 +- total_train_batch_size: 1024 +- total_eval_batch_size: 128 +- optimizer: Use OptimizerNames.ADAMW_TORCH with betas=(0.9,0.999) and epsilon=1e-08 and optimizer_args=No additional optimizer arguments +- lr_scheduler_type: inverse_sqrt +- num_epochs: 3.0 + +### Training results + + + +### Framework versions + +- Transformers 4.55.0 +- Pytorch 2.5.1+cu124 +- Datasets 3.6.0 +- Tokenizers 0.21.1 diff --git a/chat_template.jinja b/chat_template.jinja new file mode 100644 index 0000000..bdf7919 --- /dev/null +++ b/chat_template.jinja @@ -0,0 +1,54 @@ +{%- if tools %} + {{- '<|im_start|>system\n' }} + {%- if messages[0]['role'] == 'system' %} + {{- messages[0]['content'] }} + {%- else %} + {{- 'You are Qwen, created by Alibaba Cloud. You are a helpful assistant.' }} + {%- endif %} + {{- "\n\n# Tools\n\nYou may call one or more functions to assist with the user query.\n\nYou are provided with function signatures within XML tags:\n" }} + {%- for tool in tools %} + {{- "\n" }} + {{- tool | tojson }} + {%- endfor %} + {{- "\n\n\nFor each function call, return a json object with function name and arguments within XML tags:\n\n{\"name\": , \"arguments\": }\n<|im_end|>\n" }} +{%- else %} + {%- if messages[0]['role'] == 'system' %} + {{- '<|im_start|>system\n' + messages[0]['content'] + '<|im_end|>\n' }} + {%- else %} + {{- '<|im_start|>system\nYou are Qwen, created by Alibaba Cloud. You are a helpful assistant.<|im_end|>\n' }} + {%- endif %} +{%- endif %} +{%- for message in messages %} + {%- if (message.role == "user") or (message.role == "system" and not loop.first) or (message.role == "assistant" and not message.tool_calls) %} + {{- '<|im_start|>' + message.role + '\n' + message.content + '<|im_end|>' + '\n' }} + {%- elif message.role == "assistant" %} + {{- '<|im_start|>' + message.role }} + {%- if message.content %} + {{- '\n' + message.content }} + {%- endif %} + {%- for tool_call in message.tool_calls %} + {%- if tool_call.function is defined %} + {%- set tool_call = tool_call.function %} + {%- endif %} + {{- '\n\n{"name": "' }} + {{- tool_call.name }} + {{- '", "arguments": ' }} + {{- tool_call.arguments | tojson }} + {{- '}\n' }} + {%- endfor %} + {{- '<|im_end|>\n' }} + {%- elif message.role == "tool" %} + {%- if (loop.index0 == 0) or (messages[loop.index0 - 1].role != "tool") %} + {{- '<|im_start|>user' }} + {%- endif %} + {{- '\n\n' }} + {{- message.content }} + {{- '\n' }} + {%- if loop.last or (messages[loop.index0 + 1].role != "tool") %} + {{- '<|im_end|>\n' }} + {%- endif %} + {%- endif %} +{%- endfor %} +{%- if add_generation_prompt %} + {{- '<|im_start|>assistant\n' }} +{%- endif %} diff --git a/config.json b/config.json new file mode 100644 index 0000000..a302053 --- /dev/null +++ b/config.json @@ -0,0 +1,66 @@ +{ + "architectures": [ + "Qwen2ForCausalLM" + ], + "attention_dropout": 0.0, + "bos_token_id": 151643, + "eos_token_id": 151645, + "hidden_act": "silu", + "hidden_size": 2048, + "initializer_range": 0.02, + "intermediate_size": 11008, + "layer_types": [ + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention" + ], + "max_position_embeddings": 32768, + "max_window_layers": 70, + "model_type": "qwen2", + "num_attention_heads": 16, + "num_hidden_layers": 36, + "num_key_value_heads": 2, + "rms_norm_eps": 1e-06, + "rope_scaling": null, + "rope_theta": 1000000.0, + "sliding_window": null, + "tie_word_embeddings": true, + "torch_dtype": "bfloat16", + "transformers_version": "4.55.0", + "use_cache": false, + "use_sliding_window": false, + "vocab_size": 151936 +} diff --git a/generation_config.json b/generation_config.json new file mode 100644 index 0000000..0147d6e --- /dev/null +++ b/generation_config.json @@ -0,0 +1,14 @@ +{ + "bos_token_id": 151643, + "do_sample": true, + "eos_token_id": [ + 151645, + 151643 + ], + "pad_token_id": 151643, + "repetition_penalty": 1.05, + "temperature": 0.7, + "top_k": 20, + "top_p": 0.8, + "transformers_version": "4.55.0" +} diff --git a/model-00001-of-00002.safetensors b/model-00001-of-00002.safetensors new file mode 100644 index 0000000..75080ee --- /dev/null +++ b/model-00001-of-00002.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:62fcc13b68beaf0fecf5145996e0c106e824d18f6035afae3c5093011749e933 +size 4957560304 diff --git a/model-00002-of-00002.safetensors b/model-00002-of-00002.safetensors new file mode 100644 index 0000000..63efe2c --- /dev/null +++ b/model-00002-of-00002.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ac43a685c3941e9b65826a2edc45a6a460976d2e99d768c97648ff4a7ac8bbdd +size 1214366696 diff --git a/model.safetensors.index.json b/model.safetensors.index.json new file mode 100644 index 0000000..d3896a7 --- /dev/null +++ b/model.safetensors.index.json @@ -0,0 +1,442 @@ +{ + "metadata": { + "total_parameters": 3085938688, + "total_size": 6171877376 + }, + "weight_map": { + "model.embed_tokens.weight": "model-00001-of-00002.safetensors", + "model.layers.0.input_layernorm.weight": "model-00001-of-00002.safetensors", + "model.layers.0.mlp.down_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.0.mlp.gate_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.0.mlp.up_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.0.post_attention_layernorm.weight": "model-00001-of-00002.safetensors", + "model.layers.0.self_attn.k_proj.bias": "model-00001-of-00002.safetensors", + "model.layers.0.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.0.self_attn.o_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.0.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.layers.0.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.0.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.layers.0.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.1.input_layernorm.weight": "model-00001-of-00002.safetensors", + "model.layers.1.mlp.down_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.1.mlp.gate_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.1.mlp.up_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.1.post_attention_layernorm.weight": "model-00001-of-00002.safetensors", + "model.layers.1.self_attn.k_proj.bias": "model-00001-of-00002.safetensors", + "model.layers.1.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.1.self_attn.o_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.1.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.layers.1.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.1.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.layers.1.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.10.input_layernorm.weight": "model-00001-of-00002.safetensors", + "model.layers.10.mlp.down_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.10.mlp.gate_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.10.mlp.up_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.10.post_attention_layernorm.weight": "model-00001-of-00002.safetensors", + "model.layers.10.self_attn.k_proj.bias": "model-00001-of-00002.safetensors", + "model.layers.10.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.10.self_attn.o_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.10.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.layers.10.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.10.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.layers.10.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.11.input_layernorm.weight": "model-00001-of-00002.safetensors", + "model.layers.11.mlp.down_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.11.mlp.gate_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.11.mlp.up_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.11.post_attention_layernorm.weight": "model-00001-of-00002.safetensors", + "model.layers.11.self_attn.k_proj.bias": "model-00001-of-00002.safetensors", + "model.layers.11.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.11.self_attn.o_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.11.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.layers.11.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.11.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.layers.11.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.12.input_layernorm.weight": "model-00001-of-00002.safetensors", + "model.layers.12.mlp.down_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.12.mlp.gate_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.12.mlp.up_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.12.post_attention_layernorm.weight": "model-00001-of-00002.safetensors", + "model.layers.12.self_attn.k_proj.bias": "model-00001-of-00002.safetensors", + "model.layers.12.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.12.self_attn.o_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.12.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.layers.12.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.12.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.layers.12.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.13.input_layernorm.weight": "model-00001-of-00002.safetensors", + "model.layers.13.mlp.down_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.13.mlp.gate_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.13.mlp.up_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.13.post_attention_layernorm.weight": "model-00001-of-00002.safetensors", + "model.layers.13.self_attn.k_proj.bias": "model-00001-of-00002.safetensors", + "model.layers.13.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.13.self_attn.o_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.13.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.layers.13.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.13.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.layers.13.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.14.input_layernorm.weight": "model-00001-of-00002.safetensors", + "model.layers.14.mlp.down_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.14.mlp.gate_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.14.mlp.up_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.14.post_attention_layernorm.weight": "model-00001-of-00002.safetensors", + "model.layers.14.self_attn.k_proj.bias": "model-00001-of-00002.safetensors", + "model.layers.14.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.14.self_attn.o_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.14.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.layers.14.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.14.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.layers.14.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.15.input_layernorm.weight": "model-00001-of-00002.safetensors", + "model.layers.15.mlp.down_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.15.mlp.gate_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.15.mlp.up_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.15.post_attention_layernorm.weight": "model-00001-of-00002.safetensors", + "model.layers.15.self_attn.k_proj.bias": "model-00001-of-00002.safetensors", + "model.layers.15.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.15.self_attn.o_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.15.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.layers.15.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.15.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.layers.15.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.16.input_layernorm.weight": "model-00001-of-00002.safetensors", + "model.layers.16.mlp.down_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.16.mlp.gate_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.16.mlp.up_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.16.post_attention_layernorm.weight": "model-00001-of-00002.safetensors", + "model.layers.16.self_attn.k_proj.bias": "model-00001-of-00002.safetensors", + "model.layers.16.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.16.self_attn.o_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.16.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.layers.16.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.16.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.layers.16.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.17.input_layernorm.weight": "model-00001-of-00002.safetensors", + "model.layers.17.mlp.down_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.17.mlp.gate_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.17.mlp.up_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.17.post_attention_layernorm.weight": "model-00001-of-00002.safetensors", + "model.layers.17.self_attn.k_proj.bias": "model-00001-of-00002.safetensors", + "model.layers.17.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.17.self_attn.o_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.17.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.layers.17.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.17.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.layers.17.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.18.input_layernorm.weight": "model-00001-of-00002.safetensors", + "model.layers.18.mlp.down_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.18.mlp.gate_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.18.mlp.up_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.18.post_attention_layernorm.weight": "model-00001-of-00002.safetensors", + "model.layers.18.self_attn.k_proj.bias": "model-00001-of-00002.safetensors", + "model.layers.18.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.18.self_attn.o_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.18.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.layers.18.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.18.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.layers.18.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.19.input_layernorm.weight": "model-00001-of-00002.safetensors", + "model.layers.19.mlp.down_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.19.mlp.gate_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.19.mlp.up_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.19.post_attention_layernorm.weight": "model-00001-of-00002.safetensors", + "model.layers.19.self_attn.k_proj.bias": "model-00001-of-00002.safetensors", + "model.layers.19.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.19.self_attn.o_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.19.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.layers.19.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.19.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.layers.19.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.2.input_layernorm.weight": "model-00001-of-00002.safetensors", + "model.layers.2.mlp.down_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.2.mlp.gate_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.2.mlp.up_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.2.post_attention_layernorm.weight": "model-00001-of-00002.safetensors", + "model.layers.2.self_attn.k_proj.bias": "model-00001-of-00002.safetensors", + "model.layers.2.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.2.self_attn.o_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.2.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.layers.2.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.2.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.layers.2.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.20.input_layernorm.weight": "model-00001-of-00002.safetensors", + "model.layers.20.mlp.down_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.20.mlp.gate_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.20.mlp.up_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.20.post_attention_layernorm.weight": "model-00001-of-00002.safetensors", + "model.layers.20.self_attn.k_proj.bias": "model-00001-of-00002.safetensors", + "model.layers.20.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.20.self_attn.o_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.20.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.layers.20.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.20.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.layers.20.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.21.input_layernorm.weight": "model-00001-of-00002.safetensors", + "model.layers.21.mlp.down_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.21.mlp.gate_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.21.mlp.up_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.21.post_attention_layernorm.weight": "model-00001-of-00002.safetensors", + "model.layers.21.self_attn.k_proj.bias": "model-00001-of-00002.safetensors", + "model.layers.21.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.21.self_attn.o_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.21.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.layers.21.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.21.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.layers.21.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.22.input_layernorm.weight": "model-00001-of-00002.safetensors", + "model.layers.22.mlp.down_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.22.mlp.gate_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.22.mlp.up_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.22.post_attention_layernorm.weight": "model-00001-of-00002.safetensors", + "model.layers.22.self_attn.k_proj.bias": "model-00001-of-00002.safetensors", + "model.layers.22.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.22.self_attn.o_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.22.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.layers.22.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.22.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.layers.22.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.23.input_layernorm.weight": "model-00001-of-00002.safetensors", + "model.layers.23.mlp.down_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.23.mlp.gate_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.23.mlp.up_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.23.post_attention_layernorm.weight": "model-00001-of-00002.safetensors", + "model.layers.23.self_attn.k_proj.bias": "model-00001-of-00002.safetensors", + "model.layers.23.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.23.self_attn.o_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.23.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.layers.23.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.23.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.layers.23.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.24.input_layernorm.weight": "model-00001-of-00002.safetensors", + "model.layers.24.mlp.down_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.24.mlp.gate_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.24.mlp.up_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.24.post_attention_layernorm.weight": "model-00001-of-00002.safetensors", + "model.layers.24.self_attn.k_proj.bias": "model-00001-of-00002.safetensors", + "model.layers.24.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.24.self_attn.o_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.24.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.layers.24.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.24.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.layers.24.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.25.input_layernorm.weight": "model-00001-of-00002.safetensors", + "model.layers.25.mlp.down_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.25.mlp.gate_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.25.mlp.up_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.25.post_attention_layernorm.weight": "model-00001-of-00002.safetensors", + "model.layers.25.self_attn.k_proj.bias": "model-00001-of-00002.safetensors", + "model.layers.25.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.25.self_attn.o_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.25.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.layers.25.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.25.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.layers.25.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.26.input_layernorm.weight": "model-00001-of-00002.safetensors", + "model.layers.26.mlp.down_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.26.mlp.gate_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.26.mlp.up_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.26.post_attention_layernorm.weight": "model-00001-of-00002.safetensors", + "model.layers.26.self_attn.k_proj.bias": "model-00001-of-00002.safetensors", + "model.layers.26.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.26.self_attn.o_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.26.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.layers.26.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.26.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.layers.26.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.27.input_layernorm.weight": "model-00001-of-00002.safetensors", + "model.layers.27.mlp.down_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.27.mlp.gate_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.27.mlp.up_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.27.post_attention_layernorm.weight": "model-00001-of-00002.safetensors", + "model.layers.27.self_attn.k_proj.bias": "model-00001-of-00002.safetensors", + "model.layers.27.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.27.self_attn.o_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.27.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.layers.27.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.27.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.layers.27.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.28.input_layernorm.weight": "model-00002-of-00002.safetensors", + "model.layers.28.mlp.down_proj.weight": "model-00002-of-00002.safetensors", + "model.layers.28.mlp.gate_proj.weight": "model-00002-of-00002.safetensors", + "model.layers.28.mlp.up_proj.weight": "model-00002-of-00002.safetensors", + "model.layers.28.post_attention_layernorm.weight": "model-00002-of-00002.safetensors", + "model.layers.28.self_attn.k_proj.bias": "model-00001-of-00002.safetensors", + "model.layers.28.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.28.self_attn.o_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.28.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.layers.28.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.28.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.layers.28.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.29.input_layernorm.weight": "model-00002-of-00002.safetensors", + "model.layers.29.mlp.down_proj.weight": "model-00002-of-00002.safetensors", + "model.layers.29.mlp.gate_proj.weight": "model-00002-of-00002.safetensors", + "model.layers.29.mlp.up_proj.weight": "model-00002-of-00002.safetensors", + "model.layers.29.post_attention_layernorm.weight": "model-00002-of-00002.safetensors", + "model.layers.29.self_attn.k_proj.bias": "model-00002-of-00002.safetensors", + "model.layers.29.self_attn.k_proj.weight": "model-00002-of-00002.safetensors", + "model.layers.29.self_attn.o_proj.weight": "model-00002-of-00002.safetensors", + "model.layers.29.self_attn.q_proj.bias": "model-00002-of-00002.safetensors", + "model.layers.29.self_attn.q_proj.weight": "model-00002-of-00002.safetensors", + "model.layers.29.self_attn.v_proj.bias": "model-00002-of-00002.safetensors", + "model.layers.29.self_attn.v_proj.weight": "model-00002-of-00002.safetensors", + "model.layers.3.input_layernorm.weight": "model-00001-of-00002.safetensors", + "model.layers.3.mlp.down_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.3.mlp.gate_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.3.mlp.up_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.3.post_attention_layernorm.weight": "model-00001-of-00002.safetensors", + "model.layers.3.self_attn.k_proj.bias": "model-00001-of-00002.safetensors", + "model.layers.3.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.3.self_attn.o_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.3.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.layers.3.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.3.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.layers.3.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.30.input_layernorm.weight": "model-00002-of-00002.safetensors", + "model.layers.30.mlp.down_proj.weight": "model-00002-of-00002.safetensors", + "model.layers.30.mlp.gate_proj.weight": "model-00002-of-00002.safetensors", + "model.layers.30.mlp.up_proj.weight": "model-00002-of-00002.safetensors", + "model.layers.30.post_attention_layernorm.weight": "model-00002-of-00002.safetensors", + "model.layers.30.self_attn.k_proj.bias": "model-00002-of-00002.safetensors", + "model.layers.30.self_attn.k_proj.weight": "model-00002-of-00002.safetensors", + "model.layers.30.self_attn.o_proj.weight": "model-00002-of-00002.safetensors", + "model.layers.30.self_attn.q_proj.bias": "model-00002-of-00002.safetensors", + "model.layers.30.self_attn.q_proj.weight": "model-00002-of-00002.safetensors", + "model.layers.30.self_attn.v_proj.bias": "model-00002-of-00002.safetensors", + "model.layers.30.self_attn.v_proj.weight": "model-00002-of-00002.safetensors", + "model.layers.31.input_layernorm.weight": "model-00002-of-00002.safetensors", + "model.layers.31.mlp.down_proj.weight": "model-00002-of-00002.safetensors", + "model.layers.31.mlp.gate_proj.weight": "model-00002-of-00002.safetensors", + "model.layers.31.mlp.up_proj.weight": "model-00002-of-00002.safetensors", + "model.layers.31.post_attention_layernorm.weight": "model-00002-of-00002.safetensors", + "model.layers.31.self_attn.k_proj.bias": "model-00002-of-00002.safetensors", + "model.layers.31.self_attn.k_proj.weight": "model-00002-of-00002.safetensors", + "model.layers.31.self_attn.o_proj.weight": "model-00002-of-00002.safetensors", + "model.layers.31.self_attn.q_proj.bias": "model-00002-of-00002.safetensors", + "model.layers.31.self_attn.q_proj.weight": "model-00002-of-00002.safetensors", + "model.layers.31.self_attn.v_proj.bias": "model-00002-of-00002.safetensors", + "model.layers.31.self_attn.v_proj.weight": "model-00002-of-00002.safetensors", + "model.layers.32.input_layernorm.weight": "model-00002-of-00002.safetensors", + "model.layers.32.mlp.down_proj.weight": "model-00002-of-00002.safetensors", + "model.layers.32.mlp.gate_proj.weight": "model-00002-of-00002.safetensors", + "model.layers.32.mlp.up_proj.weight": "model-00002-of-00002.safetensors", + "model.layers.32.post_attention_layernorm.weight": "model-00002-of-00002.safetensors", + "model.layers.32.self_attn.k_proj.bias": "model-00002-of-00002.safetensors", + "model.layers.32.self_attn.k_proj.weight": "model-00002-of-00002.safetensors", + "model.layers.32.self_attn.o_proj.weight": "model-00002-of-00002.safetensors", + "model.layers.32.self_attn.q_proj.bias": "model-00002-of-00002.safetensors", + "model.layers.32.self_attn.q_proj.weight": "model-00002-of-00002.safetensors", + "model.layers.32.self_attn.v_proj.bias": "model-00002-of-00002.safetensors", + "model.layers.32.self_attn.v_proj.weight": "model-00002-of-00002.safetensors", + "model.layers.33.input_layernorm.weight": "model-00002-of-00002.safetensors", + "model.layers.33.mlp.down_proj.weight": "model-00002-of-00002.safetensors", + "model.layers.33.mlp.gate_proj.weight": "model-00002-of-00002.safetensors", + "model.layers.33.mlp.up_proj.weight": "model-00002-of-00002.safetensors", + "model.layers.33.post_attention_layernorm.weight": "model-00002-of-00002.safetensors", + "model.layers.33.self_attn.k_proj.bias": "model-00002-of-00002.safetensors", + "model.layers.33.self_attn.k_proj.weight": "model-00002-of-00002.safetensors", + "model.layers.33.self_attn.o_proj.weight": "model-00002-of-00002.safetensors", + "model.layers.33.self_attn.q_proj.bias": "model-00002-of-00002.safetensors", + "model.layers.33.self_attn.q_proj.weight": "model-00002-of-00002.safetensors", + "model.layers.33.self_attn.v_proj.bias": "model-00002-of-00002.safetensors", + "model.layers.33.self_attn.v_proj.weight": "model-00002-of-00002.safetensors", + "model.layers.34.input_layernorm.weight": "model-00002-of-00002.safetensors", + "model.layers.34.mlp.down_proj.weight": "model-00002-of-00002.safetensors", + "model.layers.34.mlp.gate_proj.weight": "model-00002-of-00002.safetensors", + "model.layers.34.mlp.up_proj.weight": "model-00002-of-00002.safetensors", + "model.layers.34.post_attention_layernorm.weight": "model-00002-of-00002.safetensors", + "model.layers.34.self_attn.k_proj.bias": "model-00002-of-00002.safetensors", + "model.layers.34.self_attn.k_proj.weight": "model-00002-of-00002.safetensors", + "model.layers.34.self_attn.o_proj.weight": "model-00002-of-00002.safetensors", + "model.layers.34.self_attn.q_proj.bias": "model-00002-of-00002.safetensors", + "model.layers.34.self_attn.q_proj.weight": "model-00002-of-00002.safetensors", + "model.layers.34.self_attn.v_proj.bias": "model-00002-of-00002.safetensors", + "model.layers.34.self_attn.v_proj.weight": "model-00002-of-00002.safetensors", + "model.layers.35.input_layernorm.weight": "model-00002-of-00002.safetensors", + "model.layers.35.mlp.down_proj.weight": "model-00002-of-00002.safetensors", + "model.layers.35.mlp.gate_proj.weight": "model-00002-of-00002.safetensors", + "model.layers.35.mlp.up_proj.weight": "model-00002-of-00002.safetensors", + "model.layers.35.post_attention_layernorm.weight": "model-00002-of-00002.safetensors", + "model.layers.35.self_attn.k_proj.bias": "model-00002-of-00002.safetensors", + "model.layers.35.self_attn.k_proj.weight": "model-00002-of-00002.safetensors", + "model.layers.35.self_attn.o_proj.weight": "model-00002-of-00002.safetensors", + "model.layers.35.self_attn.q_proj.bias": "model-00002-of-00002.safetensors", + "model.layers.35.self_attn.q_proj.weight": "model-00002-of-00002.safetensors", + "model.layers.35.self_attn.v_proj.bias": "model-00002-of-00002.safetensors", + "model.layers.35.self_attn.v_proj.weight": "model-00002-of-00002.safetensors", + "model.layers.4.input_layernorm.weight": "model-00001-of-00002.safetensors", + "model.layers.4.mlp.down_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.4.mlp.gate_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.4.mlp.up_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.4.post_attention_layernorm.weight": "model-00001-of-00002.safetensors", + "model.layers.4.self_attn.k_proj.bias": "model-00001-of-00002.safetensors", + "model.layers.4.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.4.self_attn.o_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.4.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.layers.4.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.4.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.layers.4.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.5.input_layernorm.weight": "model-00001-of-00002.safetensors", + "model.layers.5.mlp.down_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.5.mlp.gate_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.5.mlp.up_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.5.post_attention_layernorm.weight": "model-00001-of-00002.safetensors", + "model.layers.5.self_attn.k_proj.bias": "model-00001-of-00002.safetensors", + "model.layers.5.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.5.self_attn.o_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.5.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.layers.5.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.5.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.layers.5.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.6.input_layernorm.weight": "model-00001-of-00002.safetensors", + "model.layers.6.mlp.down_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.6.mlp.gate_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.6.mlp.up_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.6.post_attention_layernorm.weight": "model-00001-of-00002.safetensors", + "model.layers.6.self_attn.k_proj.bias": "model-00001-of-00002.safetensors", + "model.layers.6.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.6.self_attn.o_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.6.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.layers.6.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.6.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.layers.6.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.7.input_layernorm.weight": "model-00001-of-00002.safetensors", + "model.layers.7.mlp.down_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.7.mlp.gate_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.7.mlp.up_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.7.post_attention_layernorm.weight": "model-00001-of-00002.safetensors", + "model.layers.7.self_attn.k_proj.bias": "model-00001-of-00002.safetensors", + "model.layers.7.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.7.self_attn.o_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.7.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.layers.7.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.7.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.layers.7.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.8.input_layernorm.weight": "model-00001-of-00002.safetensors", + "model.layers.8.mlp.down_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.8.mlp.gate_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.8.mlp.up_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.8.post_attention_layernorm.weight": "model-00001-of-00002.safetensors", + "model.layers.8.self_attn.k_proj.bias": "model-00001-of-00002.safetensors", + "model.layers.8.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.8.self_attn.o_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.8.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.layers.8.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.8.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.layers.8.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.9.input_layernorm.weight": "model-00001-of-00002.safetensors", + "model.layers.9.mlp.down_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.9.mlp.gate_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.9.mlp.up_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.9.post_attention_layernorm.weight": "model-00001-of-00002.safetensors", + "model.layers.9.self_attn.k_proj.bias": "model-00001-of-00002.safetensors", + "model.layers.9.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.9.self_attn.o_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.9.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.layers.9.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.9.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.layers.9.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.norm.weight": "model-00002-of-00002.safetensors" + } +} diff --git a/special_tokens_map.json b/special_tokens_map.json new file mode 100644 index 0000000..ac23c0a --- /dev/null +++ b/special_tokens_map.json @@ -0,0 +1,31 @@ +{ + "additional_special_tokens": [ + "<|im_start|>", + "<|im_end|>", + "<|object_ref_start|>", + "<|object_ref_end|>", + "<|box_start|>", + "<|box_end|>", + "<|quad_start|>", + "<|quad_end|>", + "<|vision_start|>", + "<|vision_end|>", + "<|vision_pad|>", + "<|image_pad|>", + "<|video_pad|>" + ], + "eos_token": { + "content": "<|im_end|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "pad_token": { + "content": "<|endoftext|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + } +} diff --git a/tokenizer.json b/tokenizer.json new file mode 100644 index 0000000..51ebb3b --- /dev/null +++ b/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9c5ae00e602b8860cbd784ba82a8aa14e8feecec692e7076590d014d7b7fdafa +size 11421896 diff --git a/tokenizer_config.json b/tokenizer_config.json new file mode 100644 index 0000000..fb44f02 --- /dev/null +++ b/tokenizer_config.json @@ -0,0 +1,208 @@ +{ + "add_bos_token": false, + "add_prefix_space": false, + "added_tokens_decoder": { + "151643": { + "content": "<|endoftext|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "151644": { + "content": "<|im_start|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "151645": { + "content": "<|im_end|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "151646": { + "content": "<|object_ref_start|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "151647": { + "content": "<|object_ref_end|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "151648": { + "content": "<|box_start|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "151649": { + "content": "<|box_end|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "151650": { + "content": "<|quad_start|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "151651": { + "content": "<|quad_end|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "151652": { + "content": "<|vision_start|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "151653": { + "content": "<|vision_end|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "151654": { + "content": "<|vision_pad|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "151655": { + "content": "<|image_pad|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "151656": { + "content": "<|video_pad|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "151657": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "151658": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "151659": { + "content": "<|fim_prefix|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "151660": { + "content": "<|fim_middle|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "151661": { + "content": "<|fim_suffix|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "151662": { + "content": "<|fim_pad|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "151663": { + "content": "<|repo_name|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "151664": { + "content": "<|file_sep|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + } + }, + "additional_special_tokens": [ + "<|im_start|>", + "<|im_end|>", + "<|object_ref_start|>", + "<|object_ref_end|>", + "<|box_start|>", + "<|box_end|>", + "<|quad_start|>", + "<|quad_end|>", + "<|vision_start|>", + "<|vision_end|>", + "<|vision_pad|>", + "<|image_pad|>", + "<|video_pad|>" + ], + "bos_token": null, + "clean_up_tokenization_spaces": false, + "eos_token": "<|im_end|>", + "errors": "replace", + "extra_special_tokens": {}, + "model_max_length": 131072, + "pad_token": "<|endoftext|>", + "padding_side": "right", + "split_special_tokens": false, + "tokenizer_class": "Qwen2Tokenizer", + "unk_token": null +} diff --git a/trainer_log.jsonl b/trainer_log.jsonl new file mode 100644 index 0000000..11d9cde --- /dev/null +++ b/trainer_log.jsonl @@ -0,0 +1,1452 @@ +{"current_steps": 10, "total_steps": 14493, "loss": 2.4551, "lr": 4.9977515176118345e-05, "epoch": 0.0020703397945187755, "percentage": 0.07, "elapsed_time": "0:02:00", "remaining_time": "2 days, 0:40:10", "throughput": 23581.95, "total_tokens": 2852864} +{"current_steps": 20, "total_steps": 14493, "loss": 1.7054, "lr": 4.9952567580506e-05, "epoch": 0.004140679589037551, "percentage": 0.14, "elapsed_time": "0:03:53", "remaining_time": "1 day, 22:55:11", "throughput": 24112.45, "total_tokens": 5628224} +{"current_steps": 30, "total_steps": 14493, "loss": 1.5895, "lr": 4.992765730738634e-05, "epoch": 0.006211019383556326, "percentage": 0.21, "elapsed_time": "0:05:45", "remaining_time": "1 day, 22:15:28", "throughput": 24411.8, "total_tokens": 8432384} +{"current_steps": 40, "total_steps": 14493, "loss": 1.5269, "lr": 4.9902784263792476e-05, "epoch": 0.008281359178075102, "percentage": 0.28, "elapsed_time": "0:07:32", "remaining_time": "1 day, 21:22:59", "throughput": 24899.91, "total_tokens": 11258944} +{"current_steps": 50, "total_steps": 14493, "loss": 1.4905, "lr": 4.987794835708133e-05, "epoch": 0.010351698972593876, "percentage": 0.34, "elapsed_time": "0:09:16", "remaining_time": "1 day, 20:39:07", "throughput": 25321.29, "total_tokens": 14091008} +{"current_steps": 60, "total_steps": 14493, "loss": 1.4541, "lr": 4.985314949493234e-05, "epoch": 0.012422038767112653, "percentage": 0.41, "elapsed_time": "0:11:04", "remaining_time": "1 day, 20:23:24", "throughput": 25484.56, "total_tokens": 16930112} +{"current_steps": 70, "total_steps": 14493, "loss": 1.446, "lr": 4.982838758534584e-05, "epoch": 0.014492378561631427, "percentage": 0.48, "elapsed_time": "0:12:57", "remaining_time": "1 day, 20:29:39", "throughput": 25332.6, "total_tokens": 19693824} +{"current_steps": 80, "total_steps": 14493, "loss": 1.4077, "lr": 4.980366253664179e-05, "epoch": 0.016562718356150204, "percentage": 0.55, "elapsed_time": "0:14:56", "remaining_time": "1 day, 20:53:09", "throughput": 25084.22, "total_tokens": 22498304} +{"current_steps": 90, "total_steps": 14493, "loss": 1.4015, "lr": 4.977897425745825e-05, "epoch": 0.018633058150668978, "percentage": 0.62, "elapsed_time": "0:16:49", "remaining_time": "1 day, 20:52:26", "throughput": 25074.5, "total_tokens": 25311616} +{"current_steps": 100, "total_steps": 14493, "loss": 1.389, "lr": 4.975432265674997e-05, "epoch": 0.020703397945187753, "percentage": 0.69, "elapsed_time": "0:18:48", "remaining_time": "1 day, 21:08:02", "throughput": 24901.29, "total_tokens": 28110976} +{"current_steps": 110, "total_steps": 14493, "loss": 1.3741, "lr": 4.972970764378705e-05, "epoch": 0.02277373773970653, "percentage": 0.76, "elapsed_time": "0:20:32", "remaining_time": "1 day, 20:46:10", "throughput": 25065.1, "total_tokens": 30895808} +{"current_steps": 120, "total_steps": 14493, "loss": 1.3711, "lr": 4.970512912815344e-05, "epoch": 0.024844077534225305, "percentage": 0.83, "elapsed_time": "0:22:18", "remaining_time": "1 day, 20:31:32", "throughput": 25211.75, "total_tokens": 33740352} +{"current_steps": 130, "total_steps": 14493, "loss": 1.3429, "lr": 4.968058701974564e-05, "epoch": 0.02691441732874408, "percentage": 0.9, "elapsed_time": "0:24:05", "remaining_time": "1 day, 20:22:09", "throughput": 25299.11, "total_tokens": 36575424} +{"current_steps": 140, "total_steps": 14493, "loss": 1.3417, "lr": 4.96560812287712e-05, "epoch": 0.028984757123262855, "percentage": 0.97, "elapsed_time": "0:25:53", "remaining_time": "1 day, 20:14:01", "throughput": 25384.53, "total_tokens": 39428544} +{"current_steps": 150, "total_steps": 14493, "loss": 1.3367, "lr": 4.963161166574748e-05, "epoch": 0.03105509691778163, "percentage": 1.03, "elapsed_time": "0:27:41", "remaining_time": "1 day, 20:07:15", "throughput": 25438.26, "total_tokens": 42255872} +{"current_steps": 160, "total_steps": 14493, "loss": 1.3419, "lr": 4.960717824150013e-05, "epoch": 0.03312543671230041, "percentage": 1.1, "elapsed_time": "0:29:28", "remaining_time": "1 day, 20:00:06", "throughput": 25513.88, "total_tokens": 45116096} +{"current_steps": 170, "total_steps": 14493, "loss": 1.311, "lr": 4.9582780867161893e-05, "epoch": 0.03519577650681918, "percentage": 1.17, "elapsed_time": "0:31:14", "remaining_time": "1 day, 19:52:17", "throughput": 25546.79, "total_tokens": 47889088} +{"current_steps": 180, "total_steps": 14493, "loss": 1.331, "lr": 4.955841945417105e-05, "epoch": 0.037266116301337956, "percentage": 1.24, "elapsed_time": "0:32:57", "remaining_time": "1 day, 19:41:14", "throughput": 25630.7, "total_tokens": 50694592} +{"current_steps": 190, "total_steps": 14493, "loss": 1.3085, "lr": 4.953409391427024e-05, "epoch": 0.03933645609585673, "percentage": 1.31, "elapsed_time": "0:34:42", "remaining_time": "1 day, 19:33:05", "throughput": 25683.78, "total_tokens": 53492160} +{"current_steps": 200, "total_steps": 14493, "loss": 1.303, "lr": 4.950980415950502e-05, "epoch": 0.041406795890375506, "percentage": 1.38, "elapsed_time": "0:36:48", "remaining_time": "1 day, 19:50:41", "throughput": 25511.29, "total_tokens": 56345664} +{"current_steps": 210, "total_steps": 14493, "loss": 1.3138, "lr": 4.9485550102222575e-05, "epoch": 0.04347713568489428, "percentage": 1.45, "elapsed_time": "0:38:59", "remaining_time": "1 day, 20:12:16", "throughput": 25292.02, "total_tokens": 59177024} +{"current_steps": 220, "total_steps": 14493, "loss": 1.2884, "lr": 4.946133165507037e-05, "epoch": 0.04554747547941306, "percentage": 1.52, "elapsed_time": "0:41:52", "remaining_time": "1 day, 21:16:47", "throughput": 24679.55, "total_tokens": 62008704} +{"current_steps": 230, "total_steps": 14493, "loss": 1.2823, "lr": 4.943714873099483e-05, "epoch": 0.047617815273931836, "percentage": 1.59, "elapsed_time": "0:43:46", "remaining_time": "1 day, 21:14:40", "throughput": 24677.6, "total_tokens": 64817152} +{"current_steps": 240, "total_steps": 14493, "loss": 1.2913, "lr": 4.9413001243240024e-05, "epoch": 0.04968815506845061, "percentage": 1.66, "elapsed_time": "0:45:29", "remaining_time": "1 day, 21:01:56", "throughput": 24760.06, "total_tokens": 67590208} +{"current_steps": 250, "total_steps": 14493, "loss": 1.2781, "lr": 4.938888910534637e-05, "epoch": 0.051758494862969386, "percentage": 1.72, "elapsed_time": "0:47:13", "remaining_time": "1 day, 20:50:39", "throughput": 24845.38, "total_tokens": 70403392} +{"current_steps": 260, "total_steps": 14493, "loss": 1.2832, "lr": 4.936481223114932e-05, "epoch": 0.05382883465748816, "percentage": 1.79, "elapsed_time": "0:48:57", "remaining_time": "1 day, 20:39:43", "throughput": 24913.08, "total_tokens": 73172032} +{"current_steps": 270, "total_steps": 14493, "loss": 1.2859, "lr": 4.934077053477808e-05, "epoch": 0.055899174452006935, "percentage": 1.86, "elapsed_time": "0:50:39", "remaining_time": "1 day, 20:28:59", "throughput": 24974.8, "total_tokens": 75922944} +{"current_steps": 280, "total_steps": 14493, "loss": 1.2713, "lr": 4.931676393065431e-05, "epoch": 0.05796951424652571, "percentage": 1.93, "elapsed_time": "0:52:22", "remaining_time": "1 day, 20:18:19", "throughput": 25048.14, "total_tokens": 78706112} +{"current_steps": 290, "total_steps": 14493, "loss": 1.2708, "lr": 4.929279233349088e-05, "epoch": 0.060039854041044484, "percentage": 2.0, "elapsed_time": "0:54:07", "remaining_time": "1 day, 20:10:27", "throughput": 25108.08, "total_tokens": 81527616} +{"current_steps": 300, "total_steps": 14493, "loss": 1.2696, "lr": 4.926885565829051e-05, "epoch": 0.06211019383556326, "percentage": 2.07, "elapsed_time": "0:55:46", "remaining_time": "1 day, 19:59:02", "throughput": 25186.8, "total_tokens": 84297856} +{"current_steps": 310, "total_steps": 14493, "loss": 1.2686, "lr": 4.924495382034461e-05, "epoch": 0.06418053363008204, "percentage": 2.14, "elapsed_time": "0:57:27", "remaining_time": "1 day, 19:48:59", "throughput": 25259.21, "total_tokens": 87086912} +{"current_steps": 320, "total_steps": 14493, "loss": 1.2659, "lr": 4.9221086735231975e-05, "epoch": 0.06625087342460081, "percentage": 2.21, "elapsed_time": "0:59:17", "remaining_time": "1 day, 19:46:09", "throughput": 25260.16, "total_tokens": 89866304} +{"current_steps": 330, "total_steps": 14493, "loss": 1.2586, "lr": 4.919725431881751e-05, "epoch": 0.06832121321911959, "percentage": 2.28, "elapsed_time": "1:01:17", "remaining_time": "1 day, 19:50:13", "throughput": 25201.22, "total_tokens": 92667072} +{"current_steps": 340, "total_steps": 14493, "loss": 1.2673, "lr": 4.917345648725101e-05, "epoch": 0.07039155301363836, "percentage": 2.35, "elapsed_time": "1:03:14", "remaining_time": "1 day, 19:52:49", "throughput": 25170.59, "total_tokens": 95520320} +{"current_steps": 350, "total_steps": 14493, "loss": 1.2546, "lr": 4.914969315696596e-05, "epoch": 0.07246189280815714, "percentage": 2.41, "elapsed_time": "1:05:11", "remaining_time": "1 day, 19:54:01", "throughput": 25132.42, "total_tokens": 98295360} +{"current_steps": 360, "total_steps": 14493, "loss": 1.2519, "lr": 4.912596424467818e-05, "epoch": 0.07453223260267591, "percentage": 2.48, "elapsed_time": "1:06:57", "remaining_time": "1 day, 19:48:27", "throughput": 25172.07, "total_tokens": 101120256} +{"current_steps": 370, "total_steps": 14493, "loss": 1.2476, "lr": 4.910226966738475e-05, "epoch": 0.07660257239719469, "percentage": 2.55, "elapsed_time": "1:08:41", "remaining_time": "1 day, 19:41:47", "throughput": 25217.5, "total_tokens": 103926656} +{"current_steps": 380, "total_steps": 14493, "loss": 1.2478, "lr": 4.9078609342362666e-05, "epoch": 0.07867291219171346, "percentage": 2.62, "elapsed_time": "1:10:28", "remaining_time": "1 day, 19:37:33", "throughput": 25255.53, "total_tokens": 106799040} +{"current_steps": 390, "total_steps": 14493, "loss": 1.2283, "lr": 4.905498318716775e-05, "epoch": 0.08074325198623224, "percentage": 2.69, "elapsed_time": "1:12:05", "remaining_time": "1 day, 19:27:01", "throughput": 25319.96, "total_tokens": 109524736} +{"current_steps": 400, "total_steps": 14493, "loss": 1.2476, "lr": 4.9031391119633295e-05, "epoch": 0.08281359178075101, "percentage": 2.76, "elapsed_time": "1:13:52", "remaining_time": "1 day, 19:22:48", "throughput": 25347.5, "total_tokens": 112353024} +{"current_steps": 410, "total_steps": 14493, "loss": 1.2343, "lr": 4.9007833057869e-05, "epoch": 0.08488393157526979, "percentage": 2.83, "elapsed_time": "1:15:36", "remaining_time": "1 day, 19:16:59", "throughput": 25392.9, "total_tokens": 115191808} +{"current_steps": 420, "total_steps": 14493, "loss": 1.2433, "lr": 4.898430892025967e-05, "epoch": 0.08695427136978856, "percentage": 2.9, "elapsed_time": "1:17:16", "remaining_time": "1 day, 19:09:06", "throughput": 25452.25, "total_tokens": 118002176} +{"current_steps": 430, "total_steps": 14493, "loss": 1.2341, "lr": 4.896081862546415e-05, "epoch": 0.08902461116430734, "percentage": 2.97, "elapsed_time": "1:19:01", "remaining_time": "1 day, 19:04:26", "throughput": 25482.11, "total_tokens": 120821312} +{"current_steps": 440, "total_steps": 14493, "loss": 1.2162, "lr": 4.8937362092414e-05, "epoch": 0.09109495095882612, "percentage": 3.04, "elapsed_time": "1:20:46", "remaining_time": "1 day, 18:59:44", "throughput": 25501.26, "total_tokens": 123587072} +{"current_steps": 450, "total_steps": 14493, "loss": 1.2206, "lr": 4.891393924031244e-05, "epoch": 0.0931652907533449, "percentage": 3.1, "elapsed_time": "1:22:29", "remaining_time": "1 day, 18:54:16", "throughput": 25523.19, "total_tokens": 126326400} +{"current_steps": 460, "total_steps": 14493, "loss": 1.2254, "lr": 4.8890549988633095e-05, "epoch": 0.09523563054786367, "percentage": 3.17, "elapsed_time": "1:24:22", "remaining_time": "1 day, 18:54:06", "throughput": 25507.87, "total_tokens": 129139328} +{"current_steps": 470, "total_steps": 14493, "loss": 1.2213, "lr": 4.8867194257118907e-05, "epoch": 0.09730597034238245, "percentage": 3.24, "elapsed_time": "1:26:07", "remaining_time": "1 day, 18:49:35", "throughput": 25536.0, "total_tokens": 131955072} +{"current_steps": 480, "total_steps": 14493, "loss": 1.2118, "lr": 4.884387196578093e-05, "epoch": 0.09937631013690122, "percentage": 3.31, "elapsed_time": "1:27:48", "remaining_time": "1 day, 18:43:24", "throughput": 25574.34, "total_tokens": 134735744} +{"current_steps": 490, "total_steps": 14493, "loss": 1.209, "lr": 4.882058303489718e-05, "epoch": 0.10144664993142, "percentage": 3.38, "elapsed_time": "1:29:32", "remaining_time": "1 day, 18:38:55", "throughput": 25594.51, "total_tokens": 137508800} +{"current_steps": 500, "total_steps": 14493, "loss": 1.2223, "lr": 4.8797327385011496e-05, "epoch": 0.10351698972593877, "percentage": 3.45, "elapsed_time": "1:31:17", "remaining_time": "1 day, 18:34:58", "throughput": 25619.09, "total_tokens": 140333632} +{"current_steps": 510, "total_steps": 14493, "loss": 1.2043, "lr": 4.8774104936932425e-05, "epoch": 0.10558732952045755, "percentage": 3.52, "elapsed_time": "1:33:14", "remaining_time": "1 day, 18:36:40", "throughput": 25581.24, "total_tokens": 143125952} +{"current_steps": 520, "total_steps": 14493, "loss": 1.1907, "lr": 4.8750915611732076e-05, "epoch": 0.10765766931497632, "percentage": 3.59, "elapsed_time": "1:34:57", "remaining_time": "1 day, 18:31:34", "throughput": 25599.38, "total_tokens": 145848704} +{"current_steps": 530, "total_steps": 14493, "loss": 1.218, "lr": 4.8727759330744986e-05, "epoch": 0.1097280091094951, "percentage": 3.66, "elapsed_time": "1:36:34", "remaining_time": "1 day, 18:24:19", "throughput": 25645.88, "total_tokens": 148607168} +{"current_steps": 540, "total_steps": 14493, "loss": 1.2064, "lr": 4.870463601556696e-05, "epoch": 0.11179834890401387, "percentage": 3.73, "elapsed_time": "1:38:19", "remaining_time": "1 day, 18:20:27", "throughput": 25668.79, "total_tokens": 151424448} +{"current_steps": 550, "total_steps": 14493, "loss": 1.211, "lr": 4.8681545588054075e-05, "epoch": 0.11386868869853264, "percentage": 3.79, "elapsed_time": "1:40:01", "remaining_time": "1 day, 18:15:47", "throughput": 25699.13, "total_tokens": 154237632} +{"current_steps": 560, "total_steps": 14493, "loss": 1.1954, "lr": 4.8658487970321404e-05, "epoch": 0.11593902849305142, "percentage": 3.86, "elapsed_time": "1:41:44", "remaining_time": "1 day, 18:11:27", "throughput": 25727.73, "total_tokens": 157060800} +{"current_steps": 570, "total_steps": 14493, "loss": 1.192, "lr": 4.863546308474209e-05, "epoch": 0.1180093682875702, "percentage": 3.93, "elapsed_time": "1:43:25", "remaining_time": "1 day, 18:06:18", "throughput": 25758.51, "total_tokens": 159845248} +{"current_steps": 580, "total_steps": 14493, "loss": 1.2161, "lr": 4.86124708539461e-05, "epoch": 0.12007970808208897, "percentage": 4.0, "elapsed_time": "1:45:07", "remaining_time": "1 day, 18:01:48", "throughput": 25784.22, "total_tokens": 162639040} +{"current_steps": 590, "total_steps": 14493, "loss": 1.2154, "lr": 4.8589511200819216e-05, "epoch": 0.12215004787660774, "percentage": 4.07, "elapsed_time": "1:46:48", "remaining_time": "1 day, 17:56:51", "throughput": 25816.16, "total_tokens": 165441408} +{"current_steps": 600, "total_steps": 14493, "loss": 1.18, "lr": 4.8566584048501926e-05, "epoch": 0.12422038767112652, "percentage": 4.14, "elapsed_time": "1:48:28", "remaining_time": "1 day, 17:51:51", "throughput": 25846.56, "total_tokens": 168229888} +{"current_steps": 610, "total_steps": 14493, "loss": 1.1901, "lr": 4.854368932038835e-05, "epoch": 0.1262907274656453, "percentage": 4.21, "elapsed_time": "1:50:12", "remaining_time": "1 day, 17:48:23", "throughput": 25874.95, "total_tokens": 171108736} +{"current_steps": 620, "total_steps": 14493, "loss": 1.1798, "lr": 4.8520826940125144e-05, "epoch": 0.12836106726016408, "percentage": 4.28, "elapsed_time": "1:51:52", "remaining_time": "1 day, 17:43:16", "throughput": 25906.85, "total_tokens": 173898432} +{"current_steps": 630, "total_steps": 14493, "loss": 1.189, "lr": 4.849799683161046e-05, "epoch": 0.13043140705468284, "percentage": 4.35, "elapsed_time": "1:53:33", "remaining_time": "1 day, 17:38:59", "throughput": 25941.14, "total_tokens": 176761664} +{"current_steps": 640, "total_steps": 14493, "loss": 1.172, "lr": 4.8475198918992835e-05, "epoch": 0.13250174684920163, "percentage": 4.42, "elapsed_time": "1:55:16", "remaining_time": "1 day, 17:35:16", "throughput": 25965.84, "total_tokens": 179600896} +{"current_steps": 650, "total_steps": 14493, "loss": 1.1793, "lr": 4.845243312667023e-05, "epoch": 0.1345720866437204, "percentage": 4.48, "elapsed_time": "1:56:54", "remaining_time": "1 day, 17:29:55", "throughput": 25995.95, "total_tokens": 182358976} +{"current_steps": 660, "total_steps": 14493, "loss": 1.1857, "lr": 4.842969937928884e-05, "epoch": 0.13664242643823918, "percentage": 4.55, "elapsed_time": "1:58:33", "remaining_time": "1 day, 17:25:02", "throughput": 26026.66, "total_tokens": 185153408} +{"current_steps": 670, "total_steps": 14493, "loss": 1.194, "lr": 4.840699760174217e-05, "epoch": 0.13871276623275794, "percentage": 4.62, "elapsed_time": "2:00:16", "remaining_time": "1 day, 17:21:28", "throughput": 26048.1, "total_tokens": 187979136} +{"current_steps": 680, "total_steps": 14493, "loss": 1.1883, "lr": 4.8384327719169906e-05, "epoch": 0.14078310602727673, "percentage": 4.69, "elapsed_time": "2:01:59", "remaining_time": "1 day, 17:17:55", "throughput": 26068.93, "total_tokens": 190802688} +{"current_steps": 690, "total_steps": 14493, "loss": 1.1726, "lr": 4.836168965695694e-05, "epoch": 0.1428534458217955, "percentage": 4.76, "elapsed_time": "2:03:43", "remaining_time": "1 day, 17:14:56", "throughput": 26079.57, "total_tokens": 193593536} +{"current_steps": 700, "total_steps": 14493, "loss": 1.1744, "lr": 4.8339083340732304e-05, "epoch": 0.14492378561631428, "percentage": 4.83, "elapsed_time": "2:05:23", "remaining_time": "1 day, 17:10:46", "throughput": 26104.83, "total_tokens": 196401408} +{"current_steps": 710, "total_steps": 14493, "loss": 1.1896, "lr": 4.8316508696368154e-05, "epoch": 0.14699412541083307, "percentage": 4.9, "elapsed_time": "2:07:05", "remaining_time": "1 day, 17:07:15", "throughput": 26123.54, "total_tokens": 199210816} +{"current_steps": 720, "total_steps": 14493, "loss": 1.1892, "lr": 4.8293965649978714e-05, "epoch": 0.14906446520535183, "percentage": 4.97, "elapsed_time": "2:08:49", "remaining_time": "1 day, 17:04:11", "throughput": 26138.26, "total_tokens": 202026112} +{"current_steps": 730, "total_steps": 14493, "loss": 1.18, "lr": 4.8271454127919364e-05, "epoch": 0.15113480499987061, "percentage": 5.04, "elapsed_time": "2:10:29", "remaining_time": "1 day, 17:00:20", "throughput": 26158.92, "total_tokens": 204822016} +{"current_steps": 740, "total_steps": 14493, "loss": 1.1929, "lr": 4.824897405678549e-05, "epoch": 0.15320514479438938, "percentage": 5.11, "elapsed_time": "2:12:14", "remaining_time": "1 day, 16:57:38", "throughput": 26167.98, "total_tokens": 207622400} +{"current_steps": 750, "total_steps": 14493, "loss": 1.1748, "lr": 4.8226525363411576e-05, "epoch": 0.15527548458890816, "percentage": 5.17, "elapsed_time": "2:13:56", "remaining_time": "1 day, 16:54:26", "throughput": 26179.58, "total_tokens": 210400704} +{"current_steps": 760, "total_steps": 14493, "loss": 1.1732, "lr": 4.820410797487017e-05, "epoch": 0.15734582438342692, "percentage": 5.24, "elapsed_time": "2:15:36", "remaining_time": "1 day, 16:50:24", "throughput": 26201.45, "total_tokens": 213188288} +{"current_steps": 770, "total_steps": 14493, "loss": 1.1708, "lr": 4.818172181847091e-05, "epoch": 0.1594161641779457, "percentage": 5.31, "elapsed_time": "2:17:16", "remaining_time": "1 day, 16:46:28", "throughput": 26225.98, "total_tokens": 216005888} +{"current_steps": 780, "total_steps": 14493, "loss": 1.1856, "lr": 4.81593668217595e-05, "epoch": 0.16148650397246447, "percentage": 5.38, "elapsed_time": "2:18:58", "remaining_time": "1 day, 16:43:24", "throughput": 26241.98, "total_tokens": 218829824} +{"current_steps": 790, "total_steps": 14493, "loss": 1.1573, "lr": 4.813704291251675e-05, "epoch": 0.16355684376698326, "percentage": 5.45, "elapsed_time": "2:20:38", "remaining_time": "1 day, 16:39:35", "throughput": 26260.96, "total_tokens": 221610560} +{"current_steps": 800, "total_steps": 14493, "loss": 1.1809, "lr": 4.811475001875759e-05, "epoch": 0.16562718356150202, "percentage": 5.52, "elapsed_time": "2:22:22", "remaining_time": "1 day, 16:36:54", "throughput": 26273.97, "total_tokens": 224444096} +{"current_steps": 810, "total_steps": 14493, "loss": 1.1736, "lr": 4.8092488068730105e-05, "epoch": 0.1676975233560208, "percentage": 5.59, "elapsed_time": "2:24:05", "remaining_time": "1 day, 16:34:06", "throughput": 26287.53, "total_tokens": 227271808} +{"current_steps": 820, "total_steps": 14493, "loss": 1.1685, "lr": 4.807025699091452e-05, "epoch": 0.16976786315053957, "percentage": 5.66, "elapsed_time": "2:25:41", "remaining_time": "1 day, 16:29:11", "throughput": 26315.08, "total_tokens": 230020480} +{"current_steps": 830, "total_steps": 14493, "loss": 1.1607, "lr": 4.8048056714022325e-05, "epoch": 0.17183820294505836, "percentage": 5.73, "elapsed_time": "2:27:22", "remaining_time": "1 day, 16:25:59", "throughput": 26329.69, "total_tokens": 232818240} +{"current_steps": 840, "total_steps": 14493, "loss": 1.1562, "lr": 4.802588716699519e-05, "epoch": 0.17390854273957712, "percentage": 5.8, "elapsed_time": "2:29:01", "remaining_time": "1 day, 16:22:04", "throughput": 26349.85, "total_tokens": 235596672} +{"current_steps": 850, "total_steps": 14493, "loss": 1.1773, "lr": 4.8003748279004156e-05, "epoch": 0.1759788825340959, "percentage": 5.86, "elapsed_time": "2:30:39", "remaining_time": "1 day, 16:18:03", "throughput": 26370.34, "total_tokens": 238364608} +{"current_steps": 860, "total_steps": 14493, "loss": 1.1569, "lr": 4.798163997944854e-05, "epoch": 0.17804922232861467, "percentage": 5.93, "elapsed_time": "2:32:20", "remaining_time": "1 day, 16:14:56", "throughput": 26386.2, "total_tokens": 241180160} +{"current_steps": 870, "total_steps": 14493, "loss": 1.1592, "lr": 4.79595621979551e-05, "epoch": 0.18011956212313346, "percentage": 6.0, "elapsed_time": "2:34:01", "remaining_time": "1 day, 16:11:53", "throughput": 26398.84, "total_tokens": 243972032} +{"current_steps": 880, "total_steps": 14493, "loss": 1.1675, "lr": 4.793751486437702e-05, "epoch": 0.18218990191765225, "percentage": 6.07, "elapsed_time": "2:35:40", "remaining_time": "1 day, 16:08:03", "throughput": 26419.19, "total_tokens": 246756160} +{"current_steps": 890, "total_steps": 14493, "loss": 1.1521, "lr": 4.7915497908793064e-05, "epoch": 0.184260241712171, "percentage": 6.14, "elapsed_time": "2:37:20", "remaining_time": "1 day, 16:04:48", "throughput": 26431.46, "total_tokens": 249520896} +{"current_steps": 900, "total_steps": 14493, "loss": 1.1859, "lr": 4.7893511261506516e-05, "epoch": 0.1863305815066898, "percentage": 6.21, "elapsed_time": "2:38:58", "remaining_time": "1 day, 16:01:07", "throughput": 26449.93, "total_tokens": 252299648} +{"current_steps": 910, "total_steps": 14493, "loss": 1.1643, "lr": 4.787155485304435e-05, "epoch": 0.18840092130120856, "percentage": 6.28, "elapsed_time": "2:40:57", "remaining_time": "1 day, 16:02:27", "throughput": 26417.92, "total_tokens": 255123776} +{"current_steps": 920, "total_steps": 14493, "loss": 1.1505, "lr": 4.784962861415629e-05, "epoch": 0.19047126109572735, "percentage": 6.35, "elapsed_time": "2:42:42", "remaining_time": "1 day, 16:00:29", "throughput": 26427.0, "total_tokens": 257994240} +{"current_steps": 930, "total_steps": 14493, "loss": 1.1405, "lr": 4.7827732475813884e-05, "epoch": 0.1925416008902461, "percentage": 6.42, "elapsed_time": "2:44:25", "remaining_time": "1 day, 15:58:03", "throughput": 26437.55, "total_tokens": 260831488} +{"current_steps": 940, "total_steps": 14493, "loss": 1.1621, "lr": 4.7805866369209576e-05, "epoch": 0.1946119406847649, "percentage": 6.49, "elapsed_time": "2:46:10", "remaining_time": "1 day, 15:56:01", "throughput": 26449.11, "total_tokens": 263721600} +{"current_steps": 950, "total_steps": 14493, "loss": 1.1555, "lr": 4.778403022575583e-05, "epoch": 0.19668228047928366, "percentage": 6.55, "elapsed_time": "2:47:50", "remaining_time": "1 day, 15:52:49", "throughput": 26466.63, "total_tokens": 266545088} +{"current_steps": 960, "total_steps": 14493, "loss": 1.1703, "lr": 4.7762223977084195e-05, "epoch": 0.19875262027380244, "percentage": 6.62, "elapsed_time": "2:49:25", "remaining_time": "1 day, 15:48:17", "throughput": 26495.41, "total_tokens": 269331072} +{"current_steps": 970, "total_steps": 14493, "loss": 1.1561, "lr": 4.774044755504444e-05, "epoch": 0.2008229600683212, "percentage": 6.69, "elapsed_time": "2:51:06", "remaining_time": "1 day, 15:45:27", "throughput": 26508.33, "total_tokens": 272147264} +{"current_steps": 980, "total_steps": 14493, "loss": 1.1487, "lr": 4.7718700891703616e-05, "epoch": 0.20289329986284, "percentage": 6.76, "elapsed_time": "2:52:53", "remaining_time": "1 day, 15:43:52", "throughput": 26509.88, "total_tokens": 274989632} +{"current_steps": 990, "total_steps": 14493, "loss": 1.1417, "lr": 4.7696983919345215e-05, "epoch": 0.20496363965735875, "percentage": 6.83, "elapsed_time": "2:54:31", "remaining_time": "1 day, 15:40:26", "throughput": 26526.34, "total_tokens": 277772864} +{"current_steps": 1000, "total_steps": 14493, "loss": 1.1418, "lr": 4.7675296570468216e-05, "epoch": 0.20703397945187754, "percentage": 6.9, "elapsed_time": "2:56:16", "remaining_time": "1 day, 15:38:24", "throughput": 26531.78, "total_tokens": 280605504} +{"current_steps": 1010, "total_steps": 14493, "loss": 1.1495, "lr": 4.76536387777863e-05, "epoch": 0.2091043192463963, "percentage": 6.97, "elapsed_time": "2:58:00", "remaining_time": "1 day, 15:36:15", "throughput": 26540.77, "total_tokens": 283460608} +{"current_steps": 1020, "total_steps": 14493, "loss": 1.1564, "lr": 4.7632010474226915e-05, "epoch": 0.2111746590409151, "percentage": 7.04, "elapsed_time": "2:59:40", "remaining_time": "1 day, 15:33:18", "throughput": 26556.62, "total_tokens": 286296064} +{"current_steps": 1030, "total_steps": 14493, "loss": 1.1476, "lr": 4.761041159293035e-05, "epoch": 0.21324499883543385, "percentage": 7.11, "elapsed_time": "3:01:20", "remaining_time": "1 day, 15:30:18", "throughput": 26568.02, "total_tokens": 289075648} +{"current_steps": 1040, "total_steps": 14493, "loss": 1.1483, "lr": 4.7588842067249e-05, "epoch": 0.21531533862995264, "percentage": 7.18, "elapsed_time": "3:03:02", "remaining_time": "1 day, 15:27:47", "throughput": 26580.15, "total_tokens": 291920832} +{"current_steps": 1050, "total_steps": 14493, "loss": 1.1457, "lr": 4.756730183074637e-05, "epoch": 0.21738567842447143, "percentage": 7.24, "elapsed_time": "3:04:44", "remaining_time": "1 day, 15:25:18", "throughput": 26588.59, "total_tokens": 294731072} +{"current_steps": 1060, "total_steps": 14493, "loss": 1.1399, "lr": 4.7545790817196314e-05, "epoch": 0.2194560182189902, "percentage": 7.31, "elapsed_time": "3:06:24", "remaining_time": "1 day, 15:22:11", "throughput": 26603.88, "total_tokens": 297538496} +{"current_steps": 1070, "total_steps": 14493, "loss": 1.1429, "lr": 4.752430896058212e-05, "epoch": 0.22152635801350898, "percentage": 7.38, "elapsed_time": "3:08:06", "remaining_time": "1 day, 15:19:46", "throughput": 26614.24, "total_tokens": 300378880} +{"current_steps": 1080, "total_steps": 14493, "loss": 1.1272, "lr": 4.750285619509567e-05, "epoch": 0.22359669780802774, "percentage": 7.45, "elapsed_time": "3:09:44", "remaining_time": "1 day, 15:16:32", "throughput": 26627.92, "total_tokens": 303153600} +{"current_steps": 1090, "total_steps": 14493, "loss": 1.1315, "lr": 4.7481432455136644e-05, "epoch": 0.22566703760254653, "percentage": 7.52, "elapsed_time": "3:11:26", "remaining_time": "1 day, 15:13:56", "throughput": 26639.39, "total_tokens": 305981952} +{"current_steps": 1100, "total_steps": 14493, "loss": 1.1323, "lr": 4.7460037675311584e-05, "epoch": 0.2277373773970653, "percentage": 7.59, "elapsed_time": "3:13:05", "remaining_time": "1 day, 15:11:04", "throughput": 26650.82, "total_tokens": 308776256} +{"current_steps": 1110, "total_steps": 14493, "loss": 1.1337, "lr": 4.7438671790433126e-05, "epoch": 0.22980771719158408, "percentage": 7.66, "elapsed_time": "3:14:48", "remaining_time": "1 day, 15:08:48", "throughput": 26658.78, "total_tokens": 311608192} +{"current_steps": 1120, "total_steps": 14493, "loss": 1.1294, "lr": 4.741733473551915e-05, "epoch": 0.23187805698610284, "percentage": 7.73, "elapsed_time": "3:16:25", "remaining_time": "1 day, 15:05:19", "throughput": 26677.58, "total_tokens": 314405248} +{"current_steps": 1130, "total_steps": 14493, "loss": 1.1418, "lr": 4.7396026445791966e-05, "epoch": 0.23394839678062163, "percentage": 7.8, "elapsed_time": "3:18:08", "remaining_time": "1 day, 15:03:05", "throughput": 26684.89, "total_tokens": 317234880} +{"current_steps": 1140, "total_steps": 14493, "loss": 1.1427, "lr": 4.737474685667742e-05, "epoch": 0.2360187365751404, "percentage": 7.87, "elapsed_time": "3:19:45", "remaining_time": "1 day, 14:59:48", "throughput": 26700.09, "total_tokens": 320015872} +{"current_steps": 1150, "total_steps": 14493, "loss": 1.1441, "lr": 4.7353495903804165e-05, "epoch": 0.23808907636965917, "percentage": 7.93, "elapsed_time": "3:21:24", "remaining_time": "1 day, 14:56:49", "throughput": 26712.06, "total_tokens": 322796096} +{"current_steps": 1160, "total_steps": 14493, "loss": 1.1217, "lr": 4.733227352300277e-05, "epoch": 0.24015941616417794, "percentage": 8.0, "elapsed_time": "3:22:57", "remaining_time": "1 day, 14:52:53", "throughput": 26729.61, "total_tokens": 325512512} +{"current_steps": 1170, "total_steps": 14493, "loss": 1.149, "lr": 4.731107965030496e-05, "epoch": 0.24222975595869672, "percentage": 8.07, "elapsed_time": "3:24:39", "remaining_time": "1 day, 14:50:33", "throughput": 26737.1, "total_tokens": 328329472} +{"current_steps": 1180, "total_steps": 14493, "loss": 1.1268, "lr": 4.728991422194278e-05, "epoch": 0.24430009575321548, "percentage": 8.14, "elapsed_time": "3:26:26", "remaining_time": "1 day, 14:49:08", "throughput": 26738.61, "total_tokens": 331201216} +{"current_steps": 1190, "total_steps": 14493, "loss": 1.1396, "lr": 4.726877717434773e-05, "epoch": 0.24637043554773427, "percentage": 8.21, "elapsed_time": "3:28:09", "remaining_time": "1 day, 14:46:56", "throughput": 26746.82, "total_tokens": 334046016} +{"current_steps": 1200, "total_steps": 14493, "loss": 1.1358, "lr": 4.724766844415013e-05, "epoch": 0.24844077534225303, "percentage": 8.28, "elapsed_time": "3:29:52", "remaining_time": "1 day, 14:44:54", "throughput": 26750.53, "total_tokens": 336857664} +{"current_steps": 1210, "total_steps": 14493, "loss": 1.1251, "lr": 4.722658796817813e-05, "epoch": 0.2505111151367718, "percentage": 8.35, "elapsed_time": "3:31:32", "remaining_time": "1 day, 14:42:10", "throughput": 26760.47, "total_tokens": 339649024} +{"current_steps": 1220, "total_steps": 14493, "loss": 1.1362, "lr": 4.7205535683457044e-05, "epoch": 0.2525814549312906, "percentage": 8.42, "elapsed_time": "3:33:10", "remaining_time": "1 day, 14:39:19", "throughput": 26769.4, "total_tokens": 342405632} +{"current_steps": 1230, "total_steps": 14493, "loss": 1.1217, "lr": 4.7184511527208484e-05, "epoch": 0.2546517947258094, "percentage": 8.49, "elapsed_time": "3:34:53", "remaining_time": "1 day, 14:37:05", "throughput": 26777.73, "total_tokens": 345247616} +{"current_steps": 1240, "total_steps": 14493, "loss": 1.1194, "lr": 4.7163515436849644e-05, "epoch": 0.25672213452032816, "percentage": 8.56, "elapsed_time": "3:36:34", "remaining_time": "1 day, 14:34:47", "throughput": 26789.22, "total_tokens": 348121856} +{"current_steps": 1250, "total_steps": 14493, "loss": 1.1158, "lr": 4.714254734999245e-05, "epoch": 0.2587924743148469, "percentage": 8.62, "elapsed_time": "3:38:13", "remaining_time": "1 day, 14:31:52", "throughput": 26799.4, "total_tokens": 350884736} +{"current_steps": 1260, "total_steps": 14493, "loss": 1.1206, "lr": 4.712160720444284e-05, "epoch": 0.2608628141093657, "percentage": 8.69, "elapsed_time": "3:39:50", "remaining_time": "1 day, 14:28:47", "throughput": 26808.68, "total_tokens": 353609856} +{"current_steps": 1270, "total_steps": 14493, "loss": 1.1344, "lr": 4.710069493819992e-05, "epoch": 0.2629331539038845, "percentage": 8.76, "elapsed_time": "3:41:30", "remaining_time": "1 day, 14:26:14", "throughput": 26821.59, "total_tokens": 356461760} +{"current_steps": 1280, "total_steps": 14493, "loss": 1.1191, "lr": 4.70798104894553e-05, "epoch": 0.26500349369840326, "percentage": 8.83, "elapsed_time": "3:43:14", "remaining_time": "1 day, 14:24:23", "throughput": 26827.26, "total_tokens": 359329472} +{"current_steps": 1290, "total_steps": 14493, "loss": 1.1162, "lr": 4.705895379659219e-05, "epoch": 0.267073833492922, "percentage": 8.9, "elapsed_time": "3:44:52", "remaining_time": "1 day, 14:21:36", "throughput": 26837.59, "total_tokens": 362113344} +{"current_steps": 1300, "total_steps": 14493, "loss": 1.1292, "lr": 4.7038124798184766e-05, "epoch": 0.2691441732874408, "percentage": 8.97, "elapsed_time": "3:46:32", "remaining_time": "1 day, 14:18:59", "throughput": 26845.19, "total_tokens": 364883392} +{"current_steps": 1310, "total_steps": 14493, "loss": 1.1224, "lr": 4.7017323432997304e-05, "epoch": 0.2712145130819596, "percentage": 9.04, "elapsed_time": "3:48:16", "remaining_time": "1 day, 14:17:16", "throughput": 26851.37, "total_tokens": 367778752} +{"current_steps": 1320, "total_steps": 14493, "loss": 1.1244, "lr": 4.6996549639983506e-05, "epoch": 0.27328485287647836, "percentage": 9.11, "elapsed_time": "3:49:55", "remaining_time": "1 day, 14:14:30", "throughput": 26863.73, "total_tokens": 370591552} +{"current_steps": 1330, "total_steps": 14493, "loss": 1.1225, "lr": 4.697580335828569e-05, "epoch": 0.2753551926709971, "percentage": 9.18, "elapsed_time": "3:51:35", "remaining_time": "1 day, 14:12:02", "throughput": 26873.36, "total_tokens": 373416512} +{"current_steps": 1340, "total_steps": 14493, "loss": 1.1238, "lr": 4.6955084527234076e-05, "epoch": 0.2774255324655159, "percentage": 9.25, "elapsed_time": "3:53:17", "remaining_time": "1 day, 14:09:52", "throughput": 26877.64, "total_tokens": 376212992} +{"current_steps": 1350, "total_steps": 14493, "loss": 1.1221, "lr": 4.6934393086346034e-05, "epoch": 0.2794958722600347, "percentage": 9.31, "elapsed_time": "3:54:56", "remaining_time": "1 day, 14:07:18", "throughput": 26887.14, "total_tokens": 379018816} +{"current_steps": 1360, "total_steps": 14493, "loss": 1.1255, "lr": 4.6913728975325324e-05, "epoch": 0.28156621205455346, "percentage": 9.38, "elapsed_time": "3:56:33", "remaining_time": "1 day, 14:04:20", "throughput": 26898.01, "total_tokens": 381775808} +{"current_steps": 1370, "total_steps": 14493, "loss": 1.1123, "lr": 4.6893092134061393e-05, "epoch": 0.2836365518490722, "percentage": 9.45, "elapsed_time": "3:58:12", "remaining_time": "1 day, 14:01:49", "throughput": 26908.27, "total_tokens": 384597824} +{"current_steps": 1380, "total_steps": 14493, "loss": 1.1036, "lr": 4.687248250262859e-05, "epoch": 0.285706891643591, "percentage": 9.52, "elapsed_time": "3:59:52", "remaining_time": "1 day, 13:59:23", "throughput": 26914.83, "total_tokens": 387382400} +{"current_steps": 1390, "total_steps": 14493, "loss": 1.1247, "lr": 4.685190002128548e-05, "epoch": 0.2877772314381098, "percentage": 9.59, "elapsed_time": "4:01:39", "remaining_time": "1 day, 13:57:58", "throughput": 26916.91, "total_tokens": 390275200} +{"current_steps": 1400, "total_steps": 14493, "loss": 1.1176, "lr": 4.6831344630474114e-05, "epoch": 0.28984757123262855, "percentage": 9.66, "elapsed_time": "4:03:17", "remaining_time": "1 day, 13:55:13", "throughput": 26926.73, "total_tokens": 393050432} +{"current_steps": 1410, "total_steps": 14493, "loss": 1.1055, "lr": 4.6810816270819276e-05, "epoch": 0.2919179110271473, "percentage": 9.73, "elapsed_time": "4:04:58", "remaining_time": "1 day, 13:53:01", "throughput": 26931.95, "total_tokens": 395853440} +{"current_steps": 1420, "total_steps": 14493, "loss": 1.1101, "lr": 4.679031488312777e-05, "epoch": 0.29398825082166613, "percentage": 9.8, "elapsed_time": "4:06:41", "remaining_time": "1 day, 13:51:11", "throughput": 26936.83, "total_tokens": 398715648} +{"current_steps": 1430, "total_steps": 14493, "loss": 1.1156, "lr": 4.6769840408387717e-05, "epoch": 0.2960585906161849, "percentage": 9.87, "elapsed_time": "4:08:23", "remaining_time": "1 day, 13:49:06", "throughput": 26940.82, "total_tokens": 401523968} +{"current_steps": 1440, "total_steps": 14493, "loss": 1.1112, "lr": 4.674939278776787e-05, "epoch": 0.29812893041070365, "percentage": 9.94, "elapsed_time": "4:10:02", "remaining_time": "1 day, 13:46:28", "throughput": 26952.37, "total_tokens": 404345792} +{"current_steps": 1450, "total_steps": 14493, "loss": 1.1102, "lr": 4.672897196261683e-05, "epoch": 0.3001992702052224, "percentage": 10.0, "elapsed_time": "4:11:42", "remaining_time": "1 day, 13:44:09", "throughput": 26957.6, "total_tokens": 407128064} +{"current_steps": 1460, "total_steps": 14493, "loss": 1.1101, "lr": 4.670857787446238e-05, "epoch": 0.30226960999974123, "percentage": 10.07, "elapsed_time": "4:13:14", "remaining_time": "1 day, 13:40:40", "throughput": 26972.17, "total_tokens": 409840000} +{"current_steps": 1470, "total_steps": 14493, "loss": 1.09, "lr": 4.668821046501082e-05, "epoch": 0.30433994979426, "percentage": 10.14, "elapsed_time": "4:14:48", "remaining_time": "1 day, 13:37:24", "throughput": 26986.67, "total_tokens": 412587136} +{"current_steps": 1480, "total_steps": 14493, "loss": 1.0991, "lr": 4.6667869676146194e-05, "epoch": 0.30641028958877875, "percentage": 10.21, "elapsed_time": "4:16:25", "remaining_time": "1 day, 13:34:33", "throughput": 26998.12, "total_tokens": 415366912} +{"current_steps": 1490, "total_steps": 14493, "loss": 1.1042, "lr": 4.6647555449929645e-05, "epoch": 0.3084806293832975, "percentage": 10.28, "elapsed_time": "4:18:00", "remaining_time": "1 day, 13:31:34", "throughput": 27012.75, "total_tokens": 418165824} +{"current_steps": 1500, "total_steps": 14493, "loss": 1.1154, "lr": 4.662726772859869e-05, "epoch": 0.3105509691778163, "percentage": 10.35, "elapsed_time": "4:19:42", "remaining_time": "1 day, 13:29:31", "throughput": 27021.08, "total_tokens": 421043328} +{"current_steps": 1510, "total_steps": 14493, "loss": 1.1202, "lr": 4.660700645456655e-05, "epoch": 0.3126213089723351, "percentage": 10.42, "elapsed_time": "4:21:21", "remaining_time": "1 day, 13:27:12", "throughput": 27028.98, "total_tokens": 423864000} +{"current_steps": 1520, "total_steps": 14493, "loss": 1.1147, "lr": 4.658677157042149e-05, "epoch": 0.31469164876685385, "percentage": 10.49, "elapsed_time": "4:23:00", "remaining_time": "1 day, 13:24:42", "throughput": 27037.09, "total_tokens": 426652992} +{"current_steps": 1530, "total_steps": 14493, "loss": 1.0967, "lr": 4.656656301892605e-05, "epoch": 0.3167619885613726, "percentage": 10.56, "elapsed_time": "4:24:37", "remaining_time": "1 day, 13:22:01", "throughput": 27048.73, "total_tokens": 429461312} +{"current_steps": 1540, "total_steps": 14493, "loss": 1.097, "lr": 4.6546380743016465e-05, "epoch": 0.3188323283558914, "percentage": 10.63, "elapsed_time": "4:26:11", "remaining_time": "1 day, 13:18:59", "throughput": 27059.79, "total_tokens": 432193344} +{"current_steps": 1550, "total_steps": 14493, "loss": 1.0811, "lr": 4.652622468580193e-05, "epoch": 0.3209026681504102, "percentage": 10.69, "elapsed_time": "4:27:51", "remaining_time": "1 day, 13:16:39", "throughput": 27067.09, "total_tokens": 435000704} +{"current_steps": 1560, "total_steps": 14493, "loss": 1.1007, "lr": 4.650609479056392e-05, "epoch": 0.32297300794492895, "percentage": 10.76, "elapsed_time": "4:29:32", "remaining_time": "1 day, 13:14:32", "throughput": 27072.84, "total_tokens": 437824448} +{"current_steps": 1570, "total_steps": 14493, "loss": 1.0911, "lr": 4.648599100075556e-05, "epoch": 0.32504334773944776, "percentage": 10.83, "elapsed_time": "4:31:14", "remaining_time": "1 day, 13:12:40", "throughput": 27074.66, "total_tokens": 440632576} +{"current_steps": 1580, "total_steps": 14493, "loss": 1.1172, "lr": 4.6465913260000945e-05, "epoch": 0.3271136875339665, "percentage": 10.9, "elapsed_time": "4:32:51", "remaining_time": "1 day, 13:10:01", "throughput": 27084.95, "total_tokens": 443422400} +{"current_steps": 1590, "total_steps": 14493, "loss": 1.107, "lr": 4.644586151209444e-05, "epoch": 0.3291840273284853, "percentage": 10.97, "elapsed_time": "4:34:31", "remaining_time": "1 day, 13:07:51", "throughput": 27093.63, "total_tokens": 446285312} +{"current_steps": 1600, "total_steps": 14493, "loss": 1.0982, "lr": 4.6425835701000084e-05, "epoch": 0.33125436712300405, "percentage": 11.04, "elapsed_time": "4:36:12", "remaining_time": "1 day, 13:05:43", "throughput": 27101.68, "total_tokens": 449143744} +{"current_steps": 1610, "total_steps": 14493, "loss": 1.108, "lr": 4.640583577085084e-05, "epoch": 0.33332470691752286, "percentage": 11.11, "elapsed_time": "4:37:52", "remaining_time": "1 day, 13:03:33", "throughput": 27107.36, "total_tokens": 451954240} +{"current_steps": 1620, "total_steps": 14493, "loss": 1.1053, "lr": 4.638586166594806e-05, "epoch": 0.3353950467120416, "percentage": 11.18, "elapsed_time": "4:39:29", "remaining_time": "1 day, 13:00:52", "throughput": 27118.88, "total_tokens": 454760320} +{"current_steps": 1630, "total_steps": 14493, "loss": 1.1007, "lr": 4.6365913330760726e-05, "epoch": 0.3374653865065604, "percentage": 11.25, "elapsed_time": "4:41:10", "remaining_time": "1 day, 12:58:50", "throughput": 27124.54, "total_tokens": 457600064} +{"current_steps": 1640, "total_steps": 14493, "loss": 1.0768, "lr": 4.6345990709924855e-05, "epoch": 0.33953572630107914, "percentage": 11.32, "elapsed_time": "4:42:45", "remaining_time": "1 day, 12:56:02", "throughput": 27134.6, "total_tokens": 460352832} +{"current_steps": 1650, "total_steps": 14493, "loss": 1.0896, "lr": 4.632609374824284e-05, "epoch": 0.34160606609559796, "percentage": 11.38, "elapsed_time": "4:44:23", "remaining_time": "1 day, 12:53:35", "throughput": 27142.31, "total_tokens": 463141184} +{"current_steps": 1660, "total_steps": 14493, "loss": 1.1005, "lr": 4.630622239068285e-05, "epoch": 0.3436764058901167, "percentage": 11.45, "elapsed_time": "4:46:02", "remaining_time": "1 day, 12:51:21", "throughput": 27151.41, "total_tokens": 465996992} +{"current_steps": 1670, "total_steps": 14493, "loss": 1.09, "lr": 4.628637658237808e-05, "epoch": 0.3457467456846355, "percentage": 11.52, "elapsed_time": "4:47:40", "remaining_time": "1 day, 12:48:56", "throughput": 27160.18, "total_tokens": 468809344} +{"current_steps": 1680, "total_steps": 14493, "loss": 1.0863, "lr": 4.626655626862625e-05, "epoch": 0.34781708547915424, "percentage": 11.59, "elapsed_time": "4:49:18", "remaining_time": "1 day, 12:46:27", "throughput": 27168.92, "total_tokens": 471605632} +{"current_steps": 1690, "total_steps": 14493, "loss": 1.0965, "lr": 4.624676139488888e-05, "epoch": 0.34988742527367306, "percentage": 11.66, "elapsed_time": "4:51:00", "remaining_time": "1 day, 12:44:34", "throughput": 27174.15, "total_tokens": 474468096} +{"current_steps": 1700, "total_steps": 14493, "loss": 1.0979, "lr": 4.6226991906790686e-05, "epoch": 0.3519577650681918, "percentage": 11.73, "elapsed_time": "4:52:34", "remaining_time": "1 day, 12:41:45", "throughput": 27186.61, "total_tokens": 477256320} +{"current_steps": 1710, "total_steps": 14493, "loss": 1.0872, "lr": 4.620724775011897e-05, "epoch": 0.3540281048627106, "percentage": 11.8, "elapsed_time": "4:54:17", "remaining_time": "1 day, 12:39:55", "throughput": 27190.15, "total_tokens": 480103104} +{"current_steps": 1720, "total_steps": 14493, "loss": 1.0942, "lr": 4.618752887082297e-05, "epoch": 0.35609844465722934, "percentage": 11.87, "elapsed_time": "4:55:53", "remaining_time": "1 day, 12:37:23", "throughput": 27199.96, "total_tokens": 482906048} +{"current_steps": 1730, "total_steps": 14493, "loss": 1.097, "lr": 4.616783521501325e-05, "epoch": 0.35816878445174816, "percentage": 11.94, "elapsed_time": "4:57:35", "remaining_time": "1 day, 12:35:25", "throughput": 27205.32, "total_tokens": 485756544} +{"current_steps": 1740, "total_steps": 14493, "loss": 1.0803, "lr": 4.614816672896108e-05, "epoch": 0.3602391242462669, "percentage": 12.01, "elapsed_time": "4:59:14", "remaining_time": "1 day, 12:33:13", "throughput": 27213.23, "total_tokens": 488596672} +{"current_steps": 1750, "total_steps": 14493, "loss": 1.0844, "lr": 4.612852335909782e-05, "epoch": 0.3623094640407857, "percentage": 12.07, "elapsed_time": "5:00:50", "remaining_time": "1 day, 12:30:40", "throughput": 27221.68, "total_tokens": 491370816} +{"current_steps": 1760, "total_steps": 14493, "loss": 1.0751, "lr": 4.6108905052014323e-05, "epoch": 0.3643798038353045, "percentage": 12.14, "elapsed_time": "5:02:28", "remaining_time": "1 day, 12:28:19", "throughput": 27230.31, "total_tokens": 494195904} +{"current_steps": 1770, "total_steps": 14493, "loss": 1.0952, "lr": 4.608931175446027e-05, "epoch": 0.36645014362982326, "percentage": 12.21, "elapsed_time": "5:04:06", "remaining_time": "1 day, 12:25:54", "throughput": 27239.06, "total_tokens": 497004032} +{"current_steps": 1780, "total_steps": 14493, "loss": 1.097, "lr": 4.606974341334367e-05, "epoch": 0.368520483424342, "percentage": 12.28, "elapsed_time": "5:05:42", "remaining_time": "1 day, 12:23:27", "throughput": 27247.48, "total_tokens": 499799168} +{"current_steps": 1790, "total_steps": 14493, "loss": 1.0955, "lr": 4.605019997573011e-05, "epoch": 0.3705908232188608, "percentage": 12.35, "elapsed_time": "5:07:29", "remaining_time": "1 day, 12:22:12", "throughput": 27241.91, "total_tokens": 502610304} +{"current_steps": 1800, "total_steps": 14493, "loss": 1.0955, "lr": 4.603068138884229e-05, "epoch": 0.3726611630133796, "percentage": 12.42, "elapsed_time": "5:09:07", "remaining_time": "1 day, 12:19:50", "throughput": 27247.43, "total_tokens": 505370752} +{"current_steps": 1810, "total_steps": 14493, "loss": 1.0956, "lr": 4.6011187600059345e-05, "epoch": 0.37473150280789835, "percentage": 12.49, "elapsed_time": "5:10:44", "remaining_time": "1 day, 12:17:23", "throughput": 27254.22, "total_tokens": 508134592} +{"current_steps": 1820, "total_steps": 14493, "loss": 1.0827, "lr": 4.599171855691629e-05, "epoch": 0.3768018426024171, "percentage": 12.56, "elapsed_time": "5:12:20", "remaining_time": "1 day, 12:14:50", "throughput": 27262.66, "total_tokens": 510905536} +{"current_steps": 1830, "total_steps": 14493, "loss": 1.0854, "lr": 4.597227420710335e-05, "epoch": 0.3788721823969359, "percentage": 12.63, "elapsed_time": "5:13:58", "remaining_time": "1 day, 12:12:38", "throughput": 27267.88, "total_tokens": 513696384} +{"current_steps": 1840, "total_steps": 14493, "loss": 1.0851, "lr": 4.595285449846551e-05, "epoch": 0.3809425221914547, "percentage": 12.7, "elapsed_time": "5:15:34", "remaining_time": "1 day, 12:10:06", "throughput": 27276.64, "total_tokens": 516474240} +{"current_steps": 1850, "total_steps": 14493, "loss": 1.0899, "lr": 4.593345937900178e-05, "epoch": 0.38301286198597345, "percentage": 12.76, "elapsed_time": "5:17:16", "remaining_time": "1 day, 12:08:18", "throughput": 27278.32, "total_tokens": 519290432} +{"current_steps": 1860, "total_steps": 14493, "loss": 1.1104, "lr": 4.591408879686472e-05, "epoch": 0.3850832017804922, "percentage": 12.83, "elapsed_time": "5:18:51", "remaining_time": "1 day, 12:05:40", "throughput": 27289.03, "total_tokens": 522082176} +{"current_steps": 1870, "total_steps": 14493, "loss": 1.0892, "lr": 4.5894742700359775e-05, "epoch": 0.387153541575011, "percentage": 12.9, "elapsed_time": "5:20:29", "remaining_time": "1 day, 12:03:26", "throughput": 27294.93, "total_tokens": 524877888} +{"current_steps": 1880, "total_steps": 14493, "loss": 1.0923, "lr": 4.587542103794477e-05, "epoch": 0.3892238813695298, "percentage": 12.97, "elapsed_time": "5:22:07", "remaining_time": "1 day, 12:01:09", "throughput": 27301.52, "total_tokens": 527672704} +{"current_steps": 1890, "total_steps": 14493, "loss": 1.0842, "lr": 4.5856123758229247e-05, "epoch": 0.39129422116404855, "percentage": 13.04, "elapsed_time": "5:23:44", "remaining_time": "1 day, 11:58:46", "throughput": 27310.97, "total_tokens": 530498176} +{"current_steps": 1900, "total_steps": 14493, "loss": 1.0818, "lr": 4.5836850809973993e-05, "epoch": 0.3933645609585673, "percentage": 13.11, "elapsed_time": "5:25:19", "remaining_time": "1 day, 11:56:11", "throughput": 27319.36, "total_tokens": 533254016} +{"current_steps": 1910, "total_steps": 14493, "loss": 1.0769, "lr": 4.5817602142090385e-05, "epoch": 0.3954349007530861, "percentage": 13.18, "elapsed_time": "5:26:58", "remaining_time": "1 day, 11:54:05", "throughput": 27325.43, "total_tokens": 536080768} +{"current_steps": 1920, "total_steps": 14493, "loss": 1.0912, "lr": 4.579837770363989e-05, "epoch": 0.3975052405476049, "percentage": 13.25, "elapsed_time": "5:28:36", "remaining_time": "1 day, 11:51:49", "throughput": 27332.48, "total_tokens": 538887616} +{"current_steps": 1930, "total_steps": 14493, "loss": 1.0885, "lr": 4.57791774438334e-05, "epoch": 0.39957558034212365, "percentage": 13.32, "elapsed_time": "5:30:09", "remaining_time": "1 day, 11:49:03", "throughput": 27342.4, "total_tokens": 541628288} +{"current_steps": 1940, "total_steps": 14493, "loss": 1.0836, "lr": 4.576000131203078e-05, "epoch": 0.4016459201366424, "percentage": 13.39, "elapsed_time": "5:31:47", "remaining_time": "1 day, 11:46:52", "throughput": 27348.54, "total_tokens": 544433920} +{"current_steps": 1950, "total_steps": 14493, "loss": 1.0884, "lr": 4.574084925774023e-05, "epoch": 0.4037162599311612, "percentage": 13.45, "elapsed_time": "5:33:18", "remaining_time": "1 day, 11:43:58", "throughput": 27361.17, "total_tokens": 547190528} +{"current_steps": 1960, "total_steps": 14493, "loss": 1.0818, "lr": 4.5721721230617795e-05, "epoch": 0.40578659972568, "percentage": 13.52, "elapsed_time": "5:34:57", "remaining_time": "1 day, 11:41:53", "throughput": 27367.81, "total_tokens": 550032512} +{"current_steps": 1970, "total_steps": 14493, "loss": 1.079, "lr": 4.57026171804667e-05, "epoch": 0.40785693952019875, "percentage": 13.59, "elapsed_time": "5:36:42", "remaining_time": "1 day, 11:40:23", "throughput": 27366.03, "total_tokens": 552858368} +{"current_steps": 1980, "total_steps": 14493, "loss": 1.0834, "lr": 4.568353705723692e-05, "epoch": 0.4099272793147175, "percentage": 13.66, "elapsed_time": "5:38:29", "remaining_time": "1 day, 11:39:07", "throughput": 27365.67, "total_tokens": 555773440} +{"current_steps": 1990, "total_steps": 14493, "loss": 1.0737, "lr": 4.566448081102455e-05, "epoch": 0.4119976191092363, "percentage": 13.73, "elapsed_time": "5:40:11", "remaining_time": "1 day, 11:37:25", "throughput": 27366.44, "total_tokens": 558600064} +{"current_steps": 2000, "total_steps": 14493, "loss": 1.0784, "lr": 4.564544839207128e-05, "epoch": 0.4140679589037551, "percentage": 13.8, "elapsed_time": "5:41:50", "remaining_time": "1 day, 11:35:20", "throughput": 27372.32, "total_tokens": 561428288} +{"current_steps": 2010, "total_steps": 14493, "loss": 1.074, "lr": 4.562643975076387e-05, "epoch": 0.41613829869827385, "percentage": 13.87, "elapsed_time": "5:43:26", "remaining_time": "1 day, 11:32:52", "throughput": 27380.79, "total_tokens": 564209216} +{"current_steps": 2020, "total_steps": 14493, "loss": 1.0739, "lr": 4.560745483763357e-05, "epoch": 0.4182086384927926, "percentage": 13.94, "elapsed_time": "5:45:01", "remaining_time": "1 day, 11:30:24", "throughput": 27389.04, "total_tokens": 566985088} +{"current_steps": 2030, "total_steps": 14493, "loss": 1.0721, "lr": 4.5588493603355595e-05, "epoch": 0.4202789782873114, "percentage": 14.01, "elapsed_time": "5:46:39", "remaining_time": "1 day, 11:28:17", "throughput": 27392.94, "total_tokens": 569764928} +{"current_steps": 2040, "total_steps": 14493, "loss": 1.0807, "lr": 4.556955599874859e-05, "epoch": 0.4223493180818302, "percentage": 14.08, "elapsed_time": "5:48:17", "remaining_time": "1 day, 11:26:09", "throughput": 27398.73, "total_tokens": 572576448} +{"current_steps": 2050, "total_steps": 14493, "loss": 1.0826, "lr": 4.555064197477409e-05, "epoch": 0.42441965787634894, "percentage": 14.14, "elapsed_time": "5:49:59", "remaining_time": "1 day, 11:24:19", "throughput": 27402.85, "total_tokens": 575437248} +{"current_steps": 2060, "total_steps": 14493, "loss": 1.0784, "lr": 4.5531751482536e-05, "epoch": 0.4264899976708677, "percentage": 14.21, "elapsed_time": "5:51:40", "remaining_time": "1 day, 11:22:30", "throughput": 27406.88, "total_tokens": 578299776} +{"current_steps": 2070, "total_steps": 14493, "loss": 1.0667, "lr": 4.5512884473280024e-05, "epoch": 0.4285603374653865, "percentage": 14.28, "elapsed_time": "5:53:15", "remaining_time": "1 day, 11:20:06", "throughput": 27414.28, "total_tokens": 581071232} +{"current_steps": 2080, "total_steps": 14493, "loss": 1.0661, "lr": 4.549404089839322e-05, "epoch": 0.4306306772599053, "percentage": 14.35, "elapsed_time": "5:54:51", "remaining_time": "1 day, 11:17:45", "throughput": 27421.86, "total_tokens": 583860864} +{"current_steps": 2090, "total_steps": 14493, "loss": 1.0689, "lr": 4.547522070940335e-05, "epoch": 0.43270101705442404, "percentage": 14.42, "elapsed_time": "5:56:25", "remaining_time": "1 day, 11:15:08", "throughput": 27432.07, "total_tokens": 586637184} +{"current_steps": 2100, "total_steps": 14493, "loss": 1.0807, "lr": 4.545642385797848e-05, "epoch": 0.43477135684894286, "percentage": 14.49, "elapsed_time": "5:57:59", "remaining_time": "1 day, 11:12:42", "throughput": 27439.39, "total_tokens": 589395072} +{"current_steps": 2110, "total_steps": 14493, "loss": 1.0724, "lr": 4.543765029592637e-05, "epoch": 0.4368416966434616, "percentage": 14.56, "elapsed_time": "5:59:36", "remaining_time": "1 day, 11:10:24", "throughput": 27448.37, "total_tokens": 592229760} +{"current_steps": 2120, "total_steps": 14493, "loss": 1.0763, "lr": 4.541889997519403e-05, "epoch": 0.4389120364379804, "percentage": 14.63, "elapsed_time": "6:01:15", "remaining_time": "1 day, 11:08:22", "throughput": 27454.19, "total_tokens": 595071552} +{"current_steps": 2130, "total_steps": 14493, "loss": 1.0857, "lr": 4.5400172847867095e-05, "epoch": 0.44098237623249914, "percentage": 14.7, "elapsed_time": "6:02:50", "remaining_time": "1 day, 11:05:59", "throughput": 27461.5, "total_tokens": 597844160} +{"current_steps": 2140, "total_steps": 14493, "loss": 1.0806, "lr": 4.5381468866169466e-05, "epoch": 0.44305271602701796, "percentage": 14.77, "elapsed_time": "6:04:26", "remaining_time": "1 day, 11:03:40", "throughput": 27468.38, "total_tokens": 600626944} +{"current_steps": 2150, "total_steps": 14493, "loss": 1.0639, "lr": 4.5362787982462616e-05, "epoch": 0.4451230558215367, "percentage": 14.83, "elapsed_time": "6:05:59", "remaining_time": "1 day, 11:01:10", "throughput": 27478.25, "total_tokens": 603419072} +{"current_steps": 2160, "total_steps": 14493, "loss": 1.0534, "lr": 4.5344130149245275e-05, "epoch": 0.4471933956160555, "percentage": 14.9, "elapsed_time": "6:07:34", "remaining_time": "1 day, 10:58:43", "throughput": 27484.97, "total_tokens": 606157568} +{"current_steps": 2170, "total_steps": 14493, "loss": 1.0573, "lr": 4.5325495319152715e-05, "epoch": 0.44926373541057424, "percentage": 14.97, "elapsed_time": "6:09:10", "remaining_time": "1 day, 10:56:30", "throughput": 27490.76, "total_tokens": 608946624} +{"current_steps": 2180, "total_steps": 14493, "loss": 1.0658, "lr": 4.530688344495644e-05, "epoch": 0.45133407520509305, "percentage": 15.04, "elapsed_time": "6:10:50", "remaining_time": "1 day, 10:54:36", "throughput": 27494.4, "total_tokens": 611774720} +{"current_steps": 2190, "total_steps": 14493, "loss": 1.0688, "lr": 4.528829447956357e-05, "epoch": 0.4534044149996118, "percentage": 15.11, "elapsed_time": "6:12:27", "remaining_time": "1 day, 10:52:22", "throughput": 27500.99, "total_tokens": 614570176} +{"current_steps": 2200, "total_steps": 14493, "loss": 1.0769, "lr": 4.526972837601633e-05, "epoch": 0.4554747547941306, "percentage": 15.18, "elapsed_time": "6:14:03", "remaining_time": "1 day, 10:50:06", "throughput": 27507.17, "total_tokens": 617349504} +{"current_steps": 2210, "total_steps": 14493, "loss": 1.0854, "lr": 4.525118508749165e-05, "epoch": 0.45754509458864934, "percentage": 15.25, "elapsed_time": "6:15:45", "remaining_time": "1 day, 10:48:26", "throughput": 27509.44, "total_tokens": 620218240} +{"current_steps": 2220, "total_steps": 14493, "loss": 1.0629, "lr": 4.5232664567300546e-05, "epoch": 0.45961543438316815, "percentage": 15.32, "elapsed_time": "6:17:23", "remaining_time": "1 day, 10:46:19", "throughput": 27514.2, "total_tokens": 623004608} +{"current_steps": 2230, "total_steps": 14493, "loss": 1.0644, "lr": 4.521416676888773e-05, "epoch": 0.4616857741776869, "percentage": 15.39, "elapsed_time": "6:19:02", "remaining_time": "1 day, 10:44:25", "throughput": 27518.18, "total_tokens": 625843008} +{"current_steps": 2240, "total_steps": 14493, "loss": 1.064, "lr": 4.519569164583107e-05, "epoch": 0.4637561139722057, "percentage": 15.46, "elapsed_time": "6:20:39", "remaining_time": "1 day, 10:42:14", "throughput": 27523.8, "total_tokens": 628633472} +{"current_steps": 2250, "total_steps": 14493, "loss": 1.0837, "lr": 4.517723915184109e-05, "epoch": 0.4658264537667245, "percentage": 15.52, "elapsed_time": "6:22:21", "remaining_time": "1 day, 10:40:30", "throughput": 27525.39, "total_tokens": 631465856} +{"current_steps": 2260, "total_steps": 14493, "loss": 1.0575, "lr": 4.5158809240760506e-05, "epoch": 0.46789679356124325, "percentage": 15.59, "elapsed_time": "6:23:55", "remaining_time": "1 day, 10:38:07", "throughput": 27534.28, "total_tokens": 634269376} +{"current_steps": 2270, "total_steps": 14493, "loss": 1.0637, "lr": 4.514040186656375e-05, "epoch": 0.469967133355762, "percentage": 15.66, "elapsed_time": "6:25:30", "remaining_time": "1 day, 10:35:49", "throughput": 27541.02, "total_tokens": 637042816} +{"current_steps": 2280, "total_steps": 14493, "loss": 1.05, "lr": 4.512201698335644e-05, "epoch": 0.4720374731502808, "percentage": 15.73, "elapsed_time": "6:27:05", "remaining_time": "1 day, 10:33:27", "throughput": 27547.8, "total_tokens": 639801728} +{"current_steps": 2290, "total_steps": 14493, "loss": 1.0693, "lr": 4.510365454537496e-05, "epoch": 0.4741078129447996, "percentage": 15.8, "elapsed_time": "6:28:38", "remaining_time": "1 day, 10:30:58", "throughput": 27555.96, "total_tokens": 642554752} +{"current_steps": 2300, "total_steps": 14493, "loss": 1.0588, "lr": 4.5085314506985945e-05, "epoch": 0.47617815273931835, "percentage": 15.87, "elapsed_time": "6:30:17", "remaining_time": "1 day, 10:29:03", "throughput": 27558.97, "total_tokens": 645360832} +{"current_steps": 2310, "total_steps": 14493, "loss": 1.0745, "lr": 4.50669968226858e-05, "epoch": 0.4782484925338371, "percentage": 15.94, "elapsed_time": "6:31:56", "remaining_time": "1 day, 10:27:06", "throughput": 27562.91, "total_tokens": 648182208} +{"current_steps": 2320, "total_steps": 14493, "loss": 1.0758, "lr": 4.504870144710027e-05, "epoch": 0.48031883232835587, "percentage": 16.01, "elapsed_time": "6:33:32", "remaining_time": "1 day, 10:24:55", "throughput": 27569.43, "total_tokens": 650987776} +{"current_steps": 2330, "total_steps": 14493, "loss": 1.0589, "lr": 4.5030428334983884e-05, "epoch": 0.4823891721228747, "percentage": 16.08, "elapsed_time": "6:35:13", "remaining_time": "1 day, 10:23:07", "throughput": 27570.55, "total_tokens": 653786624} +{"current_steps": 2340, "total_steps": 14493, "loss": 1.0631, "lr": 4.501217744121959e-05, "epoch": 0.48445951191739345, "percentage": 16.15, "elapsed_time": "6:36:50", "remaining_time": "1 day, 10:21:01", "throughput": 27576.11, "total_tokens": 656596672} +{"current_steps": 2350, "total_steps": 14493, "loss": 1.0567, "lr": 4.499394872081821e-05, "epoch": 0.4865298517119122, "percentage": 16.21, "elapsed_time": "6:38:29", "remaining_time": "1 day, 10:19:08", "throughput": 27579.56, "total_tokens": 659425216} +{"current_steps": 2360, "total_steps": 14493, "loss": 1.0599, "lr": 4.4975742128918e-05, "epoch": 0.48860019150643097, "percentage": 16.28, "elapsed_time": "6:40:08", "remaining_time": "1 day, 10:17:08", "throughput": 27582.89, "total_tokens": 662214976} +{"current_steps": 2370, "total_steps": 14493, "loss": 1.0704, "lr": 4.495755762078418e-05, "epoch": 0.4906705313009498, "percentage": 16.35, "elapsed_time": "6:41:46", "remaining_time": "1 day, 10:15:10", "throughput": 27588.56, "total_tokens": 665068288} +{"current_steps": 2380, "total_steps": 14493, "loss": 1.0703, "lr": 4.49393951518085e-05, "epoch": 0.49274087109546855, "percentage": 16.42, "elapsed_time": "6:43:19", "remaining_time": "1 day, 10:12:44", "throughput": 27596.15, "total_tokens": 667821248} +{"current_steps": 2390, "total_steps": 14493, "loss": 1.0614, "lr": 4.4921254677508716e-05, "epoch": 0.4948112108899873, "percentage": 16.49, "elapsed_time": "6:45:00", "remaining_time": "1 day, 10:11:00", "throughput": 27598.46, "total_tokens": 670668736} +{"current_steps": 2400, "total_steps": 14493, "loss": 1.0558, "lr": 4.490313615352821e-05, "epoch": 0.49688155068450607, "percentage": 16.56, "elapsed_time": "6:46:37", "remaining_time": "1 day, 10:08:55", "throughput": 27604.15, "total_tokens": 673483904} +{"current_steps": 2410, "total_steps": 14493, "loss": 1.069, "lr": 4.48850395356355e-05, "epoch": 0.4989518904790249, "percentage": 16.63, "elapsed_time": "6:48:14", "remaining_time": "1 day, 10:06:46", "throughput": 27610.43, "total_tokens": 676297536} +{"current_steps": 2420, "total_steps": 14493, "loss": 1.049, "lr": 4.486696477972375e-05, "epoch": 0.5010222302735436, "percentage": 16.7, "elapsed_time": "6:49:54", "remaining_time": "1 day, 10:04:55", "throughput": 27613.74, "total_tokens": 679132416} +{"current_steps": 2430, "total_steps": 14493, "loss": 1.0654, "lr": 4.484891184181041e-05, "epoch": 0.5030925700680624, "percentage": 16.77, "elapsed_time": "6:51:29", "remaining_time": "1 day, 10:02:42", "throughput": 27620.4, "total_tokens": 681927232} +{"current_steps": 2440, "total_steps": 14493, "loss": 1.0578, "lr": 4.483088067803662e-05, "epoch": 0.5051629098625812, "percentage": 16.84, "elapsed_time": "6:53:04", "remaining_time": "1 day, 10:00:29", "throughput": 27625.98, "total_tokens": 684699776} +{"current_steps": 2450, "total_steps": 14493, "loss": 1.0662, "lr": 4.481287124466697e-05, "epoch": 0.5072332496570999, "percentage": 16.9, "elapsed_time": "6:54:41", "remaining_time": "1 day, 9:58:23", "throughput": 27630.51, "total_tokens": 687480320} +{"current_steps": 2460, "total_steps": 14493, "loss": 1.0483, "lr": 4.479488349808885e-05, "epoch": 0.5093035894516188, "percentage": 16.97, "elapsed_time": "6:56:17", "remaining_time": "1 day, 9:56:16", "throughput": 27635.91, "total_tokens": 690277312} +{"current_steps": 2470, "total_steps": 14493, "loss": 1.0443, "lr": 4.4776917394812114e-05, "epoch": 0.5113739292461376, "percentage": 17.04, "elapsed_time": "6:57:56", "remaining_time": "1 day, 9:54:20", "throughput": 27639.92, "total_tokens": 693101120} +{"current_steps": 2480, "total_steps": 14493, "loss": 1.0428, "lr": 4.475897289146862e-05, "epoch": 0.5134442690406563, "percentage": 17.11, "elapsed_time": "6:59:31", "remaining_time": "1 day, 9:52:11", "throughput": 27645.06, "total_tokens": 695877696} +{"current_steps": 2490, "total_steps": 14493, "loss": 1.0519, "lr": 4.4741049944811806e-05, "epoch": 0.5155146088351751, "percentage": 17.18, "elapsed_time": "7:01:11", "remaining_time": "1 day, 9:50:22", "throughput": 27648.53, "total_tokens": 698728640} +{"current_steps": 2500, "total_steps": 14493, "loss": 1.049, "lr": 4.472314851171621e-05, "epoch": 0.5175849486296938, "percentage": 17.25, "elapsed_time": "7:02:53", "remaining_time": "1 day, 9:48:41", "throughput": 27648.67, "total_tokens": 701543104} +{"current_steps": 2510, "total_steps": 14493, "loss": 1.0453, "lr": 4.4705268549177084e-05, "epoch": 0.5196552884242126, "percentage": 17.32, "elapsed_time": "7:04:26", "remaining_time": "1 day, 9:46:21", "throughput": 27655.38, "total_tokens": 704294976} +{"current_steps": 2520, "total_steps": 14493, "loss": 1.0536, "lr": 4.468741001430989e-05, "epoch": 0.5217256282187314, "percentage": 17.39, "elapsed_time": "7:06:04", "remaining_time": "1 day, 9:44:20", "throughput": 27659.78, "total_tokens": 707099520} +{"current_steps": 2530, "total_steps": 14493, "loss": 1.0492, "lr": 4.466957286434997e-05, "epoch": 0.5237959680132501, "percentage": 17.46, "elapsed_time": "7:07:42", "remaining_time": "1 day, 9:42:24", "throughput": 27663.1, "total_tokens": 709908928} +{"current_steps": 2540, "total_steps": 14493, "loss": 1.0544, "lr": 4.4651757056652e-05, "epoch": 0.525866307807769, "percentage": 17.53, "elapsed_time": "7:09:18", "remaining_time": "1 day, 9:40:17", "throughput": 27667.62, "total_tokens": 712680768} +{"current_steps": 2550, "total_steps": 14493, "loss": 1.0631, "lr": 4.463396254868968e-05, "epoch": 0.5279366476022878, "percentage": 17.59, "elapsed_time": "7:10:56", "remaining_time": "1 day, 9:38:17", "throughput": 27671.19, "total_tokens": 715466368} +{"current_steps": 2560, "total_steps": 14493, "loss": 1.0666, "lr": 4.461618929805519e-05, "epoch": 0.5300069873968065, "percentage": 17.66, "elapsed_time": "7:12:37", "remaining_time": "1 day, 9:36:34", "throughput": 27672.93, "total_tokens": 718309888} +{"current_steps": 2570, "total_steps": 14493, "loss": 1.0678, "lr": 4.459843726245888e-05, "epoch": 0.5320773271913253, "percentage": 17.73, "elapsed_time": "7:14:17", "remaining_time": "1 day, 9:34:50", "throughput": 27676.36, "total_tokens": 721187008} +{"current_steps": 2580, "total_steps": 14493, "loss": 1.0547, "lr": 4.458070639972875e-05, "epoch": 0.534147666985844, "percentage": 17.8, "elapsed_time": "7:15:56", "remaining_time": "1 day, 9:32:55", "throughput": 27679.73, "total_tokens": 724001536} +{"current_steps": 2590, "total_steps": 14493, "loss": 1.0387, "lr": 4.456299666781007e-05, "epoch": 0.5362180067803628, "percentage": 17.87, "elapsed_time": "7:17:34", "remaining_time": "1 day, 9:31:00", "throughput": 27683.14, "total_tokens": 726811968} +{"current_steps": 2600, "total_steps": 14493, "loss": 1.0263, "lr": 4.4545308024764984e-05, "epoch": 0.5382883465748816, "percentage": 17.94, "elapsed_time": "7:19:11", "remaining_time": "1 day, 9:28:58", "throughput": 27687.3, "total_tokens": 729604736} +{"current_steps": 2610, "total_steps": 14493, "loss": 1.0363, "lr": 4.452764042877207e-05, "epoch": 0.5403586863694004, "percentage": 18.01, "elapsed_time": "7:20:53", "remaining_time": "1 day, 9:27:21", "throughput": 27688.95, "total_tokens": 732481472} +{"current_steps": 2620, "total_steps": 14493, "loss": 1.0516, "lr": 4.45099938381259e-05, "epoch": 0.5424290261639192, "percentage": 18.08, "elapsed_time": "7:22:33", "remaining_time": "1 day, 9:25:30", "throughput": 27692.07, "total_tokens": 735313280} +{"current_steps": 2630, "total_steps": 14493, "loss": 1.0414, "lr": 4.449236821123667e-05, "epoch": 0.544499365958438, "percentage": 18.15, "elapsed_time": "7:24:09", "remaining_time": "1 day, 9:23:27", "throughput": 27697.32, "total_tokens": 738123520} +{"current_steps": 2640, "total_steps": 14493, "loss": 1.0442, "lr": 4.447476350662976e-05, "epoch": 0.5465697057529567, "percentage": 18.22, "elapsed_time": "7:25:46", "remaining_time": "1 day, 9:21:24", "throughput": 27701.79, "total_tokens": 740919104} +{"current_steps": 2650, "total_steps": 14493, "loss": 1.0581, "lr": 4.4457179682945346e-05, "epoch": 0.5486400455474755, "percentage": 18.28, "elapsed_time": "7:27:20", "remaining_time": "1 day, 9:19:12", "throughput": 27708.5, "total_tokens": 743716544} +{"current_steps": 2660, "total_steps": 14493, "loss": 1.0612, "lr": 4.443961669893798e-05, "epoch": 0.5507103853419942, "percentage": 18.35, "elapsed_time": "7:28:56", "remaining_time": "1 day, 9:17:08", "throughput": 27712.73, "total_tokens": 746493952} +{"current_steps": 2670, "total_steps": 14493, "loss": 1.0472, "lr": 4.4422074513476155e-05, "epoch": 0.552780725136513, "percentage": 18.42, "elapsed_time": "7:30:35", "remaining_time": "1 day, 9:15:15", "throughput": 27716.33, "total_tokens": 749322304} +{"current_steps": 2680, "total_steps": 14493, "loss": 1.0521, "lr": 4.4404553085541955e-05, "epoch": 0.5548510649310318, "percentage": 18.49, "elapsed_time": "7:32:10", "remaining_time": "1 day, 9:13:05", "throughput": 27721.32, "total_tokens": 752084096} +{"current_steps": 2690, "total_steps": 14493, "loss": 1.0521, "lr": 4.438705237423063e-05, "epoch": 0.5569214047255506, "percentage": 18.56, "elapsed_time": "7:33:45", "remaining_time": "1 day, 9:10:56", "throughput": 27726.9, "total_tokens": 754865088} +{"current_steps": 2700, "total_steps": 14493, "loss": 1.0543, "lr": 4.436957233875017e-05, "epoch": 0.5589917445200694, "percentage": 18.63, "elapsed_time": "7:35:26", "remaining_time": "1 day, 9:09:15", "throughput": 27728.16, "total_tokens": 757709248} +{"current_steps": 2710, "total_steps": 14493, "loss": 1.0604, "lr": 4.4352112938420956e-05, "epoch": 0.5610620843145881, "percentage": 18.7, "elapsed_time": "7:37:08", "remaining_time": "1 day, 9:07:36", "throughput": 27728.84, "total_tokens": 760548416} +{"current_steps": 2720, "total_steps": 14493, "loss": 1.0426, "lr": 4.433467413267529e-05, "epoch": 0.5631324241091069, "percentage": 18.77, "elapsed_time": "7:38:41", "remaining_time": "1 day, 9:05:22", "throughput": 27735.14, "total_tokens": 763317952} +{"current_steps": 2730, "total_steps": 14493, "loss": 1.0614, "lr": 4.431725588105708e-05, "epoch": 0.5652027639036257, "percentage": 18.84, "elapsed_time": "7:40:19", "remaining_time": "1 day, 9:03:25", "throughput": 27738.78, "total_tokens": 766121664} +{"current_steps": 2740, "total_steps": 14493, "loss": 1.0416, "lr": 4.4299858143221377e-05, "epoch": 0.5672731036981444, "percentage": 18.91, "elapsed_time": "7:41:51", "remaining_time": "1 day, 9:01:04", "throughput": 27744.86, "total_tokens": 768842624} +{"current_steps": 2750, "total_steps": 14493, "loss": 1.0355, "lr": 4.4282480878934065e-05, "epoch": 0.5693434434926632, "percentage": 18.97, "elapsed_time": "7:43:22", "remaining_time": "1 day, 8:58:43", "throughput": 27750.6, "total_tokens": 771549120} +{"current_steps": 2760, "total_steps": 14493, "loss": 1.0431, "lr": 4.4265124048071346e-05, "epoch": 0.571413783287182, "percentage": 19.04, "elapsed_time": "7:44:59", "remaining_time": "1 day, 8:56:41", "throughput": 27755.4, "total_tokens": 774348480} +{"current_steps": 2770, "total_steps": 14493, "loss": 1.0516, "lr": 4.4247787610619477e-05, "epoch": 0.5734841230817008, "percentage": 19.11, "elapsed_time": "7:46:36", "remaining_time": "1 day, 8:54:44", "throughput": 27759.97, "total_tokens": 777179648} +{"current_steps": 2780, "total_steps": 14493, "loss": 1.0404, "lr": 4.42304715266743e-05, "epoch": 0.5755544628762196, "percentage": 19.18, "elapsed_time": "7:48:10", "remaining_time": "1 day, 8:52:35", "throughput": 27766.37, "total_tokens": 779982208} +{"current_steps": 2790, "total_steps": 14493, "loss": 1.0452, "lr": 4.421317575644092e-05, "epoch": 0.5776248026707383, "percentage": 19.25, "elapsed_time": "7:49:47", "remaining_time": "1 day, 8:50:37", "throughput": 27770.03, "total_tokens": 782780416} +{"current_steps": 2800, "total_steps": 14493, "loss": 1.0443, "lr": 4.419590026023325e-05, "epoch": 0.5796951424652571, "percentage": 19.32, "elapsed_time": "7:51:25", "remaining_time": "1 day, 8:48:41", "throughput": 27774.32, "total_tokens": 785603776} +{"current_steps": 2810, "total_steps": 14493, "loss": 1.0449, "lr": 4.417864499847368e-05, "epoch": 0.5817654822597759, "percentage": 19.39, "elapsed_time": "7:53:06", "remaining_time": "1 day, 8:47:01", "throughput": 27775.31, "total_tokens": 788444864} +{"current_steps": 2820, "total_steps": 14493, "loss": 1.0328, "lr": 4.4161409931692676e-05, "epoch": 0.5838358220542946, "percentage": 19.46, "elapsed_time": "7:54:41", "remaining_time": "1 day, 8:44:53", "throughput": 27779.61, "total_tokens": 791192448} +{"current_steps": 2830, "total_steps": 14493, "loss": 1.0446, "lr": 4.414419502052841e-05, "epoch": 0.5859061618488134, "percentage": 19.53, "elapsed_time": "7:56:19", "remaining_time": "1 day, 8:43:02", "throughput": 27782.7, "total_tokens": 794021568} +{"current_steps": 2840, "total_steps": 14493, "loss": 1.0561, "lr": 4.412700022572637e-05, "epoch": 0.5879765016433323, "percentage": 19.6, "elapsed_time": "7:57:57", "remaining_time": "1 day, 8:41:07", "throughput": 27786.5, "total_tokens": 796837952} +{"current_steps": 2850, "total_steps": 14493, "loss": 1.0261, "lr": 4.410982550813902e-05, "epoch": 0.590046841437851, "percentage": 19.66, "elapsed_time": "7:59:31", "remaining_time": "1 day, 8:38:59", "throughput": 27792.46, "total_tokens": 799631168} +{"current_steps": 2860, "total_steps": 14493, "loss": 1.0347, "lr": 4.409267082872535e-05, "epoch": 0.5921171812323698, "percentage": 19.73, "elapsed_time": "8:01:06", "remaining_time": "1 day, 8:36:53", "throughput": 27797.41, "total_tokens": 802411520} +{"current_steps": 2870, "total_steps": 14493, "loss": 1.0416, "lr": 4.407553614855059e-05, "epoch": 0.5941875210268885, "percentage": 19.8, "elapsed_time": "8:02:41", "remaining_time": "1 day, 8:34:48", "throughput": 27802.23, "total_tokens": 805190336} +{"current_steps": 2880, "total_steps": 14493, "loss": 1.049, "lr": 4.405842142878579e-05, "epoch": 0.5962578608214073, "percentage": 19.87, "elapsed_time": "8:04:20", "remaining_time": "1 day, 8:32:58", "throughput": 27805.34, "total_tokens": 808026368} +{"current_steps": 2890, "total_steps": 14493, "loss": 1.038, "lr": 4.404132663070745e-05, "epoch": 0.5983282006159261, "percentage": 19.94, "elapsed_time": "8:05:52", "remaining_time": "1 day, 8:30:43", "throughput": 27811.91, "total_tokens": 810786176} +{"current_steps": 2900, "total_steps": 14493, "loss": 1.0372, "lr": 4.402425171569716e-05, "epoch": 0.6003985404104448, "percentage": 20.01, "elapsed_time": "8:07:28", "remaining_time": "1 day, 8:28:44", "throughput": 27816.91, "total_tokens": 813610624} +{"current_steps": 2910, "total_steps": 14493, "loss": 1.046, "lr": 4.400719664524127e-05, "epoch": 0.6024688802049636, "percentage": 20.08, "elapsed_time": "8:09:07", "remaining_time": "1 day, 8:26:54", "throughput": 27820.02, "total_tokens": 816442304} +{"current_steps": 2920, "total_steps": 14493, "loss": 1.0441, "lr": 4.399016138093044e-05, "epoch": 0.6045392199994825, "percentage": 20.15, "elapsed_time": "8:10:41", "remaining_time": "1 day, 8:24:47", "throughput": 27825.7, "total_tokens": 819232576} +{"current_steps": 2930, "total_steps": 14493, "loss": 1.0383, "lr": 4.397314588445937e-05, "epoch": 0.6066095597940012, "percentage": 20.22, "elapsed_time": "8:12:18", "remaining_time": "1 day, 8:22:52", "throughput": 27828.06, "total_tokens": 822010816} +{"current_steps": 2940, "total_steps": 14493, "loss": 1.0281, "lr": 4.395615011762637e-05, "epoch": 0.60867989958852, "percentage": 20.29, "elapsed_time": "8:13:57", "remaining_time": "1 day, 8:21:04", "throughput": 27830.39, "total_tokens": 824828480} +{"current_steps": 2950, "total_steps": 14493, "loss": 1.0511, "lr": 4.3939174042333057e-05, "epoch": 0.6107502393830387, "percentage": 20.35, "elapsed_time": "8:15:30", "remaining_time": "1 day, 8:18:53", "throughput": 27837.02, "total_tokens": 827619840} +{"current_steps": 2960, "total_steps": 14493, "loss": 1.0445, "lr": 4.3922217620583904e-05, "epoch": 0.6128205791775575, "percentage": 20.42, "elapsed_time": "8:17:10", "remaining_time": "1 day, 8:17:06", "throughput": 27838.65, "total_tokens": 830429056} +{"current_steps": 2970, "total_steps": 14493, "loss": 1.0356, "lr": 4.3905280814486025e-05, "epoch": 0.6148909189720763, "percentage": 20.49, "elapsed_time": "8:18:47", "remaining_time": "1 day, 8:15:11", "throughput": 27843.33, "total_tokens": 833273472} +{"current_steps": 2980, "total_steps": 14493, "loss": 1.0279, "lr": 4.388836358624867e-05, "epoch": 0.616961258766595, "percentage": 20.56, "elapsed_time": "8:20:26", "remaining_time": "1 day, 8:13:24", "throughput": 27845.49, "total_tokens": 836098176} +{"current_steps": 2990, "total_steps": 14493, "loss": 1.0434, "lr": 4.3871465898182976e-05, "epoch": 0.6190315985611139, "percentage": 20.63, "elapsed_time": "8:22:02", "remaining_time": "1 day, 8:11:27", "throughput": 27848.64, "total_tokens": 838878464} +{"current_steps": 3000, "total_steps": 14493, "loss": 1.0443, "lr": 4.385458771270156e-05, "epoch": 0.6211019383556327, "percentage": 20.7, "elapsed_time": "8:23:38", "remaining_time": "1 day, 8:09:28", "throughput": 27852.36, "total_tokens": 841665344} +{"current_steps": 3010, "total_steps": 14493, "loss": 1.0252, "lr": 4.3837728992318205e-05, "epoch": 0.6231722781501514, "percentage": 20.77, "elapsed_time": "8:25:16", "remaining_time": "1 day, 8:07:35", "throughput": 27855.21, "total_tokens": 844467648} +{"current_steps": 3020, "total_steps": 14493, "loss": 1.0299, "lr": 4.382088969964746e-05, "epoch": 0.6252426179446702, "percentage": 20.84, "elapsed_time": "8:26:51", "remaining_time": "1 day, 8:05:34", "throughput": 27860.01, "total_tokens": 847271744} +{"current_steps": 3030, "total_steps": 14493, "loss": 1.0361, "lr": 4.380406979740436e-05, "epoch": 0.6273129577391889, "percentage": 20.91, "elapsed_time": "8:28:27", "remaining_time": "1 day, 8:03:33", "throughput": 27864.23, "total_tokens": 850054336} +{"current_steps": 3040, "total_steps": 14493, "loss": 1.0431, "lr": 4.3787269248403994e-05, "epoch": 0.6293832975337077, "percentage": 20.98, "elapsed_time": "8:29:58", "remaining_time": "1 day, 8:01:19", "throughput": 27871.44, "total_tokens": 852836224} +{"current_steps": 3050, "total_steps": 14493, "loss": 1.0387, "lr": 4.377048801556126e-05, "epoch": 0.6314536373282265, "percentage": 21.04, "elapsed_time": "8:31:32", "remaining_time": "1 day, 7:59:13", "throughput": 27876.14, "total_tokens": 855601280} +{"current_steps": 3060, "total_steps": 14493, "loss": 1.0318, "lr": 4.3753726061890446e-05, "epoch": 0.6335239771227452, "percentage": 21.11, "elapsed_time": "8:33:15", "remaining_time": "1 day, 7:57:40", "throughput": 27875.73, "total_tokens": 858446208} +{"current_steps": 3070, "total_steps": 14493, "loss": 1.0403, "lr": 4.373698335050488e-05, "epoch": 0.6355943169172641, "percentage": 21.18, "elapsed_time": "8:34:50", "remaining_time": "1 day, 7:55:37", "throughput": 27880.42, "total_tokens": 861229568} +{"current_steps": 3080, "total_steps": 14493, "loss": 1.0387, "lr": 4.372025984461667e-05, "epoch": 0.6376646567117829, "percentage": 21.25, "elapsed_time": "8:36:26", "remaining_time": "1 day, 7:53:41", "throughput": 27884.75, "total_tokens": 864055040} +{"current_steps": 3090, "total_steps": 14493, "loss": 1.0313, "lr": 4.370355550753629e-05, "epoch": 0.6397349965063016, "percentage": 21.32, "elapsed_time": "8:38:02", "remaining_time": "1 day, 7:51:43", "throughput": 27887.18, "total_tokens": 866802496} +{"current_steps": 3100, "total_steps": 14493, "loss": 1.033, "lr": 4.368687030267226e-05, "epoch": 0.6418053363008204, "percentage": 21.39, "elapsed_time": "8:39:41", "remaining_time": "1 day, 7:49:55", "throughput": 27889.88, "total_tokens": 869634560} +{"current_steps": 3110, "total_steps": 14493, "loss": 1.0376, "lr": 4.367020419353081e-05, "epoch": 0.6438756760953391, "percentage": 21.46, "elapsed_time": "8:41:17", "remaining_time": "1 day, 7:47:58", "throughput": 27894.02, "total_tokens": 872444352} +{"current_steps": 3120, "total_steps": 14493, "loss": 1.0296, "lr": 4.365355714371558e-05, "epoch": 0.6459460158898579, "percentage": 21.53, "elapsed_time": "8:42:56", "remaining_time": "1 day, 7:46:11", "throughput": 27895.33, "total_tokens": 875246656} +{"current_steps": 3130, "total_steps": 14493, "loss": 1.0418, "lr": 4.3636929116927235e-05, "epoch": 0.6480163556843767, "percentage": 21.6, "elapsed_time": "8:44:31", "remaining_time": "1 day, 7:44:13", "throughput": 27899.79, "total_tokens": 878058560} +{"current_steps": 3140, "total_steps": 14493, "loss": 1.0408, "lr": 4.362032007696314e-05, "epoch": 0.6500866954788955, "percentage": 21.67, "elapsed_time": "8:46:07", "remaining_time": "1 day, 7:42:15", "throughput": 27904.69, "total_tokens": 880878336} +{"current_steps": 3150, "total_steps": 14493, "loss": 1.0341, "lr": 4.360372998771707e-05, "epoch": 0.6521570352734143, "percentage": 21.73, "elapsed_time": "8:47:43", "remaining_time": "1 day, 7:40:19", "throughput": 27907.82, "total_tokens": 883665152} +{"current_steps": 3160, "total_steps": 14493, "loss": 1.0345, "lr": 4.358715881317884e-05, "epoch": 0.654227375067933, "percentage": 21.8, "elapsed_time": "8:49:19", "remaining_time": "1 day, 7:38:20", "throughput": 27912.17, "total_tokens": 886466496} +{"current_steps": 3170, "total_steps": 14493, "loss": 1.0378, "lr": 4.357060651743399e-05, "epoch": 0.6562977148624518, "percentage": 21.87, "elapsed_time": "8:50:54", "remaining_time": "1 day, 7:36:21", "throughput": 27915.33, "total_tokens": 889228864} +{"current_steps": 3180, "total_steps": 14493, "loss": 1.0309, "lr": 4.3554073064663454e-05, "epoch": 0.6583680546569706, "percentage": 21.94, "elapsed_time": "8:52:34", "remaining_time": "1 day, 7:34:41", "throughput": 27916.2, "total_tokens": 892059392} +{"current_steps": 3190, "total_steps": 14493, "loss": 1.0127, "lr": 4.353755841914325e-05, "epoch": 0.6604383944514893, "percentage": 22.01, "elapsed_time": "8:54:11", "remaining_time": "1 day, 7:32:45", "throughput": 27919.28, "total_tokens": 894846016} +{"current_steps": 3200, "total_steps": 14493, "loss": 1.0448, "lr": 4.3521062545244116e-05, "epoch": 0.6625087342460081, "percentage": 22.08, "elapsed_time": "8:55:51", "remaining_time": "1 day, 7:31:06", "throughput": 27920.5, "total_tokens": 897698240} +{"current_steps": 3210, "total_steps": 14493, "loss": 1.0339, "lr": 4.350458540743126e-05, "epoch": 0.6645790740405269, "percentage": 22.15, "elapsed_time": "8:57:30", "remaining_time": "1 day, 7:29:18", "throughput": 27922.68, "total_tokens": 900515456} +{"current_steps": 3220, "total_steps": 14493, "loss": 1.0233, "lr": 4.3488126970263955e-05, "epoch": 0.6666494138350457, "percentage": 22.22, "elapsed_time": "8:59:07", "remaining_time": "1 day, 7:27:27", "throughput": 27924.29, "total_tokens": 903293824} +{"current_steps": 3230, "total_steps": 14493, "loss": 1.0338, "lr": 4.347168719839527e-05, "epoch": 0.6687197536295645, "percentage": 22.29, "elapsed_time": "9:00:48", "remaining_time": "1 day, 7:25:46", "throughput": 27924.63, "total_tokens": 906098944} +{"current_steps": 3240, "total_steps": 14493, "loss": 1.0312, "lr": 4.345526605657173e-05, "epoch": 0.6707900934240832, "percentage": 22.36, "elapsed_time": "9:02:25", "remaining_time": "1 day, 7:23:53", "throughput": 27928.04, "total_tokens": 908919104} +{"current_steps": 3250, "total_steps": 14493, "loss": 1.032, "lr": 4.343886350963304e-05, "epoch": 0.672860433218602, "percentage": 22.42, "elapsed_time": "9:04:02", "remaining_time": "1 day, 7:22:04", "throughput": 27930.52, "total_tokens": 911732352} +{"current_steps": 3260, "total_steps": 14493, "loss": 1.0442, "lr": 4.3422479522511697e-05, "epoch": 0.6749307730131208, "percentage": 22.49, "elapsed_time": "9:05:44", "remaining_time": "1 day, 7:20:26", "throughput": 27931.26, "total_tokens": 914585856} +{"current_steps": 3270, "total_steps": 14493, "loss": 1.0194, "lr": 4.340611406023272e-05, "epoch": 0.6770011128076395, "percentage": 22.56, "elapsed_time": "9:07:20", "remaining_time": "1 day, 7:18:30", "throughput": 27934.53, "total_tokens": 917372672} +{"current_steps": 3280, "total_steps": 14493, "loss": 1.0141, "lr": 4.338976708791336e-05, "epoch": 0.6790714526021583, "percentage": 22.63, "elapsed_time": "9:08:57", "remaining_time": "1 day, 7:16:38", "throughput": 27938.42, "total_tokens": 920207744} +{"current_steps": 3290, "total_steps": 14493, "loss": 1.026, "lr": 4.337343857076272e-05, "epoch": 0.6811417923966772, "percentage": 22.7, "elapsed_time": "9:10:33", "remaining_time": "1 day, 7:14:44", "throughput": 27941.01, "total_tokens": 922991104} +{"current_steps": 3300, "total_steps": 14493, "loss": 1.0233, "lr": 4.33571284740815e-05, "epoch": 0.6832121321911959, "percentage": 22.77, "elapsed_time": "9:12:10", "remaining_time": "1 day, 7:12:53", "throughput": 27943.61, "total_tokens": 925795200} +{"current_steps": 3310, "total_steps": 14493, "loss": 1.029, "lr": 4.3340836763261675e-05, "epoch": 0.6852824719857147, "percentage": 22.84, "elapsed_time": "9:13:46", "remaining_time": "1 day, 7:10:58", "throughput": 27947.66, "total_tokens": 928612736} +{"current_steps": 3320, "total_steps": 14493, "loss": 1.0123, "lr": 4.332456340378618e-05, "epoch": 0.6873528117802334, "percentage": 22.91, "elapsed_time": "9:15:26", "remaining_time": "1 day, 7:09:16", "throughput": 27948.9, "total_tokens": 931448384} +{"current_steps": 3330, "total_steps": 14493, "loss": 1.024, "lr": 4.3308308361228586e-05, "epoch": 0.6894231515747522, "percentage": 22.98, "elapsed_time": "9:17:03", "remaining_time": "1 day, 7:07:23", "throughput": 27952.5, "total_tokens": 934263424} +{"current_steps": 3340, "total_steps": 14493, "loss": 1.037, "lr": 4.329207160125282e-05, "epoch": 0.691493491369271, "percentage": 23.05, "elapsed_time": "9:18:35", "remaining_time": "1 day, 7:05:15", "throughput": 27958.38, "total_tokens": 937041280} +{"current_steps": 3350, "total_steps": 14493, "loss": 1.0304, "lr": 4.327585308961287e-05, "epoch": 0.6935638311637897, "percentage": 23.11, "elapsed_time": "9:20:14", "remaining_time": "1 day, 7:03:30", "throughput": 27960.55, "total_tokens": 939877184} +{"current_steps": 3360, "total_steps": 14493, "loss": 1.0244, "lr": 4.325965279215243e-05, "epoch": 0.6956341709583085, "percentage": 23.18, "elapsed_time": "9:21:53", "remaining_time": "1 day, 7:01:47", "throughput": 27960.97, "total_tokens": 942675072} +{"current_steps": 3370, "total_steps": 14493, "loss": 1.0128, "lr": 4.3243470674804686e-05, "epoch": 0.6977045107528274, "percentage": 23.25, "elapsed_time": "9:23:33", "remaining_time": "1 day, 7:00:04", "throughput": 27962.25, "total_tokens": 945503936} +{"current_steps": 3380, "total_steps": 14493, "loss": 1.0325, "lr": 4.3227306703591904e-05, "epoch": 0.6997748505473461, "percentage": 23.32, "elapsed_time": "9:25:08", "remaining_time": "1 day, 6:58:07", "throughput": 27966.05, "total_tokens": 948290944} +{"current_steps": 3390, "total_steps": 14493, "loss": 1.0256, "lr": 4.32111608446252e-05, "epoch": 0.7018451903418649, "percentage": 23.39, "elapsed_time": "9:26:44", "remaining_time": "1 day, 6:56:13", "throughput": 27969.08, "total_tokens": 951084928} +{"current_steps": 3400, "total_steps": 14493, "loss": 1.0163, "lr": 4.319503306410426e-05, "epoch": 0.7039155301363836, "percentage": 23.46, "elapsed_time": "9:28:25", "remaining_time": "1 day, 6:54:34", "throughput": 27970.4, "total_tokens": 953946944} +{"current_steps": 3410, "total_steps": 14493, "loss": 1.0016, "lr": 4.317892332831699e-05, "epoch": 0.7059858699309024, "percentage": 23.53, "elapsed_time": "9:30:02", "remaining_time": "1 day, 6:52:42", "throughput": 27973.29, "total_tokens": 956750400} +{"current_steps": 3420, "total_steps": 14493, "loss": 1.0293, "lr": 4.316283160363922e-05, "epoch": 0.7080562097254212, "percentage": 23.6, "elapsed_time": "9:31:37", "remaining_time": "1 day, 6:50:45", "throughput": 27977.76, "total_tokens": 959561536} +{"current_steps": 3430, "total_steps": 14493, "loss": 1.0296, "lr": 4.314675785653447e-05, "epoch": 0.7101265495199399, "percentage": 23.67, "elapsed_time": "9:33:16", "remaining_time": "1 day, 6:49:00", "throughput": 27978.9, "total_tokens": 962366016} +{"current_steps": 3440, "total_steps": 14493, "loss": 1.0194, "lr": 4.3130702053553606e-05, "epoch": 0.7121968893144587, "percentage": 23.74, "elapsed_time": "9:34:51", "remaining_time": "1 day, 6:47:04", "throughput": 27982.36, "total_tokens": 965157568} +{"current_steps": 3450, "total_steps": 14493, "loss": 1.0216, "lr": 4.3114664161334546e-05, "epoch": 0.7142672291089776, "percentage": 23.8, "elapsed_time": "9:36:30", "remaining_time": "1 day, 6:45:19", "throughput": 27983.77, "total_tokens": 967970368} +{"current_steps": 3460, "total_steps": 14493, "loss": 1.0295, "lr": 4.3098644146601984e-05, "epoch": 0.7163375689034963, "percentage": 23.87, "elapsed_time": "9:38:06", "remaining_time": "1 day, 6:43:24", "throughput": 27988.28, "total_tokens": 970805952} +{"current_steps": 3470, "total_steps": 14493, "loss": 1.0401, "lr": 4.30826419761671e-05, "epoch": 0.7184079086980151, "percentage": 23.94, "elapsed_time": "9:39:43", "remaining_time": "1 day, 6:41:35", "throughput": 27991.08, "total_tokens": 973627200} +{"current_steps": 3480, "total_steps": 14493, "loss": 1.0265, "lr": 4.30666576169273e-05, "epoch": 0.7204782484925338, "percentage": 24.01, "elapsed_time": "9:41:22", "remaining_time": "1 day, 6:39:50", "throughput": 27992.82, "total_tokens": 976452544} +{"current_steps": 3490, "total_steps": 14493, "loss": 1.0154, "lr": 4.305069103586585e-05, "epoch": 0.7225485882870526, "percentage": 24.08, "elapsed_time": "9:43:00", "remaining_time": "1 day, 6:38:03", "throughput": 27993.86, "total_tokens": 979234816} +{"current_steps": 3500, "total_steps": 14493, "loss": 1.0254, "lr": 4.303474220005164e-05, "epoch": 0.7246189280815714, "percentage": 24.15, "elapsed_time": "9:44:35", "remaining_time": "1 day, 6:36:07", "throughput": 27997.56, "total_tokens": 982031808} +{"current_steps": 3510, "total_steps": 14493, "loss": 0.9984, "lr": 4.3018811076638944e-05, "epoch": 0.7266892678760901, "percentage": 24.22, "elapsed_time": "9:46:13", "remaining_time": "1 day, 6:34:21", "throughput": 27998.65, "total_tokens": 984819776} +{"current_steps": 3520, "total_steps": 14493, "loss": 1.008, "lr": 4.300289763286704e-05, "epoch": 0.728759607670609, "percentage": 24.29, "elapsed_time": "9:47:49", "remaining_time": "1 day, 6:32:27", "throughput": 28002.68, "total_tokens": 987650752} +{"current_steps": 3530, "total_steps": 14493, "loss": 1.0223, "lr": 4.298700183606e-05, "epoch": 0.7308299474651277, "percentage": 24.36, "elapsed_time": "9:49:26", "remaining_time": "1 day, 6:30:36", "throughput": 28005.7, "total_tokens": 990462976} +{"current_steps": 3540, "total_steps": 14493, "loss": 1.0163, "lr": 4.297112365362637e-05, "epoch": 0.7329002872596465, "percentage": 24.43, "elapsed_time": "9:51:03", "remaining_time": "1 day, 6:28:47", "throughput": 28008.43, "total_tokens": 993288768} +{"current_steps": 3550, "total_steps": 14493, "loss": 1.0218, "lr": 4.295526305305891e-05, "epoch": 0.7349706270541653, "percentage": 24.49, "elapsed_time": "9:52:39", "remaining_time": "1 day, 6:26:52", "throughput": 28011.52, "total_tokens": 996067072} +{"current_steps": 3560, "total_steps": 14493, "loss": 1.0253, "lr": 4.293942000193429e-05, "epoch": 0.737040966848684, "percentage": 24.56, "elapsed_time": "9:54:20", "remaining_time": "1 day, 6:25:14", "throughput": 28012.44, "total_tokens": 998928000} +{"current_steps": 3570, "total_steps": 14493, "loss": 1.0286, "lr": 4.2923594467912866e-05, "epoch": 0.7391113066432028, "percentage": 24.63, "elapsed_time": "9:55:53", "remaining_time": "1 day, 6:23:14", "throughput": 28016.27, "total_tokens": 1001688960} +{"current_steps": 3580, "total_steps": 14493, "loss": 1.0163, "lr": 4.290778641873832e-05, "epoch": 0.7411816464377216, "percentage": 24.7, "elapsed_time": "9:57:28", "remaining_time": "1 day, 6:21:17", "throughput": 28020.37, "total_tokens": 1004489472} +{"current_steps": 3590, "total_steps": 14493, "loss": 1.0264, "lr": 4.2891995822237455e-05, "epoch": 0.7432519862322403, "percentage": 24.77, "elapsed_time": "9:59:03", "remaining_time": "1 day, 6:19:21", "throughput": 28025.04, "total_tokens": 1007311040} +{"current_steps": 3600, "total_steps": 14493, "loss": 1.0093, "lr": 4.28762226463199e-05, "epoch": 0.7453223260267592, "percentage": 24.84, "elapsed_time": "10:00:43", "remaining_time": "1 day, 6:17:40", "throughput": 28026.23, "total_tokens": 1010152832} +{"current_steps": 3610, "total_steps": 14493, "loss": 1.0193, "lr": 4.286046685897781e-05, "epoch": 0.747392665821278, "percentage": 24.91, "elapsed_time": "10:02:21", "remaining_time": "1 day, 6:15:54", "throughput": 28028.27, "total_tokens": 1012981504} +{"current_steps": 3620, "total_steps": 14493, "loss": 1.0197, "lr": 4.284472842828562e-05, "epoch": 0.7494630056157967, "percentage": 24.98, "elapsed_time": "10:04:00", "remaining_time": "1 day, 6:14:11", "throughput": 28029.16, "total_tokens": 1015792768} +{"current_steps": 3630, "total_steps": 14493, "loss": 1.0122, "lr": 4.282900732239977e-05, "epoch": 0.7515333454103155, "percentage": 25.05, "elapsed_time": "10:05:38", "remaining_time": "1 day, 6:12:24", "throughput": 28031.18, "total_tokens": 1018606400} +{"current_steps": 3640, "total_steps": 14493, "loss": 1.0217, "lr": 4.281330350955845e-05, "epoch": 0.7536036852048342, "percentage": 25.12, "elapsed_time": "10:07:18", "remaining_time": "1 day, 6:10:43", "throughput": 28032.05, "total_tokens": 1021432192} +{"current_steps": 3650, "total_steps": 14493, "loss": 1.0299, "lr": 4.279761695808125e-05, "epoch": 0.755674024999353, "percentage": 25.18, "elapsed_time": "10:08:52", "remaining_time": "1 day, 6:08:46", "throughput": 28035.19, "total_tokens": 1024199744} +{"current_steps": 3660, "total_steps": 14493, "loss": 1.0381, "lr": 4.278194763636904e-05, "epoch": 0.7577443647938717, "percentage": 25.25, "elapsed_time": "10:10:35", "remaining_time": "1 day, 6:07:15", "throughput": 28035.52, "total_tokens": 1027097920} +{"current_steps": 3670, "total_steps": 14493, "loss": 1.0387, "lr": 4.276629551290354e-05, "epoch": 0.7598147045883906, "percentage": 25.32, "elapsed_time": "10:12:16", "remaining_time": "1 day, 6:05:37", "throughput": 28036.47, "total_tokens": 1029957312} +{"current_steps": 3680, "total_steps": 14493, "loss": 1.0157, "lr": 4.2750660556247175e-05, "epoch": 0.7618850443829094, "percentage": 25.39, "elapsed_time": "10:13:49", "remaining_time": "1 day, 6:03:37", "throughput": 28040.95, "total_tokens": 1032739200} +{"current_steps": 3690, "total_steps": 14493, "loss": 1.017, "lr": 4.273504273504274e-05, "epoch": 0.7639553841774281, "percentage": 25.46, "elapsed_time": "10:15:25", "remaining_time": "1 day, 6:01:45", "throughput": 28044.86, "total_tokens": 1035576896} +{"current_steps": 3700, "total_steps": 14493, "loss": 1.0144, "lr": 4.271944201801317e-05, "epoch": 0.7660257239719469, "percentage": 25.53, "elapsed_time": "10:17:02", "remaining_time": "1 day, 5:59:56", "throughput": 28047.86, "total_tokens": 1038408768} +{"current_steps": 3710, "total_steps": 14493, "loss": 1.0096, "lr": 4.270385837396127e-05, "epoch": 0.7680960637664657, "percentage": 25.6, "elapsed_time": "10:18:35", "remaining_time": "1 day, 5:57:54", "throughput": 28052.46, "total_tokens": 1041173184} +{"current_steps": 3720, "total_steps": 14493, "loss": 1.0192, "lr": 4.268829177176945e-05, "epoch": 0.7701664035609844, "percentage": 25.67, "elapsed_time": "10:20:11", "remaining_time": "1 day, 5:56:04", "throughput": 28054.87, "total_tokens": 1043971520} +{"current_steps": 3730, "total_steps": 14493, "loss": 1.0029, "lr": 4.2672742180399455e-05, "epoch": 0.7722367433555032, "percentage": 25.74, "elapsed_time": "10:21:48", "remaining_time": "1 day, 5:54:15", "throughput": 28056.4, "total_tokens": 1046751232} +{"current_steps": 3740, "total_steps": 14493, "loss": 1.0189, "lr": 4.265720956889213e-05, "epoch": 0.774307083150022, "percentage": 25.81, "elapsed_time": "10:23:24", "remaining_time": "1 day, 5:52:22", "throughput": 28060.44, "total_tokens": 1049577920} +{"current_steps": 3750, "total_steps": 14493, "loss": 1.0194, "lr": 4.2641693906367113e-05, "epoch": 0.7763774229445408, "percentage": 25.87, "elapsed_time": "10:25:00", "remaining_time": "1 day, 5:50:32", "throughput": 28062.87, "total_tokens": 1052382848} +{"current_steps": 3760, "total_steps": 14493, "loss": 1.0217, "lr": 4.2626195162022646e-05, "epoch": 0.7784477627390596, "percentage": 25.94, "elapsed_time": "10:26:39", "remaining_time": "1 day, 5:48:48", "throughput": 28064.01, "total_tokens": 1055188288} +{"current_steps": 3770, "total_steps": 14493, "loss": 1.0074, "lr": 4.2610713305135255e-05, "epoch": 0.7805181025335783, "percentage": 26.01, "elapsed_time": "10:28:12", "remaining_time": "1 day, 5:46:48", "throughput": 28068.25, "total_tokens": 1057958464} +{"current_steps": 3780, "total_steps": 14493, "loss": 0.998, "lr": 4.2595248305059546e-05, "epoch": 0.7825884423280971, "percentage": 26.08, "elapsed_time": "10:29:50", "remaining_time": "1 day, 5:45:03", "throughput": 28070.09, "total_tokens": 1060789696} +{"current_steps": 3790, "total_steps": 14493, "loss": 1.0157, "lr": 4.2579800131227916e-05, "epoch": 0.7846587821226159, "percentage": 26.15, "elapsed_time": "10:31:28", "remaining_time": "1 day, 5:43:16", "throughput": 28072.06, "total_tokens": 1063598528} +{"current_steps": 3800, "total_steps": 14493, "loss": 1.002, "lr": 4.256436875315028e-05, "epoch": 0.7867291219171346, "percentage": 26.22, "elapsed_time": "10:33:04", "remaining_time": "1 day, 5:41:26", "throughput": 28074.83, "total_tokens": 1066411136} +{"current_steps": 3810, "total_steps": 14493, "loss": 1.0114, "lr": 4.2548954140413895e-05, "epoch": 0.7887994617116534, "percentage": 26.29, "elapsed_time": "10:34:41", "remaining_time": "1 day, 5:39:39", "throughput": 28076.91, "total_tokens": 1069221056} +{"current_steps": 3820, "total_steps": 14493, "loss": 1.0186, "lr": 4.253355626268302e-05, "epoch": 0.7908698015061723, "percentage": 26.36, "elapsed_time": "10:36:23", "remaining_time": "1 day, 5:38:03", "throughput": 28076.69, "total_tokens": 1072058176} +{"current_steps": 3830, "total_steps": 14493, "loss": 1.0163, "lr": 4.2518175089698716e-05, "epoch": 0.792940141300691, "percentage": 26.43, "elapsed_time": "10:38:00", "remaining_time": "1 day, 5:36:16", "throughput": 28078.8, "total_tokens": 1074878272} +{"current_steps": 3840, "total_steps": 14493, "loss": 1.0185, "lr": 4.25028105912786e-05, "epoch": 0.7950104810952098, "percentage": 26.5, "elapsed_time": "10:39:38", "remaining_time": "1 day, 5:34:30", "throughput": 28081.14, "total_tokens": 1077712640} +{"current_steps": 3850, "total_steps": 14493, "loss": 1.0084, "lr": 4.2487462737316565e-05, "epoch": 0.7970808208897285, "percentage": 26.56, "elapsed_time": "10:41:14", "remaining_time": "1 day, 5:32:40", "throughput": 28083.7, "total_tokens": 1080517760} +{"current_steps": 3860, "total_steps": 14493, "loss": 1.0041, "lr": 4.2472131497782555e-05, "epoch": 0.7991511606842473, "percentage": 26.63, "elapsed_time": "10:42:53", "remaining_time": "1 day, 5:30:57", "throughput": 28084.84, "total_tokens": 1083339392} +{"current_steps": 3870, "total_steps": 14493, "loss": 1.0234, "lr": 4.245681684272231e-05, "epoch": 0.8012215004787661, "percentage": 26.7, "elapsed_time": "10:44:34", "remaining_time": "1 day, 5:29:19", "throughput": 28085.12, "total_tokens": 1086169536} +{"current_steps": 3880, "total_steps": 14493, "loss": 1.0008, "lr": 4.244151874225712e-05, "epoch": 0.8032918402732848, "percentage": 26.77, "elapsed_time": "10:46:11", "remaining_time": "1 day, 5:27:32", "throughput": 28087.47, "total_tokens": 1088997312} +{"current_steps": 3890, "total_steps": 14493, "loss": 1.0139, "lr": 4.2426237166583596e-05, "epoch": 0.8053621800678036, "percentage": 26.84, "elapsed_time": "10:47:48", "remaining_time": "1 day, 5:25:43", "throughput": 28089.11, "total_tokens": 1091772224} +{"current_steps": 3900, "total_steps": 14493, "loss": 0.9906, "lr": 4.241097208597339e-05, "epoch": 0.8074325198623225, "percentage": 26.91, "elapsed_time": "10:49:27", "remaining_time": "1 day, 5:24:02", "throughput": 28089.81, "total_tokens": 1094596288} +{"current_steps": 3910, "total_steps": 14493, "loss": 1.0071, "lr": 4.2395723470773005e-05, "epoch": 0.8095028596568412, "percentage": 26.98, "elapsed_time": "10:51:00", "remaining_time": "1 day, 5:22:02", "throughput": 28093.89, "total_tokens": 1097353024} +{"current_steps": 3920, "total_steps": 14493, "loss": 1.0109, "lr": 4.238049129140347e-05, "epoch": 0.81157319945136, "percentage": 27.05, "elapsed_time": "10:52:35", "remaining_time": "1 day, 5:20:10", "throughput": 28096.25, "total_tokens": 1100126336} +{"current_steps": 3930, "total_steps": 14493, "loss": 0.9929, "lr": 4.236527551836022e-05, "epoch": 0.8136435392458787, "percentage": 27.12, "elapsed_time": "10:54:11", "remaining_time": "1 day, 5:18:18", "throughput": 28098.86, "total_tokens": 1102909440} +{"current_steps": 3940, "total_steps": 14493, "loss": 0.9885, "lr": 4.235007612221274e-05, "epoch": 0.8157138790403975, "percentage": 27.19, "elapsed_time": "10:55:48", "remaining_time": "1 day, 5:16:30", "throughput": 28100.67, "total_tokens": 1105705600} +{"current_steps": 3950, "total_steps": 14493, "loss": 1.0132, "lr": 4.2334893073604386e-05, "epoch": 0.8177842188349163, "percentage": 27.25, "elapsed_time": "10:57:23", "remaining_time": "1 day, 5:14:37", "throughput": 28103.81, "total_tokens": 1108499008} +{"current_steps": 3960, "total_steps": 14493, "loss": 0.9957, "lr": 4.231972634325214e-05, "epoch": 0.819854558629435, "percentage": 27.32, "elapsed_time": "10:58:58", "remaining_time": "1 day, 5:12:47", "throughput": 28105.55, "total_tokens": 1111260608} +{"current_steps": 3970, "total_steps": 14493, "loss": 1.0052, "lr": 4.230457590194635e-05, "epoch": 0.8219248984239539, "percentage": 27.39, "elapsed_time": "11:00:35", "remaining_time": "1 day, 5:11:00", "throughput": 28107.84, "total_tokens": 1114080256} +{"current_steps": 3980, "total_steps": 14493, "loss": 1.0199, "lr": 4.228944172055053e-05, "epoch": 0.8239952382184726, "percentage": 27.46, "elapsed_time": "11:02:17", "remaining_time": "1 day, 5:09:24", "throughput": 28107.81, "total_tokens": 1116928576} +{"current_steps": 3990, "total_steps": 14493, "loss": 1.0007, "lr": 4.22743237700011e-05, "epoch": 0.8260655780129914, "percentage": 27.53, "elapsed_time": "11:03:53", "remaining_time": "1 day, 5:07:34", "throughput": 28110.75, "total_tokens": 1119739392} +{"current_steps": 4000, "total_steps": 14493, "loss": 1.0113, "lr": 4.225922202130716e-05, "epoch": 0.8281359178075102, "percentage": 27.6, "elapsed_time": "11:05:28", "remaining_time": "1 day, 5:05:42", "throughput": 28113.21, "total_tokens": 1122514624} +{"current_steps": 4010, "total_steps": 14493, "loss": 1.0121, "lr": 4.224413644555024e-05, "epoch": 0.8302062576020289, "percentage": 27.67, "elapsed_time": "11:07:03", "remaining_time": "1 day, 5:03:49", "throughput": 28115.67, "total_tokens": 1125283328} +{"current_steps": 4020, "total_steps": 14493, "loss": 1.0029, "lr": 4.222906701388411e-05, "epoch": 0.8322765973965477, "percentage": 27.74, "elapsed_time": "11:08:40", "remaining_time": "1 day, 5:02:01", "throughput": 28118.51, "total_tokens": 1128115584} +{"current_steps": 4030, "total_steps": 14493, "loss": 1.0144, "lr": 4.2214013697534466e-05, "epoch": 0.8343469371910665, "percentage": 27.81, "elapsed_time": "11:10:23", "remaining_time": "1 day, 5:00:30", "throughput": 28116.78, "total_tokens": 1130949184} +{"current_steps": 4040, "total_steps": 14493, "loss": 1.0094, "lr": 4.219897646779882e-05, "epoch": 0.8364172769855852, "percentage": 27.88, "elapsed_time": "11:11:56", "remaining_time": "1 day, 4:58:32", "throughput": 28121.16, "total_tokens": 1133733568} +{"current_steps": 4050, "total_steps": 14493, "loss": 0.9993, "lr": 4.2183955296046145e-05, "epoch": 0.8384876167801041, "percentage": 27.94, "elapsed_time": "11:13:29", "remaining_time": "1 day, 4:56:37", "throughput": 28123.98, "total_tokens": 1136485824} +{"current_steps": 4060, "total_steps": 14493, "loss": 1.0158, "lr": 4.2168950153716746e-05, "epoch": 0.8405579565746228, "percentage": 28.01, "elapsed_time": "11:15:06", "remaining_time": "1 day, 4:54:48", "throughput": 28126.31, "total_tokens": 1139284416} +{"current_steps": 4070, "total_steps": 14493, "loss": 0.9966, "lr": 4.215396101232197e-05, "epoch": 0.8426282963691416, "percentage": 28.08, "elapsed_time": "11:16:41", "remaining_time": "1 day, 4:52:57", "throughput": 28129.15, "total_tokens": 1142084544} +{"current_steps": 4080, "total_steps": 14493, "loss": 1.0007, "lr": 4.213898784344398e-05, "epoch": 0.8446986361636604, "percentage": 28.15, "elapsed_time": "11:18:23", "remaining_time": "1 day, 4:51:22", "throughput": 28128.77, "total_tokens": 1144926464} +{"current_steps": 4090, "total_steps": 14493, "loss": 0.9986, "lr": 4.21240306187356e-05, "epoch": 0.8467689759581791, "percentage": 28.22, "elapsed_time": "11:19:56", "remaining_time": "1 day, 4:49:27", "throughput": 28132.14, "total_tokens": 1147703872} +{"current_steps": 4100, "total_steps": 14493, "loss": 1.0164, "lr": 4.2109089309919967e-05, "epoch": 0.8488393157526979, "percentage": 28.29, "elapsed_time": "11:21:32", "remaining_time": "1 day, 4:47:37", "throughput": 28135.02, "total_tokens": 1150516864} +{"current_steps": 4110, "total_steps": 14493, "loss": 1.0071, "lr": 4.2094163888790445e-05, "epoch": 0.8509096555472166, "percentage": 28.36, "elapsed_time": "11:23:09", "remaining_time": "1 day, 4:45:50", "throughput": 28135.93, "total_tokens": 1153272640} +{"current_steps": 4120, "total_steps": 14493, "loss": 1.0095, "lr": 4.2079254327210294e-05, "epoch": 0.8529799953417354, "percentage": 28.43, "elapsed_time": "11:24:47", "remaining_time": "1 day, 4:44:05", "throughput": 28138.36, "total_tokens": 1156123776} +{"current_steps": 4130, "total_steps": 14493, "loss": 0.9939, "lr": 4.206436059711249e-05, "epoch": 0.8550503351362543, "percentage": 28.5, "elapsed_time": "11:26:20", "remaining_time": "1 day, 4:42:10", "throughput": 28141.55, "total_tokens": 1158882944} +{"current_steps": 4140, "total_steps": 14493, "loss": 1.0058, "lr": 4.20494826704995e-05, "epoch": 0.857120674930773, "percentage": 28.57, "elapsed_time": "11:27:55", "remaining_time": "1 day, 4:40:19", "throughput": 28143.41, "total_tokens": 1161640384} +{"current_steps": 4150, "total_steps": 14493, "loss": 0.9929, "lr": 4.203462051944307e-05, "epoch": 0.8591910147252918, "percentage": 28.63, "elapsed_time": "11:29:31", "remaining_time": "1 day, 4:38:29", "throughput": 28145.92, "total_tokens": 1164431104} +{"current_steps": 4160, "total_steps": 14493, "loss": 0.9924, "lr": 4.201977411608398e-05, "epoch": 0.8612613545198106, "percentage": 28.7, "elapsed_time": "11:31:10", "remaining_time": "1 day, 4:36:48", "throughput": 28147.31, "total_tokens": 1167285568} +{"current_steps": 4170, "total_steps": 14493, "loss": 1.0095, "lr": 4.200494343263185e-05, "epoch": 0.8633316943143293, "percentage": 28.77, "elapsed_time": "11:32:47", "remaining_time": "1 day, 4:35:02", "throughput": 28149.63, "total_tokens": 1170117504} +{"current_steps": 4180, "total_steps": 14493, "loss": 1.0123, "lr": 4.1990128441364914e-05, "epoch": 0.8654020341088481, "percentage": 28.84, "elapsed_time": "11:34:20", "remaining_time": "1 day, 4:33:06", "throughput": 28154.04, "total_tokens": 1172918848} +{"current_steps": 4190, "total_steps": 14493, "loss": 1.0086, "lr": 4.197532911462977e-05, "epoch": 0.8674723739033668, "percentage": 28.91, "elapsed_time": "11:35:54", "remaining_time": "1 day, 4:31:12", "throughput": 28156.84, "total_tokens": 1175677248} +{"current_steps": 4200, "total_steps": 14493, "loss": 1.0065, "lr": 4.196054542484125e-05, "epoch": 0.8695427136978857, "percentage": 28.98, "elapsed_time": "11:37:35", "remaining_time": "1 day, 4:29:34", "throughput": 28156.71, "total_tokens": 1178502400} +{"current_steps": 4210, "total_steps": 14493, "loss": 0.9958, "lr": 4.1945777344482084e-05, "epoch": 0.8716130534924045, "percentage": 29.05, "elapsed_time": "11:39:10", "remaining_time": "1 day, 4:27:45", "throughput": 28158.97, "total_tokens": 1181292608} +{"current_steps": 4220, "total_steps": 14493, "loss": 1.0071, "lr": 4.19310248461028e-05, "epoch": 0.8736833932869232, "percentage": 29.12, "elapsed_time": "11:40:46", "remaining_time": "1 day, 4:25:55", "throughput": 28160.91, "total_tokens": 1184063168} +{"current_steps": 4230, "total_steps": 14493, "loss": 0.9936, "lr": 4.1916287902321405e-05, "epoch": 0.875753733081442, "percentage": 29.19, "elapsed_time": "11:42:25", "remaining_time": "1 day, 4:24:14", "throughput": 28161.72, "total_tokens": 1186881536} +{"current_steps": 4240, "total_steps": 14493, "loss": 0.9977, "lr": 4.190156648582328e-05, "epoch": 0.8778240728759608, "percentage": 29.26, "elapsed_time": "11:44:00", "remaining_time": "1 day, 4:22:25", "throughput": 28164.15, "total_tokens": 1189676800} +{"current_steps": 4250, "total_steps": 14493, "loss": 1.0107, "lr": 4.188686056936087e-05, "epoch": 0.8798944126704795, "percentage": 29.32, "elapsed_time": "11:45:36", "remaining_time": "1 day, 4:20:35", "throughput": 28166.71, "total_tokens": 1192469632} +{"current_steps": 4260, "total_steps": 14493, "loss": 0.9919, "lr": 4.187217012575352e-05, "epoch": 0.8819647524649983, "percentage": 29.39, "elapsed_time": "11:47:11", "remaining_time": "1 day, 4:18:46", "throughput": 28169.79, "total_tokens": 1195298752} +{"current_steps": 4270, "total_steps": 14493, "loss": 0.9824, "lr": 4.185749512788727e-05, "epoch": 0.884035092259517, "percentage": 29.46, "elapsed_time": "11:48:49", "remaining_time": "1 day, 4:17:02", "throughput": 28171.91, "total_tokens": 1198147712} +{"current_steps": 4280, "total_steps": 14493, "loss": 0.9983, "lr": 4.184283554871462e-05, "epoch": 0.8861054320540359, "percentage": 29.53, "elapsed_time": "11:50:24", "remaining_time": "1 day, 4:15:11", "throughput": 28173.39, "total_tokens": 1200884352} +{"current_steps": 4290, "total_steps": 14493, "loss": 1.0103, "lr": 4.1828191361254344e-05, "epoch": 0.8881757718485547, "percentage": 29.6, "elapsed_time": "11:52:02", "remaining_time": "1 day, 4:13:27", "throughput": 28175.34, "total_tokens": 1203714048} +{"current_steps": 4300, "total_steps": 14493, "loss": 1.0107, "lr": 4.181356253859127e-05, "epoch": 0.8902461116430734, "percentage": 29.67, "elapsed_time": "11:53:36", "remaining_time": "1 day, 4:11:35", "throughput": 28177.78, "total_tokens": 1206481792} +{"current_steps": 4310, "total_steps": 14493, "loss": 1.0178, "lr": 4.179894905387606e-05, "epoch": 0.8923164514375922, "percentage": 29.74, "elapsed_time": "11:55:13", "remaining_time": "1 day, 4:09:49", "throughput": 28179.92, "total_tokens": 1209298880} +{"current_steps": 4320, "total_steps": 14493, "loss": 1.0012, "lr": 4.178435088032502e-05, "epoch": 0.894386791232111, "percentage": 29.81, "elapsed_time": "11:56:49", "remaining_time": "1 day, 4:08:01", "throughput": 28182.03, "total_tokens": 1212093824} +{"current_steps": 4330, "total_steps": 14493, "loss": 0.9991, "lr": 4.176976799121989e-05, "epoch": 0.8964571310266297, "percentage": 29.88, "elapsed_time": "11:58:25", "remaining_time": "1 day, 4:06:14", "throughput": 28184.17, "total_tokens": 1214901568} +{"current_steps": 4340, "total_steps": 14493, "loss": 1.0036, "lr": 4.1755200359907657e-05, "epoch": 0.8985274708211485, "percentage": 29.95, "elapsed_time": "12:00:00", "remaining_time": "1 day, 4:04:24", "throughput": 28186.55, "total_tokens": 1217683776} +{"current_steps": 4350, "total_steps": 14493, "loss": 1.0162, "lr": 4.174064795980028e-05, "epoch": 0.9005978106156673, "percentage": 30.01, "elapsed_time": "12:01:35", "remaining_time": "1 day, 4:02:34", "throughput": 28188.25, "total_tokens": 1220434368} +{"current_steps": 4360, "total_steps": 14493, "loss": 1.0092, "lr": 4.17261107643746e-05, "epoch": 0.9026681504101861, "percentage": 30.08, "elapsed_time": "12:03:12", "remaining_time": "1 day, 4:00:46", "throughput": 28189.4, "total_tokens": 1223198720} +{"current_steps": 4370, "total_steps": 14493, "loss": 1.0021, "lr": 4.171158874717204e-05, "epoch": 0.9047384902047049, "percentage": 30.15, "elapsed_time": "12:04:50", "remaining_time": "1 day, 3:59:05", "throughput": 28190.7, "total_tokens": 1226032768} +{"current_steps": 4380, "total_steps": 14493, "loss": 0.9994, "lr": 4.169708188179844e-05, "epoch": 0.9068088299992236, "percentage": 30.22, "elapsed_time": "12:06:26", "remaining_time": "1 day, 3:57:16", "throughput": 28193.08, "total_tokens": 1228827136} +{"current_steps": 4390, "total_steps": 14493, "loss": 0.9922, "lr": 4.1682590141923846e-05, "epoch": 0.9088791697937424, "percentage": 30.29, "elapsed_time": "12:08:06", "remaining_time": "1 day, 3:55:39", "throughput": 28192.46, "total_tokens": 1231635264} +{"current_steps": 4400, "total_steps": 14493, "loss": 1.0045, "lr": 4.1668113501282335e-05, "epoch": 0.9109495095882612, "percentage": 30.36, "elapsed_time": "12:09:39", "remaining_time": "1 day, 3:53:44", "throughput": 28195.27, "total_tokens": 1234383808} +{"current_steps": 4410, "total_steps": 14493, "loss": 0.9971, "lr": 4.165365193367178e-05, "epoch": 0.9130198493827799, "percentage": 30.43, "elapsed_time": "12:11:15", "remaining_time": "1 day, 3:51:55", "throughput": 28198.88, "total_tokens": 1237230400} +{"current_steps": 4420, "total_steps": 14493, "loss": 0.9887, "lr": 4.163920541295369e-05, "epoch": 0.9150901891772987, "percentage": 30.5, "elapsed_time": "12:12:48", "remaining_time": "1 day, 3:50:01", "throughput": 28201.78, "total_tokens": 1239981696} +{"current_steps": 4430, "total_steps": 14493, "loss": 0.9882, "lr": 4.1624773913052946e-05, "epoch": 0.9171605289718175, "percentage": 30.57, "elapsed_time": "12:14:24", "remaining_time": "1 day, 3:48:15", "throughput": 28203.69, "total_tokens": 1242783296} +{"current_steps": 4440, "total_steps": 14493, "loss": 0.9984, "lr": 4.161035740795769e-05, "epoch": 0.9192308687663363, "percentage": 30.64, "elapsed_time": "12:16:02", "remaining_time": "1 day, 3:46:31", "throughput": 28204.87, "total_tokens": 1245584832} +{"current_steps": 4450, "total_steps": 14493, "loss": 0.983, "lr": 4.1595955871719055e-05, "epoch": 0.9213012085608551, "percentage": 30.7, "elapsed_time": "12:17:36", "remaining_time": "1 day, 3:44:40", "throughput": 28207.57, "total_tokens": 1248367808} +{"current_steps": 4460, "total_steps": 14493, "loss": 1.004, "lr": 4.158156927845101e-05, "epoch": 0.9233715483553738, "percentage": 30.77, "elapsed_time": "12:19:14", "remaining_time": "1 day, 3:42:57", "throughput": 28208.95, "total_tokens": 1251189632} +{"current_steps": 4470, "total_steps": 14493, "loss": 0.9904, "lr": 4.156719760233016e-05, "epoch": 0.9254418881498926, "percentage": 30.84, "elapsed_time": "12:20:51", "remaining_time": "1 day, 3:41:12", "throughput": 28211.4, "total_tokens": 1254040576} +{"current_steps": 4480, "total_steps": 14493, "loss": 1.0002, "lr": 4.155284081759552e-05, "epoch": 0.9275122279444113, "percentage": 30.91, "elapsed_time": "12:22:25", "remaining_time": "1 day, 3:39:22", "throughput": 28214.01, "total_tokens": 1256818496} +{"current_steps": 4490, "total_steps": 14493, "loss": 1.0131, "lr": 4.1538498898548356e-05, "epoch": 0.9295825677389301, "percentage": 30.98, "elapsed_time": "12:23:58", "remaining_time": "1 day, 3:37:27", "throughput": 28217.93, "total_tokens": 1259611456} +{"current_steps": 4500, "total_steps": 14493, "loss": 0.9998, "lr": 4.1524171819552e-05, "epoch": 0.931652907533449, "percentage": 31.05, "elapsed_time": "12:25:34", "remaining_time": "1 day, 3:35:40", "throughput": 28220.16, "total_tokens": 1262415104} +{"current_steps": 4510, "total_steps": 14493, "loss": 0.9901, "lr": 4.15098595550316e-05, "epoch": 0.9337232473279677, "percentage": 31.12, "elapsed_time": "12:27:11", "remaining_time": "1 day, 3:33:56", "throughput": 28221.92, "total_tokens": 1265236416} +{"current_steps": 4520, "total_steps": 14493, "loss": 0.9962, "lr": 4.1495562079474e-05, "epoch": 0.9357935871224865, "percentage": 31.19, "elapsed_time": "12:28:45", "remaining_time": "1 day, 3:32:04", "throughput": 28225.37, "total_tokens": 1268035072} +{"current_steps": 4530, "total_steps": 14493, "loss": 1.017, "lr": 4.148127936742749e-05, "epoch": 0.9378639269170053, "percentage": 31.26, "elapsed_time": "12:30:23", "remaining_time": "1 day, 3:30:21", "throughput": 28227.02, "total_tokens": 1270877696} +{"current_steps": 4540, "total_steps": 14493, "loss": 0.9974, "lr": 4.146701139350166e-05, "epoch": 0.939934266711524, "percentage": 31.33, "elapsed_time": "12:31:57", "remaining_time": "1 day, 3:28:30", "throughput": 28229.56, "total_tokens": 1273650432} +{"current_steps": 4550, "total_steps": 14493, "loss": 1.0, "lr": 4.1452758132367196e-05, "epoch": 0.9420046065060428, "percentage": 31.39, "elapsed_time": "12:33:37", "remaining_time": "1 day, 3:26:52", "throughput": 28230.49, "total_tokens": 1276510144} +{"current_steps": 4560, "total_steps": 14493, "loss": 1.0082, "lr": 4.1438519558755656e-05, "epoch": 0.9440749463005615, "percentage": 31.46, "elapsed_time": "12:35:16", "remaining_time": "1 day, 3:25:12", "throughput": 28231.47, "total_tokens": 1279357824} +{"current_steps": 4570, "total_steps": 14493, "loss": 0.9831, "lr": 4.1424295647459336e-05, "epoch": 0.9461452860950803, "percentage": 31.53, "elapsed_time": "12:36:50", "remaining_time": "1 day, 3:23:21", "throughput": 28233.37, "total_tokens": 1282093120} +{"current_steps": 4580, "total_steps": 14493, "loss": 0.9987, "lr": 4.141008637333106e-05, "epoch": 0.9482156258895992, "percentage": 31.6, "elapsed_time": "12:38:24", "remaining_time": "1 day, 3:21:29", "throughput": 28236.72, "total_tokens": 1284891968} +{"current_steps": 4590, "total_steps": 14493, "loss": 0.9955, "lr": 4.1395891711283974e-05, "epoch": 0.9502859656841179, "percentage": 31.67, "elapsed_time": "12:40:01", "remaining_time": "1 day, 3:19:46", "throughput": 28238.4, "total_tokens": 1287718144} +{"current_steps": 4600, "total_steps": 14493, "loss": 0.9904, "lr": 4.1381711636291395e-05, "epoch": 0.9523563054786367, "percentage": 31.74, "elapsed_time": "12:41:41", "remaining_time": "1 day, 3:18:07", "throughput": 28238.95, "total_tokens": 1290556992} +{"current_steps": 4610, "total_steps": 14493, "loss": 1.0015, "lr": 4.1367546123386604e-05, "epoch": 0.9544266452731555, "percentage": 31.81, "elapsed_time": "12:43:19", "remaining_time": "1 day, 3:16:24", "throughput": 28240.8, "total_tokens": 1293401984} +{"current_steps": 4620, "total_steps": 14493, "loss": 1.0094, "lr": 4.1353395147662673e-05, "epoch": 0.9564969850676742, "percentage": 31.88, "elapsed_time": "12:44:58", "remaining_time": "1 day, 3:14:44", "throughput": 28241.3, "total_tokens": 1296221248} +{"current_steps": 4630, "total_steps": 14493, "loss": 1.006, "lr": 4.133925868427225e-05, "epoch": 0.958567324862193, "percentage": 31.95, "elapsed_time": "12:46:35", "remaining_time": "1 day, 3:13:02", "throughput": 28242.09, "total_tokens": 1299018176} +{"current_steps": 4640, "total_steps": 14493, "loss": 0.9877, "lr": 4.132513670842744e-05, "epoch": 0.9606376646567117, "percentage": 32.02, "elapsed_time": "12:48:11", "remaining_time": "1 day, 3:11:15", "throughput": 28243.88, "total_tokens": 1301816832} +{"current_steps": 4650, "total_steps": 14493, "loss": 0.9906, "lr": 4.1311029195399534e-05, "epoch": 0.9627080044512305, "percentage": 32.08, "elapsed_time": "12:49:45", "remaining_time": "1 day, 3:09:24", "throughput": 28246.76, "total_tokens": 1304598528} +{"current_steps": 4660, "total_steps": 14493, "loss": 0.9951, "lr": 4.129693612051892e-05, "epoch": 0.9647783442457494, "percentage": 32.15, "elapsed_time": "12:51:21", "remaining_time": "1 day, 3:07:37", "throughput": 28248.46, "total_tokens": 1307373696} +{"current_steps": 4670, "total_steps": 14493, "loss": 0.997, "lr": 4.1282857459174826e-05, "epoch": 0.9668486840402681, "percentage": 32.22, "elapsed_time": "12:52:56", "remaining_time": "1 day, 3:05:48", "throughput": 28250.63, "total_tokens": 1310155712} +{"current_steps": 4680, "total_steps": 14493, "loss": 0.9864, "lr": 4.1268793186815184e-05, "epoch": 0.9689190238347869, "percentage": 32.29, "elapsed_time": "12:54:31", "remaining_time": "1 day, 3:04:00", "throughput": 28252.38, "total_tokens": 1312919488} +{"current_steps": 4690, "total_steps": 14493, "loss": 0.9933, "lr": 4.1254743278946456e-05, "epoch": 0.9709893636293057, "percentage": 32.36, "elapsed_time": "12:56:06", "remaining_time": "1 day, 3:02:13", "throughput": 28255.33, "total_tokens": 1315760384} +{"current_steps": 4700, "total_steps": 14493, "loss": 0.9976, "lr": 4.1240707711133394e-05, "epoch": 0.9730597034238244, "percentage": 32.43, "elapsed_time": "12:57:47", "remaining_time": "1 day, 3:00:36", "throughput": 28255.59, "total_tokens": 1318614016} +{"current_steps": 4710, "total_steps": 14493, "loss": 0.9986, "lr": 4.122668645899893e-05, "epoch": 0.9751300432183432, "percentage": 32.5, "elapsed_time": "12:59:23", "remaining_time": "1 day, 2:58:50", "throughput": 28257.65, "total_tokens": 1321414848} +{"current_steps": 4720, "total_steps": 14493, "loss": 0.9977, "lr": 4.1212679498223975e-05, "epoch": 0.9772003830128619, "percentage": 32.57, "elapsed_time": "13:01:01", "remaining_time": "1 day, 2:57:09", "throughput": 28258.49, "total_tokens": 1324240384} +{"current_steps": 4730, "total_steps": 14493, "loss": 0.9911, "lr": 4.1198686804547215e-05, "epoch": 0.9792707228073808, "percentage": 32.64, "elapsed_time": "13:02:36", "remaining_time": "1 day, 2:55:21", "throughput": 28261.67, "total_tokens": 1327073216} +{"current_steps": 4740, "total_steps": 14493, "loss": 1.003, "lr": 4.118470835376499e-05, "epoch": 0.9813410626018996, "percentage": 32.71, "elapsed_time": "13:04:11", "remaining_time": "1 day, 2:53:33", "throughput": 28264.43, "total_tokens": 1329889344} +{"current_steps": 4750, "total_steps": 14493, "loss": 0.9892, "lr": 4.117074412173107e-05, "epoch": 0.9834114023964183, "percentage": 32.77, "elapsed_time": "13:05:46", "remaining_time": "1 day, 2:51:45", "throughput": 28267.05, "total_tokens": 1332705600} +{"current_steps": 4760, "total_steps": 14493, "loss": 0.9917, "lr": 4.115679408435648e-05, "epoch": 0.9854817421909371, "percentage": 32.84, "elapsed_time": "13:07:24", "remaining_time": "1 day, 2:50:02", "throughput": 28269.07, "total_tokens": 1335547456} +{"current_steps": 4770, "total_steps": 14493, "loss": 0.9867, "lr": 4.114285821760937e-05, "epoch": 0.9875520819854559, "percentage": 32.91, "elapsed_time": "13:08:58", "remaining_time": "1 day, 2:48:13", "throughput": 28270.6, "total_tokens": 1338298432} +{"current_steps": 4780, "total_steps": 14493, "loss": 0.9993, "lr": 4.11289364975148e-05, "epoch": 0.9896224217799746, "percentage": 32.98, "elapsed_time": "13:10:33", "remaining_time": "1 day, 2:46:25", "throughput": 28273.08, "total_tokens": 1341098752} +{"current_steps": 4790, "total_steps": 14493, "loss": 0.9862, "lr": 4.111502890015456e-05, "epoch": 0.9916927615744934, "percentage": 33.05, "elapsed_time": "13:12:08", "remaining_time": "1 day, 2:44:37", "throughput": 28275.21, "total_tokens": 1343879168} +{"current_steps": 4800, "total_steps": 14493, "loss": 0.9803, "lr": 4.1101135401667056e-05, "epoch": 0.9937631013690121, "percentage": 33.12, "elapsed_time": "13:13:43", "remaining_time": "1 day, 2:42:48", "throughput": 28276.58, "total_tokens": 1346617152} +{"current_steps": 4810, "total_steps": 14493, "loss": 0.9827, "lr": 4.108725597824708e-05, "epoch": 0.995833441163531, "percentage": 33.19, "elapsed_time": "13:15:19", "remaining_time": "1 day, 2:41:04", "throughput": 28278.56, "total_tokens": 1349446080} +{"current_steps": 4820, "total_steps": 14493, "loss": 1.0102, "lr": 4.107339060614564e-05, "epoch": 0.9979037809580498, "percentage": 33.26, "elapsed_time": "13:16:56", "remaining_time": "1 day, 2:39:21", "throughput": 28279.45, "total_tokens": 1352233600} +{"current_steps": 4830, "total_steps": 14493, "loss": 0.9917, "lr": 4.1059539261669825e-05, "epoch": 0.9999741207525685, "percentage": 33.33, "elapsed_time": "13:18:26", "remaining_time": "1 day, 2:37:22", "throughput": 28283.69, "total_tokens": 1354962816} +{"current_steps": 4840, "total_steps": 14493, "loss": 0.9117, "lr": 4.104570192118262e-05, "epoch": 1.001863305815067, "percentage": 33.4, "elapsed_time": "13:19:50", "remaining_time": "1 day, 2:35:12", "throughput": 28286.44, "total_tokens": 1357469504} +{"current_steps": 4850, "total_steps": 14493, "loss": 0.9013, "lr": 4.1031878561102714e-05, "epoch": 1.0039336456095858, "percentage": 33.46, "elapsed_time": "13:21:25", "remaining_time": "1 day, 2:33:26", "throughput": 28288.85, "total_tokens": 1360289344} +{"current_steps": 4860, "total_steps": 14493, "loss": 0.8892, "lr": 4.1018069157904385e-05, "epoch": 1.0060039854041045, "percentage": 33.53, "elapsed_time": "13:23:03", "remaining_time": "1 day, 2:31:44", "throughput": 28290.02, "total_tokens": 1363114368} +{"current_steps": 4870, "total_steps": 14493, "loss": 0.8932, "lr": 4.100427368811727e-05, "epoch": 1.0080743251986233, "percentage": 33.6, "elapsed_time": "13:24:38", "remaining_time": "1 day, 2:29:57", "throughput": 28292.7, "total_tokens": 1365939264} +{"current_steps": 4880, "total_steps": 14493, "loss": 0.8966, "lr": 4.099049212832622e-05, "epoch": 1.010144664993142, "percentage": 33.67, "elapsed_time": "13:26:15", "remaining_time": "1 day, 2:28:13", "throughput": 28294.75, "total_tokens": 1368767360} +{"current_steps": 4890, "total_steps": 14493, "loss": 0.898, "lr": 4.0976724455171155e-05, "epoch": 1.0122150047876608, "percentage": 33.74, "elapsed_time": "13:27:52", "remaining_time": "1 day, 2:26:30", "throughput": 28296.97, "total_tokens": 1371619008} +{"current_steps": 4900, "total_steps": 14493, "loss": 0.8919, "lr": 4.096297064534688e-05, "epoch": 1.0142853445821796, "percentage": 33.81, "elapsed_time": "13:29:28", "remaining_time": "1 day, 2:24:44", "throughput": 28298.53, "total_tokens": 1374410176} +{"current_steps": 4910, "total_steps": 14493, "loss": 0.9019, "lr": 4.0949230675602904e-05, "epoch": 1.0163556843766983, "percentage": 33.88, "elapsed_time": "13:31:04", "remaining_time": "1 day, 2:22:59", "throughput": 28301.19, "total_tokens": 1377254848} +{"current_steps": 4920, "total_steps": 14493, "loss": 0.8878, "lr": 4.09355045227433e-05, "epoch": 1.018426024171217, "percentage": 33.95, "elapsed_time": "13:32:38", "remaining_time": "1 day, 2:21:10", "throughput": 28304.12, "total_tokens": 1380064000} +{"current_steps": 4930, "total_steps": 14493, "loss": 0.8895, "lr": 4.092179216362654e-05, "epoch": 1.0204963639657358, "percentage": 34.02, "elapsed_time": "13:34:17", "remaining_time": "1 day, 2:19:30", "throughput": 28304.79, "total_tokens": 1382891520} +{"current_steps": 4940, "total_steps": 14493, "loss": 0.8892, "lr": 4.090809357516532e-05, "epoch": 1.0225667037602546, "percentage": 34.09, "elapsed_time": "13:35:52", "remaining_time": "1 day, 2:17:43", "throughput": 28307.0, "total_tokens": 1385686272} +{"current_steps": 4950, "total_steps": 14493, "loss": 0.8979, "lr": 4.089440873432638e-05, "epoch": 1.0246370435547734, "percentage": 34.15, "elapsed_time": "13:37:28", "remaining_time": "1 day, 2:15:59", "throughput": 28308.75, "total_tokens": 1388507392} +{"current_steps": 4960, "total_steps": 14493, "loss": 0.877, "lr": 4.088073761813037e-05, "epoch": 1.0267073833492921, "percentage": 34.22, "elapsed_time": "13:39:04", "remaining_time": "1 day, 2:14:14", "throughput": 28310.45, "total_tokens": 1391302720} +{"current_steps": 4970, "total_steps": 14493, "loss": 0.8958, "lr": 4.086708020365172e-05, "epoch": 1.0287777231438109, "percentage": 34.29, "elapsed_time": "13:40:40", "remaining_time": "1 day, 2:12:28", "throughput": 28312.38, "total_tokens": 1394106816} +{"current_steps": 4980, "total_steps": 14493, "loss": 0.9016, "lr": 4.0853436468018354e-05, "epoch": 1.0308480629383296, "percentage": 34.36, "elapsed_time": "13:42:17", "remaining_time": "1 day, 2:10:46", "throughput": 28313.23, "total_tokens": 1396905344} +{"current_steps": 4990, "total_steps": 14493, "loss": 0.8921, "lr": 4.0839806388411686e-05, "epoch": 1.0329184027328486, "percentage": 34.43, "elapsed_time": "13:43:55", "remaining_time": "1 day, 2:09:04", "throughput": 28314.13, "total_tokens": 1399711488} +{"current_steps": 5000, "total_steps": 14493, "loss": 0.8873, "lr": 4.0826189942066346e-05, "epoch": 1.0349887425273674, "percentage": 34.5, "elapsed_time": "13:45:36", "remaining_time": "1 day, 2:07:29", "throughput": 28313.32, "total_tokens": 1402533632} +{"current_steps": 5000, "total_steps": 14493, "eval_loss": 1.0262424945831299, "epoch": 1.0349887425273674, "percentage": 34.5, "elapsed_time": "13:45:38", "remaining_time": "1 day, 2:07:34", "throughput": 28311.86, "total_tokens": 1402533632} +{"current_steps": 5010, "total_steps": 14493, "loss": 0.8923, "lr": 4.081258710627008e-05, "epoch": 1.0370590823218861, "percentage": 34.57, "elapsed_time": "13:47:44", "remaining_time": "1 day, 2:06:46", "throughput": 28296.59, "total_tokens": 1405346624} +{"current_steps": 5020, "total_steps": 14493, "loss": 0.8866, "lr": 4.0798997858363557e-05, "epoch": 1.039129422116405, "percentage": 34.64, "elapsed_time": "13:49:21", "remaining_time": "1 day, 2:05:02", "throughput": 28297.42, "total_tokens": 1408123008} +{"current_steps": 5030, "total_steps": 14493, "loss": 0.8772, "lr": 4.078542217574024e-05, "epoch": 1.0411997619109237, "percentage": 34.71, "elapsed_time": "13:51:02", "remaining_time": "1 day, 2:03:27", "throughput": 28297.24, "total_tokens": 1410982144} +{"current_steps": 5040, "total_steps": 14493, "loss": 0.9006, "lr": 4.0771860035846196e-05, "epoch": 1.0432701017054424, "percentage": 34.78, "elapsed_time": "13:52:34", "remaining_time": "1 day, 2:01:34", "throughput": 28300.5, "total_tokens": 1413734720} +{"current_steps": 5050, "total_steps": 14493, "loss": 0.8873, "lr": 4.0758311416179965e-05, "epoch": 1.0453404414999612, "percentage": 34.84, "elapsed_time": "13:54:12", "remaining_time": "1 day, 1:59:52", "throughput": 28301.44, "total_tokens": 1416543744} +{"current_steps": 5060, "total_steps": 14493, "loss": 0.879, "lr": 4.0744776294292386e-05, "epoch": 1.04741078129448, "percentage": 34.91, "elapsed_time": "13:55:51", "remaining_time": "1 day, 1:58:13", "throughput": 28301.73, "total_tokens": 1419367360} +{"current_steps": 5070, "total_steps": 14493, "loss": 0.8858, "lr": 4.073125464778646e-05, "epoch": 1.0494811210889987, "percentage": 34.98, "elapsed_time": "13:57:25", "remaining_time": "1 day, 1:56:24", "throughput": 28304.26, "total_tokens": 1422153216} +{"current_steps": 5080, "total_steps": 14493, "loss": 0.891, "lr": 4.071774645431717e-05, "epoch": 1.0515514608835175, "percentage": 35.05, "elapsed_time": "13:58:59", "remaining_time": "1 day, 1:54:37", "throughput": 28305.84, "total_tokens": 1424913664} +{"current_steps": 5090, "total_steps": 14493, "loss": 0.8755, "lr": 4.070425169159135e-05, "epoch": 1.0536218006780362, "percentage": 35.12, "elapsed_time": "14:00:38", "remaining_time": "1 day, 1:52:56", "throughput": 28306.6, "total_tokens": 1427732096} +{"current_steps": 5100, "total_steps": 14493, "loss": 0.8704, "lr": 4.069077033736751e-05, "epoch": 1.055692140472555, "percentage": 35.19, "elapsed_time": "14:02:17", "remaining_time": "1 day, 1:51:18", "throughput": 28306.81, "total_tokens": 1430561600} +{"current_steps": 5110, "total_steps": 14493, "loss": 0.8858, "lr": 4.06773023694557e-05, "epoch": 1.0577624802670738, "percentage": 35.26, "elapsed_time": "14:03:50", "remaining_time": "1 day, 1:49:27", "throughput": 28309.92, "total_tokens": 1433331520} +{"current_steps": 5120, "total_steps": 14493, "loss": 0.8868, "lr": 4.066384776571732e-05, "epoch": 1.0598328200615925, "percentage": 35.33, "elapsed_time": "14:05:23", "remaining_time": "1 day, 1:47:38", "throughput": 28312.25, "total_tokens": 1436105536} +{"current_steps": 5130, "total_steps": 14493, "loss": 0.8926, "lr": 4.065040650406504e-05, "epoch": 1.0619031598561113, "percentage": 35.4, "elapsed_time": "14:06:56", "remaining_time": "1 day, 1:45:46", "throughput": 28315.14, "total_tokens": 1438865920} +{"current_steps": 5140, "total_steps": 14493, "loss": 0.8899, "lr": 4.0636978562462576e-05, "epoch": 1.0639734996506303, "percentage": 35.47, "elapsed_time": "14:08:30", "remaining_time": "1 day, 1:43:58", "throughput": 28318.67, "total_tokens": 1441710272} +{"current_steps": 5150, "total_steps": 14493, "loss": 0.8902, "lr": 4.062356391892456e-05, "epoch": 1.066043839445149, "percentage": 35.53, "elapsed_time": "14:10:05", "remaining_time": "1 day, 1:42:12", "throughput": 28320.08, "total_tokens": 1444471616} +{"current_steps": 5160, "total_steps": 14493, "loss": 0.8866, "lr": 4.0610162551516395e-05, "epoch": 1.0681141792396678, "percentage": 35.6, "elapsed_time": "14:11:37", "remaining_time": "1 day, 1:40:21", "throughput": 28323.0, "total_tokens": 1447248192} +{"current_steps": 5170, "total_steps": 14493, "loss": 0.8873, "lr": 4.059677443835412e-05, "epoch": 1.0701845190341865, "percentage": 35.67, "elapsed_time": "14:13:14", "remaining_time": "1 day, 1:38:38", "throughput": 28324.57, "total_tokens": 1450068736} +{"current_steps": 5180, "total_steps": 14493, "loss": 0.9028, "lr": 4.058339955760423e-05, "epoch": 1.0722548588287053, "percentage": 35.74, "elapsed_time": "14:14:55", "remaining_time": "1 day, 1:37:02", "throughput": 28324.66, "total_tokens": 1452914752} +{"current_steps": 5190, "total_steps": 14493, "loss": 0.8949, "lr": 4.0570037887483535e-05, "epoch": 1.074325198623224, "percentage": 35.81, "elapsed_time": "14:16:35", "remaining_time": "1 day, 1:35:25", "throughput": 28324.67, "total_tokens": 1455764032} +{"current_steps": 5200, "total_steps": 14493, "loss": 0.8981, "lr": 4.0556689406259025e-05, "epoch": 1.0763955384177428, "percentage": 35.88, "elapsed_time": "14:18:09", "remaining_time": "1 day, 1:33:37", "throughput": 28327.11, "total_tokens": 1458553600} +{"current_steps": 5210, "total_steps": 14493, "loss": 0.8951, "lr": 4.054335409224771e-05, "epoch": 1.0784658782122616, "percentage": 35.95, "elapsed_time": "14:19:45", "remaining_time": "1 day, 1:31:53", "throughput": 28329.0, "total_tokens": 1461370752} +{"current_steps": 5220, "total_steps": 14493, "loss": 0.8995, "lr": 4.053003192381646e-05, "epoch": 1.0805362180067803, "percentage": 36.02, "elapsed_time": "14:21:26", "remaining_time": "1 day, 1:30:17", "throughput": 28329.39, "total_tokens": 1464235712} +{"current_steps": 5230, "total_steps": 14493, "loss": 0.888, "lr": 4.051672287938189e-05, "epoch": 1.082606557801299, "percentage": 36.09, "elapsed_time": "14:23:07", "remaining_time": "1 day, 1:28:41", "throughput": 28329.12, "total_tokens": 1467087040} +{"current_steps": 5240, "total_steps": 14493, "loss": 0.8886, "lr": 4.050342693741019e-05, "epoch": 1.0846768975958179, "percentage": 36.16, "elapsed_time": "14:24:46", "remaining_time": "1 day, 1:27:03", "throughput": 28329.49, "total_tokens": 1469926336} +{"current_steps": 5250, "total_steps": 14493, "loss": 0.8962, "lr": 4.049014407641699e-05, "epoch": 1.0867472373903366, "percentage": 36.22, "elapsed_time": "14:26:25", "remaining_time": "1 day, 1:25:23", "throughput": 28330.08, "total_tokens": 1472746944} +{"current_steps": 5260, "total_steps": 14493, "loss": 0.8862, "lr": 4.047687427496717e-05, "epoch": 1.0888175771848554, "percentage": 36.29, "elapsed_time": "14:28:05", "remaining_time": "1 day, 1:23:46", "throughput": 28330.52, "total_tokens": 1475609792} +{"current_steps": 5270, "total_steps": 14493, "loss": 0.8864, "lr": 4.046361751167479e-05, "epoch": 1.0908879169793742, "percentage": 36.36, "elapsed_time": "14:29:45", "remaining_time": "1 day, 1:22:09", "throughput": 28330.76, "total_tokens": 1478453120} +{"current_steps": 5280, "total_steps": 14493, "loss": 0.8884, "lr": 4.045037376520292e-05, "epoch": 1.0929582567738931, "percentage": 36.43, "elapsed_time": "14:31:16", "remaining_time": "1 day, 1:20:16", "throughput": 28333.55, "total_tokens": 1481180096} +{"current_steps": 5290, "total_steps": 14493, "loss": 0.8786, "lr": 4.043714301426344e-05, "epoch": 1.095028596568412, "percentage": 36.5, "elapsed_time": "14:32:52", "remaining_time": "1 day, 1:18:32", "throughput": 28335.38, "total_tokens": 1483997504} +{"current_steps": 5300, "total_steps": 14493, "loss": 0.8869, "lr": 4.042392523761696e-05, "epoch": 1.0970989363629307, "percentage": 36.57, "elapsed_time": "14:34:33", "remaining_time": "1 day, 1:16:56", "throughput": 28335.51, "total_tokens": 1486856640} +{"current_steps": 5310, "total_steps": 14493, "loss": 0.8874, "lr": 4.041072041407267e-05, "epoch": 1.0991692761574494, "percentage": 36.64, "elapsed_time": "14:36:08", "remaining_time": "1 day, 1:15:11", "throughput": 28337.46, "total_tokens": 1489669440} +{"current_steps": 5320, "total_steps": 14493, "loss": 0.8965, "lr": 4.039752852248815e-05, "epoch": 1.1012396159519682, "percentage": 36.71, "elapsed_time": "14:37:44", "remaining_time": "1 day, 1:13:27", "throughput": 28339.49, "total_tokens": 1492498112} +{"current_steps": 5330, "total_steps": 14493, "loss": 0.9143, "lr": 4.0384349541769286e-05, "epoch": 1.103309955746487, "percentage": 36.78, "elapsed_time": "14:39:25", "remaining_time": "1 day, 1:11:50", "throughput": 28339.58, "total_tokens": 1495341184} +{"current_steps": 5340, "total_steps": 14493, "loss": 0.904, "lr": 4.037118345087011e-05, "epoch": 1.1053802955410057, "percentage": 36.85, "elapsed_time": "14:41:05", "remaining_time": "1 day, 1:10:13", "throughput": 28339.84, "total_tokens": 1498187456} +{"current_steps": 5350, "total_steps": 14493, "loss": 0.8875, "lr": 4.0358030228792636e-05, "epoch": 1.1074506353355245, "percentage": 36.91, "elapsed_time": "14:42:41", "remaining_time": "1 day, 1:08:30", "throughput": 28341.54, "total_tokens": 1501020096} +{"current_steps": 5360, "total_steps": 14493, "loss": 0.8826, "lr": 4.034488985458673e-05, "epoch": 1.1095209751300432, "percentage": 36.98, "elapsed_time": "14:44:21", "remaining_time": "1 day, 1:06:52", "throughput": 28341.61, "total_tokens": 1503841152} +{"current_steps": 5370, "total_steps": 14493, "loss": 0.9024, "lr": 4.033176230735001e-05, "epoch": 1.111591314924562, "percentage": 37.05, "elapsed_time": "14:45:52", "remaining_time": "1 day, 1:04:59", "throughput": 28344.69, "total_tokens": 1506583424} +{"current_steps": 5380, "total_steps": 14493, "loss": 0.8953, "lr": 4.0318647566227626e-05, "epoch": 1.1136616547190807, "percentage": 37.12, "elapsed_time": "14:47:29", "remaining_time": "1 day, 1:03:18", "throughput": 28345.64, "total_tokens": 1509405184} +{"current_steps": 5390, "total_steps": 14493, "loss": 0.8917, "lr": 4.0305545610412205e-05, "epoch": 1.1157319945135995, "percentage": 37.19, "elapsed_time": "14:49:09", "remaining_time": "1 day, 1:01:39", "throughput": 28346.11, "total_tokens": 1512244672} +{"current_steps": 5400, "total_steps": 14493, "loss": 0.9011, "lr": 4.029245641914365e-05, "epoch": 1.1178023343081183, "percentage": 37.26, "elapsed_time": "14:50:48", "remaining_time": "1 day, 1:00:01", "throughput": 28346.35, "total_tokens": 1515074176} +{"current_steps": 5410, "total_steps": 14493, "loss": 0.8809, "lr": 4.027937997170904e-05, "epoch": 1.119872674102637, "percentage": 37.33, "elapsed_time": "14:52:23", "remaining_time": "1 day, 0:58:15", "throughput": 28347.81, "total_tokens": 1517839744} +{"current_steps": 5420, "total_steps": 14493, "loss": 0.8914, "lr": 4.026631624744247e-05, "epoch": 1.1219430138971558, "percentage": 37.4, "elapsed_time": "14:54:00", "remaining_time": "1 day, 0:56:33", "throughput": 28348.51, "total_tokens": 1520633280} +{"current_steps": 5430, "total_steps": 14493, "loss": 0.8883, "lr": 4.025326522572493e-05, "epoch": 1.1240133536916745, "percentage": 37.47, "elapsed_time": "14:55:38", "remaining_time": "1 day, 0:54:52", "throughput": 28350.03, "total_tokens": 1523479936} +{"current_steps": 5440, "total_steps": 14493, "loss": 0.8895, "lr": 4.024022688598415e-05, "epoch": 1.1260836934861933, "percentage": 37.54, "elapsed_time": "14:57:18", "remaining_time": "1 day, 0:53:15", "throughput": 28350.04, "total_tokens": 1526330304} +{"current_steps": 5450, "total_steps": 14493, "loss": 0.8858, "lr": 4.0227201207694494e-05, "epoch": 1.1281540332807123, "percentage": 37.6, "elapsed_time": "14:58:56", "remaining_time": "1 day, 0:51:34", "throughput": 28350.92, "total_tokens": 1529141824} +{"current_steps": 5460, "total_steps": 14493, "loss": 0.894, "lr": 4.021418817037677e-05, "epoch": 1.130224373075231, "percentage": 37.67, "elapsed_time": "15:00:33", "remaining_time": "1 day, 0:49:52", "throughput": 28351.91, "total_tokens": 1531954560} +{"current_steps": 5470, "total_steps": 14493, "loss": 0.878, "lr": 4.0201187753598174e-05, "epoch": 1.1322947128697498, "percentage": 37.74, "elapsed_time": "15:02:11", "remaining_time": "1 day, 0:48:12", "throughput": 28352.2, "total_tokens": 1534743744} +{"current_steps": 5480, "total_steps": 14493, "loss": 0.897, "lr": 4.018819993697208e-05, "epoch": 1.1343650526642686, "percentage": 37.81, "elapsed_time": "15:03:50", "remaining_time": "1 day, 0:46:32", "throughput": 28352.41, "total_tokens": 1537556736} +{"current_steps": 5490, "total_steps": 14493, "loss": 0.8997, "lr": 4.017522470015793e-05, "epoch": 1.1364353924587873, "percentage": 37.88, "elapsed_time": "15:05:25", "remaining_time": "1 day, 0:44:48", "throughput": 28353.41, "total_tokens": 1540321024} +{"current_steps": 5500, "total_steps": 14493, "loss": 0.8907, "lr": 4.0162262022861144e-05, "epoch": 1.138505732253306, "percentage": 37.95, "elapsed_time": "15:07:04", "remaining_time": "1 day, 0:43:09", "throughput": 28353.94, "total_tokens": 1543161216} +{"current_steps": 5510, "total_steps": 14493, "loss": 0.8778, "lr": 4.0149311884832906e-05, "epoch": 1.1405760720478249, "percentage": 38.02, "elapsed_time": "15:08:42", "remaining_time": "1 day, 0:41:28", "throughput": 28354.5, "total_tokens": 1545959552} +{"current_steps": 5520, "total_steps": 14493, "loss": 0.903, "lr": 4.0136374265870116e-05, "epoch": 1.1426464118423436, "percentage": 38.09, "elapsed_time": "15:10:19", "remaining_time": "1 day, 0:39:47", "throughput": 28355.72, "total_tokens": 1548788864} +{"current_steps": 5530, "total_steps": 14493, "loss": 0.8821, "lr": 4.0123449145815174e-05, "epoch": 1.1447167516368624, "percentage": 38.16, "elapsed_time": "15:11:54", "remaining_time": "1 day, 0:38:01", "throughput": 28357.25, "total_tokens": 1551565632} +{"current_steps": 5540, "total_steps": 14493, "loss": 0.8944, "lr": 4.011053650455592e-05, "epoch": 1.1467870914313811, "percentage": 38.23, "elapsed_time": "15:13:31", "remaining_time": "1 day, 0:36:18", "throughput": 28358.41, "total_tokens": 1554355072} +{"current_steps": 5550, "total_steps": 14493, "loss": 0.8815, "lr": 4.0097636322025466e-05, "epoch": 1.1488574312259, "percentage": 38.29, "elapsed_time": "15:15:10", "remaining_time": "1 day, 0:34:39", "throughput": 28358.66, "total_tokens": 1557183040} +{"current_steps": 5560, "total_steps": 14493, "loss": 0.8923, "lr": 4.008474857820206e-05, "epoch": 1.1509277710204187, "percentage": 38.36, "elapsed_time": "15:16:48", "remaining_time": "1 day, 0:32:59", "throughput": 28360.13, "total_tokens": 1560045056} +{"current_steps": 5570, "total_steps": 14493, "loss": 0.8882, "lr": 4.007187325310899e-05, "epoch": 1.1529981108149374, "percentage": 38.43, "elapsed_time": "15:18:23", "remaining_time": "1 day, 0:31:14", "throughput": 28361.71, "total_tokens": 1562836032} +{"current_steps": 5580, "total_steps": 14493, "loss": 0.8765, "lr": 4.00590103268144e-05, "epoch": 1.1550684506094564, "percentage": 38.5, "elapsed_time": "15:19:58", "remaining_time": "1 day, 0:29:29", "throughput": 28363.37, "total_tokens": 1565624960} +{"current_steps": 5590, "total_steps": 14493, "loss": 0.878, "lr": 4.004615977943124e-05, "epoch": 1.157138790403975, "percentage": 38.57, "elapsed_time": "15:21:35", "remaining_time": "1 day, 0:27:47", "throughput": 28364.97, "total_tokens": 1568460352} +{"current_steps": 5600, "total_steps": 14493, "loss": 0.8893, "lr": 4.0033321591117025e-05, "epoch": 1.159209130198494, "percentage": 38.64, "elapsed_time": "15:23:11", "remaining_time": "1 day, 0:26:04", "throughput": 28366.67, "total_tokens": 1571283392} +{"current_steps": 5610, "total_steps": 14493, "loss": 0.8862, "lr": 4.002049574207381e-05, "epoch": 1.1612794699930127, "percentage": 38.71, "elapsed_time": "15:24:54", "remaining_time": "1 day, 0:24:31", "throughput": 28366.02, "total_tokens": 1574163520} +{"current_steps": 5620, "total_steps": 14493, "loss": 0.8788, "lr": 4.000768221254803e-05, "epoch": 1.1633498097875314, "percentage": 38.78, "elapsed_time": "15:26:31", "remaining_time": "1 day, 0:22:48", "throughput": 28367.29, "total_tokens": 1576967424} +{"current_steps": 5630, "total_steps": 14493, "loss": 0.8894, "lr": 3.999488098283034e-05, "epoch": 1.1654201495820502, "percentage": 38.85, "elapsed_time": "15:28:07", "remaining_time": "1 day, 0:21:05", "throughput": 28368.85, "total_tokens": 1579789184} +{"current_steps": 5640, "total_steps": 14493, "loss": 0.8848, "lr": 3.9982092033255506e-05, "epoch": 1.167490489376569, "percentage": 38.92, "elapsed_time": "15:29:45", "remaining_time": "1 day, 0:19:24", "throughput": 28369.22, "total_tokens": 1582581952} +{"current_steps": 5650, "total_steps": 14493, "loss": 0.8959, "lr": 3.996931534420232e-05, "epoch": 1.1695608291710877, "percentage": 38.98, "elapsed_time": "15:31:22", "remaining_time": "1 day, 0:17:43", "throughput": 28370.48, "total_tokens": 1585420224} +{"current_steps": 5660, "total_steps": 14493, "loss": 0.8806, "lr": 3.995655089609339e-05, "epoch": 1.1716311689656065, "percentage": 39.05, "elapsed_time": "15:32:59", "remaining_time": "1 day, 0:16:01", "throughput": 28371.38, "total_tokens": 1588212608} +{"current_steps": 5670, "total_steps": 14493, "loss": 0.893, "lr": 3.994379866939511e-05, "epoch": 1.1737015087601252, "percentage": 39.12, "elapsed_time": "15:34:34", "remaining_time": "1 day, 0:14:17", "throughput": 28372.65, "total_tokens": 1590994240} +{"current_steps": 5680, "total_steps": 14493, "loss": 0.8972, "lr": 3.993105864461745e-05, "epoch": 1.175771848554644, "percentage": 39.19, "elapsed_time": "15:36:09", "remaining_time": "1 day, 0:12:31", "throughput": 28374.64, "total_tokens": 1593794176} +{"current_steps": 5690, "total_steps": 14493, "loss": 0.8938, "lr": 3.9918330802313866e-05, "epoch": 1.1778421883491628, "percentage": 39.26, "elapsed_time": "15:37:49", "remaining_time": "1 day, 0:10:55", "throughput": 28375.09, "total_tokens": 1596665728} +{"current_steps": 5700, "total_steps": 14493, "loss": 0.8784, "lr": 3.9905615123081206e-05, "epoch": 1.1799125281436815, "percentage": 39.33, "elapsed_time": "15:39:25", "remaining_time": "1 day, 0:09:11", "throughput": 28376.02, "total_tokens": 1599426624} +{"current_steps": 5710, "total_steps": 14493, "loss": 0.8978, "lr": 3.989291158755953e-05, "epoch": 1.1819828679382003, "percentage": 39.4, "elapsed_time": "15:41:00", "remaining_time": "1 day, 0:07:25", "throughput": 28377.58, "total_tokens": 1602198912} +{"current_steps": 5720, "total_steps": 14493, "loss": 0.8967, "lr": 3.988022017643201e-05, "epoch": 1.184053207732719, "percentage": 39.47, "elapsed_time": "15:42:37", "remaining_time": "1 day, 0:05:44", "throughput": 28378.33, "total_tokens": 1605007680} +{"current_steps": 5730, "total_steps": 14493, "loss": 0.8731, "lr": 3.9867540870424826e-05, "epoch": 1.186123547527238, "percentage": 39.54, "elapsed_time": "15:44:18", "remaining_time": "1 day, 0:04:08", "throughput": 28378.35, "total_tokens": 1607861696} +{"current_steps": 5740, "total_steps": 14493, "loss": 0.8816, "lr": 3.985487365030702e-05, "epoch": 1.1881938873217566, "percentage": 39.61, "elapsed_time": "15:45:53", "remaining_time": "1 day, 0:02:23", "throughput": 28380.29, "total_tokens": 1610676800} +{"current_steps": 5750, "total_steps": 14493, "loss": 0.8852, "lr": 3.984221849689036e-05, "epoch": 1.1902642271162756, "percentage": 39.67, "elapsed_time": "15:47:28", "remaining_time": "1 day, 0:00:39", "throughput": 28381.59, "total_tokens": 1613457408} +{"current_steps": 5760, "total_steps": 14493, "loss": 0.881, "lr": 3.982957539102927e-05, "epoch": 1.1923345669107943, "percentage": 39.74, "elapsed_time": "15:49:12", "remaining_time": "23:59:08", "throughput": 28380.63, "total_tokens": 1616350208} +{"current_steps": 5770, "total_steps": 14493, "loss": 0.8735, "lr": 3.981694431362065e-05, "epoch": 1.194404906705313, "percentage": 39.81, "elapsed_time": "15:50:51", "remaining_time": "23:57:29", "throughput": 28380.74, "total_tokens": 1619169792} +{"current_steps": 5780, "total_steps": 14493, "loss": 0.8909, "lr": 3.9804325245603786e-05, "epoch": 1.1964752464998318, "percentage": 39.88, "elapsed_time": "15:52:24", "remaining_time": "23:55:42", "throughput": 28382.91, "total_tokens": 1621935040} +{"current_steps": 5790, "total_steps": 14493, "loss": 0.8851, "lr": 3.9791718167960226e-05, "epoch": 1.1985455862943506, "percentage": 39.95, "elapsed_time": "15:54:01", "remaining_time": "23:54:00", "throughput": 28383.79, "total_tokens": 1624734464} +{"current_steps": 5800, "total_steps": 14493, "loss": 0.8925, "lr": 3.9779123061713665e-05, "epoch": 1.2006159260888694, "percentage": 40.02, "elapsed_time": "15:55:37", "remaining_time": "23:52:17", "throughput": 28384.06, "total_tokens": 1627478976} +{"current_steps": 5810, "total_steps": 14493, "loss": 0.8822, "lr": 3.976653990792979e-05, "epoch": 1.2026862658833881, "percentage": 40.09, "elapsed_time": "15:57:14", "remaining_time": "23:50:35", "throughput": 28385.28, "total_tokens": 1630297024} +{"current_steps": 5820, "total_steps": 14493, "loss": 0.8741, "lr": 3.9753968687716206e-05, "epoch": 1.2047566056779069, "percentage": 40.16, "elapsed_time": "15:58:53", "remaining_time": "23:48:56", "throughput": 28386.2, "total_tokens": 1633156096} +{"current_steps": 5830, "total_steps": 14493, "loss": 0.8722, "lr": 3.974140938222232e-05, "epoch": 1.2068269454724256, "percentage": 40.23, "elapsed_time": "16:00:31", "remaining_time": "23:47:16", "throughput": 28387.14, "total_tokens": 1635996800} +{"current_steps": 5840, "total_steps": 14493, "loss": 0.8837, "lr": 3.972886197263915e-05, "epoch": 1.2088972852669444, "percentage": 40.3, "elapsed_time": "16:02:08", "remaining_time": "23:45:35", "throughput": 28387.94, "total_tokens": 1638795136} +{"current_steps": 5850, "total_steps": 14493, "loss": 0.8788, "lr": 3.97163264401993e-05, "epoch": 1.2109676250614632, "percentage": 40.36, "elapsed_time": "16:03:49", "remaining_time": "23:43:58", "throughput": 28387.74, "total_tokens": 1641636288} +{"current_steps": 5860, "total_steps": 14493, "loss": 0.902, "lr": 3.970380276617677e-05, "epoch": 1.213037964855982, "percentage": 40.43, "elapsed_time": "16:05:27", "remaining_time": "23:42:19", "throughput": 28388.56, "total_tokens": 1644478272} +{"current_steps": 5870, "total_steps": 14493, "loss": 0.8814, "lr": 3.96912909318869e-05, "epoch": 1.2151083046505007, "percentage": 40.5, "elapsed_time": "16:07:06", "remaining_time": "23:40:41", "throughput": 28389.11, "total_tokens": 1647334656} +{"current_steps": 5880, "total_steps": 14493, "loss": 0.8868, "lr": 3.96787909186862e-05, "epoch": 1.2171786444450194, "percentage": 40.57, "elapsed_time": "16:08:45", "remaining_time": "23:39:02", "throughput": 28389.22, "total_tokens": 1650138880} +{"current_steps": 5890, "total_steps": 14493, "loss": 0.8669, "lr": 3.9666302707972244e-05, "epoch": 1.2192489842395382, "percentage": 40.64, "elapsed_time": "16:10:22", "remaining_time": "23:37:20", "throughput": 28390.88, "total_tokens": 1652984576} +{"current_steps": 5900, "total_steps": 14493, "loss": 0.8847, "lr": 3.965382628118358e-05, "epoch": 1.2213193240340572, "percentage": 40.71, "elapsed_time": "16:11:58", "remaining_time": "23:35:37", "throughput": 28392.19, "total_tokens": 1655785920} +{"current_steps": 5910, "total_steps": 14493, "loss": 0.889, "lr": 3.964136161979959e-05, "epoch": 1.223389663828576, "percentage": 40.78, "elapsed_time": "16:13:38", "remaining_time": "23:34:00", "throughput": 28391.81, "total_tokens": 1658609408} +{"current_steps": 5920, "total_steps": 14493, "loss": 0.9027, "lr": 3.9628908705340406e-05, "epoch": 1.2254600036230947, "percentage": 40.85, "elapsed_time": "16:15:16", "remaining_time": "23:32:19", "throughput": 28392.02, "total_tokens": 1661389952} +{"current_steps": 5930, "total_steps": 14493, "loss": 0.8847, "lr": 3.961646751936673e-05, "epoch": 1.2275303434176135, "percentage": 40.92, "elapsed_time": "16:16:51", "remaining_time": "23:30:36", "throughput": 28393.12, "total_tokens": 1664168640} +{"current_steps": 5940, "total_steps": 14493, "loss": 0.8859, "lr": 3.960403804347979e-05, "epoch": 1.2296006832121322, "percentage": 40.99, "elapsed_time": "16:18:27", "remaining_time": "23:28:52", "throughput": 28394.75, "total_tokens": 1666983168} +{"current_steps": 5950, "total_steps": 14493, "loss": 0.8785, "lr": 3.959162025932119e-05, "epoch": 1.231671023006651, "percentage": 41.05, "elapsed_time": "16:20:06", "remaining_time": "23:27:14", "throughput": 28394.47, "total_tokens": 1669792448} +{"current_steps": 5960, "total_steps": 14493, "loss": 0.884, "lr": 3.95792141485728e-05, "epoch": 1.2337413628011697, "percentage": 41.12, "elapsed_time": "16:21:41", "remaining_time": "23:25:30", "throughput": 28395.72, "total_tokens": 1672560576} +{"current_steps": 5970, "total_steps": 14493, "loss": 0.8702, "lr": 3.956681969295664e-05, "epoch": 1.2358117025956885, "percentage": 41.19, "elapsed_time": "16:23:20", "remaining_time": "23:23:50", "throughput": 28395.78, "total_tokens": 1675354368} +{"current_steps": 5980, "total_steps": 14493, "loss": 0.8686, "lr": 3.955443687423479e-05, "epoch": 1.2378820423902073, "percentage": 41.26, "elapsed_time": "16:24:54", "remaining_time": "23:22:05", "throughput": 28397.28, "total_tokens": 1678112192} +{"current_steps": 5990, "total_steps": 14493, "loss": 0.8915, "lr": 3.954206567420924e-05, "epoch": 1.239952382184726, "percentage": 41.33, "elapsed_time": "16:26:30", "remaining_time": "23:20:22", "throughput": 28398.17, "total_tokens": 1680897920} +{"current_steps": 6000, "total_steps": 14493, "loss": 0.8877, "lr": 3.952970607472179e-05, "epoch": 1.2420227219792448, "percentage": 41.4, "elapsed_time": "16:28:09", "remaining_time": "23:18:43", "throughput": 28398.92, "total_tokens": 1683745472} +{"current_steps": 6010, "total_steps": 14493, "loss": 0.8966, "lr": 3.951735805765399e-05, "epoch": 1.2440930617737636, "percentage": 41.47, "elapsed_time": "16:29:45", "remaining_time": "23:17:01", "throughput": 28400.44, "total_tokens": 1686573888} +{"current_steps": 6020, "total_steps": 14493, "loss": 0.8915, "lr": 3.950502160492692e-05, "epoch": 1.2461634015682823, "percentage": 41.54, "elapsed_time": "16:31:22", "remaining_time": "23:15:20", "throughput": 28401.63, "total_tokens": 1689408320} +{"current_steps": 6030, "total_steps": 14493, "loss": 0.8755, "lr": 3.9492696698501205e-05, "epoch": 1.248233741362801, "percentage": 41.61, "elapsed_time": "16:33:01", "remaining_time": "23:13:41", "throughput": 28402.31, "total_tokens": 1692245056} +{"current_steps": 6040, "total_steps": 14493, "loss": 0.8883, "lr": 3.9480383320376784e-05, "epoch": 1.2503040811573198, "percentage": 41.68, "elapsed_time": "16:34:34", "remaining_time": "23:11:54", "throughput": 28404.63, "total_tokens": 1695030848} +{"current_steps": 6050, "total_steps": 14493, "loss": 0.8783, "lr": 3.94680814525929e-05, "epoch": 1.2523744209518388, "percentage": 41.74, "elapsed_time": "16:36:14", "remaining_time": "23:10:17", "throughput": 28404.62, "total_tokens": 1697861312} +{"current_steps": 6060, "total_steps": 14493, "loss": 0.887, "lr": 3.945579107722792e-05, "epoch": 1.2544447607463576, "percentage": 41.81, "elapsed_time": "16:37:49", "remaining_time": "23:08:33", "throughput": 28406.07, "total_tokens": 1700650176} +{"current_steps": 6070, "total_steps": 14493, "loss": 0.8857, "lr": 3.9443512176399276e-05, "epoch": 1.2565151005408763, "percentage": 41.88, "elapsed_time": "16:39:29", "remaining_time": "23:06:55", "throughput": 28406.39, "total_tokens": 1703507968} +{"current_steps": 6080, "total_steps": 14493, "loss": 0.8868, "lr": 3.9431244732263307e-05, "epoch": 1.258585440335395, "percentage": 41.95, "elapsed_time": "16:41:02", "remaining_time": "23:05:08", "throughput": 28409.0, "total_tokens": 1706301248} +{"current_steps": 6090, "total_steps": 14493, "loss": 0.8998, "lr": 3.941898872701519e-05, "epoch": 1.2606557801299139, "percentage": 42.02, "elapsed_time": "16:42:38", "remaining_time": "23:03:26", "throughput": 28409.94, "total_tokens": 1709093312} +{"current_steps": 6100, "total_steps": 14493, "loss": 0.8842, "lr": 3.940674414288882e-05, "epoch": 1.2627261199244326, "percentage": 42.09, "elapsed_time": "16:44:14", "remaining_time": "23:01:44", "throughput": 28411.08, "total_tokens": 1711899584} +{"current_steps": 6110, "total_steps": 14493, "loss": 0.8858, "lr": 3.939451096215668e-05, "epoch": 1.2647964597189514, "percentage": 42.16, "elapsed_time": "16:45:51", "remaining_time": "23:00:03", "throughput": 28411.68, "total_tokens": 1714691712} +{"current_steps": 6120, "total_steps": 14493, "loss": 0.8769, "lr": 3.938228916712978e-05, "epoch": 1.2668667995134701, "percentage": 42.23, "elapsed_time": "16:47:31", "remaining_time": "22:58:25", "throughput": 28412.03, "total_tokens": 1717550144} +{"current_steps": 6130, "total_steps": 14493, "loss": 0.8945, "lr": 3.937007874015748e-05, "epoch": 1.268937139307989, "percentage": 42.3, "elapsed_time": "16:49:08", "remaining_time": "22:56:45", "throughput": 28412.89, "total_tokens": 1720366272} +{"current_steps": 6140, "total_steps": 14493, "loss": 0.8886, "lr": 3.935787966362748e-05, "epoch": 1.2710074791025077, "percentage": 42.37, "elapsed_time": "16:50:47", "remaining_time": "22:55:06", "throughput": 28413.01, "total_tokens": 1723186624} +{"current_steps": 6150, "total_steps": 14493, "loss": 0.8853, "lr": 3.9345691919965595e-05, "epoch": 1.2730778188970264, "percentage": 42.43, "elapsed_time": "16:52:24", "remaining_time": "22:53:25", "throughput": 28414.01, "total_tokens": 1725994304} +{"current_steps": 6160, "total_steps": 14493, "loss": 0.8828, "lr": 3.9333515491635764e-05, "epoch": 1.2751481586915452, "percentage": 42.5, "elapsed_time": "16:54:00", "remaining_time": "22:51:43", "throughput": 28415.13, "total_tokens": 1728803904} +{"current_steps": 6170, "total_steps": 14493, "loss": 0.8648, "lr": 3.932135036113987e-05, "epoch": 1.277218498486064, "percentage": 42.57, "elapsed_time": "16:55:35", "remaining_time": "22:49:59", "throughput": 28416.55, "total_tokens": 1731582720} +{"current_steps": 6180, "total_steps": 14493, "loss": 0.8825, "lr": 3.930919651101764e-05, "epoch": 1.279288838280583, "percentage": 42.64, "elapsed_time": "16:57:09", "remaining_time": "22:48:13", "throughput": 28418.23, "total_tokens": 1734357952} +{"current_steps": 6190, "total_steps": 14493, "loss": 0.8948, "lr": 3.9297053923846576e-05, "epoch": 1.2813591780751015, "percentage": 42.71, "elapsed_time": "16:58:46", "remaining_time": "22:46:32", "throughput": 28419.25, "total_tokens": 1737169408} +{"current_steps": 6200, "total_steps": 14493, "loss": 0.8885, "lr": 3.928492258224183e-05, "epoch": 1.2834295178696205, "percentage": 42.78, "elapsed_time": "17:00:21", "remaining_time": "22:44:48", "throughput": 28420.69, "total_tokens": 1739946176} +{"current_steps": 6210, "total_steps": 14493, "loss": 0.885, "lr": 3.927280246885609e-05, "epoch": 1.285499857664139, "percentage": 42.85, "elapsed_time": "17:01:53", "remaining_time": "22:43:01", "throughput": 28422.89, "total_tokens": 1742712128} +{"current_steps": 6220, "total_steps": 14493, "loss": 0.8876, "lr": 3.9260693566379486e-05, "epoch": 1.287570197458658, "percentage": 42.92, "elapsed_time": "17:03:28", "remaining_time": "22:41:17", "throughput": 28425.14, "total_tokens": 1745549184} +{"current_steps": 6230, "total_steps": 14493, "loss": 0.892, "lr": 3.924859585753948e-05, "epoch": 1.2896405372531767, "percentage": 42.99, "elapsed_time": "17:05:05", "remaining_time": "22:39:36", "throughput": 28426.02, "total_tokens": 1748364288} +{"current_steps": 6240, "total_steps": 14493, "loss": 0.891, "lr": 3.923650932510079e-05, "epoch": 1.2917108770476955, "percentage": 43.06, "elapsed_time": "17:06:40", "remaining_time": "22:37:52", "throughput": 28427.76, "total_tokens": 1751152192} +{"current_steps": 6250, "total_steps": 14493, "loss": 0.8828, "lr": 3.9224433951865215e-05, "epoch": 1.2937812168422143, "percentage": 43.12, "elapsed_time": "17:08:13", "remaining_time": "22:36:05", "throughput": 28429.81, "total_tokens": 1753921152} +{"current_steps": 6260, "total_steps": 14493, "loss": 0.8778, "lr": 3.921236972067165e-05, "epoch": 1.295851556636733, "percentage": 43.19, "elapsed_time": "17:09:48", "remaining_time": "22:34:23", "throughput": 28430.27, "total_tokens": 1756670400} +{"current_steps": 6270, "total_steps": 14493, "loss": 0.8704, "lr": 3.920031661439585e-05, "epoch": 1.2979218964312518, "percentage": 43.26, "elapsed_time": "17:11:23", "remaining_time": "22:32:39", "throughput": 28431.15, "total_tokens": 1759431872} +{"current_steps": 6280, "total_steps": 14493, "loss": 0.8806, "lr": 3.918827461595045e-05, "epoch": 1.2999922362257705, "percentage": 43.33, "elapsed_time": "17:13:03", "remaining_time": "22:31:01", "throughput": 28431.15, "total_tokens": 1762251712} +{"current_steps": 6290, "total_steps": 14493, "loss": 0.8712, "lr": 3.9176243708284746e-05, "epoch": 1.3020625760202893, "percentage": 43.4, "elapsed_time": "17:14:37", "remaining_time": "22:29:16", "throughput": 28432.81, "total_tokens": 1765026368} +{"current_steps": 6300, "total_steps": 14493, "loss": 0.8814, "lr": 3.9164223874384715e-05, "epoch": 1.304132915814808, "percentage": 43.47, "elapsed_time": "17:16:12", "remaining_time": "22:27:33", "throughput": 28434.35, "total_tokens": 1767824960} +{"current_steps": 6310, "total_steps": 14493, "loss": 0.8571, "lr": 3.91522150972728e-05, "epoch": 1.3062032556093268, "percentage": 43.54, "elapsed_time": "17:17:45", "remaining_time": "22:25:47", "throughput": 28436.24, "total_tokens": 1770599424} +{"current_steps": 6320, "total_steps": 14493, "loss": 0.8753, "lr": 3.9140217360007896e-05, "epoch": 1.3082735954038456, "percentage": 43.61, "elapsed_time": "17:19:19", "remaining_time": "22:24:02", "throughput": 28438.79, "total_tokens": 1773422144} +{"current_steps": 6330, "total_steps": 14493, "loss": 0.882, "lr": 3.912823064568521e-05, "epoch": 1.3103439351983646, "percentage": 43.68, "elapsed_time": "17:20:53", "remaining_time": "22:22:18", "throughput": 28440.38, "total_tokens": 1776208704} +{"current_steps": 6340, "total_steps": 14493, "loss": 0.8853, "lr": 3.9116254937436155e-05, "epoch": 1.312414274992883, "percentage": 43.75, "elapsed_time": "17:22:28", "remaining_time": "22:20:34", "throughput": 28442.14, "total_tokens": 1779002496} +{"current_steps": 6350, "total_steps": 14493, "loss": 0.9006, "lr": 3.910429021842825e-05, "epoch": 1.314484614787402, "percentage": 43.81, "elapsed_time": "17:24:03", "remaining_time": "22:18:51", "throughput": 28443.46, "total_tokens": 1781801920} +{"current_steps": 6360, "total_steps": 14493, "loss": 0.8815, "lr": 3.9092336471865084e-05, "epoch": 1.3165549545819206, "percentage": 43.88, "elapsed_time": "17:25:40", "remaining_time": "22:17:10", "throughput": 28444.27, "total_tokens": 1784600960} +{"current_steps": 6370, "total_steps": 14493, "loss": 0.8815, "lr": 3.908039368098611e-05, "epoch": 1.3186252943764396, "percentage": 43.95, "elapsed_time": "17:27:16", "remaining_time": "22:15:29", "throughput": 28445.13, "total_tokens": 1787400384} +{"current_steps": 6380, "total_steps": 14493, "loss": 0.875, "lr": 3.9068461829066633e-05, "epoch": 1.3206956341709584, "percentage": 44.02, "elapsed_time": "17:28:53", "remaining_time": "22:13:48", "throughput": 28445.93, "total_tokens": 1790202432} +{"current_steps": 6390, "total_steps": 14493, "loss": 0.878, "lr": 3.9056540899417656e-05, "epoch": 1.3227659739654771, "percentage": 44.09, "elapsed_time": "17:30:31", "remaining_time": "22:12:08", "throughput": 28446.85, "total_tokens": 1793036032} +{"current_steps": 6400, "total_steps": 14493, "loss": 0.8688, "lr": 3.904463087538585e-05, "epoch": 1.3248363137599959, "percentage": 44.16, "elapsed_time": "17:32:06", "remaining_time": "22:10:24", "throughput": 28447.66, "total_tokens": 1795787648} +{"current_steps": 6410, "total_steps": 14493, "loss": 0.8758, "lr": 3.903273174035336e-05, "epoch": 1.3269066535545146, "percentage": 44.23, "elapsed_time": "17:33:39", "remaining_time": "22:08:39", "throughput": 28449.01, "total_tokens": 1798529792} +{"current_steps": 6420, "total_steps": 14493, "loss": 0.8808, "lr": 3.902084347773779e-05, "epoch": 1.3289769933490334, "percentage": 44.3, "elapsed_time": "17:35:13", "remaining_time": "22:06:55", "throughput": 28450.12, "total_tokens": 1801281600} +{"current_steps": 6430, "total_steps": 14493, "loss": 0.8857, "lr": 3.900896607099207e-05, "epoch": 1.3310473331435522, "percentage": 44.37, "elapsed_time": "17:36:48", "remaining_time": "22:05:11", "throughput": 28451.78, "total_tokens": 1804080512} +{"current_steps": 6440, "total_steps": 14493, "loss": 0.8796, "lr": 3.899709950360437e-05, "epoch": 1.333117672938071, "percentage": 44.44, "elapsed_time": "17:38:21", "remaining_time": "22:03:26", "throughput": 28453.54, "total_tokens": 1806853312} +{"current_steps": 6450, "total_steps": 14493, "loss": 0.891, "lr": 3.8985243759097997e-05, "epoch": 1.3351880127325897, "percentage": 44.5, "elapsed_time": "17:40:01", "remaining_time": "22:01:49", "throughput": 28453.88, "total_tokens": 1809700416} +{"current_steps": 6460, "total_steps": 14493, "loss": 0.8816, "lr": 3.897339882103129e-05, "epoch": 1.3372583525271085, "percentage": 44.57, "elapsed_time": "17:41:42", "remaining_time": "22:00:14", "throughput": 28453.08, "total_tokens": 1812540992} +{"current_steps": 6470, "total_steps": 14493, "loss": 0.8795, "lr": 3.8961564672997544e-05, "epoch": 1.3393286923216272, "percentage": 44.64, "elapsed_time": "17:43:17", "remaining_time": "21:58:30", "throughput": 28454.76, "total_tokens": 1815329792} +{"current_steps": 6480, "total_steps": 14493, "loss": 0.9073, "lr": 3.8949741298624924e-05, "epoch": 1.3413990321161462, "percentage": 44.71, "elapsed_time": "17:44:54", "remaining_time": "21:56:50", "throughput": 28455.55, "total_tokens": 1818150080} +{"current_steps": 6490, "total_steps": 14493, "loss": 0.8807, "lr": 3.8937928681576305e-05, "epoch": 1.3434693719106647, "percentage": 44.78, "elapsed_time": "17:46:26", "remaining_time": "21:55:04", "throughput": 28457.74, "total_tokens": 1820924416} +{"current_steps": 6500, "total_steps": 14493, "loss": 0.9027, "lr": 3.8926126805549276e-05, "epoch": 1.3455397117051837, "percentage": 44.85, "elapsed_time": "17:48:06", "remaining_time": "21:53:27", "throughput": 28457.5, "total_tokens": 1823751040} +{"current_steps": 6510, "total_steps": 14493, "loss": 0.8752, "lr": 3.891433565427596e-05, "epoch": 1.3476100514997023, "percentage": 44.92, "elapsed_time": "17:49:44", "remaining_time": "21:51:47", "throughput": 28457.85, "total_tokens": 1826558400} +{"current_steps": 6520, "total_steps": 14493, "loss": 0.895, "lr": 3.8902555211522964e-05, "epoch": 1.3496803912942212, "percentage": 44.99, "elapsed_time": "17:51:20", "remaining_time": "21:50:05", "throughput": 28458.72, "total_tokens": 1829336640} +{"current_steps": 6530, "total_steps": 14493, "loss": 0.8743, "lr": 3.889078546109127e-05, "epoch": 1.35175073108874, "percentage": 45.06, "elapsed_time": "17:52:58", "remaining_time": "21:48:26", "throughput": 28459.54, "total_tokens": 1832187968} +{"current_steps": 6540, "total_steps": 14493, "loss": 0.8645, "lr": 3.887902638681616e-05, "epoch": 1.3538210708832588, "percentage": 45.13, "elapsed_time": "17:54:39", "remaining_time": "21:46:50", "throughput": 28459.08, "total_tokens": 1835033600} +{"current_steps": 6550, "total_steps": 14493, "loss": 0.8723, "lr": 3.886727797256707e-05, "epoch": 1.3558914106777775, "percentage": 45.19, "elapsed_time": "17:56:13", "remaining_time": "21:45:06", "throughput": 28461.28, "total_tokens": 1837854464} +{"current_steps": 6560, "total_steps": 14493, "loss": 0.8836, "lr": 3.88555402022476e-05, "epoch": 1.3579617504722963, "percentage": 45.26, "elapsed_time": "17:57:48", "remaining_time": "21:43:23", "throughput": 28462.68, "total_tokens": 1840638016} +{"current_steps": 6570, "total_steps": 14493, "loss": 0.8766, "lr": 3.884381305979528e-05, "epoch": 1.360032090266815, "percentage": 45.33, "elapsed_time": "17:59:23", "remaining_time": "21:41:41", "throughput": 28463.61, "total_tokens": 1843416768} +{"current_steps": 6580, "total_steps": 14493, "loss": 0.892, "lr": 3.883209652918163e-05, "epoch": 1.3621024300613338, "percentage": 45.4, "elapsed_time": "18:01:01", "remaining_time": "21:40:01", "throughput": 28464.15, "total_tokens": 1846232384} +{"current_steps": 6590, "total_steps": 14493, "loss": 0.8787, "lr": 3.8820390594411935e-05, "epoch": 1.3641727698558526, "percentage": 45.47, "elapsed_time": "18:02:39", "remaining_time": "21:38:21", "throughput": 28464.94, "total_tokens": 1849063040} +{"current_steps": 6600, "total_steps": 14493, "loss": 0.8879, "lr": 3.880869523952524e-05, "epoch": 1.3662431096503713, "percentage": 45.54, "elapsed_time": "18:04:18", "remaining_time": "21:36:44", "throughput": 28464.99, "total_tokens": 1851893824} +{"current_steps": 6610, "total_steps": 14493, "loss": 0.8877, "lr": 3.879701044859422e-05, "epoch": 1.36831344944489, "percentage": 45.61, "elapsed_time": "18:05:56", "remaining_time": "21:35:04", "throughput": 28465.62, "total_tokens": 1854713152} +{"current_steps": 6620, "total_steps": 14493, "loss": 0.8706, "lr": 3.87853362057251e-05, "epoch": 1.3703837892394088, "percentage": 45.68, "elapsed_time": "18:07:38", "remaining_time": "21:33:29", "throughput": 28464.81, "total_tokens": 1857559424} +{"current_steps": 6630, "total_steps": 14493, "loss": 0.8814, "lr": 3.8773672495057576e-05, "epoch": 1.3724541290339278, "percentage": 45.75, "elapsed_time": "18:09:10", "remaining_time": "21:31:44", "throughput": 28466.97, "total_tokens": 1860338048} +{"current_steps": 6640, "total_steps": 14493, "loss": 0.8773, "lr": 3.8762019300764674e-05, "epoch": 1.3745244688284464, "percentage": 45.82, "elapsed_time": "18:10:47", "remaining_time": "21:30:03", "throughput": 28467.62, "total_tokens": 1863145856} +{"current_steps": 6650, "total_steps": 14493, "loss": 0.8776, "lr": 3.875037660705273e-05, "epoch": 1.3765948086229653, "percentage": 45.88, "elapsed_time": "18:12:26", "remaining_time": "21:28:25", "throughput": 28467.86, "total_tokens": 1865977856} +{"current_steps": 6660, "total_steps": 14493, "loss": 0.8744, "lr": 3.873874439816127e-05, "epoch": 1.3786651484174839, "percentage": 45.95, "elapsed_time": "18:14:01", "remaining_time": "21:26:42", "throughput": 28468.9, "total_tokens": 1868744256} +{"current_steps": 6670, "total_steps": 14493, "loss": 0.8764, "lr": 3.872712265836289e-05, "epoch": 1.3807354882120029, "percentage": 46.02, "elapsed_time": "18:15:36", "remaining_time": "21:25:00", "throughput": 28470.07, "total_tokens": 1871534016} +{"current_steps": 6680, "total_steps": 14493, "loss": 0.8773, "lr": 3.8715511371963225e-05, "epoch": 1.3828058280065216, "percentage": 46.09, "elapsed_time": "18:17:13", "remaining_time": "21:23:19", "throughput": 28471.0, "total_tokens": 1874337856} +{"current_steps": 6690, "total_steps": 14493, "loss": 0.8757, "lr": 3.87039105233008e-05, "epoch": 1.3848761678010404, "percentage": 46.16, "elapsed_time": "18:18:48", "remaining_time": "21:21:37", "throughput": 28471.61, "total_tokens": 1877096512} +{"current_steps": 6700, "total_steps": 14493, "loss": 0.8751, "lr": 3.8692320096746975e-05, "epoch": 1.3869465075955592, "percentage": 46.23, "elapsed_time": "18:20:26", "remaining_time": "21:19:57", "throughput": 28472.54, "total_tokens": 1879937920} +{"current_steps": 6710, "total_steps": 14493, "loss": 0.8932, "lr": 3.868074007670589e-05, "epoch": 1.389016847390078, "percentage": 46.3, "elapsed_time": "18:22:03", "remaining_time": "21:18:17", "throughput": 28473.65, "total_tokens": 1882775616} +{"current_steps": 6720, "total_steps": 14493, "loss": 0.8624, "lr": 3.866917044761428e-05, "epoch": 1.3910871871845967, "percentage": 46.37, "elapsed_time": "18:23:43", "remaining_time": "21:16:39", "throughput": 28473.47, "total_tokens": 1885600896} +{"current_steps": 6730, "total_steps": 14493, "loss": 0.8761, "lr": 3.8657611193941486e-05, "epoch": 1.3931575269791154, "percentage": 46.44, "elapsed_time": "18:25:20", "remaining_time": "21:15:00", "throughput": 28473.52, "total_tokens": 1888387904} +{"current_steps": 6740, "total_steps": 14493, "loss": 0.871, "lr": 3.8646062300189315e-05, "epoch": 1.3952278667736342, "percentage": 46.51, "elapsed_time": "18:26:58", "remaining_time": "21:13:21", "throughput": 28473.75, "total_tokens": 1891192000} +{"current_steps": 6750, "total_steps": 14493, "loss": 0.8661, "lr": 3.8634523750891984e-05, "epoch": 1.397298206568153, "percentage": 46.57, "elapsed_time": "18:28:38", "remaining_time": "21:11:43", "throughput": 28474.06, "total_tokens": 1894046336} +{"current_steps": 6760, "total_steps": 14493, "loss": 0.8774, "lr": 3.862299553061597e-05, "epoch": 1.3993685463626717, "percentage": 46.64, "elapsed_time": "18:30:14", "remaining_time": "21:10:02", "throughput": 28475.39, "total_tokens": 1896860544} +{"current_steps": 6770, "total_steps": 14493, "loss": 0.8804, "lr": 3.861147762396e-05, "epoch": 1.4014388861571905, "percentage": 46.71, "elapsed_time": "18:31:51", "remaining_time": "21:08:22", "throughput": 28476.25, "total_tokens": 1899695104} +{"current_steps": 6780, "total_steps": 14493, "loss": 0.8732, "lr": 3.859997001555494e-05, "epoch": 1.4035092259517095, "percentage": 46.78, "elapsed_time": "18:33:29", "remaining_time": "21:06:43", "throughput": 28476.41, "total_tokens": 1902498560} +{"current_steps": 6790, "total_steps": 14493, "loss": 0.8768, "lr": 3.8588472690063676e-05, "epoch": 1.405579565746228, "percentage": 46.85, "elapsed_time": "18:35:03", "remaining_time": "21:04:59", "throughput": 28478.03, "total_tokens": 1905277824} +{"current_steps": 6800, "total_steps": 14493, "loss": 0.9108, "lr": 3.857698563218106e-05, "epoch": 1.407649905540747, "percentage": 46.92, "elapsed_time": "18:36:39", "remaining_time": "21:03:18", "throughput": 28479.08, "total_tokens": 1908084864} +{"current_steps": 6810, "total_steps": 14493, "loss": 0.8922, "lr": 3.8565508826633836e-05, "epoch": 1.4097202453352655, "percentage": 46.99, "elapsed_time": "18:38:20", "remaining_time": "21:01:42", "throughput": 28478.71, "total_tokens": 1910940288} +{"current_steps": 6820, "total_steps": 14493, "loss": 0.8836, "lr": 3.855404225818049e-05, "epoch": 1.4117905851297845, "percentage": 47.06, "elapsed_time": "18:39:58", "remaining_time": "21:00:03", "throughput": 28478.94, "total_tokens": 1913753280} +{"current_steps": 6830, "total_steps": 14493, "loss": 0.8771, "lr": 3.8542585911611286e-05, "epoch": 1.4138609249243033, "percentage": 47.13, "elapsed_time": "18:41:32", "remaining_time": "20:58:20", "throughput": 28480.21, "total_tokens": 1916516672} +{"current_steps": 6840, "total_steps": 14493, "loss": 0.8733, "lr": 3.853113977174803e-05, "epoch": 1.415931264718822, "percentage": 47.2, "elapsed_time": "18:43:09", "remaining_time": "20:56:39", "throughput": 28481.04, "total_tokens": 1919334400} +{"current_steps": 6850, "total_steps": 14493, "loss": 0.8796, "lr": 3.851970382344411e-05, "epoch": 1.4180016045133408, "percentage": 47.26, "elapsed_time": "18:44:47", "remaining_time": "20:55:00", "throughput": 28481.41, "total_tokens": 1922150720} +{"current_steps": 6860, "total_steps": 14493, "loss": 0.8783, "lr": 3.850827805158433e-05, "epoch": 1.4200719443078595, "percentage": 47.33, "elapsed_time": "18:46:23", "remaining_time": "20:53:19", "throughput": 28482.14, "total_tokens": 1924924672} +{"current_steps": 6870, "total_steps": 14493, "loss": 0.8774, "lr": 3.8496862441084896e-05, "epoch": 1.4221422841023783, "percentage": 47.4, "elapsed_time": "18:47:59", "remaining_time": "20:51:38", "throughput": 28482.73, "total_tokens": 1927708352} +{"current_steps": 6880, "total_steps": 14493, "loss": 0.8676, "lr": 3.848545697689328e-05, "epoch": 1.424212623896897, "percentage": 47.47, "elapsed_time": "18:49:34", "remaining_time": "20:49:55", "throughput": 28484.6, "total_tokens": 1930525952} +{"current_steps": 6890, "total_steps": 14493, "loss": 0.8801, "lr": 3.8474061643988136e-05, "epoch": 1.4262829636914158, "percentage": 47.54, "elapsed_time": "18:51:08", "remaining_time": "20:48:11", "throughput": 28486.5, "total_tokens": 1933321856} +{"current_steps": 6900, "total_steps": 14493, "loss": 0.8883, "lr": 3.846267642737925e-05, "epoch": 1.4283533034859346, "percentage": 47.61, "elapsed_time": "18:52:46", "remaining_time": "20:46:32", "throughput": 28486.85, "total_tokens": 1936154240} +{"current_steps": 6910, "total_steps": 14493, "loss": 0.8743, "lr": 3.8451301312107455e-05, "epoch": 1.4304236432804533, "percentage": 47.68, "elapsed_time": "18:54:21", "remaining_time": "20:44:50", "throughput": 28487.59, "total_tokens": 1938912256} +{"current_steps": 6920, "total_steps": 14493, "loss": 0.8671, "lr": 3.843993628324451e-05, "epoch": 1.432493983074972, "percentage": 47.75, "elapsed_time": "18:55:58", "remaining_time": "20:43:09", "throughput": 28488.22, "total_tokens": 1941704832} +{"current_steps": 6930, "total_steps": 14493, "loss": 0.872, "lr": 3.8428581325893034e-05, "epoch": 1.434564322869491, "percentage": 47.82, "elapsed_time": "18:57:39", "remaining_time": "20:41:34", "throughput": 28487.71, "total_tokens": 1944554816} +{"current_steps": 6940, "total_steps": 14493, "loss": 0.8772, "lr": 3.8417236425186484e-05, "epoch": 1.4366346626640096, "percentage": 47.89, "elapsed_time": "18:59:13", "remaining_time": "20:39:51", "throughput": 28489.32, "total_tokens": 1947348864} +{"current_steps": 6950, "total_steps": 14493, "loss": 0.8877, "lr": 3.840590156628895e-05, "epoch": 1.4387050024585286, "percentage": 47.95, "elapsed_time": "19:00:52", "remaining_time": "20:38:13", "throughput": 28489.55, "total_tokens": 1950191680} +{"current_steps": 6960, "total_steps": 14493, "loss": 0.8733, "lr": 3.8394576734395205e-05, "epoch": 1.4407753422530472, "percentage": 48.02, "elapsed_time": "19:02:25", "remaining_time": "20:36:28", "throughput": 28491.4, "total_tokens": 1952960384} +{"current_steps": 6970, "total_steps": 14493, "loss": 0.8986, "lr": 3.838326191473054e-05, "epoch": 1.4428456820475661, "percentage": 48.09, "elapsed_time": "19:04:02", "remaining_time": "20:34:48", "throughput": 28491.54, "total_tokens": 1955729536} +{"current_steps": 6980, "total_steps": 14493, "loss": 0.8791, "lr": 3.837195709255069e-05, "epoch": 1.444916021842085, "percentage": 48.16, "elapsed_time": "19:05:35", "remaining_time": "20:33:03", "throughput": 28493.17, "total_tokens": 1958483584} +{"current_steps": 6990, "total_steps": 14493, "loss": 0.8751, "lr": 3.8360662253141796e-05, "epoch": 1.4469863616366037, "percentage": 48.23, "elapsed_time": "19:07:04", "remaining_time": "20:31:16", "throughput": 28495.58, "total_tokens": 1961207040} +{"current_steps": 7000, "total_steps": 14493, "loss": 0.8663, "lr": 3.834937738182029e-05, "epoch": 1.4490567014311224, "percentage": 48.3, "elapsed_time": "19:08:40", "remaining_time": "20:29:34", "throughput": 28496.63, "total_tokens": 1963989696} +{"current_steps": 7010, "total_steps": 14493, "loss": 0.8748, "lr": 3.833810246393281e-05, "epoch": 1.4511270412256412, "percentage": 48.37, "elapsed_time": "19:10:21", "remaining_time": "20:27:58", "throughput": 28496.06, "total_tokens": 1966833472} +{"current_steps": 7020, "total_steps": 14493, "loss": 0.8668, "lr": 3.832683748485616e-05, "epoch": 1.45319738102016, "percentage": 48.44, "elapsed_time": "19:11:58", "remaining_time": "20:26:18", "throughput": 28496.22, "total_tokens": 1969617088} +{"current_steps": 7030, "total_steps": 14493, "loss": 0.8779, "lr": 3.8315582429997184e-05, "epoch": 1.4552677208146787, "percentage": 48.51, "elapsed_time": "19:13:30", "remaining_time": "20:24:33", "throughput": 28498.51, "total_tokens": 1972393152} +{"current_steps": 7040, "total_steps": 14493, "loss": 0.8771, "lr": 3.830433728479272e-05, "epoch": 1.4573380606091975, "percentage": 48.58, "elapsed_time": "19:15:10", "remaining_time": "20:22:56", "throughput": 28497.46, "total_tokens": 1975173504} +{"current_steps": 7050, "total_steps": 14493, "loss": 0.8802, "lr": 3.829310203470948e-05, "epoch": 1.4594084004037162, "percentage": 48.64, "elapsed_time": "19:16:46", "remaining_time": "20:21:16", "throughput": 28498.22, "total_tokens": 1977975680} +{"current_steps": 7060, "total_steps": 14493, "loss": 0.8693, "lr": 3.828187666524403e-05, "epoch": 1.461478740198235, "percentage": 48.71, "elapsed_time": "19:18:20", "remaining_time": "20:19:32", "throughput": 28500.19, "total_tokens": 1980769216} +{"current_steps": 7070, "total_steps": 14493, "loss": 0.8857, "lr": 3.827066116192266e-05, "epoch": 1.4635490799927537, "percentage": 48.78, "elapsed_time": "19:19:57", "remaining_time": "20:17:52", "throughput": 28500.6, "total_tokens": 1983568768} +{"current_steps": 7080, "total_steps": 14493, "loss": 0.8795, "lr": 3.825945551030135e-05, "epoch": 1.4656194197872725, "percentage": 48.85, "elapsed_time": "19:21:32", "remaining_time": "20:16:10", "throughput": 28502.04, "total_tokens": 1986368448} +{"current_steps": 7090, "total_steps": 14493, "loss": 0.8633, "lr": 3.824825969596561e-05, "epoch": 1.4676897595817913, "percentage": 48.92, "elapsed_time": "19:23:08", "remaining_time": "20:14:29", "throughput": 28502.89, "total_tokens": 1989174272} +{"current_steps": 7100, "total_steps": 14493, "loss": 0.8702, "lr": 3.823707370453054e-05, "epoch": 1.4697600993763102, "percentage": 48.99, "elapsed_time": "19:24:43", "remaining_time": "20:12:47", "throughput": 28504.1, "total_tokens": 1991973120} +{"current_steps": 7110, "total_steps": 14493, "loss": 0.8792, "lr": 3.8225897521640614e-05, "epoch": 1.4718304391708288, "percentage": 49.06, "elapsed_time": "19:26:23", "remaining_time": "20:11:10", "throughput": 28504.34, "total_tokens": 1994826944} +{"current_steps": 7120, "total_steps": 14493, "loss": 0.887, "lr": 3.8214731132969675e-05, "epoch": 1.4739007789653478, "percentage": 49.13, "elapsed_time": "19:27:57", "remaining_time": "20:09:27", "throughput": 28505.89, "total_tokens": 1997611776} +{"current_steps": 7130, "total_steps": 14493, "loss": 0.8701, "lr": 3.820357452422084e-05, "epoch": 1.4759711187598665, "percentage": 49.2, "elapsed_time": "19:29:34", "remaining_time": "20:07:47", "throughput": 28506.87, "total_tokens": 2000443712} +{"current_steps": 7140, "total_steps": 14493, "loss": 0.8709, "lr": 3.8192427681126445e-05, "epoch": 1.4780414585543853, "percentage": 49.27, "elapsed_time": "19:31:10", "remaining_time": "20:06:07", "throughput": 28507.66, "total_tokens": 2003257024} +{"current_steps": 7150, "total_steps": 14493, "loss": 0.8724, "lr": 3.818129058944793e-05, "epoch": 1.480111798348904, "percentage": 49.33, "elapsed_time": "19:32:47", "remaining_time": "20:04:26", "throughput": 28508.14, "total_tokens": 2006042816} +{"current_steps": 7160, "total_steps": 14493, "loss": 0.882, "lr": 3.817016323497578e-05, "epoch": 1.4821821381434228, "percentage": 49.4, "elapsed_time": "19:34:25", "remaining_time": "20:02:48", "throughput": 28508.66, "total_tokens": 2008874944} +{"current_steps": 7170, "total_steps": 14493, "loss": 0.8674, "lr": 3.8159045603529455e-05, "epoch": 1.4842524779379416, "percentage": 49.47, "elapsed_time": "19:35:59", "remaining_time": "20:01:04", "throughput": 28510.01, "total_tokens": 2011640192} +{"current_steps": 7180, "total_steps": 14493, "loss": 0.8765, "lr": 3.8147937680957334e-05, "epoch": 1.4863228177324603, "percentage": 49.54, "elapsed_time": "19:37:33", "remaining_time": "19:59:22", "throughput": 28511.05, "total_tokens": 2014404416} +{"current_steps": 7190, "total_steps": 14493, "loss": 0.867, "lr": 3.813683945313658e-05, "epoch": 1.488393157526979, "percentage": 49.61, "elapsed_time": "19:39:13", "remaining_time": "19:57:45", "throughput": 28510.88, "total_tokens": 2017239872} +{"current_steps": 7200, "total_steps": 14493, "loss": 0.867, "lr": 3.812575090597313e-05, "epoch": 1.4904634973214979, "percentage": 49.68, "elapsed_time": "19:40:48", "remaining_time": "19:56:03", "throughput": 28512.35, "total_tokens": 2020055680} +{"current_steps": 7210, "total_steps": 14493, "loss": 0.8824, "lr": 3.811467202540156e-05, "epoch": 1.4925338371160166, "percentage": 49.75, "elapsed_time": "19:42:25", "remaining_time": "19:54:23", "throughput": 28513.02, "total_tokens": 2022862656} +{"current_steps": 7220, "total_steps": 14493, "loss": 0.8715, "lr": 3.810360279738507e-05, "epoch": 1.4946041769105354, "percentage": 49.82, "elapsed_time": "19:44:03", "remaining_time": "19:52:45", "throughput": 28513.15, "total_tokens": 2025678976} +{"current_steps": 7230, "total_steps": 14493, "loss": 0.8688, "lr": 3.809254320791535e-05, "epoch": 1.4966745167050541, "percentage": 49.89, "elapsed_time": "19:45:41", "remaining_time": "19:51:05", "throughput": 28513.86, "total_tokens": 2028509632} +{"current_steps": 7240, "total_steps": 14493, "loss": 0.8621, "lr": 3.808149324301256e-05, "epoch": 1.498744856499573, "percentage": 49.96, "elapsed_time": "19:47:18", "remaining_time": "19:49:26", "throughput": 28514.5, "total_tokens": 2031328768} +{"current_steps": 7250, "total_steps": 14493, "loss": 0.8776, "lr": 3.807045288872522e-05, "epoch": 1.5008151962940919, "percentage": 50.02, "elapsed_time": "19:48:55", "remaining_time": "19:47:46", "throughput": 28515.42, "total_tokens": 2034166528} +{"current_steps": 7260, "total_steps": 14493, "loss": 0.8753, "lr": 3.805942213113015e-05, "epoch": 1.5028855360886104, "percentage": 50.09, "elapsed_time": "19:50:32", "remaining_time": "19:46:06", "throughput": 28516.17, "total_tokens": 2036969216} +{"current_steps": 7270, "total_steps": 14493, "loss": 0.8897, "lr": 3.8048400956332385e-05, "epoch": 1.5049558758831294, "percentage": 50.16, "elapsed_time": "19:52:06", "remaining_time": "19:44:24", "throughput": 28517.54, "total_tokens": 2039757952} +{"current_steps": 7280, "total_steps": 14493, "loss": 0.8795, "lr": 3.803738935046512e-05, "epoch": 1.507026215677648, "percentage": 50.23, "elapsed_time": "19:53:49", "remaining_time": "19:42:50", "throughput": 28516.86, "total_tokens": 2042643584} +{"current_steps": 7290, "total_steps": 14493, "loss": 0.8753, "lr": 3.802638729968962e-05, "epoch": 1.509096555472167, "percentage": 50.3, "elapsed_time": "19:55:25", "remaining_time": "19:41:09", "throughput": 28517.58, "total_tokens": 2045446400} +{"current_steps": 7300, "total_steps": 14493, "loss": 0.8772, "lr": 3.8015394790195145e-05, "epoch": 1.5111668952666857, "percentage": 50.37, "elapsed_time": "19:57:03", "remaining_time": "19:39:30", "throughput": 28517.68, "total_tokens": 2048239552} +{"current_steps": 7310, "total_steps": 14493, "loss": 0.8754, "lr": 3.800441180819891e-05, "epoch": 1.5132372350612044, "percentage": 50.44, "elapsed_time": "19:58:41", "remaining_time": "19:37:51", "throughput": 28518.05, "total_tokens": 2051053824} +{"current_steps": 7320, "total_steps": 14493, "loss": 0.8704, "lr": 3.7993438339945965e-05, "epoch": 1.5153075748557232, "percentage": 50.51, "elapsed_time": "20:00:19", "remaining_time": "19:36:13", "throughput": 28518.25, "total_tokens": 2053864576} +{"current_steps": 7330, "total_steps": 14493, "loss": 0.8834, "lr": 3.798247437170914e-05, "epoch": 1.517377914650242, "percentage": 50.58, "elapsed_time": "20:01:54", "remaining_time": "19:34:31", "throughput": 28518.81, "total_tokens": 2056630208} +{"current_steps": 7340, "total_steps": 14493, "loss": 0.8786, "lr": 3.797151988978901e-05, "epoch": 1.5194482544447607, "percentage": 50.65, "elapsed_time": "20:03:35", "remaining_time": "19:32:55", "throughput": 28518.2, "total_tokens": 2059448576} +{"current_steps": 7350, "total_steps": 14493, "loss": 0.8717, "lr": 3.796057488051377e-05, "epoch": 1.5215185942392795, "percentage": 50.71, "elapsed_time": "20:05:14", "remaining_time": "19:31:17", "throughput": 28518.57, "total_tokens": 2062292736} +{"current_steps": 7360, "total_steps": 14493, "loss": 0.8789, "lr": 3.794963933023918e-05, "epoch": 1.5235889340337982, "percentage": 50.78, "elapsed_time": "20:06:50", "remaining_time": "19:29:37", "throughput": 28519.56, "total_tokens": 2065118144} +{"current_steps": 7370, "total_steps": 14493, "loss": 0.8745, "lr": 3.79387132253485e-05, "epoch": 1.525659273828317, "percentage": 50.85, "elapsed_time": "20:08:22", "remaining_time": "19:27:52", "throughput": 28521.36, "total_tokens": 2067868160} +{"current_steps": 7380, "total_steps": 14493, "loss": 0.8732, "lr": 3.792779655225243e-05, "epoch": 1.527729613622836, "percentage": 50.92, "elapsed_time": "20:09:55", "remaining_time": "19:26:09", "throughput": 28522.46, "total_tokens": 2070615104} +{"current_steps": 7390, "total_steps": 14493, "loss": 0.8748, "lr": 3.791688929738902e-05, "epoch": 1.5297999534173545, "percentage": 50.99, "elapsed_time": "20:11:31", "remaining_time": "19:24:28", "throughput": 28523.37, "total_tokens": 2073407808} +{"current_steps": 7400, "total_steps": 14493, "loss": 0.874, "lr": 3.79059914472236e-05, "epoch": 1.5318702932118735, "percentage": 51.06, "elapsed_time": "20:13:08", "remaining_time": "19:22:48", "throughput": 28523.74, "total_tokens": 2076185792} +{"current_steps": 7410, "total_steps": 14493, "loss": 0.8779, "lr": 3.7895102988248716e-05, "epoch": 1.533940633006392, "percentage": 51.13, "elapsed_time": "20:14:44", "remaining_time": "19:21:08", "throughput": 28524.25, "total_tokens": 2078977856} +{"current_steps": 7420, "total_steps": 14493, "loss": 0.8882, "lr": 3.7884223906984064e-05, "epoch": 1.536010972800911, "percentage": 51.2, "elapsed_time": "20:16:19", "remaining_time": "19:19:26", "throughput": 28524.69, "total_tokens": 2081716608} +{"current_steps": 7430, "total_steps": 14493, "loss": 0.8554, "lr": 3.787335418997641e-05, "epoch": 1.5380813125954296, "percentage": 51.27, "elapsed_time": "20:17:56", "remaining_time": "19:17:46", "throughput": 28525.86, "total_tokens": 2084561728} +{"current_steps": 7440, "total_steps": 14493, "loss": 0.8764, "lr": 3.786249382379952e-05, "epoch": 1.5401516523899486, "percentage": 51.34, "elapsed_time": "20:19:29", "remaining_time": "19:16:03", "throughput": 28527.18, "total_tokens": 2087328704} +{"current_steps": 7450, "total_steps": 14493, "loss": 0.8673, "lr": 3.785164279505411e-05, "epoch": 1.5422219921844673, "percentage": 51.4, "elapsed_time": "20:21:03", "remaining_time": "19:14:21", "throughput": 28528.18, "total_tokens": 2090075072} +{"current_steps": 7460, "total_steps": 14493, "loss": 0.8735, "lr": 3.7840801090367744e-05, "epoch": 1.544292331978986, "percentage": 51.47, "elapsed_time": "20:22:38", "remaining_time": "19:12:39", "throughput": 28529.27, "total_tokens": 2092858432} +{"current_steps": 7470, "total_steps": 14493, "loss": 0.8898, "lr": 3.782996869639479e-05, "epoch": 1.5463626717735048, "percentage": 51.54, "elapsed_time": "20:24:15", "remaining_time": "19:10:59", "throughput": 28529.55, "total_tokens": 2095640512} +{"current_steps": 7480, "total_steps": 14493, "loss": 0.8634, "lr": 3.7819145599816354e-05, "epoch": 1.5484330115680236, "percentage": 51.61, "elapsed_time": "20:25:51", "remaining_time": "19:09:19", "throughput": 28530.22, "total_tokens": 2098438208} +{"current_steps": 7490, "total_steps": 14493, "loss": 0.8695, "lr": 3.780833178734018e-05, "epoch": 1.5505033513625424, "percentage": 51.68, "elapsed_time": "20:27:29", "remaining_time": "19:07:40", "throughput": 28530.47, "total_tokens": 2101256384} +{"current_steps": 7500, "total_steps": 14493, "loss": 0.8706, "lr": 3.77975272457006e-05, "epoch": 1.5525736911570611, "percentage": 51.75, "elapsed_time": "20:29:05", "remaining_time": "19:06:00", "throughput": 28531.65, "total_tokens": 2104087232} +{"current_steps": 7510, "total_steps": 14493, "loss": 0.8838, "lr": 3.778673196165851e-05, "epoch": 1.5546440309515799, "percentage": 51.82, "elapsed_time": "20:30:43", "remaining_time": "19:04:21", "throughput": 28531.95, "total_tokens": 2106899520} +{"current_steps": 7520, "total_steps": 14493, "loss": 0.8805, "lr": 3.7775945922001186e-05, "epoch": 1.5567143707460986, "percentage": 51.89, "elapsed_time": "20:32:22", "remaining_time": "19:02:44", "throughput": 28532.06, "total_tokens": 2109737664} +{"current_steps": 7530, "total_steps": 14493, "loss": 0.8844, "lr": 3.776516911354236e-05, "epoch": 1.5587847105406176, "percentage": 51.96, "elapsed_time": "20:34:04", "remaining_time": "19:01:08", "throughput": 28532.06, "total_tokens": 2112633216} +{"current_steps": 7540, "total_steps": 14493, "loss": 0.883, "lr": 3.775440152312205e-05, "epoch": 1.5608550503351362, "percentage": 52.03, "elapsed_time": "20:35:43", "remaining_time": "18:59:31", "throughput": 28531.74, "total_tokens": 2115449984} +{"current_steps": 7550, "total_steps": 14493, "loss": 0.8841, "lr": 3.774364313760652e-05, "epoch": 1.5629253901296551, "percentage": 52.09, "elapsed_time": "20:37:19", "remaining_time": "18:57:51", "throughput": 28533.05, "total_tokens": 2118286912} +{"current_steps": 7560, "total_steps": 14493, "loss": 0.865, "lr": 3.7732893943888224e-05, "epoch": 1.5649957299241737, "percentage": 52.16, "elapsed_time": "20:38:58", "remaining_time": "18:56:12", "throughput": 28533.34, "total_tokens": 2121119680} +{"current_steps": 7570, "total_steps": 14493, "loss": 0.8673, "lr": 3.772215392888574e-05, "epoch": 1.5670660697186927, "percentage": 52.23, "elapsed_time": "20:40:37", "remaining_time": "18:54:35", "throughput": 28533.49, "total_tokens": 2123964928} +{"current_steps": 7580, "total_steps": 14493, "loss": 0.873, "lr": 3.771142307954368e-05, "epoch": 1.5691364095132112, "percentage": 52.3, "elapsed_time": "20:42:18", "remaining_time": "18:52:59", "throughput": 28533.25, "total_tokens": 2126817344} +{"current_steps": 7590, "total_steps": 14493, "loss": 0.8746, "lr": 3.770070138283264e-05, "epoch": 1.5712067493077302, "percentage": 52.37, "elapsed_time": "20:43:57", "remaining_time": "18:51:22", "throughput": 28533.68, "total_tokens": 2129693568} +{"current_steps": 7600, "total_steps": 14493, "loss": 0.8844, "lr": 3.768998882574915e-05, "epoch": 1.573277089102249, "percentage": 52.44, "elapsed_time": "20:45:39", "remaining_time": "18:49:47", "throughput": 28533.09, "total_tokens": 2132556736} +{"current_steps": 7610, "total_steps": 14493, "loss": 0.8661, "lr": 3.767928539531557e-05, "epoch": 1.5753474288967677, "percentage": 52.51, "elapsed_time": "20:47:22", "remaining_time": "18:48:12", "throughput": 28531.64, "total_tokens": 2135385920} +{"current_steps": 7620, "total_steps": 14493, "loss": 0.8864, "lr": 3.7668591078580055e-05, "epoch": 1.5774177686912865, "percentage": 52.58, "elapsed_time": "20:48:56", "remaining_time": "18:46:30", "throughput": 28533.19, "total_tokens": 2138179200} +{"current_steps": 7630, "total_steps": 14493, "loss": 0.8835, "lr": 3.765790586261647e-05, "epoch": 1.5794881084858052, "percentage": 52.65, "elapsed_time": "20:50:33", "remaining_time": "18:44:51", "throughput": 28533.89, "total_tokens": 2141007296} +{"current_steps": 7640, "total_steps": 14493, "loss": 0.8523, "lr": 3.7647229734524326e-05, "epoch": 1.581558448280324, "percentage": 52.72, "elapsed_time": "20:52:07", "remaining_time": "18:43:08", "throughput": 28534.98, "total_tokens": 2143770304} +{"current_steps": 7650, "total_steps": 14493, "loss": 0.8583, "lr": 3.7636562681428744e-05, "epoch": 1.5836287880748428, "percentage": 52.78, "elapsed_time": "20:53:44", "remaining_time": "18:41:28", "throughput": 28535.23, "total_tokens": 2146543680} +{"current_steps": 7660, "total_steps": 14493, "loss": 0.8839, "lr": 3.7625904690480346e-05, "epoch": 1.5856991278693615, "percentage": 52.85, "elapsed_time": "20:55:27", "remaining_time": "18:39:54", "throughput": 28535.06, "total_tokens": 2149469568} +{"current_steps": 7670, "total_steps": 14493, "loss": 0.865, "lr": 3.7615255748855224e-05, "epoch": 1.5877694676638803, "percentage": 52.92, "elapsed_time": "20:57:02", "remaining_time": "18:38:13", "throughput": 28535.78, "total_tokens": 2152244096} +{"current_steps": 7680, "total_steps": 14493, "loss": 0.8742, "lr": 3.7604615843754845e-05, "epoch": 1.5898398074583993, "percentage": 52.99, "elapsed_time": "20:58:36", "remaining_time": "18:36:31", "throughput": 28536.9, "total_tokens": 2155017024} +{"current_steps": 7690, "total_steps": 14493, "loss": 0.8785, "lr": 3.759398496240601e-05, "epoch": 1.5919101472529178, "percentage": 53.06, "elapsed_time": "21:00:10", "remaining_time": "18:34:49", "throughput": 28538.35, "total_tokens": 2157809792} +{"current_steps": 7700, "total_steps": 14493, "loss": 0.8735, "lr": 3.7583363092060815e-05, "epoch": 1.5939804870474368, "percentage": 53.13, "elapsed_time": "21:01:49", "remaining_time": "18:33:11", "throughput": 28538.5, "total_tokens": 2160624192} +{"current_steps": 7710, "total_steps": 14493, "loss": 0.8829, "lr": 3.757275021999649e-05, "epoch": 1.5960508268419553, "percentage": 53.2, "elapsed_time": "21:03:22", "remaining_time": "18:31:28", "throughput": 28539.37, "total_tokens": 2163369472} +{"current_steps": 7720, "total_steps": 14493, "loss": 0.8755, "lr": 3.7562146333515445e-05, "epoch": 1.5981211666364743, "percentage": 53.27, "elapsed_time": "21:04:57", "remaining_time": "18:29:46", "throughput": 28540.72, "total_tokens": 2166157376} +{"current_steps": 7730, "total_steps": 14493, "loss": 0.8782, "lr": 3.7551551419945167e-05, "epoch": 1.6001915064309928, "percentage": 53.34, "elapsed_time": "21:06:30", "remaining_time": "18:28:04", "throughput": 28542.01, "total_tokens": 2168935808} +{"current_steps": 7740, "total_steps": 14493, "loss": 0.8771, "lr": 3.7540965466638104e-05, "epoch": 1.6022618462255118, "percentage": 53.41, "elapsed_time": "21:08:03", "remaining_time": "18:26:21", "throughput": 28543.68, "total_tokens": 2171717376} +{"current_steps": 7750, "total_steps": 14493, "loss": 0.8712, "lr": 3.753038846097172e-05, "epoch": 1.6043321860200306, "percentage": 53.47, "elapsed_time": "21:09:42", "remaining_time": "18:24:43", "throughput": 28543.94, "total_tokens": 2174551936} +{"current_steps": 7760, "total_steps": 14493, "loss": 0.8723, "lr": 3.751982039034827e-05, "epoch": 1.6064025258145493, "percentage": 53.54, "elapsed_time": "21:11:14", "remaining_time": "18:22:59", "throughput": 28545.27, "total_tokens": 2177273152} +{"current_steps": 7770, "total_steps": 14493, "loss": 0.8812, "lr": 3.75092612421949e-05, "epoch": 1.608472865609068, "percentage": 53.61, "elapsed_time": "21:12:45", "remaining_time": "18:21:15", "throughput": 28546.98, "total_tokens": 2180005952} +{"current_steps": 7780, "total_steps": 14493, "loss": 0.8651, "lr": 3.7498711003963475e-05, "epoch": 1.6105432054035869, "percentage": 53.68, "elapsed_time": "21:14:22", "remaining_time": "18:19:35", "throughput": 28547.11, "total_tokens": 2182782400} +{"current_steps": 7790, "total_steps": 14493, "loss": 0.8764, "lr": 3.748816966313058e-05, "epoch": 1.6126135451981056, "percentage": 53.75, "elapsed_time": "21:15:57", "remaining_time": "18:17:55", "throughput": 28548.39, "total_tokens": 2185606272} +{"current_steps": 7800, "total_steps": 14493, "loss": 0.8621, "lr": 3.7477637207197374e-05, "epoch": 1.6146838849926244, "percentage": 53.82, "elapsed_time": "21:17:37", "remaining_time": "18:16:18", "throughput": 28548.05, "total_tokens": 2188433600} +{"current_steps": 7810, "total_steps": 14493, "loss": 0.8822, "lr": 3.7467113623689666e-05, "epoch": 1.6167542247871431, "percentage": 53.89, "elapsed_time": "21:19:13", "remaining_time": "18:14:37", "throughput": 28549.33, "total_tokens": 2191248960} +{"current_steps": 7820, "total_steps": 14493, "loss": 0.8728, "lr": 3.745659890015768e-05, "epoch": 1.618824564581662, "percentage": 53.96, "elapsed_time": "21:20:48", "remaining_time": "18:12:56", "throughput": 28550.29, "total_tokens": 2194055360} +{"current_steps": 7830, "total_steps": 14493, "loss": 0.8836, "lr": 3.744609302417615e-05, "epoch": 1.620894904376181, "percentage": 54.03, "elapsed_time": "21:22:31", "remaining_time": "18:11:22", "throughput": 28548.93, "total_tokens": 2196894976} +{"current_steps": 7840, "total_steps": 14493, "loss": 0.8843, "lr": 3.7435595983344175e-05, "epoch": 1.6229652441706994, "percentage": 54.1, "elapsed_time": "21:24:03", "remaining_time": "18:09:38", "throughput": 28551.21, "total_tokens": 2199683520} +{"current_steps": 7850, "total_steps": 14493, "loss": 0.8792, "lr": 3.7425107765285155e-05, "epoch": 1.6250355839652184, "percentage": 54.16, "elapsed_time": "21:25:40", "remaining_time": "18:07:59", "throughput": 28551.37, "total_tokens": 2202471168} +{"current_steps": 7860, "total_steps": 14493, "loss": 0.8784, "lr": 3.741462835764676e-05, "epoch": 1.627105923759737, "percentage": 54.23, "elapsed_time": "21:27:19", "remaining_time": "18:06:21", "throughput": 28551.62, "total_tokens": 2205308800} +{"current_steps": 7870, "total_steps": 14493, "loss": 0.8615, "lr": 3.740415774810088e-05, "epoch": 1.629176263554256, "percentage": 54.3, "elapsed_time": "21:28:54", "remaining_time": "18:04:41", "throughput": 28552.29, "total_tokens": 2208085056} +{"current_steps": 7880, "total_steps": 14493, "loss": 0.866, "lr": 3.739369592434351e-05, "epoch": 1.6312466033487745, "percentage": 54.37, "elapsed_time": "21:30:33", "remaining_time": "18:03:03", "throughput": 28552.23, "total_tokens": 2210903488} +{"current_steps": 7890, "total_steps": 14493, "loss": 0.8643, "lr": 3.738324287409473e-05, "epoch": 1.6333169431432935, "percentage": 54.44, "elapsed_time": "21:32:07", "remaining_time": "18:01:21", "throughput": 28553.07, "total_tokens": 2213641792} +{"current_steps": 7900, "total_steps": 14493, "loss": 0.8759, "lr": 3.7372798585098644e-05, "epoch": 1.6353872829378122, "percentage": 54.51, "elapsed_time": "21:33:48", "remaining_time": "17:59:45", "throughput": 28552.43, "total_tokens": 2216480064} +{"current_steps": 7910, "total_steps": 14493, "loss": 0.863, "lr": 3.736236304512331e-05, "epoch": 1.637457622732331, "percentage": 54.58, "elapsed_time": "21:35:24", "remaining_time": "17:58:05", "throughput": 28553.54, "total_tokens": 2219303232} +{"current_steps": 7920, "total_steps": 14493, "loss": 0.8724, "lr": 3.735193624196067e-05, "epoch": 1.6395279625268497, "percentage": 54.65, "elapsed_time": "21:36:56", "remaining_time": "17:56:21", "throughput": 28555.31, "total_tokens": 2222061376} +{"current_steps": 7930, "total_steps": 14493, "loss": 0.8638, "lr": 3.7341518163426514e-05, "epoch": 1.6415983023213685, "percentage": 54.72, "elapsed_time": "21:38:32", "remaining_time": "17:54:41", "throughput": 28556.16, "total_tokens": 2224871232} +{"current_steps": 7940, "total_steps": 14493, "loss": 0.8688, "lr": 3.73311087973604e-05, "epoch": 1.6436686421158873, "percentage": 54.79, "elapsed_time": "21:40:03", "remaining_time": "17:52:57", "throughput": 28558.4, "total_tokens": 2227663872} +{"current_steps": 7950, "total_steps": 14493, "loss": 0.8786, "lr": 3.732070813162561e-05, "epoch": 1.645738981910406, "percentage": 54.85, "elapsed_time": "21:41:37", "remaining_time": "17:51:15", "throughput": 28559.81, "total_tokens": 2230453760} +{"current_steps": 7960, "total_steps": 14493, "loss": 0.8746, "lr": 3.731031615410908e-05, "epoch": 1.6478093217049248, "percentage": 54.92, "elapsed_time": "21:43:13", "remaining_time": "17:49:35", "throughput": 28560.85, "total_tokens": 2233270144} +{"current_steps": 7970, "total_steps": 14493, "loss": 0.8699, "lr": 3.729993285272132e-05, "epoch": 1.6498796614994435, "percentage": 54.99, "elapsed_time": "21:44:47", "remaining_time": "17:47:54", "throughput": 28561.87, "total_tokens": 2236041280} +{"current_steps": 7980, "total_steps": 14493, "loss": 0.8558, "lr": 3.7289558215396414e-05, "epoch": 1.6519500012939625, "percentage": 55.06, "elapsed_time": "21:46:24", "remaining_time": "17:46:14", "throughput": 28562.18, "total_tokens": 2238838592} +{"current_steps": 7990, "total_steps": 14493, "loss": 0.8758, "lr": 3.727919223009191e-05, "epoch": 1.654020341088481, "percentage": 55.13, "elapsed_time": "21:48:00", "remaining_time": "17:44:34", "throughput": 28562.63, "total_tokens": 2241609216} +{"current_steps": 8000, "total_steps": 14493, "loss": 0.8666, "lr": 3.726883488478877e-05, "epoch": 1.656090680883, "percentage": 55.2, "elapsed_time": "21:49:34", "remaining_time": "17:42:53", "throughput": 28563.71, "total_tokens": 2244390464} +{"current_steps": 8010, "total_steps": 14493, "loss": 0.8799, "lr": 3.7258486167491323e-05, "epoch": 1.6581610206775186, "percentage": 55.27, "elapsed_time": "21:51:17", "remaining_time": "17:41:18", "throughput": 28562.36, "total_tokens": 2247220288} +{"current_steps": 8020, "total_steps": 14493, "loss": 0.8776, "lr": 3.724814606622721e-05, "epoch": 1.6602313604720376, "percentage": 55.34, "elapsed_time": "21:52:51", "remaining_time": "17:39:37", "throughput": 28563.68, "total_tokens": 2250006144} +{"current_steps": 8030, "total_steps": 14493, "loss": 0.868, "lr": 3.7237814569047294e-05, "epoch": 1.662301700266556, "percentage": 55.41, "elapsed_time": "21:54:26", "remaining_time": "17:37:56", "throughput": 28564.23, "total_tokens": 2252764800} +{"current_steps": 8040, "total_steps": 14493, "loss": 0.8792, "lr": 3.7227491664025656e-05, "epoch": 1.664372040061075, "percentage": 55.48, "elapsed_time": "21:56:13", "remaining_time": "17:36:24", "throughput": 28561.5, "total_tokens": 2255598016} +{"current_steps": 8050, "total_steps": 14493, "loss": 0.8874, "lr": 3.721717733925948e-05, "epoch": 1.6664423798555938, "percentage": 55.54, "elapsed_time": "21:58:01", "remaining_time": "17:34:54", "throughput": 28557.47, "total_tokens": 2258378624} +{"current_steps": 8060, "total_steps": 14493, "loss": 0.8532, "lr": 3.720687158286904e-05, "epoch": 1.6685127196501126, "percentage": 55.61, "elapsed_time": "21:59:47", "remaining_time": "17:33:22", "throughput": 28554.45, "total_tokens": 2261147072} +{"current_steps": 8070, "total_steps": 14493, "loss": 0.8811, "lr": 3.719657438299762e-05, "epoch": 1.6705830594446314, "percentage": 55.68, "elapsed_time": "22:01:30", "remaining_time": "17:31:47", "throughput": 28552.41, "total_tokens": 2263923072} +{"current_steps": 8080, "total_steps": 14493, "loss": 0.8702, "lr": 3.7186285727811446e-05, "epoch": 1.6726533992391501, "percentage": 55.75, "elapsed_time": "22:03:18", "remaining_time": "17:30:17", "throughput": 28549.15, "total_tokens": 2266769536} +{"current_steps": 8090, "total_steps": 14493, "loss": 0.8597, "lr": 3.717600560549967e-05, "epoch": 1.674723739033669, "percentage": 55.82, "elapsed_time": "22:05:04", "remaining_time": "17:28:45", "throughput": 28546.57, "total_tokens": 2269589184} +{"current_steps": 8100, "total_steps": 14493, "loss": 0.8655, "lr": 3.716573400427426e-05, "epoch": 1.6767940788281877, "percentage": 55.89, "elapsed_time": "22:06:53", "remaining_time": "17:27:15", "throughput": 28542.86, "total_tokens": 2272400704} +{"current_steps": 8110, "total_steps": 14493, "loss": 0.8626, "lr": 3.7155470912370004e-05, "epoch": 1.6788644186227064, "percentage": 55.96, "elapsed_time": "22:08:39", "remaining_time": "17:25:43", "throughput": 28540.32, "total_tokens": 2275225856} +{"current_steps": 8120, "total_steps": 14493, "loss": 0.8736, "lr": 3.714521631804439e-05, "epoch": 1.6809347584172252, "percentage": 56.03, "elapsed_time": "22:10:27", "remaining_time": "17:24:12", "throughput": 28536.99, "total_tokens": 2278042304} +{"current_steps": 8130, "total_steps": 14493, "loss": 0.856, "lr": 3.713497020957759e-05, "epoch": 1.6830050982117442, "percentage": 56.1, "elapsed_time": "22:12:12", "remaining_time": "17:22:39", "throughput": 28534.4, "total_tokens": 2280825664} +{"current_steps": 8140, "total_steps": 14493, "loss": 0.8725, "lr": 3.712473257527238e-05, "epoch": 1.6850754380062627, "percentage": 56.17, "elapsed_time": "22:13:57", "remaining_time": "17:21:06", "throughput": 28531.93, "total_tokens": 2283620224} +{"current_steps": 8150, "total_steps": 14493, "loss": 0.8588, "lr": 3.711450340345412e-05, "epoch": 1.6871457778007817, "percentage": 56.23, "elapsed_time": "22:15:45", "remaining_time": "17:19:35", "throughput": 28528.49, "total_tokens": 2286426560} +{"current_steps": 8160, "total_steps": 14493, "loss": 0.872, "lr": 3.710428268247067e-05, "epoch": 1.6892161175953002, "percentage": 56.3, "elapsed_time": "22:17:31", "remaining_time": "17:18:03", "throughput": 28525.68, "total_tokens": 2289224576} +{"current_steps": 8170, "total_steps": 14493, "loss": 0.8596, "lr": 3.709407040069233e-05, "epoch": 1.6912864573898192, "percentage": 56.37, "elapsed_time": "22:19:22", "remaining_time": "17:16:34", "throughput": 28521.42, "total_tokens": 2292050048} +{"current_steps": 8180, "total_steps": 14493, "loss": 0.878, "lr": 3.708386654651179e-05, "epoch": 1.6933567971843377, "percentage": 56.44, "elapsed_time": "22:21:12", "remaining_time": "17:15:05", "throughput": 28517.22, "total_tokens": 2294853568} +{"current_steps": 8190, "total_steps": 14493, "loss": 0.8669, "lr": 3.707367110834409e-05, "epoch": 1.6954271369788567, "percentage": 56.51, "elapsed_time": "22:22:52", "remaining_time": "17:13:28", "throughput": 28516.26, "total_tokens": 2297627648} +{"current_steps": 8200, "total_steps": 14493, "loss": 0.8637, "lr": 3.7063484074626555e-05, "epoch": 1.6974974767733755, "percentage": 56.58, "elapsed_time": "22:24:36", "remaining_time": "17:11:54", "throughput": 28514.4, "total_tokens": 2300451520} +{"current_steps": 8210, "total_steps": 14493, "loss": 0.8542, "lr": 3.7053305433818725e-05, "epoch": 1.6995678165678942, "percentage": 56.65, "elapsed_time": "22:26:34", "remaining_time": "17:10:30", "throughput": 28507.94, "total_tokens": 2303287168} +{"current_steps": 8220, "total_steps": 14493, "loss": 0.8648, "lr": 3.704313517440232e-05, "epoch": 1.701638156362413, "percentage": 56.72, "elapsed_time": "22:28:33", "remaining_time": "17:09:07", "throughput": 28501.42, "total_tokens": 2306137600} +{"current_steps": 8230, "total_steps": 14493, "loss": 0.8727, "lr": 3.703297328488118e-05, "epoch": 1.7037084961569318, "percentage": 56.79, "elapsed_time": "22:30:22", "remaining_time": "17:07:37", "throughput": 28497.18, "total_tokens": 2308899136} +{"current_steps": 8240, "total_steps": 14493, "loss": 0.8709, "lr": 3.70228197537812e-05, "epoch": 1.7057788359514505, "percentage": 56.86, "elapsed_time": "22:32:06", "remaining_time": "17:06:03", "throughput": 28495.0, "total_tokens": 2311701952} +{"current_steps": 8250, "total_steps": 14493, "loss": 0.8691, "lr": 3.7012674569650305e-05, "epoch": 1.7078491757459693, "percentage": 56.92, "elapsed_time": "22:33:48", "remaining_time": "17:04:27", "throughput": 28493.97, "total_tokens": 2314509696} +{"current_steps": 8260, "total_steps": 14493, "loss": 0.8694, "lr": 3.700253772105835e-05, "epoch": 1.709919515540488, "percentage": 56.99, "elapsed_time": "22:35:31", "remaining_time": "17:02:52", "throughput": 28492.21, "total_tokens": 2317309888} +{"current_steps": 8270, "total_steps": 14493, "loss": 0.8689, "lr": 3.699240919659711e-05, "epoch": 1.7119898553350068, "percentage": 57.06, "elapsed_time": "22:37:17", "remaining_time": "17:01:19", "throughput": 28489.68, "total_tokens": 2320128704} +{"current_steps": 8280, "total_steps": 14493, "loss": 0.8756, "lr": 3.698228898488019e-05, "epoch": 1.7140601951295258, "percentage": 57.13, "elapsed_time": "22:39:08", "remaining_time": "16:59:50", "throughput": 28484.89, "total_tokens": 2322895168} +{"current_steps": 8290, "total_steps": 14493, "loss": 0.8618, "lr": 3.6972177074543e-05, "epoch": 1.7161305349240443, "percentage": 57.2, "elapsed_time": "22:41:00", "remaining_time": "16:58:22", "throughput": 28480.0, "total_tokens": 2325692160} +{"current_steps": 8300, "total_steps": 14493, "loss": 0.863, "lr": 3.69620734542427e-05, "epoch": 1.7182008747185633, "percentage": 57.27, "elapsed_time": "22:42:59", "remaining_time": "16:56:59", "throughput": 28473.24, "total_tokens": 2328516864} +{"current_steps": 8310, "total_steps": 14493, "loss": 0.8726, "lr": 3.695197811265811e-05, "epoch": 1.7202712145130818, "percentage": 57.34, "elapsed_time": "22:44:49", "remaining_time": "16:55:29", "throughput": 28469.09, "total_tokens": 2331326400} +{"current_steps": 8320, "total_steps": 14493, "loss": 0.8751, "lr": 3.6941891038489694e-05, "epoch": 1.7223415543076008, "percentage": 57.41, "elapsed_time": "22:46:30", "remaining_time": "16:53:52", "throughput": 28468.2, "total_tokens": 2334118208} +{"current_steps": 8330, "total_steps": 14493, "loss": 0.8783, "lr": 3.693181222045952e-05, "epoch": 1.7244118941021194, "percentage": 57.48, "elapsed_time": "22:48:15", "remaining_time": "16:52:18", "throughput": 28466.39, "total_tokens": 2336952512} +{"current_steps": 8340, "total_steps": 14493, "loss": 0.8832, "lr": 3.692174164731113e-05, "epoch": 1.7264822338966384, "percentage": 57.55, "elapsed_time": "22:49:55", "remaining_time": "16:50:41", "throughput": 28466.0, "total_tokens": 2339788096} +{"current_steps": 8350, "total_steps": 14493, "loss": 0.863, "lr": 3.6911679307809595e-05, "epoch": 1.7285525736911571, "percentage": 57.61, "elapsed_time": "22:51:39", "remaining_time": "16:49:06", "throughput": 28464.46, "total_tokens": 2342597696} +{"current_steps": 8360, "total_steps": 14493, "loss": 0.8556, "lr": 3.690162519074137e-05, "epoch": 1.7306229134856759, "percentage": 57.68, "elapsed_time": "22:53:36", "remaining_time": "16:47:42", "throughput": 28457.64, "total_tokens": 2345391552} +{"current_steps": 8370, "total_steps": 14493, "loss": 0.8731, "lr": 3.689157928491431e-05, "epoch": 1.7326932532801946, "percentage": 57.75, "elapsed_time": "22:55:20", "remaining_time": "16:46:07", "throughput": 28455.69, "total_tokens": 2348184896} +{"current_steps": 8380, "total_steps": 14493, "loss": 0.8763, "lr": 3.6881541579157566e-05, "epoch": 1.7347635930747134, "percentage": 57.82, "elapsed_time": "22:57:18", "remaining_time": "16:44:42", "throughput": 28449.31, "total_tokens": 2351011328} +{"current_steps": 8390, "total_steps": 14493, "loss": 0.8593, "lr": 3.687151206232154e-05, "epoch": 1.7368339328692322, "percentage": 57.89, "elapsed_time": "22:59:08", "remaining_time": "16:43:12", "throughput": 28445.92, "total_tokens": 2353856512} +{"current_steps": 8400, "total_steps": 14493, "loss": 0.855, "lr": 3.686149072327788e-05, "epoch": 1.738904272663751, "percentage": 57.96, "elapsed_time": "23:00:53", "remaining_time": "16:41:38", "throughput": 28444.14, "total_tokens": 2356699584} +{"current_steps": 8410, "total_steps": 14493, "loss": 0.8673, "lr": 3.685147755091937e-05, "epoch": 1.7409746124582697, "percentage": 58.03, "elapsed_time": "23:02:29", "remaining_time": "16:39:57", "throughput": 28444.34, "total_tokens": 2359437504} +{"current_steps": 8420, "total_steps": 14493, "loss": 0.8665, "lr": 3.684147253415992e-05, "epoch": 1.7430449522527884, "percentage": 58.1, "elapsed_time": "23:04:13", "remaining_time": "16:38:22", "throughput": 28442.55, "total_tokens": 2362244160} +{"current_steps": 8430, "total_steps": 14493, "loss": 0.8666, "lr": 3.683147566193448e-05, "epoch": 1.7451152920473074, "percentage": 58.17, "elapsed_time": "23:05:56", "remaining_time": "16:36:47", "throughput": 28440.97, "total_tokens": 2365049280} +{"current_steps": 8440, "total_steps": 14493, "loss": 0.8696, "lr": 3.6821486923199025e-05, "epoch": 1.747185631841826, "percentage": 58.24, "elapsed_time": "23:07:39", "remaining_time": "16:35:12", "throughput": 28439.84, "total_tokens": 2367894464} +{"current_steps": 8450, "total_steps": 14493, "loss": 0.8645, "lr": 3.681150630693046e-05, "epoch": 1.749255971636345, "percentage": 58.3, "elapsed_time": "23:09:24", "remaining_time": "16:33:37", "throughput": 28437.73, "total_tokens": 2370691200} +{"current_steps": 8460, "total_steps": 14493, "loss": 0.863, "lr": 3.6801533802126615e-05, "epoch": 1.7513263114308635, "percentage": 58.37, "elapsed_time": "23:11:24", "remaining_time": "16:32:14", "throughput": 28431.01, "total_tokens": 2373545472} +{"current_steps": 8470, "total_steps": 14493, "loss": 0.8685, "lr": 3.679156939780617e-05, "epoch": 1.7533966512253825, "percentage": 58.44, "elapsed_time": "23:13:23", "remaining_time": "16:30:49", "throughput": 28424.35, "total_tokens": 2376361664} +{"current_steps": 8480, "total_steps": 14493, "loss": 0.8639, "lr": 3.6781613083008594e-05, "epoch": 1.755466991019901, "percentage": 58.51, "elapsed_time": "23:15:01", "remaining_time": "16:29:11", "throughput": 28424.1, "total_tokens": 2379139392} +{"current_steps": 8490, "total_steps": 14493, "loss": 0.8672, "lr": 3.677166484679412e-05, "epoch": 1.75753733081442, "percentage": 58.58, "elapsed_time": "23:16:44", "remaining_time": "16:27:35", "throughput": 28423.12, "total_tokens": 2381971648} +{"current_steps": 8500, "total_steps": 14493, "loss": 0.8624, "lr": 3.676172467824368e-05, "epoch": 1.7596076706089387, "percentage": 58.65, "elapsed_time": "23:18:25", "remaining_time": "16:25:58", "throughput": 28422.03, "total_tokens": 2384758784} +{"current_steps": 8510, "total_steps": 14493, "loss": 0.8688, "lr": 3.675179256645885e-05, "epoch": 1.7616780104034575, "percentage": 58.72, "elapsed_time": "23:20:05", "remaining_time": "16:24:20", "throughput": 28421.55, "total_tokens": 2387565440} +{"current_steps": 8520, "total_steps": 14493, "loss": 0.8738, "lr": 3.674186850056181e-05, "epoch": 1.7637483501979763, "percentage": 58.79, "elapsed_time": "23:21:47", "remaining_time": "16:22:44", "throughput": 28419.92, "total_tokens": 2390341056} +{"current_steps": 8530, "total_steps": 14493, "loss": 0.8674, "lr": 3.67319524696953e-05, "epoch": 1.765818689992495, "percentage": 58.86, "elapsed_time": "23:23:27", "remaining_time": "16:21:06", "throughput": 28420.0, "total_tokens": 2393168320} +{"current_steps": 8540, "total_steps": 14493, "loss": 0.864, "lr": 3.6722044463022536e-05, "epoch": 1.7678890297870138, "percentage": 58.92, "elapsed_time": "23:25:04", "remaining_time": "16:19:26", "throughput": 28419.7, "total_tokens": 2395903360} +{"current_steps": 8550, "total_steps": 14493, "loss": 0.851, "lr": 3.6712144469727214e-05, "epoch": 1.7699593695815325, "percentage": 58.99, "elapsed_time": "23:26:43", "remaining_time": "16:17:47", "throughput": 28419.01, "total_tokens": 2398651072} +{"current_steps": 8560, "total_steps": 14493, "loss": 0.8641, "lr": 3.67022524790134e-05, "epoch": 1.7720297093760513, "percentage": 59.06, "elapsed_time": "23:28:23", "remaining_time": "16:16:10", "throughput": 28418.12, "total_tokens": 2401435840} +{"current_steps": 8570, "total_steps": 14493, "loss": 0.8618, "lr": 3.6692368480105546e-05, "epoch": 1.77410004917057, "percentage": 59.13, "elapsed_time": "23:30:16", "remaining_time": "16:14:41", "throughput": 28413.75, "total_tokens": 2404266816} +{"current_steps": 8580, "total_steps": 14493, "loss": 0.884, "lr": 3.6682492462248374e-05, "epoch": 1.776170388965089, "percentage": 59.2, "elapsed_time": "23:32:15", "remaining_time": "16:13:16", "throughput": 28407.23, "total_tokens": 2407100864} +{"current_steps": 8590, "total_steps": 14493, "loss": 0.8586, "lr": 3.667262441470689e-05, "epoch": 1.7782407287596076, "percentage": 59.27, "elapsed_time": "23:34:04", "remaining_time": "16:11:44", "throughput": 28403.52, "total_tokens": 2409876480} +{"current_steps": 8600, "total_steps": 14493, "loss": 0.8492, "lr": 3.6662764326766255e-05, "epoch": 1.7803110685541266, "percentage": 59.34, "elapsed_time": "23:35:43", "remaining_time": "16:10:06", "throughput": 28403.44, "total_tokens": 2412690240} +{"current_steps": 8610, "total_steps": 14493, "loss": 0.8744, "lr": 3.665291218773185e-05, "epoch": 1.7823814083486451, "percentage": 59.41, "elapsed_time": "23:37:22", "remaining_time": "16:08:27", "throughput": 28403.67, "total_tokens": 2415522944} +{"current_steps": 8620, "total_steps": 14493, "loss": 0.8642, "lr": 3.664306798692912e-05, "epoch": 1.784451748143164, "percentage": 59.48, "elapsed_time": "23:39:00", "remaining_time": "16:06:47", "throughput": 28403.61, "total_tokens": 2418290624} +{"current_steps": 8630, "total_steps": 14493, "loss": 0.858, "lr": 3.6633231713703576e-05, "epoch": 1.7865220879376826, "percentage": 59.55, "elapsed_time": "23:40:42", "remaining_time": "16:05:11", "throughput": 28402.49, "total_tokens": 2421086336} +{"current_steps": 8640, "total_steps": 14493, "loss": 0.8589, "lr": 3.6623403357420745e-05, "epoch": 1.7885924277322016, "percentage": 59.61, "elapsed_time": "23:42:25", "remaining_time": "16:03:35", "throughput": 28401.49, "total_tokens": 2423925888} +{"current_steps": 8650, "total_steps": 14493, "loss": 0.8712, "lr": 3.661358290746611e-05, "epoch": 1.7906627675267204, "percentage": 59.68, "elapsed_time": "23:44:27", "remaining_time": "16:02:12", "throughput": 28393.59, "total_tokens": 2426723136} +{"current_steps": 8660, "total_steps": 14493, "loss": 0.8603, "lr": 3.6603770353245056e-05, "epoch": 1.7927331073212391, "percentage": 59.75, "elapsed_time": "23:46:17", "remaining_time": "16:00:41", "throughput": 28389.86, "total_tokens": 2429532800} +{"current_steps": 8670, "total_steps": 14493, "loss": 0.8628, "lr": 3.659396568418286e-05, "epoch": 1.794803447115758, "percentage": 59.82, "elapsed_time": "23:48:08", "remaining_time": "15:59:10", "throughput": 28385.65, "total_tokens": 2432317696} +{"current_steps": 8680, "total_steps": 14493, "loss": 0.8625, "lr": 3.658416888972459e-05, "epoch": 1.7968737869102767, "percentage": 59.89, "elapsed_time": "23:49:59", "remaining_time": "15:57:39", "throughput": 28382.1, "total_tokens": 2435166656} +{"current_steps": 8690, "total_steps": 14493, "loss": 0.8633, "lr": 3.6574379959335106e-05, "epoch": 1.7989441267047954, "percentage": 59.96, "elapsed_time": "23:51:55", "remaining_time": "15:56:12", "throughput": 28376.76, "total_tokens": 2437992256} +{"current_steps": 8700, "total_steps": 14493, "loss": 0.8521, "lr": 3.6564598882498976e-05, "epoch": 1.8010144664993142, "percentage": 60.03, "elapsed_time": "23:53:36", "remaining_time": "15:54:35", "throughput": 28376.2, "total_tokens": 2440830144} +{"current_steps": 8710, "total_steps": 14493, "loss": 0.8647, "lr": 3.655482564872043e-05, "epoch": 1.803084806293833, "percentage": 60.1, "elapsed_time": "23:55:19", "remaining_time": "15:52:58", "throughput": 28375.18, "total_tokens": 2443652800} +{"current_steps": 8720, "total_steps": 14493, "loss": 0.8498, "lr": 3.654506024752336e-05, "epoch": 1.8051551460883517, "percentage": 60.17, "elapsed_time": "23:57:06", "remaining_time": "15:51:25", "throughput": 28372.78, "total_tokens": 2446493120} +{"current_steps": 8730, "total_steps": 14493, "loss": 0.8654, "lr": 3.653530266845121e-05, "epoch": 1.8072254858828707, "percentage": 60.24, "elapsed_time": "23:58:47", "remaining_time": "15:49:47", "throughput": 28372.52, "total_tokens": 2449321472} +{"current_steps": 8740, "total_steps": 14493, "loss": 0.8548, "lr": 3.652555290106696e-05, "epoch": 1.8092958256773892, "percentage": 60.3, "elapsed_time": "1 day, 0:00:28", "remaining_time": "15:48:10", "throughput": 28371.3, "total_tokens": 2452100096} +{"current_steps": 8750, "total_steps": 14493, "loss": 0.8651, "lr": 3.6515810934953084e-05, "epoch": 1.8113661654719082, "percentage": 60.37, "elapsed_time": "1 day, 0:02:19", "remaining_time": "15:46:39", "throughput": 28367.76, "total_tokens": 2454921920} +{"current_steps": 8760, "total_steps": 14493, "loss": 0.8766, "lr": 3.650607675971151e-05, "epoch": 1.8134365052664267, "percentage": 60.44, "elapsed_time": "1 day, 0:04:01", "remaining_time": "15:45:02", "throughput": 28366.97, "total_tokens": 2457764160} +{"current_steps": 8770, "total_steps": 14493, "loss": 0.8548, "lr": 3.649635036496351e-05, "epoch": 1.8155068450609457, "percentage": 60.51, "elapsed_time": "1 day, 0:05:44", "remaining_time": "15:43:26", "throughput": 28365.26, "total_tokens": 2460530560} +{"current_steps": 8780, "total_steps": 14493, "loss": 0.8641, "lr": 3.6486631740349746e-05, "epoch": 1.8175771848554643, "percentage": 60.58, "elapsed_time": "1 day, 0:07:31", "remaining_time": "15:41:52", "throughput": 28362.52, "total_tokens": 2463331712} +{"current_steps": 8790, "total_steps": 14493, "loss": 0.8676, "lr": 3.647692087553018e-05, "epoch": 1.8196475246499833, "percentage": 60.65, "elapsed_time": "1 day, 0:09:16", "remaining_time": "15:40:17", "throughput": 28360.74, "total_tokens": 2466145216} +{"current_steps": 8800, "total_steps": 14493, "loss": 0.852, "lr": 3.6467217760184005e-05, "epoch": 1.8217178644445018, "percentage": 60.72, "elapsed_time": "1 day, 0:10:57", "remaining_time": "15:38:40", "throughput": 28359.54, "total_tokens": 2468922048} +{"current_steps": 8810, "total_steps": 14493, "loss": 0.8721, "lr": 3.6457522384009625e-05, "epoch": 1.8237882042390208, "percentage": 60.79, "elapsed_time": "1 day, 0:12:43", "remaining_time": "15:37:06", "throughput": 28357.33, "total_tokens": 2471730880} +{"current_steps": 8820, "total_steps": 14493, "loss": 0.8741, "lr": 3.644783473672462e-05, "epoch": 1.8258585440335395, "percentage": 60.86, "elapsed_time": "1 day, 0:14:25", "remaining_time": "15:35:29", "throughput": 28356.29, "total_tokens": 2474538816} +{"current_steps": 8830, "total_steps": 14493, "loss": 0.8672, "lr": 3.643815480806568e-05, "epoch": 1.8279288838280583, "percentage": 60.93, "elapsed_time": "1 day, 0:16:08", "remaining_time": "15:33:52", "throughput": 28355.0, "total_tokens": 2477331200} +{"current_steps": 8840, "total_steps": 14493, "loss": 0.8596, "lr": 3.6428482587788555e-05, "epoch": 1.829999223622577, "percentage": 60.99, "elapsed_time": "1 day, 0:18:04", "remaining_time": "15:32:24", "throughput": 28349.37, "total_tokens": 2480144128} +{"current_steps": 8850, "total_steps": 14493, "loss": 0.8611, "lr": 3.641881806566803e-05, "epoch": 1.8320695634170958, "percentage": 61.06, "elapsed_time": "1 day, 0:20:05", "remaining_time": "15:30:59", "throughput": 28341.8, "total_tokens": 2482892800} +{"current_steps": 8860, "total_steps": 14493, "loss": 0.8603, "lr": 3.640916123149788e-05, "epoch": 1.8341399032116146, "percentage": 61.13, "elapsed_time": "1 day, 0:21:54", "remaining_time": "15:29:27", "throughput": 28338.07, "total_tokens": 2485663744} +{"current_steps": 8870, "total_steps": 14493, "loss": 0.859, "lr": 3.639951207509079e-05, "epoch": 1.8362102430061333, "percentage": 61.2, "elapsed_time": "1 day, 0:23:43", "remaining_time": "15:27:54", "throughput": 28334.53, "total_tokens": 2488447936} +{"current_steps": 8880, "total_steps": 14493, "loss": 0.8809, "lr": 3.6389870586278333e-05, "epoch": 1.8382805828006523, "percentage": 61.27, "elapsed_time": "1 day, 0:25:24", "remaining_time": "15:26:16", "throughput": 28333.76, "total_tokens": 2491242880} +{"current_steps": 8890, "total_steps": 14493, "loss": 0.8565, "lr": 3.6380236754910965e-05, "epoch": 1.8403509225951709, "percentage": 61.34, "elapsed_time": "1 day, 0:27:10", "remaining_time": "15:24:41", "throughput": 28332.06, "total_tokens": 2494073728} +{"current_steps": 8900, "total_steps": 14493, "loss": 0.8625, "lr": 3.6370610570857897e-05, "epoch": 1.8424212623896898, "percentage": 61.41, "elapsed_time": "1 day, 0:28:50", "remaining_time": "15:23:03", "throughput": 28331.07, "total_tokens": 2496823232} +{"current_steps": 8910, "total_steps": 14493, "loss": 0.863, "lr": 3.6360992024007114e-05, "epoch": 1.8444916021842084, "percentage": 61.48, "elapsed_time": "1 day, 0:30:30", "remaining_time": "15:21:25", "throughput": 28330.28, "total_tokens": 2499594304} +{"current_steps": 8920, "total_steps": 14493, "loss": 0.8631, "lr": 3.6351381104265304e-05, "epoch": 1.8465619419787274, "percentage": 61.55, "elapsed_time": "1 day, 0:32:09", "remaining_time": "15:19:46", "throughput": 28329.97, "total_tokens": 2502384768} +{"current_steps": 8930, "total_steps": 14493, "loss": 0.8636, "lr": 3.634177780155783e-05, "epoch": 1.848632281773246, "percentage": 61.62, "elapsed_time": "1 day, 0:33:49", "remaining_time": "15:18:07", "throughput": 28329.6, "total_tokens": 2505168640} +{"current_steps": 8940, "total_steps": 14493, "loss": 0.8675, "lr": 3.633218210582867e-05, "epoch": 1.8507026215677649, "percentage": 61.68, "elapsed_time": "1 day, 0:35:29", "remaining_time": "15:16:29", "throughput": 28329.0, "total_tokens": 2507951616} +{"current_steps": 8950, "total_steps": 14493, "loss": 0.8518, "lr": 3.6322594007040376e-05, "epoch": 1.8527729613622834, "percentage": 61.75, "elapsed_time": "1 day, 0:37:12", "remaining_time": "15:14:52", "throughput": 28327.56, "total_tokens": 2510742784} +{"current_steps": 8960, "total_steps": 14493, "loss": 0.8604, "lr": 3.631301349517403e-05, "epoch": 1.8548433011568024, "percentage": 61.82, "elapsed_time": "1 day, 0:38:53", "remaining_time": "15:13:14", "throughput": 28327.26, "total_tokens": 2513564480} +{"current_steps": 8970, "total_steps": 14493, "loss": 0.8554, "lr": 3.6303440560229216e-05, "epoch": 1.8569136409513212, "percentage": 61.89, "elapsed_time": "1 day, 0:40:30", "remaining_time": "15:11:34", "throughput": 28327.29, "total_tokens": 2516337344} +{"current_steps": 8980, "total_steps": 14493, "loss": 0.8591, "lr": 3.629387519222395e-05, "epoch": 1.85898398074584, "percentage": 61.96, "elapsed_time": "1 day, 0:42:11", "remaining_time": "15:09:56", "throughput": 28326.61, "total_tokens": 2519134208} +{"current_steps": 8990, "total_steps": 14493, "loss": 0.8558, "lr": 3.628431738119464e-05, "epoch": 1.8610543205403587, "percentage": 62.03, "elapsed_time": "1 day, 0:43:56", "remaining_time": "15:08:21", "throughput": 28325.7, "total_tokens": 2522007360} +{"current_steps": 9000, "total_steps": 14493, "loss": 0.8673, "lr": 3.62747671171961e-05, "epoch": 1.8631246603348774, "percentage": 62.1, "elapsed_time": "1 day, 0:45:44", "remaining_time": "15:06:47", "throughput": 28323.15, "total_tokens": 2524852544} +{"current_steps": 9010, "total_steps": 14493, "loss": 0.8806, "lr": 3.626522439030138e-05, "epoch": 1.8651950001293962, "percentage": 62.17, "elapsed_time": "1 day, 0:47:24", "remaining_time": "15:05:09", "throughput": 28322.75, "total_tokens": 2527644672} +{"current_steps": 9020, "total_steps": 14493, "loss": 0.8701, "lr": 3.6255689190601863e-05, "epoch": 1.867265339923915, "percentage": 62.24, "elapsed_time": "1 day, 0:49:04", "remaining_time": "15:03:30", "throughput": 28322.5, "total_tokens": 2530447744} +{"current_steps": 9030, "total_steps": 14493, "loss": 0.8646, "lr": 3.624616150820714e-05, "epoch": 1.869335679718434, "percentage": 62.31, "elapsed_time": "1 day, 0:50:46", "remaining_time": "15:01:53", "throughput": 28321.57, "total_tokens": 2533256896} +{"current_steps": 9040, "total_steps": 14493, "loss": 0.8672, "lr": 3.623664133324499e-05, "epoch": 1.8714060195129525, "percentage": 62.37, "elapsed_time": "1 day, 0:52:29", "remaining_time": "15:00:16", "throughput": 28320.64, "total_tokens": 2536099392} +{"current_steps": 9050, "total_steps": 14493, "loss": 0.8645, "lr": 3.622712865586131e-05, "epoch": 1.8734763593074715, "percentage": 62.44, "elapsed_time": "1 day, 0:54:16", "remaining_time": "14:58:42", "throughput": 28318.58, "total_tokens": 2538951616} +{"current_steps": 9060, "total_steps": 14493, "loss": 0.866, "lr": 3.621762346622014e-05, "epoch": 1.87554669910199, "percentage": 62.51, "elapsed_time": "1 day, 0:56:02", "remaining_time": "14:57:07", "throughput": 28317.01, "total_tokens": 2541814400} +{"current_steps": 9070, "total_steps": 14493, "loss": 0.8575, "lr": 3.620812575450352e-05, "epoch": 1.877617038896509, "percentage": 62.58, "elapsed_time": "1 day, 0:57:44", "remaining_time": "14:55:30", "throughput": 28316.53, "total_tokens": 2544658560} +{"current_steps": 9080, "total_steps": 14493, "loss": 0.8542, "lr": 3.6198635510911556e-05, "epoch": 1.8796873786910275, "percentage": 62.65, "elapsed_time": "1 day, 0:59:29", "remaining_time": "14:53:55", "throughput": 28314.86, "total_tokens": 2547487104} +{"current_steps": 9090, "total_steps": 14493, "loss": 0.8644, "lr": 3.618915272566228e-05, "epoch": 1.8817577184855465, "percentage": 62.72, "elapsed_time": "1 day, 1:01:08", "remaining_time": "14:52:15", "throughput": 28314.95, "total_tokens": 2550286464} +{"current_steps": 9100, "total_steps": 14493, "loss": 0.8719, "lr": 3.6179677388991694e-05, "epoch": 1.883828058280065, "percentage": 62.79, "elapsed_time": "1 day, 1:02:47", "remaining_time": "14:50:36", "throughput": 28314.47, "total_tokens": 2553044352} +{"current_steps": 9110, "total_steps": 14493, "loss": 0.8778, "lr": 3.617020949115366e-05, "epoch": 1.885898398074584, "percentage": 62.86, "elapsed_time": "1 day, 1:04:27", "remaining_time": "14:48:58", "throughput": 28314.09, "total_tokens": 2555855552} +{"current_steps": 9120, "total_steps": 14493, "loss": 0.8552, "lr": 3.6160749022419886e-05, "epoch": 1.8879687378691028, "percentage": 62.93, "elapsed_time": "1 day, 1:06:09", "remaining_time": "14:47:20", "throughput": 28313.03, "total_tokens": 2558631808} +{"current_steps": 9130, "total_steps": 14493, "loss": 0.8585, "lr": 3.6151295973079887e-05, "epoch": 1.8900390776636216, "percentage": 63.0, "elapsed_time": "1 day, 1:07:54", "remaining_time": "14:45:44", "throughput": 28311.01, "total_tokens": 2561417856} +{"current_steps": 9140, "total_steps": 14493, "loss": 0.8655, "lr": 3.6141850333440934e-05, "epoch": 1.8921094174581403, "percentage": 63.06, "elapsed_time": "1 day, 1:09:33", "remaining_time": "14:44:05", "throughput": 28310.54, "total_tokens": 2564181248} +{"current_steps": 9150, "total_steps": 14493, "loss": 0.8564, "lr": 3.613241209382803e-05, "epoch": 1.894179757252659, "percentage": 63.13, "elapsed_time": "1 day, 1:11:13", "remaining_time": "14:42:27", "throughput": 28310.0, "total_tokens": 2566978688} +{"current_steps": 9160, "total_steps": 14493, "loss": 0.8549, "lr": 3.6122981244583834e-05, "epoch": 1.8962500970471778, "percentage": 63.2, "elapsed_time": "1 day, 1:12:55", "remaining_time": "14:40:49", "throughput": 28309.18, "total_tokens": 2569774336} +{"current_steps": 9170, "total_steps": 14493, "loss": 0.8671, "lr": 3.6113557776068644e-05, "epoch": 1.8983204368416966, "percentage": 63.27, "elapsed_time": "1 day, 1:14:38", "remaining_time": "14:39:13", "throughput": 28308.31, "total_tokens": 2572616256} +{"current_steps": 9180, "total_steps": 14493, "loss": 0.8652, "lr": 3.6104141678660386e-05, "epoch": 1.9003907766362156, "percentage": 63.34, "elapsed_time": "1 day, 1:16:18", "remaining_time": "14:37:34", "throughput": 28307.47, "total_tokens": 2575378496} +{"current_steps": 9190, "total_steps": 14493, "loss": 0.8564, "lr": 3.6094732942754487e-05, "epoch": 1.9024611164307341, "percentage": 63.41, "elapsed_time": "1 day, 1:18:03", "remaining_time": "14:35:58", "throughput": 28305.91, "total_tokens": 2578203968} +{"current_steps": 9200, "total_steps": 14493, "loss": 0.8734, "lr": 3.60853315587639e-05, "epoch": 1.904531456225253, "percentage": 63.48, "elapsed_time": "1 day, 1:19:43", "remaining_time": "14:34:20", "throughput": 28305.74, "total_tokens": 2581024000} +{"current_steps": 9210, "total_steps": 14493, "loss": 0.8603, "lr": 3.607593751711909e-05, "epoch": 1.9066017960197716, "percentage": 63.55, "elapsed_time": "1 day, 1:21:25", "remaining_time": "14:32:42", "throughput": 28304.81, "total_tokens": 2583822336} +{"current_steps": 9220, "total_steps": 14493, "loss": 0.859, "lr": 3.60665508082679e-05, "epoch": 1.9086721358142906, "percentage": 63.62, "elapsed_time": "1 day, 1:22:58", "remaining_time": "14:31:00", "throughput": 28305.77, "total_tokens": 2586531904} +{"current_steps": 9230, "total_steps": 14493, "loss": 0.8646, "lr": 3.6057171422675585e-05, "epoch": 1.9107424756088092, "percentage": 63.69, "elapsed_time": "1 day, 1:24:38", "remaining_time": "14:29:21", "throughput": 28305.46, "total_tokens": 2589332096} +{"current_steps": 9240, "total_steps": 14493, "loss": 0.8689, "lr": 3.604779935082474e-05, "epoch": 1.9128128154033281, "percentage": 63.75, "elapsed_time": "1 day, 1:26:19", "remaining_time": "14:27:43", "throughput": 28304.76, "total_tokens": 2592149248} +{"current_steps": 9250, "total_steps": 14493, "loss": 0.8682, "lr": 3.603843458321526e-05, "epoch": 1.9148831551978467, "percentage": 63.82, "elapsed_time": "1 day, 1:28:01", "remaining_time": "14:26:06", "throughput": 28303.85, "total_tokens": 2594952896} +{"current_steps": 9260, "total_steps": 14493, "loss": 0.862, "lr": 3.6029077110364355e-05, "epoch": 1.9169534949923657, "percentage": 63.89, "elapsed_time": "1 day, 1:29:42", "remaining_time": "14:24:27", "throughput": 28303.58, "total_tokens": 2597762624} +{"current_steps": 9270, "total_steps": 14493, "loss": 0.8559, "lr": 3.60197269228064e-05, "epoch": 1.9190238347868844, "percentage": 63.96, "elapsed_time": "1 day, 1:31:21", "remaining_time": "14:22:49", "throughput": 28303.09, "total_tokens": 2600542976} +{"current_steps": 9280, "total_steps": 14493, "loss": 0.8702, "lr": 3.601038401109299e-05, "epoch": 1.9210941745814032, "percentage": 64.03, "elapsed_time": "1 day, 1:32:57", "remaining_time": "14:21:07", "throughput": 28303.7, "total_tokens": 2603302144} +{"current_steps": 9290, "total_steps": 14493, "loss": 0.8556, "lr": 3.6001048365792846e-05, "epoch": 1.923164514375922, "percentage": 64.1, "elapsed_time": "1 day, 1:34:41", "remaining_time": "14:19:31", "throughput": 28302.29, "total_tokens": 2606114048} +{"current_steps": 9300, "total_steps": 14493, "loss": 0.8702, "lr": 3.599171997749182e-05, "epoch": 1.9252348541704407, "percentage": 64.17, "elapsed_time": "1 day, 1:36:25", "remaining_time": "14:17:54", "throughput": 28301.6, "total_tokens": 2608984512} +{"current_steps": 9310, "total_steps": 14493, "loss": 0.8633, "lr": 3.598239883679281e-05, "epoch": 1.9273051939649595, "percentage": 64.24, "elapsed_time": "1 day, 1:38:05", "remaining_time": "14:16:16", "throughput": 28301.78, "total_tokens": 2611845312} +{"current_steps": 9320, "total_steps": 14493, "loss": 0.8515, "lr": 3.597308493431576e-05, "epoch": 1.9293755337594782, "percentage": 64.31, "elapsed_time": "1 day, 1:39:46", "remaining_time": "14:14:38", "throughput": 28301.06, "total_tokens": 2614628160} +{"current_steps": 9330, "total_steps": 14493, "loss": 0.853, "lr": 3.596377826069758e-05, "epoch": 1.931445873553997, "percentage": 64.38, "elapsed_time": "1 day, 1:41:28", "remaining_time": "14:13:01", "throughput": 28299.96, "total_tokens": 2617425984} +{"current_steps": 9340, "total_steps": 14493, "loss": 0.8659, "lr": 3.5954478806592155e-05, "epoch": 1.9335162133485158, "percentage": 64.44, "elapsed_time": "1 day, 1:43:09", "remaining_time": "14:11:22", "throughput": 28299.42, "total_tokens": 2620218816} +{"current_steps": 9350, "total_steps": 14493, "loss": 0.868, "lr": 3.594518656267024e-05, "epoch": 1.9355865531430347, "percentage": 64.51, "elapsed_time": "1 day, 1:44:48", "remaining_time": "14:09:43", "throughput": 28299.23, "total_tokens": 2623024064} +{"current_steps": 9360, "total_steps": 14493, "loss": 0.8759, "lr": 3.5935901519619496e-05, "epoch": 1.9376568929375533, "percentage": 64.58, "elapsed_time": "1 day, 1:46:30", "remaining_time": "14:08:05", "throughput": 28299.21, "total_tokens": 2625890304} +{"current_steps": 9370, "total_steps": 14493, "loss": 0.8549, "lr": 3.5926623668144385e-05, "epoch": 1.9397272327320723, "percentage": 64.65, "elapsed_time": "1 day, 1:48:07", "remaining_time": "14:06:26", "throughput": 28299.22, "total_tokens": 2628657088} +{"current_steps": 9380, "total_steps": 14493, "loss": 0.8668, "lr": 3.5917352998966194e-05, "epoch": 1.9417975725265908, "percentage": 64.72, "elapsed_time": "1 day, 1:49:51", "remaining_time": "14:04:49", "throughput": 28298.2, "total_tokens": 2631488832} +{"current_steps": 9390, "total_steps": 14493, "loss": 0.8668, "lr": 3.5908089502822914e-05, "epoch": 1.9438679123211098, "percentage": 64.79, "elapsed_time": "1 day, 1:51:27", "remaining_time": "14:03:08", "throughput": 28298.66, "total_tokens": 2634237312} +{"current_steps": 9400, "total_steps": 14493, "loss": 0.8626, "lr": 3.589883317046929e-05, "epoch": 1.9459382521156283, "percentage": 64.86, "elapsed_time": "1 day, 1:53:06", "remaining_time": "14:01:28", "throughput": 28298.34, "total_tokens": 2637010624} +{"current_steps": 9410, "total_steps": 14493, "loss": 0.8513, "lr": 3.5889583992676715e-05, "epoch": 1.9480085919101473, "percentage": 64.93, "elapsed_time": "1 day, 1:54:43", "remaining_time": "13:59:49", "throughput": 28298.32, "total_tokens": 2639775104} +{"current_steps": 9420, "total_steps": 14493, "loss": 0.8621, "lr": 3.5880341960233244e-05, "epoch": 1.950078931704666, "percentage": 65.0, "elapsed_time": "1 day, 1:56:25", "remaining_time": "13:58:11", "throughput": 28297.87, "total_tokens": 2642609984} +{"current_steps": 9430, "total_steps": 14493, "loss": 0.8653, "lr": 3.58711070639435e-05, "epoch": 1.9521492714991848, "percentage": 65.07, "elapsed_time": "1 day, 1:58:08", "remaining_time": "13:56:34", "throughput": 28296.88, "total_tokens": 2645435200} +{"current_steps": 9440, "total_steps": 14493, "loss": 0.8495, "lr": 3.586187929462869e-05, "epoch": 1.9542196112937036, "percentage": 65.13, "elapsed_time": "1 day, 1:59:46", "remaining_time": "13:54:54", "throughput": 28297.04, "total_tokens": 2648222272} +{"current_steps": 9450, "total_steps": 14493, "loss": 0.8464, "lr": 3.585265864312651e-05, "epoch": 1.9562899510882223, "percentage": 65.2, "elapsed_time": "1 day, 2:01:29", "remaining_time": "13:53:17", "throughput": 28296.44, "total_tokens": 2651069568} +{"current_steps": 9460, "total_steps": 14493, "loss": 0.867, "lr": 3.584344510029118e-05, "epoch": 1.958360290882741, "percentage": 65.27, "elapsed_time": "1 day, 2:03:04", "remaining_time": "13:51:36", "throughput": 28297.12, "total_tokens": 2653839104} +{"current_steps": 9470, "total_steps": 14493, "loss": 0.8679, "lr": 3.583423865699333e-05, "epoch": 1.9604306306772599, "percentage": 65.34, "elapsed_time": "1 day, 2:04:43", "remaining_time": "13:49:57", "throughput": 28296.92, "total_tokens": 2656625472} +{"current_steps": 9480, "total_steps": 14493, "loss": 0.8674, "lr": 3.5825039304119994e-05, "epoch": 1.9625009704717786, "percentage": 65.41, "elapsed_time": "1 day, 2:06:26", "remaining_time": "13:48:20", "throughput": 28296.46, "total_tokens": 2659495744} +{"current_steps": 9490, "total_steps": 14493, "loss": 0.8586, "lr": 3.581584703257461e-05, "epoch": 1.9645713102662974, "percentage": 65.48, "elapsed_time": "1 day, 2:08:09", "remaining_time": "13:46:42", "throughput": 28295.46, "total_tokens": 2662294208} +{"current_steps": 9500, "total_steps": 14493, "loss": 0.8575, "lr": 3.580666183327689e-05, "epoch": 1.9666416500608164, "percentage": 65.55, "elapsed_time": "1 day, 2:09:51", "remaining_time": "13:45:04", "throughput": 28294.66, "total_tokens": 2665103296} +{"current_steps": 9510, "total_steps": 14493, "loss": 0.8312, "lr": 3.5797483697162906e-05, "epoch": 1.968711989855335, "percentage": 65.62, "elapsed_time": "1 day, 2:11:33", "remaining_time": "13:43:27", "throughput": 28293.67, "total_tokens": 2667895488} +{"current_steps": 9520, "total_steps": 14493, "loss": 0.8712, "lr": 3.5788312615184936e-05, "epoch": 1.970782329649854, "percentage": 65.69, "elapsed_time": "1 day, 2:13:12", "remaining_time": "13:41:48", "throughput": 28293.36, "total_tokens": 2670671872} +{"current_steps": 9530, "total_steps": 14493, "loss": 0.8624, "lr": 3.5779148578311476e-05, "epoch": 1.9728526694443724, "percentage": 65.76, "elapsed_time": "1 day, 2:14:46", "remaining_time": "13:40:06", "throughput": 28294.27, "total_tokens": 2673412672} +{"current_steps": 9540, "total_steps": 14493, "loss": 0.8681, "lr": 3.5769991577527236e-05, "epoch": 1.9749230092388914, "percentage": 65.82, "elapsed_time": "1 day, 2:16:29", "remaining_time": "13:38:29", "throughput": 28293.42, "total_tokens": 2676256832} +{"current_steps": 9550, "total_steps": 14493, "loss": 0.8595, "lr": 3.5760841603833034e-05, "epoch": 1.97699334903341, "percentage": 65.89, "elapsed_time": "1 day, 2:18:09", "remaining_time": "13:36:50", "throughput": 28293.58, "total_tokens": 2679092416} +{"current_steps": 9560, "total_steps": 14493, "loss": 0.8589, "lr": 3.5751698648245814e-05, "epoch": 1.979063688827929, "percentage": 65.96, "elapsed_time": "1 day, 2:19:53", "remaining_time": "13:35:13", "throughput": 28292.63, "total_tokens": 2681955008} +{"current_steps": 9570, "total_steps": 14493, "loss": 0.8578, "lr": 3.574256270179857e-05, "epoch": 1.9811340286224477, "percentage": 66.03, "elapsed_time": "1 day, 2:21:34", "remaining_time": "13:33:35", "throughput": 28292.36, "total_tokens": 2684774912} +{"current_steps": 9580, "total_steps": 14493, "loss": 0.8707, "lr": 3.573343375554037e-05, "epoch": 1.9832043684169665, "percentage": 66.1, "elapsed_time": "1 day, 2:23:25", "remaining_time": "13:32:02", "throughput": 28288.79, "total_tokens": 2687588480} +{"current_steps": 9590, "total_steps": 14493, "loss": 0.8512, "lr": 3.572431180053621e-05, "epoch": 1.9852747082114852, "percentage": 66.17, "elapsed_time": "1 day, 2:25:11", "remaining_time": "13:30:26", "throughput": 28287.56, "total_tokens": 2690463232} +{"current_steps": 9600, "total_steps": 14493, "loss": 0.871, "lr": 3.571519682786711e-05, "epoch": 1.987345048006004, "percentage": 66.24, "elapsed_time": "1 day, 2:26:49", "remaining_time": "13:28:47", "throughput": 28287.17, "total_tokens": 2693211520} +{"current_steps": 9610, "total_steps": 14493, "loss": 0.8672, "lr": 3.570608882862996e-05, "epoch": 1.9894153878005227, "percentage": 66.31, "elapsed_time": "1 day, 2:28:32", "remaining_time": "13:27:09", "throughput": 28285.98, "total_tokens": 2696007616} +{"current_steps": 9620, "total_steps": 14493, "loss": 0.8589, "lr": 3.569698779393757e-05, "epoch": 1.9914857275950415, "percentage": 66.38, "elapsed_time": "1 day, 2:30:08", "remaining_time": "13:25:29", "throughput": 28286.67, "total_tokens": 2698799168} +{"current_steps": 9630, "total_steps": 14493, "loss": 0.8603, "lr": 3.568789371491859e-05, "epoch": 1.9935560673895603, "percentage": 66.45, "elapsed_time": "1 day, 2:31:52", "remaining_time": "13:23:52", "throughput": 28285.94, "total_tokens": 2701652480} +{"current_steps": 9640, "total_steps": 14493, "loss": 0.8631, "lr": 3.567880658271748e-05, "epoch": 1.995626407184079, "percentage": 66.51, "elapsed_time": "1 day, 2:33:32", "remaining_time": "13:22:13", "throughput": 28286.07, "total_tokens": 2704509120} +{"current_steps": 9650, "total_steps": 14493, "loss": 0.8422, "lr": 3.566972638849445e-05, "epoch": 1.997696746978598, "percentage": 66.58, "elapsed_time": "1 day, 2:35:14", "remaining_time": "13:20:35", "throughput": 28285.44, "total_tokens": 2707338368} +{"current_steps": 9660, "total_steps": 14493, "loss": 0.8431, "lr": 3.566065312342551e-05, "epoch": 1.9997670867731165, "percentage": 66.65, "elapsed_time": "1 day, 2:36:55", "remaining_time": "13:18:57", "throughput": 28285.2, "total_tokens": 2710168512} +{"current_steps": 9670, "total_steps": 14493, "loss": 0.7894, "lr": 3.565158677870231e-05, "epoch": 2.001656271835615, "percentage": 66.72, "elapsed_time": "1 day, 2:38:25", "remaining_time": "13:17:13", "throughput": 28285.25, "total_tokens": 2712699008} +{"current_steps": 9680, "total_steps": 14493, "loss": 0.7965, "lr": 3.564252734553221e-05, "epoch": 2.003726611630134, "percentage": 66.79, "elapsed_time": "1 day, 2:40:07", "remaining_time": "13:15:35", "throughput": 28284.49, "total_tokens": 2715510592} +{"current_steps": 9690, "total_steps": 14493, "loss": 0.8017, "lr": 3.563347481513818e-05, "epoch": 2.0057969514246525, "percentage": 66.86, "elapsed_time": "1 day, 2:41:48", "remaining_time": "13:13:57", "throughput": 28284.08, "total_tokens": 2718342592} +{"current_steps": 9700, "total_steps": 14493, "loss": 0.7906, "lr": 3.56244291787588e-05, "epoch": 2.0078672912191715, "percentage": 66.93, "elapsed_time": "1 day, 2:43:32", "remaining_time": "13:12:20", "throughput": 28283.27, "total_tokens": 2721205056} +{"current_steps": 9710, "total_steps": 14493, "loss": 0.7939, "lr": 3.5615390427648216e-05, "epoch": 2.00993763101369, "percentage": 67.0, "elapsed_time": "1 day, 2:45:10", "remaining_time": "13:10:41", "throughput": 28283.62, "total_tokens": 2724004928} +{"current_steps": 9720, "total_steps": 14493, "loss": 0.773, "lr": 3.5606358553076075e-05, "epoch": 2.012007970808209, "percentage": 67.07, "elapsed_time": "1 day, 2:46:49", "remaining_time": "13:09:01", "throughput": 28283.88, "total_tokens": 2726829888} +{"current_steps": 9730, "total_steps": 14493, "loss": 0.7928, "lr": 3.5597333546327526e-05, "epoch": 2.0140783106027276, "percentage": 67.14, "elapsed_time": "1 day, 2:48:26", "remaining_time": "13:07:21", "throughput": 28284.47, "total_tokens": 2729642752} +{"current_steps": 9740, "total_steps": 14493, "loss": 0.773, "lr": 3.5588315398703186e-05, "epoch": 2.0161486503972466, "percentage": 67.2, "elapsed_time": "1 day, 2:50:03", "remaining_time": "13:05:41", "throughput": 28284.69, "total_tokens": 2732389504} +{"current_steps": 9750, "total_steps": 14493, "loss": 0.7861, "lr": 3.557930410151907e-05, "epoch": 2.018218990191765, "percentage": 67.27, "elapsed_time": "1 day, 2:51:41", "remaining_time": "13:04:01", "throughput": 28284.89, "total_tokens": 2735179584} +{"current_steps": 9760, "total_steps": 14493, "loss": 0.7752, "lr": 3.5570299646106606e-05, "epoch": 2.020289329986284, "percentage": 67.34, "elapsed_time": "1 day, 2:53:20", "remaining_time": "13:02:22", "throughput": 28284.84, "total_tokens": 2737983872} +{"current_steps": 9770, "total_steps": 14493, "loss": 0.7961, "lr": 3.556130202381253e-05, "epoch": 2.0223596697808026, "percentage": 67.41, "elapsed_time": "1 day, 2:55:04", "remaining_time": "13:00:45", "throughput": 28284.1, "total_tokens": 2740853888} +{"current_steps": 9780, "total_steps": 14493, "loss": 0.7952, "lr": 3.555231122599892e-05, "epoch": 2.0244300095753216, "percentage": 67.48, "elapsed_time": "1 day, 2:56:43", "remaining_time": "12:59:06", "throughput": 28284.51, "total_tokens": 2743693184} +{"current_steps": 9790, "total_steps": 14493, "loss": 0.7901, "lr": 3.554332724404313e-05, "epoch": 2.02650034936984, "percentage": 67.55, "elapsed_time": "1 day, 2:58:23", "remaining_time": "12:57:27", "throughput": 28284.42, "total_tokens": 2746518080} +{"current_steps": 9800, "total_steps": 14493, "loss": 0.7945, "lr": 3.553435006933777e-05, "epoch": 2.028570689164359, "percentage": 67.62, "elapsed_time": "1 day, 3:00:03", "remaining_time": "12:55:48", "throughput": 28284.37, "total_tokens": 2749334144} +{"current_steps": 9810, "total_steps": 14493, "loss": 0.7906, "lr": 3.5525379693290626e-05, "epoch": 2.0306410289588777, "percentage": 67.69, "elapsed_time": "1 day, 3:01:41", "remaining_time": "12:54:08", "throughput": 28284.35, "total_tokens": 2752119168} +{"current_steps": 9820, "total_steps": 14493, "loss": 0.7845, "lr": 3.551641610732469e-05, "epoch": 2.0327113687533966, "percentage": 67.76, "elapsed_time": "1 day, 3:03:19", "remaining_time": "12:52:29", "throughput": 28284.37, "total_tokens": 2754895616} +{"current_steps": 9830, "total_steps": 14493, "loss": 0.7852, "lr": 3.55074593028781e-05, "epoch": 2.0347817085479156, "percentage": 67.83, "elapsed_time": "1 day, 3:05:02", "remaining_time": "12:50:51", "throughput": 28283.98, "total_tokens": 2757745472} +{"current_steps": 9840, "total_steps": 14493, "loss": 0.8081, "lr": 3.5498509271404065e-05, "epoch": 2.036852048342434, "percentage": 67.89, "elapsed_time": "1 day, 3:06:44", "remaining_time": "12:49:13", "throughput": 28283.5, "total_tokens": 2760585152} +{"current_steps": 9850, "total_steps": 14493, "loss": 0.7981, "lr": 3.5489566004370893e-05, "epoch": 2.038922388136953, "percentage": 67.96, "elapsed_time": "1 day, 3:08:19", "remaining_time": "12:47:32", "throughput": 28284.47, "total_tokens": 2763374336} +{"current_steps": 9860, "total_steps": 14493, "loss": 0.7964, "lr": 3.548062949326194e-05, "epoch": 2.0409927279314717, "percentage": 68.03, "elapsed_time": "1 day, 3:09:55", "remaining_time": "12:45:52", "throughput": 28285.15, "total_tokens": 2766170880} +{"current_steps": 9870, "total_steps": 14493, "loss": 0.7888, "lr": 3.547169972957554e-05, "epoch": 2.0430630677259907, "percentage": 68.1, "elapsed_time": "1 day, 3:11:34", "remaining_time": "12:44:12", "throughput": 28285.1, "total_tokens": 2768958400} +{"current_steps": 9880, "total_steps": 14493, "loss": 0.7751, "lr": 3.5462776704825e-05, "epoch": 2.045133407520509, "percentage": 68.17, "elapsed_time": "1 day, 3:13:15", "remaining_time": "12:42:34", "throughput": 28284.84, "total_tokens": 2771773184} +{"current_steps": 9890, "total_steps": 14493, "loss": 0.7938, "lr": 3.5453860410538594e-05, "epoch": 2.047203747315028, "percentage": 68.24, "elapsed_time": "1 day, 3:14:52", "remaining_time": "12:40:53", "throughput": 28285.51, "total_tokens": 2774585408} +{"current_steps": 9900, "total_steps": 14493, "loss": 0.797, "lr": 3.5444950838259455e-05, "epoch": 2.0492740871095467, "percentage": 68.31, "elapsed_time": "1 day, 3:16:34", "remaining_time": "12:39:16", "throughput": 28285.22, "total_tokens": 2777442368} +{"current_steps": 9910, "total_steps": 14493, "loss": 0.8018, "lr": 3.543604797954563e-05, "epoch": 2.0513444269040657, "percentage": 68.38, "elapsed_time": "1 day, 3:18:13", "remaining_time": "12:37:36", "throughput": 28285.19, "total_tokens": 2780241216} +{"current_steps": 9920, "total_steps": 14493, "loss": 0.7801, "lr": 3.542715182596996e-05, "epoch": 2.0534147666985842, "percentage": 68.45, "elapsed_time": "1 day, 3:19:53", "remaining_time": "12:35:58", "throughput": 28285.15, "total_tokens": 2783083392} +{"current_steps": 9930, "total_steps": 14493, "loss": 0.7734, "lr": 3.5418262369120115e-05, "epoch": 2.0554851064931032, "percentage": 68.52, "elapsed_time": "1 day, 3:21:31", "remaining_time": "12:34:18", "throughput": 28285.18, "total_tokens": 2785847232} +{"current_steps": 9940, "total_steps": 14493, "loss": 0.7952, "lr": 3.5409379600598526e-05, "epoch": 2.0575554462876218, "percentage": 68.58, "elapsed_time": "1 day, 3:23:15", "remaining_time": "12:32:41", "throughput": 28284.72, "total_tokens": 2788738112} +{"current_steps": 9950, "total_steps": 14493, "loss": 0.775, "lr": 3.540050351202235e-05, "epoch": 2.0596257860821408, "percentage": 68.65, "elapsed_time": "1 day, 3:24:51", "remaining_time": "12:31:00", "throughput": 28285.21, "total_tokens": 2791515328} +{"current_steps": 9960, "total_steps": 14493, "loss": 0.8037, "lr": 3.539163409502347e-05, "epoch": 2.0616961258766593, "percentage": 68.72, "elapsed_time": "1 day, 3:26:29", "remaining_time": "12:29:21", "throughput": 28285.48, "total_tokens": 2794319616} +{"current_steps": 9970, "total_steps": 14493, "loss": 0.7983, "lr": 3.5382771341248416e-05, "epoch": 2.0637664656711783, "percentage": 68.79, "elapsed_time": "1 day, 3:28:08", "remaining_time": "12:27:42", "throughput": 28285.61, "total_tokens": 2797132160} +{"current_steps": 9980, "total_steps": 14493, "loss": 0.7902, "lr": 3.537391524235835e-05, "epoch": 2.0658368054656973, "percentage": 68.86, "elapsed_time": "1 day, 3:29:46", "remaining_time": "12:26:02", "throughput": 28285.55, "total_tokens": 2799899520} +{"current_steps": 9990, "total_steps": 14493, "loss": 0.7812, "lr": 3.5365065790029055e-05, "epoch": 2.067907145260216, "percentage": 68.93, "elapsed_time": "1 day, 3:31:22", "remaining_time": "12:24:21", "throughput": 28286.49, "total_tokens": 2802698304} +{"current_steps": 10000, "total_steps": 14493, "loss": 0.7933, "lr": 3.535622297595087e-05, "epoch": 2.069977485054735, "percentage": 69.0, "elapsed_time": "1 day, 3:32:56", "remaining_time": "12:22:40", "throughput": 28287.33, "total_tokens": 2805451328} +{"current_steps": 10000, "total_steps": 14493, "eval_loss": 1.045206069946289, "epoch": 2.069977485054735, "percentage": 69.0, "elapsed_time": "1 day, 3:32:59", "remaining_time": "12:22:41", "throughput": 28286.6, "total_tokens": 2805451328} +{"current_steps": 10010, "total_steps": 14493, "loss": 0.7856, "lr": 3.534738679182869e-05, "epoch": 2.0720478248492533, "percentage": 69.07, "elapsed_time": "1 day, 3:35:10", "remaining_time": "12:21:16", "throughput": 28277.45, "total_tokens": 2808233728} +{"current_steps": 10020, "total_steps": 14493, "loss": 0.8001, "lr": 3.533855722938188e-05, "epoch": 2.0741181646437723, "percentage": 69.14, "elapsed_time": "1 day, 3:36:49", "remaining_time": "12:19:37", "throughput": 28277.06, "total_tokens": 2811002880} +{"current_steps": 10030, "total_steps": 14493, "loss": 0.7943, "lr": 3.5329734280344325e-05, "epoch": 2.076188504438291, "percentage": 69.21, "elapsed_time": "1 day, 3:38:26", "remaining_time": "12:17:57", "throughput": 28277.36, "total_tokens": 2813794432} +{"current_steps": 10040, "total_steps": 14493, "loss": 0.7803, "lr": 3.5320917936464294e-05, "epoch": 2.07825884423281, "percentage": 69.27, "elapsed_time": "1 day, 3:40:05", "remaining_time": "12:16:17", "throughput": 28277.55, "total_tokens": 2816610880} +{"current_steps": 10050, "total_steps": 14493, "loss": 0.7767, "lr": 3.5312108189504505e-05, "epoch": 2.0803291840273284, "percentage": 69.34, "elapsed_time": "1 day, 3:41:45", "remaining_time": "12:14:38", "throughput": 28277.53, "total_tokens": 2819415104} +{"current_steps": 10060, "total_steps": 14493, "loss": 0.7957, "lr": 3.530330503124204e-05, "epoch": 2.0823995238218473, "percentage": 69.41, "elapsed_time": "1 day, 3:43:26", "remaining_time": "12:13:00", "throughput": 28277.49, "total_tokens": 2822277696} +{"current_steps": 10070, "total_steps": 14493, "loss": 0.7918, "lr": 3.5294508453468325e-05, "epoch": 2.084469863616366, "percentage": 69.48, "elapsed_time": "1 day, 3:45:03", "remaining_time": "12:11:20", "throughput": 28277.9, "total_tokens": 2825072960} +{"current_steps": 10080, "total_steps": 14493, "loss": 0.7791, "lr": 3.528571844798908e-05, "epoch": 2.086540203410885, "percentage": 69.55, "elapsed_time": "1 day, 3:46:39", "remaining_time": "12:09:39", "throughput": 28278.12, "total_tokens": 2827807040} +{"current_steps": 10090, "total_steps": 14493, "loss": 0.7831, "lr": 3.527693500662431e-05, "epoch": 2.0886105432054034, "percentage": 69.62, "elapsed_time": "1 day, 3:48:19", "remaining_time": "12:08:00", "throughput": 28278.23, "total_tokens": 2830650240} +{"current_steps": 10100, "total_steps": 14493, "loss": 0.771, "lr": 3.5268158121208294e-05, "epoch": 2.0906808829999224, "percentage": 69.69, "elapsed_time": "1 day, 3:49:56", "remaining_time": "12:06:20", "throughput": 28279.07, "total_tokens": 2833477504} +{"current_steps": 10110, "total_steps": 14493, "loss": 0.7792, "lr": 3.525938778358949e-05, "epoch": 2.092751222794441, "percentage": 69.76, "elapsed_time": "1 day, 3:51:32", "remaining_time": "12:04:39", "throughput": 28280.11, "total_tokens": 2836279936} +{"current_steps": 10120, "total_steps": 14493, "loss": 0.797, "lr": 3.5250623985630537e-05, "epoch": 2.09482156258896, "percentage": 69.83, "elapsed_time": "1 day, 3:53:11", "remaining_time": "12:03:00", "throughput": 28280.11, "total_tokens": 2839091904} +{"current_steps": 10130, "total_steps": 14493, "loss": 0.7954, "lr": 3.524186671920826e-05, "epoch": 2.096891902383479, "percentage": 69.9, "elapsed_time": "1 day, 3:54:50", "remaining_time": "12:01:21", "throughput": 28280.14, "total_tokens": 2841891968} +{"current_steps": 10140, "total_steps": 14493, "loss": 0.7959, "lr": 3.523311597621358e-05, "epoch": 2.0989622421779974, "percentage": 69.96, "elapsed_time": "1 day, 3:56:28", "remaining_time": "11:59:41", "throughput": 28280.76, "total_tokens": 2844714560} +{"current_steps": 10150, "total_steps": 14493, "loss": 0.7965, "lr": 3.5224371748551505e-05, "epoch": 2.1010325819725164, "percentage": 70.03, "elapsed_time": "1 day, 3:58:07", "remaining_time": "11:58:02", "throughput": 28280.5, "total_tokens": 2847498624} +{"current_steps": 10160, "total_steps": 14493, "loss": 0.7806, "lr": 3.521563402814109e-05, "epoch": 2.103102921767035, "percentage": 70.1, "elapsed_time": "1 day, 3:59:48", "remaining_time": "11:56:23", "throughput": 28280.56, "total_tokens": 2850348736} +{"current_steps": 10170, "total_steps": 14493, "loss": 0.7807, "lr": 3.5206902806915436e-05, "epoch": 2.105173261561554, "percentage": 70.17, "elapsed_time": "1 day, 4:01:25", "remaining_time": "11:54:43", "throughput": 28280.72, "total_tokens": 2853124928} +{"current_steps": 10180, "total_steps": 14493, "loss": 0.7976, "lr": 3.5198178076821644e-05, "epoch": 2.1072436013560725, "percentage": 70.24, "elapsed_time": "1 day, 4:03:06", "remaining_time": "11:53:05", "throughput": 28280.54, "total_tokens": 2855939840} +{"current_steps": 10190, "total_steps": 14493, "loss": 0.7909, "lr": 3.5189459829820743e-05, "epoch": 2.1093139411505915, "percentage": 70.31, "elapsed_time": "1 day, 4:04:43", "remaining_time": "11:51:25", "throughput": 28281.1, "total_tokens": 2858757440} +{"current_steps": 10200, "total_steps": 14493, "loss": 0.7909, "lr": 3.5180748057887714e-05, "epoch": 2.11138428094511, "percentage": 70.38, "elapsed_time": "1 day, 4:06:23", "remaining_time": "11:49:46", "throughput": 28281.18, "total_tokens": 2861576320} +{"current_steps": 10210, "total_steps": 14493, "loss": 0.7946, "lr": 3.517204275301144e-05, "epoch": 2.113454620739629, "percentage": 70.45, "elapsed_time": "1 day, 4:08:03", "remaining_time": "11:48:07", "throughput": 28281.19, "total_tokens": 2864404608} +{"current_steps": 10220, "total_steps": 14493, "loss": 0.7924, "lr": 3.5163343907194676e-05, "epoch": 2.1155249605341475, "percentage": 70.52, "elapsed_time": "1 day, 4:09:38", "remaining_time": "11:46:26", "throughput": 28281.68, "total_tokens": 2867159296} +{"current_steps": 10230, "total_steps": 14493, "loss": 0.7884, "lr": 3.5154651512453995e-05, "epoch": 2.1175953003286665, "percentage": 70.59, "elapsed_time": "1 day, 4:11:15", "remaining_time": "11:44:46", "throughput": 28282.33, "total_tokens": 2869966784} +{"current_steps": 10240, "total_steps": 14493, "loss": 0.778, "lr": 3.514596556081981e-05, "epoch": 2.119665640123185, "percentage": 70.65, "elapsed_time": "1 day, 4:12:52", "remaining_time": "11:43:06", "throughput": 28283.07, "total_tokens": 2872773824} +{"current_steps": 10250, "total_steps": 14493, "loss": 0.7892, "lr": 3.513728604433628e-05, "epoch": 2.121735979917704, "percentage": 70.72, "elapsed_time": "1 day, 4:14:29", "remaining_time": "11:41:26", "throughput": 28283.4, "total_tokens": 2875549952} +{"current_steps": 10260, "total_steps": 14493, "loss": 0.785, "lr": 3.5128612955061334e-05, "epoch": 2.1238063197122226, "percentage": 70.79, "elapsed_time": "1 day, 4:16:07", "remaining_time": "11:39:46", "throughput": 28283.5, "total_tokens": 2878350784} +{"current_steps": 10270, "total_steps": 14493, "loss": 0.7913, "lr": 3.5119946285066595e-05, "epoch": 2.1258766595067415, "percentage": 70.86, "elapsed_time": "1 day, 4:17:47", "remaining_time": "11:38:07", "throughput": 28283.62, "total_tokens": 2881186304} +{"current_steps": 10280, "total_steps": 14493, "loss": 0.7791, "lr": 3.511128602643739e-05, "epoch": 2.1279469993012605, "percentage": 70.93, "elapsed_time": "1 day, 4:19:24", "remaining_time": "11:36:27", "throughput": 28283.79, "total_tokens": 2883932800} +{"current_steps": 10290, "total_steps": 14493, "loss": 0.7924, "lr": 3.510263217127269e-05, "epoch": 2.130017339095779, "percentage": 71.0, "elapsed_time": "1 day, 4:21:00", "remaining_time": "11:34:46", "throughput": 28284.33, "total_tokens": 2886701824} +{"current_steps": 10300, "total_steps": 14493, "loss": 0.775, "lr": 3.50939847116851e-05, "epoch": 2.132087678890298, "percentage": 71.07, "elapsed_time": "1 day, 4:22:37", "remaining_time": "11:33:07", "throughput": 28284.49, "total_tokens": 2889471872} +{"current_steps": 10310, "total_steps": 14493, "loss": 0.7972, "lr": 3.508534363980081e-05, "epoch": 2.1341580186848166, "percentage": 71.14, "elapsed_time": "1 day, 4:24:16", "remaining_time": "11:31:27", "throughput": 28284.89, "total_tokens": 2892307328} +{"current_steps": 10320, "total_steps": 14493, "loss": 0.8089, "lr": 3.507670894775958e-05, "epoch": 2.1362283584793356, "percentage": 71.21, "elapsed_time": "1 day, 4:25:56", "remaining_time": "11:29:49", "throughput": 28284.98, "total_tokens": 2895161088} +{"current_steps": 10330, "total_steps": 14493, "loss": 0.7925, "lr": 3.506808062771471e-05, "epoch": 2.138298698273854, "percentage": 71.28, "elapsed_time": "1 day, 4:27:37", "remaining_time": "11:28:10", "throughput": 28284.79, "total_tokens": 2898000576} +{"current_steps": 10340, "total_steps": 14493, "loss": 0.7901, "lr": 3.505945867183298e-05, "epoch": 2.140369038068373, "percentage": 71.34, "elapsed_time": "1 day, 4:29:15", "remaining_time": "11:26:30", "throughput": 28285.33, "total_tokens": 2900810048} +{"current_steps": 10350, "total_steps": 14493, "loss": 0.7877, "lr": 3.505084307229468e-05, "epoch": 2.1424393778628916, "percentage": 71.41, "elapsed_time": "1 day, 4:30:49", "remaining_time": "11:24:49", "throughput": 28286.23, "total_tokens": 2903572096} +{"current_steps": 10360, "total_steps": 14493, "loss": 0.7806, "lr": 3.5042233821293525e-05, "epoch": 2.1445097176574106, "percentage": 71.48, "elapsed_time": "1 day, 4:32:25", "remaining_time": "11:23:09", "throughput": 28287.31, "total_tokens": 2906387776} +{"current_steps": 10370, "total_steps": 14493, "loss": 0.7972, "lr": 3.503363091103664e-05, "epoch": 2.146580057451929, "percentage": 71.55, "elapsed_time": "1 day, 4:34:04", "remaining_time": "11:21:29", "throughput": 28287.5, "total_tokens": 2909210240} +{"current_steps": 10380, "total_steps": 14493, "loss": 0.7891, "lr": 3.5025034333744545e-05, "epoch": 2.148650397246448, "percentage": 71.62, "elapsed_time": "1 day, 4:35:43", "remaining_time": "11:19:50", "throughput": 28287.86, "total_tokens": 2912059392} +{"current_steps": 10390, "total_steps": 14493, "loss": 0.7769, "lr": 3.501644408165112e-05, "epoch": 2.1507207370409667, "percentage": 71.69, "elapsed_time": "1 day, 4:37:22", "remaining_time": "11:18:11", "throughput": 28288.06, "total_tokens": 2914881536} +{"current_steps": 10400, "total_steps": 14493, "loss": 0.7884, "lr": 3.500786014700357e-05, "epoch": 2.1527910768354857, "percentage": 71.76, "elapsed_time": "1 day, 4:39:04", "remaining_time": "11:16:33", "throughput": 28287.85, "total_tokens": 2917723328} +{"current_steps": 10410, "total_steps": 14493, "loss": 0.7812, "lr": 3.499928252206237e-05, "epoch": 2.1548614166300046, "percentage": 71.83, "elapsed_time": "1 day, 4:40:42", "remaining_time": "11:14:53", "throughput": 28288.44, "total_tokens": 2920557440} +{"current_steps": 10420, "total_steps": 14493, "loss": 0.7884, "lr": 3.499071119910131e-05, "epoch": 2.156931756424523, "percentage": 71.9, "elapsed_time": "1 day, 4:42:22", "remaining_time": "11:13:14", "throughput": 28288.13, "total_tokens": 2923353728} +{"current_steps": 10430, "total_steps": 14493, "loss": 0.7781, "lr": 3.498214617040739e-05, "epoch": 2.159002096219042, "percentage": 71.97, "elapsed_time": "1 day, 4:43:59", "remaining_time": "11:11:34", "throughput": 28288.57, "total_tokens": 2926147264} +{"current_steps": 10440, "total_steps": 14493, "loss": 0.7885, "lr": 3.49735874282808e-05, "epoch": 2.1610724360135607, "percentage": 72.03, "elapsed_time": "1 day, 4:45:38", "remaining_time": "11:09:55", "throughput": 28288.89, "total_tokens": 2929002368} +{"current_steps": 10450, "total_steps": 14493, "loss": 0.7816, "lr": 3.4965034965034965e-05, "epoch": 2.1631427758080797, "percentage": 72.1, "elapsed_time": "1 day, 4:47:24", "remaining_time": "11:08:19", "throughput": 28287.56, "total_tokens": 2931850944} +{"current_steps": 10460, "total_steps": 14493, "loss": 0.7811, "lr": 3.495648877299642e-05, "epoch": 2.165213115602598, "percentage": 72.17, "elapsed_time": "1 day, 4:49:00", "remaining_time": "11:06:38", "throughput": 28288.22, "total_tokens": 2934634112} +{"current_steps": 10470, "total_steps": 14493, "loss": 0.787, "lr": 3.494794884450483e-05, "epoch": 2.167283455397117, "percentage": 72.24, "elapsed_time": "1 day, 4:50:38", "remaining_time": "11:04:59", "throughput": 28288.43, "total_tokens": 2937441024} +{"current_steps": 10480, "total_steps": 14493, "loss": 0.7904, "lr": 3.4939415171912954e-05, "epoch": 2.1693537951916357, "percentage": 72.31, "elapsed_time": "1 day, 4:52:12", "remaining_time": "11:03:17", "throughput": 28289.29, "total_tokens": 2940178112} +{"current_steps": 10490, "total_steps": 14493, "loss": 0.7814, "lr": 3.4930887747586616e-05, "epoch": 2.1714241349861547, "percentage": 72.38, "elapsed_time": "1 day, 4:53:50", "remaining_time": "11:01:38", "throughput": 28290.12, "total_tokens": 2943025984} +{"current_steps": 10500, "total_steps": 14493, "loss": 0.7946, "lr": 3.492236656390469e-05, "epoch": 2.1734944747806733, "percentage": 72.45, "elapsed_time": "1 day, 4:55:28", "remaining_time": "10:59:58", "throughput": 28290.14, "total_tokens": 2945816064} +{"current_steps": 10510, "total_steps": 14493, "loss": 0.7907, "lr": 3.4913851613259034e-05, "epoch": 2.1755648145751922, "percentage": 72.52, "elapsed_time": "1 day, 4:57:05", "remaining_time": "10:58:18", "throughput": 28290.56, "total_tokens": 2948607424} +{"current_steps": 10520, "total_steps": 14493, "loss": 0.7817, "lr": 3.490534288805452e-05, "epoch": 2.177635154369711, "percentage": 72.59, "elapsed_time": "1 day, 4:58:44", "remaining_time": "10:56:39", "throughput": 28290.74, "total_tokens": 2951412672} +{"current_steps": 10530, "total_steps": 14493, "loss": 0.7986, "lr": 3.489684038070891e-05, "epoch": 2.1797054941642298, "percentage": 72.66, "elapsed_time": "1 day, 5:00:23", "remaining_time": "10:55:00", "throughput": 28290.81, "total_tokens": 2954235328} +{"current_steps": 10540, "total_steps": 14493, "loss": 0.7905, "lr": 3.488834408365296e-05, "epoch": 2.1817758339587483, "percentage": 72.72, "elapsed_time": "1 day, 5:02:02", "remaining_time": "10:53:20", "throughput": 28290.9, "total_tokens": 2957038272} +{"current_steps": 10550, "total_steps": 14493, "loss": 0.7828, "lr": 3.487985398933027e-05, "epoch": 2.1838461737532673, "percentage": 72.79, "elapsed_time": "1 day, 5:03:38", "remaining_time": "10:51:40", "throughput": 28291.26, "total_tokens": 2959798976} +{"current_steps": 10560, "total_steps": 14493, "loss": 0.7853, "lr": 3.4871370090197324e-05, "epoch": 2.1859165135477863, "percentage": 72.86, "elapsed_time": "1 day, 5:05:18", "remaining_time": "10:50:01", "throughput": 28291.13, "total_tokens": 2962596480} +{"current_steps": 10570, "total_steps": 14493, "loss": 0.788, "lr": 3.486289237872343e-05, "epoch": 2.187986853342305, "percentage": 72.93, "elapsed_time": "1 day, 5:06:58", "remaining_time": "10:48:23", "throughput": 28291.08, "total_tokens": 2965440832} +{"current_steps": 10580, "total_steps": 14493, "loss": 0.7916, "lr": 3.485442084739075e-05, "epoch": 2.190057193136824, "percentage": 73.0, "elapsed_time": "1 day, 5:08:35", "remaining_time": "10:46:42", "throughput": 28291.37, "total_tokens": 2968199808} +{"current_steps": 10590, "total_steps": 14493, "loss": 0.7905, "lr": 3.484595548869416e-05, "epoch": 2.1921275329313423, "percentage": 73.07, "elapsed_time": "1 day, 5:10:11", "remaining_time": "10:45:02", "throughput": 28292.15, "total_tokens": 2971012992} +{"current_steps": 10600, "total_steps": 14493, "loss": 0.7819, "lr": 3.4837496295141335e-05, "epoch": 2.1941978727258613, "percentage": 73.14, "elapsed_time": "1 day, 5:11:54", "remaining_time": "10:43:24", "throughput": 28291.56, "total_tokens": 2973865472} +{"current_steps": 10610, "total_steps": 14493, "loss": 0.7873, "lr": 3.482904325925266e-05, "epoch": 2.19626821252038, "percentage": 73.21, "elapsed_time": "1 day, 5:13:38", "remaining_time": "10:41:47", "throughput": 28290.44, "total_tokens": 2976683136} +{"current_steps": 10620, "total_steps": 14493, "loss": 0.7983, "lr": 3.482059637356124e-05, "epoch": 2.198338552314899, "percentage": 73.28, "elapsed_time": "1 day, 5:15:17", "remaining_time": "10:40:08", "throughput": 28290.67, "total_tokens": 2979496576} +{"current_steps": 10630, "total_steps": 14493, "loss": 0.7796, "lr": 3.481215563061281e-05, "epoch": 2.2004088921094174, "percentage": 73.35, "elapsed_time": "1 day, 5:16:51", "remaining_time": "10:38:27", "throughput": 28291.41, "total_tokens": 2982241600} +{"current_steps": 10640, "total_steps": 14493, "loss": 0.789, "lr": 3.4803721022965785e-05, "epoch": 2.2024792319039364, "percentage": 73.41, "elapsed_time": "1 day, 5:18:28", "remaining_time": "10:36:47", "throughput": 28292.12, "total_tokens": 2985048576} +{"current_steps": 10650, "total_steps": 14493, "loss": 0.7947, "lr": 3.479529254319117e-05, "epoch": 2.204549571698455, "percentage": 73.48, "elapsed_time": "1 day, 5:20:05", "remaining_time": "10:35:07", "throughput": 28292.71, "total_tokens": 2987859840} +{"current_steps": 10660, "total_steps": 14493, "loss": 0.7823, "lr": 3.478687018387257e-05, "epoch": 2.206619911492974, "percentage": 73.55, "elapsed_time": "1 day, 5:21:43", "remaining_time": "10:33:27", "throughput": 28293.09, "total_tokens": 2990678016} +{"current_steps": 10670, "total_steps": 14493, "loss": 0.8089, "lr": 3.477845393760616e-05, "epoch": 2.2086902512874924, "percentage": 73.62, "elapsed_time": "1 day, 5:23:20", "remaining_time": "10:31:47", "throughput": 28293.31, "total_tokens": 2993451328} +{"current_steps": 10680, "total_steps": 14493, "loss": 0.7828, "lr": 3.4770043797000614e-05, "epoch": 2.2107605910820114, "percentage": 73.69, "elapsed_time": "1 day, 5:24:57", "remaining_time": "10:30:07", "throughput": 28293.78, "total_tokens": 2996238720} +{"current_steps": 10690, "total_steps": 14493, "loss": 0.7825, "lr": 3.4761639754677146e-05, "epoch": 2.21283093087653, "percentage": 73.76, "elapsed_time": "1 day, 5:26:35", "remaining_time": "10:28:28", "throughput": 28294.14, "total_tokens": 2999057024} +{"current_steps": 10700, "total_steps": 14493, "loss": 0.7922, "lr": 3.4753241803269435e-05, "epoch": 2.214901270671049, "percentage": 73.83, "elapsed_time": "1 day, 5:28:13", "remaining_time": "10:26:48", "throughput": 28294.48, "total_tokens": 3001855744} +{"current_steps": 10710, "total_steps": 14493, "loss": 0.7874, "lr": 3.474484993542361e-05, "epoch": 2.2169716104655675, "percentage": 73.9, "elapsed_time": "1 day, 5:29:50", "remaining_time": "10:25:08", "throughput": 28295.08, "total_tokens": 3004674560} +{"current_steps": 10720, "total_steps": 14493, "loss": 0.7817, "lr": 3.473646414379822e-05, "epoch": 2.2190419502600864, "percentage": 73.97, "elapsed_time": "1 day, 5:31:30", "remaining_time": "10:23:29", "throughput": 28294.8, "total_tokens": 3007462784} +{"current_steps": 10730, "total_steps": 14493, "loss": 0.7903, "lr": 3.472808442106422e-05, "epoch": 2.221112290054605, "percentage": 74.04, "elapsed_time": "1 day, 5:33:09", "remaining_time": "10:21:50", "throughput": 28294.74, "total_tokens": 3010275456} +{"current_steps": 10740, "total_steps": 14493, "loss": 0.8043, "lr": 3.4719710759904936e-05, "epoch": 2.223182629849124, "percentage": 74.1, "elapsed_time": "1 day, 5:34:47", "remaining_time": "10:20:11", "throughput": 28294.89, "total_tokens": 3013053696} +{"current_steps": 10750, "total_steps": 14493, "loss": 0.8025, "lr": 3.471134315301603e-05, "epoch": 2.225252969643643, "percentage": 74.17, "elapsed_time": "1 day, 5:36:28", "remaining_time": "10:18:32", "throughput": 28294.45, "total_tokens": 3015868800} +{"current_steps": 10760, "total_steps": 14493, "loss": 0.7816, "lr": 3.470298159310549e-05, "epoch": 2.2273233094381615, "percentage": 74.24, "elapsed_time": "1 day, 5:38:00", "remaining_time": "10:16:51", "throughput": 28295.47, "total_tokens": 3018583360} +{"current_steps": 10770, "total_steps": 14493, "loss": 0.7891, "lr": 3.4694626072893585e-05, "epoch": 2.2293936492326805, "percentage": 74.31, "elapsed_time": "1 day, 5:39:41", "remaining_time": "10:15:12", "throughput": 28295.25, "total_tokens": 3021423232} +{"current_steps": 10780, "total_steps": 14493, "loss": 0.7832, "lr": 3.468627658511285e-05, "epoch": 2.231463989027199, "percentage": 74.38, "elapsed_time": "1 day, 5:41:17", "remaining_time": "10:13:32", "throughput": 28295.69, "total_tokens": 3024175488} +{"current_steps": 10790, "total_steps": 14493, "loss": 0.8012, "lr": 3.467793312250806e-05, "epoch": 2.233534328821718, "percentage": 74.45, "elapsed_time": "1 day, 5:43:00", "remaining_time": "10:11:54", "throughput": 28295.11, "total_tokens": 3027012352} +{"current_steps": 10800, "total_steps": 14493, "loss": 0.7916, "lr": 3.466959567783619e-05, "epoch": 2.2356046686162365, "percentage": 74.52, "elapsed_time": "1 day, 5:44:39", "remaining_time": "10:10:15", "throughput": 28295.35, "total_tokens": 3029857856} +{"current_steps": 10810, "total_steps": 14493, "loss": 0.7982, "lr": 3.466126424386642e-05, "epoch": 2.2376750084107555, "percentage": 74.59, "elapsed_time": "1 day, 5:46:23", "remaining_time": "10:08:37", "throughput": 28294.77, "total_tokens": 3032742464} +{"current_steps": 10820, "total_steps": 14493, "loss": 0.8026, "lr": 3.4652938813380056e-05, "epoch": 2.239745348205274, "percentage": 74.66, "elapsed_time": "1 day, 5:47:59", "remaining_time": "10:06:57", "throughput": 28295.43, "total_tokens": 3035519232} +{"current_steps": 10830, "total_steps": 14493, "loss": 0.7812, "lr": 3.464461937917057e-05, "epoch": 2.241815687999793, "percentage": 74.73, "elapsed_time": "1 day, 5:49:38", "remaining_time": "10:05:18", "throughput": 28295.64, "total_tokens": 3038354752} +{"current_steps": 10840, "total_steps": 14493, "loss": 0.7924, "lr": 3.4636305934043525e-05, "epoch": 2.2438860277943116, "percentage": 74.79, "elapsed_time": "1 day, 5:51:14", "remaining_time": "10:03:38", "throughput": 28296.31, "total_tokens": 3041127168} +{"current_steps": 10850, "total_steps": 14493, "loss": 0.7926, "lr": 3.4627998470816544e-05, "epoch": 2.2459563675888305, "percentage": 74.86, "elapsed_time": "1 day, 5:52:51", "remaining_time": "10:01:58", "throughput": 28297.08, "total_tokens": 3043963200} +{"current_steps": 10860, "total_steps": 14493, "loss": 0.7976, "lr": 3.4619696982319334e-05, "epoch": 2.248026707383349, "percentage": 74.93, "elapsed_time": "1 day, 5:54:32", "remaining_time": "10:00:19", "throughput": 28297.07, "total_tokens": 3046810368} +{"current_steps": 10870, "total_steps": 14493, "loss": 0.7922, "lr": 3.461140146139361e-05, "epoch": 2.250097047177868, "percentage": 75.0, "elapsed_time": "1 day, 5:56:06", "remaining_time": "9:58:38", "throughput": 28297.96, "total_tokens": 3049580352} +{"current_steps": 10880, "total_steps": 14493, "loss": 0.7871, "lr": 3.460311190089309e-05, "epoch": 2.2521673869723866, "percentage": 75.07, "elapsed_time": "1 day, 5:57:44", "remaining_time": "9:56:59", "throughput": 28298.49, "total_tokens": 3052403520} +{"current_steps": 10890, "total_steps": 14493, "loss": 0.8006, "lr": 3.459482829368348e-05, "epoch": 2.2542377267669056, "percentage": 75.14, "elapsed_time": "1 day, 5:59:19", "remaining_time": "9:55:18", "throughput": 28299.02, "total_tokens": 3055159552} +{"current_steps": 10900, "total_steps": 14493, "loss": 0.7795, "lr": 3.4586550632642425e-05, "epoch": 2.2563080665614246, "percentage": 75.21, "elapsed_time": "1 day, 6:00:57", "remaining_time": "9:53:39", "throughput": 28299.31, "total_tokens": 3057954880} +{"current_steps": 10910, "total_steps": 14493, "loss": 0.7687, "lr": 3.457827891065949e-05, "epoch": 2.258378406355943, "percentage": 75.28, "elapsed_time": "1 day, 6:02:36", "remaining_time": "9:52:00", "throughput": 28299.4, "total_tokens": 3060775744} +{"current_steps": 10920, "total_steps": 14493, "loss": 0.7854, "lr": 3.457001312063614e-05, "epoch": 2.260448746150462, "percentage": 75.35, "elapsed_time": "1 day, 6:04:14", "remaining_time": "9:50:20", "throughput": 28299.9, "total_tokens": 3063587712} +{"current_steps": 10930, "total_steps": 14493, "loss": 0.797, "lr": 3.45617532554857e-05, "epoch": 2.2625190859449806, "percentage": 75.42, "elapsed_time": "1 day, 6:05:51", "remaining_time": "9:48:40", "throughput": 28300.13, "total_tokens": 3066363520} +{"current_steps": 10940, "total_steps": 14493, "loss": 0.7876, "lr": 3.455349930813339e-05, "epoch": 2.2645894257394996, "percentage": 75.48, "elapsed_time": "1 day, 6:07:32", "remaining_time": "9:47:02", "throughput": 28299.95, "total_tokens": 3069200000} +{"current_steps": 10950, "total_steps": 14493, "loss": 0.7914, "lr": 3.45452512715162e-05, "epoch": 2.266659765534018, "percentage": 75.55, "elapsed_time": "1 day, 6:09:05", "remaining_time": "9:45:21", "throughput": 28301.34, "total_tokens": 3071987648} +{"current_steps": 10960, "total_steps": 14493, "loss": 0.7944, "lr": 3.4537009138582935e-05, "epoch": 2.268730105328537, "percentage": 75.62, "elapsed_time": "1 day, 6:10:42", "remaining_time": "9:43:41", "throughput": 28301.98, "total_tokens": 3074807936} +{"current_steps": 10970, "total_steps": 14493, "loss": 0.7917, "lr": 3.4528772902294174e-05, "epoch": 2.2708004451230557, "percentage": 75.69, "elapsed_time": "1 day, 6:12:22", "remaining_time": "9:42:02", "throughput": 28302.22, "total_tokens": 3077653568} +{"current_steps": 10980, "total_steps": 14493, "loss": 0.7918, "lr": 3.452054255562222e-05, "epoch": 2.2728707849175747, "percentage": 75.76, "elapsed_time": "1 day, 6:14:04", "remaining_time": "9:40:24", "throughput": 28301.99, "total_tokens": 3080502208} +{"current_steps": 10990, "total_steps": 14493, "loss": 0.8001, "lr": 3.451231809155115e-05, "epoch": 2.274941124712093, "percentage": 75.83, "elapsed_time": "1 day, 6:15:40", "remaining_time": "9:38:44", "throughput": 28302.74, "total_tokens": 3083313088} +{"current_steps": 11000, "total_steps": 14493, "loss": 0.7929, "lr": 3.450409950307666e-05, "epoch": 2.277011464506612, "percentage": 75.9, "elapsed_time": "1 day, 6:17:21", "remaining_time": "9:37:05", "throughput": 28302.48, "total_tokens": 3086144000} +{"current_steps": 11010, "total_steps": 14493, "loss": 0.7753, "lr": 3.449588678320619e-05, "epoch": 2.279081804301131, "percentage": 75.97, "elapsed_time": "1 day, 6:19:01", "remaining_time": "9:35:26", "throughput": 28302.29, "total_tokens": 3088959296} +{"current_steps": 11020, "total_steps": 14493, "loss": 0.7719, "lr": 3.4487679924958767e-05, "epoch": 2.2811521440956497, "percentage": 76.04, "elapsed_time": "1 day, 6:20:38", "remaining_time": "9:33:47", "throughput": 28302.75, "total_tokens": 3091758912} +{"current_steps": 11030, "total_steps": 14493, "loss": 0.7646, "lr": 3.4479478921365076e-05, "epoch": 2.2832224838901682, "percentage": 76.11, "elapsed_time": "1 day, 6:22:15", "remaining_time": "9:32:07", "throughput": 28303.64, "total_tokens": 3094580544} +{"current_steps": 11040, "total_steps": 14493, "loss": 0.7756, "lr": 3.447128376546738e-05, "epoch": 2.2852928236846872, "percentage": 76.17, "elapsed_time": "1 day, 6:23:48", "remaining_time": "9:30:26", "throughput": 28304.81, "total_tokens": 3097359104} +{"current_steps": 11050, "total_steps": 14493, "loss": 0.7877, "lr": 3.4463094450319505e-05, "epoch": 2.287363163479206, "percentage": 76.24, "elapsed_time": "1 day, 6:25:23", "remaining_time": "9:28:45", "throughput": 28305.38, "total_tokens": 3100111232} +{"current_steps": 11060, "total_steps": 14493, "loss": 0.7959, "lr": 3.4454910968986855e-05, "epoch": 2.2894335032737247, "percentage": 76.31, "elapsed_time": "1 day, 6:26:57", "remaining_time": "9:27:04", "throughput": 28306.63, "total_tokens": 3102895616} +{"current_steps": 11070, "total_steps": 14493, "loss": 0.7879, "lr": 3.4446733314546336e-05, "epoch": 2.2915038430682437, "percentage": 76.38, "elapsed_time": "1 day, 6:28:37", "remaining_time": "9:25:26", "throughput": 28306.84, "total_tokens": 3105741248} +{"current_steps": 11080, "total_steps": 14493, "loss": 0.7793, "lr": 3.443856148008633e-05, "epoch": 2.2935741828627623, "percentage": 76.45, "elapsed_time": "1 day, 6:30:16", "remaining_time": "9:23:47", "throughput": 28306.3, "total_tokens": 3108501632} +{"current_steps": 11090, "total_steps": 14493, "loss": 0.7992, "lr": 3.443039545870672e-05, "epoch": 2.2956445226572813, "percentage": 76.52, "elapsed_time": "1 day, 6:31:49", "remaining_time": "9:22:05", "throughput": 28307.33, "total_tokens": 3111235328} +{"current_steps": 11100, "total_steps": 14493, "loss": 0.7878, "lr": 3.442223524351883e-05, "epoch": 2.2977148624518, "percentage": 76.59, "elapsed_time": "1 day, 6:33:24", "remaining_time": "9:20:25", "throughput": 28307.72, "total_tokens": 3113987392} +{"current_steps": 11110, "total_steps": 14493, "loss": 0.7978, "lr": 3.44140808276454e-05, "epoch": 2.2997852022463188, "percentage": 76.66, "elapsed_time": "1 day, 6:35:01", "remaining_time": "9:18:45", "throughput": 28308.49, "total_tokens": 3116799808} +{"current_steps": 11120, "total_steps": 14493, "loss": 0.787, "lr": 3.4405932204220575e-05, "epoch": 2.3018555420408373, "percentage": 76.73, "elapsed_time": "1 day, 6:36:37", "remaining_time": "9:17:05", "throughput": 28309.2, "total_tokens": 3119592960} +{"current_steps": 11130, "total_steps": 14493, "loss": 0.8014, "lr": 3.4397789366389876e-05, "epoch": 2.3039258818353563, "percentage": 76.8, "elapsed_time": "1 day, 6:38:14", "remaining_time": "9:15:26", "throughput": 28309.88, "total_tokens": 3122432704} +{"current_steps": 11140, "total_steps": 14493, "loss": 0.787, "lr": 3.438965230731016e-05, "epoch": 2.305996221629875, "percentage": 76.86, "elapsed_time": "1 day, 6:39:48", "remaining_time": "9:13:45", "throughput": 28310.61, "total_tokens": 3125156800} +{"current_steps": 11150, "total_steps": 14493, "loss": 0.7816, "lr": 3.438152102014964e-05, "epoch": 2.308066561424394, "percentage": 76.93, "elapsed_time": "1 day, 6:41:23", "remaining_time": "9:12:05", "throughput": 28311.37, "total_tokens": 3127932096} +{"current_steps": 11160, "total_steps": 14493, "loss": 0.7821, "lr": 3.437339549808778e-05, "epoch": 2.310136901218913, "percentage": 77.0, "elapsed_time": "1 day, 6:43:00", "remaining_time": "9:10:25", "throughput": 28311.95, "total_tokens": 3130754240} +{"current_steps": 11170, "total_steps": 14493, "loss": 0.7746, "lr": 3.43652757343154e-05, "epoch": 2.3122072410134313, "percentage": 77.07, "elapsed_time": "1 day, 6:44:38", "remaining_time": "9:08:46", "throughput": 28312.46, "total_tokens": 3133576832} +{"current_steps": 11180, "total_steps": 14493, "loss": 0.7857, "lr": 3.435716172203449e-05, "epoch": 2.31427758080795, "percentage": 77.14, "elapsed_time": "1 day, 6:46:15", "remaining_time": "9:07:06", "throughput": 28313.11, "total_tokens": 3136392896} +{"current_steps": 11190, "total_steps": 14493, "loss": 0.7855, "lr": 3.434905345445833e-05, "epoch": 2.316347920602469, "percentage": 77.21, "elapsed_time": "1 day, 6:47:52", "remaining_time": "9:05:26", "throughput": 28313.36, "total_tokens": 3139159680} +{"current_steps": 11200, "total_steps": 14493, "loss": 0.7892, "lr": 3.4340950924811374e-05, "epoch": 2.318418260396988, "percentage": 77.28, "elapsed_time": "1 day, 6:49:27", "remaining_time": "9:03:46", "throughput": 28314.16, "total_tokens": 3141951424} +{"current_steps": 11210, "total_steps": 14493, "loss": 0.7911, "lr": 3.433285412632927e-05, "epoch": 2.3204886001915064, "percentage": 77.35, "elapsed_time": "1 day, 6:51:00", "remaining_time": "9:02:05", "throughput": 28315.47, "total_tokens": 3144736320} +{"current_steps": 11220, "total_steps": 14493, "loss": 0.7821, "lr": 3.4324763052258835e-05, "epoch": 2.3225589399860254, "percentage": 77.42, "elapsed_time": "1 day, 6:52:43", "remaining_time": "9:00:27", "throughput": 28315.22, "total_tokens": 3147607104} +{"current_steps": 11230, "total_steps": 14493, "loss": 0.794, "lr": 3.4316677695858003e-05, "epoch": 2.324629279780544, "percentage": 77.49, "elapsed_time": "1 day, 6:54:19", "remaining_time": "8:58:47", "throughput": 28315.72, "total_tokens": 3150385664} +{"current_steps": 11240, "total_steps": 14493, "loss": 0.8011, "lr": 3.430859805039583e-05, "epoch": 2.326699619575063, "percentage": 77.55, "elapsed_time": "1 day, 6:55:55", "remaining_time": "8:57:07", "throughput": 28316.75, "total_tokens": 3153220800} +{"current_steps": 11250, "total_steps": 14493, "loss": 0.7958, "lr": 3.430052410915246e-05, "epoch": 2.3287699593695814, "percentage": 77.62, "elapsed_time": "1 day, 6:57:32", "remaining_time": "8:55:28", "throughput": 28317.24, "total_tokens": 3156039424} +{"current_steps": 11260, "total_steps": 14493, "loss": 0.7941, "lr": 3.4292455865419086e-05, "epoch": 2.3308402991641004, "percentage": 77.69, "elapsed_time": "1 day, 6:59:08", "remaining_time": "8:53:48", "throughput": 28317.99, "total_tokens": 3158841728} +{"current_steps": 11270, "total_steps": 14493, "loss": 0.7834, "lr": 3.4284393312497973e-05, "epoch": 2.332910638958619, "percentage": 77.76, "elapsed_time": "1 day, 7:00:45", "remaining_time": "8:52:08", "throughput": 28318.53, "total_tokens": 3161624448} +{"current_steps": 11280, "total_steps": 14493, "loss": 0.7874, "lr": 3.427633644370238e-05, "epoch": 2.334980978753138, "percentage": 77.83, "elapsed_time": "1 day, 7:02:22", "remaining_time": "8:50:28", "throughput": 28318.73, "total_tokens": 3164418176} +{"current_steps": 11290, "total_steps": 14493, "loss": 0.7947, "lr": 3.4268285252356564e-05, "epoch": 2.3370513185476565, "percentage": 77.9, "elapsed_time": "1 day, 7:03:59", "remaining_time": "8:48:48", "throughput": 28319.36, "total_tokens": 3167209792} +{"current_steps": 11300, "total_steps": 14493, "loss": 0.7978, "lr": 3.426023973179575e-05, "epoch": 2.3391216583421754, "percentage": 77.97, "elapsed_time": "1 day, 7:05:31", "remaining_time": "8:47:07", "throughput": 28320.3, "total_tokens": 3169922176} +{"current_steps": 11310, "total_steps": 14493, "loss": 0.788, "lr": 3.425219987536614e-05, "epoch": 2.3411919981366944, "percentage": 78.04, "elapsed_time": "1 day, 7:07:04", "remaining_time": "8:45:27", "throughput": 28321.11, "total_tokens": 3172665792} +{"current_steps": 11320, "total_steps": 14493, "loss": 0.7936, "lr": 3.4244165676424815e-05, "epoch": 2.343262337931213, "percentage": 78.11, "elapsed_time": "1 day, 7:08:41", "remaining_time": "8:43:47", "throughput": 28321.88, "total_tokens": 3175479872} +{"current_steps": 11330, "total_steps": 14493, "loss": 0.8027, "lr": 3.423613712833979e-05, "epoch": 2.3453326777257315, "percentage": 78.18, "elapsed_time": "1 day, 7:10:17", "remaining_time": "8:42:07", "throughput": 28322.57, "total_tokens": 3178296064} +{"current_steps": 11340, "total_steps": 14493, "loss": 0.781, "lr": 3.422811422448995e-05, "epoch": 2.3474030175202505, "percentage": 78.24, "elapsed_time": "1 day, 7:11:54", "remaining_time": "8:40:28", "throughput": 28322.97, "total_tokens": 3181078528} +{"current_steps": 11350, "total_steps": 14493, "loss": 0.7772, "lr": 3.422009695826503e-05, "epoch": 2.3494733573147695, "percentage": 78.31, "elapsed_time": "1 day, 7:13:28", "remaining_time": "8:38:47", "throughput": 28323.78, "total_tokens": 3183843200} +{"current_steps": 11360, "total_steps": 14493, "loss": 0.785, "lr": 3.4212085323065626e-05, "epoch": 2.351543697109288, "percentage": 78.38, "elapsed_time": "1 day, 7:15:03", "remaining_time": "8:37:07", "throughput": 28324.91, "total_tokens": 3186661760} +{"current_steps": 11370, "total_steps": 14493, "loss": 0.7894, "lr": 3.4204079312303103e-05, "epoch": 2.353614036903807, "percentage": 78.45, "elapsed_time": "1 day, 7:16:44", "remaining_time": "8:35:29", "throughput": 28325.09, "total_tokens": 3189527872} +{"current_steps": 11380, "total_steps": 14493, "loss": 0.8012, "lr": 3.419607891939964e-05, "epoch": 2.3556843766983255, "percentage": 78.52, "elapsed_time": "1 day, 7:18:20", "remaining_time": "8:33:49", "throughput": 28325.94, "total_tokens": 3192333696} +{"current_steps": 11390, "total_steps": 14493, "loss": 0.7981, "lr": 3.4188084137788166e-05, "epoch": 2.3577547164928445, "percentage": 78.59, "elapsed_time": "1 day, 7:19:55", "remaining_time": "8:32:09", "throughput": 28326.83, "total_tokens": 3195131328} +{"current_steps": 11400, "total_steps": 14493, "loss": 0.7839, "lr": 3.418009496091238e-05, "epoch": 2.359825056287363, "percentage": 78.66, "elapsed_time": "1 day, 7:21:27", "remaining_time": "8:30:28", "throughput": 28327.8, "total_tokens": 3197851264} +{"current_steps": 11410, "total_steps": 14493, "loss": 0.7856, "lr": 3.417211138222666e-05, "epoch": 2.361895396081882, "percentage": 78.73, "elapsed_time": "1 day, 7:23:05", "remaining_time": "8:28:48", "throughput": 28328.15, "total_tokens": 3200668480} +{"current_steps": 11420, "total_steps": 14493, "loss": 0.7858, "lr": 3.416413339519612e-05, "epoch": 2.3639657358764006, "percentage": 78.8, "elapsed_time": "1 day, 7:24:37", "remaining_time": "8:27:07", "throughput": 28329.68, "total_tokens": 3203443712} +{"current_steps": 11430, "total_steps": 14493, "loss": 0.7905, "lr": 3.4156160993296524e-05, "epoch": 2.3660360756709196, "percentage": 78.87, "elapsed_time": "1 day, 7:26:15", "remaining_time": "8:25:28", "throughput": 28330.15, "total_tokens": 3206279680} +{"current_steps": 11440, "total_steps": 14493, "loss": 0.7987, "lr": 3.4148194170014295e-05, "epoch": 2.368106415465438, "percentage": 78.93, "elapsed_time": "1 day, 7:27:54", "remaining_time": "8:23:49", "throughput": 28330.54, "total_tokens": 3209126912} +{"current_steps": 11450, "total_steps": 14493, "loss": 0.7801, "lr": 3.4140232918846484e-05, "epoch": 2.370176755259957, "percentage": 79.0, "elapsed_time": "1 day, 7:29:28", "remaining_time": "8:22:09", "throughput": 28331.6, "total_tokens": 3211903040} +{"current_steps": 11460, "total_steps": 14493, "loss": 0.7914, "lr": 3.4132277233300753e-05, "epoch": 2.372247095054476, "percentage": 79.07, "elapsed_time": "1 day, 7:31:05", "remaining_time": "8:20:29", "throughput": 28332.07, "total_tokens": 3214718080} +{"current_steps": 11470, "total_steps": 14493, "loss": 0.7896, "lr": 3.4124327106895356e-05, "epoch": 2.3743174348489946, "percentage": 79.14, "elapsed_time": "1 day, 7:32:36", "remaining_time": "8:18:48", "throughput": 28333.17, "total_tokens": 3217426880} +{"current_steps": 11480, "total_steps": 14493, "loss": 0.7831, "lr": 3.4116382533159097e-05, "epoch": 2.376387774643513, "percentage": 79.21, "elapsed_time": "1 day, 7:34:17", "remaining_time": "8:17:10", "throughput": 28332.89, "total_tokens": 3220245952} +{"current_steps": 11490, "total_steps": 14493, "loss": 0.7876, "lr": 3.4108443505631335e-05, "epoch": 2.378458114438032, "percentage": 79.28, "elapsed_time": "1 day, 7:35:55", "remaining_time": "8:15:30", "throughput": 28333.32, "total_tokens": 3223084544} +{"current_steps": 11500, "total_steps": 14493, "loss": 0.7791, "lr": 3.410051001786192e-05, "epoch": 2.380528454232551, "percentage": 79.35, "elapsed_time": "1 day, 7:37:33", "remaining_time": "8:13:51", "throughput": 28333.89, "total_tokens": 3225921280} +{"current_steps": 11510, "total_steps": 14493, "loss": 0.7734, "lr": 3.409258206341124e-05, "epoch": 2.3825987940270696, "percentage": 79.42, "elapsed_time": "1 day, 7:39:14", "remaining_time": "8:12:13", "throughput": 28333.97, "total_tokens": 3228785280} +{"current_steps": 11520, "total_steps": 14493, "loss": 0.7908, "lr": 3.4084659635850134e-05, "epoch": 2.3846691338215886, "percentage": 79.49, "elapsed_time": "1 day, 7:40:53", "remaining_time": "8:10:34", "throughput": 28334.2, "total_tokens": 3231614016} +{"current_steps": 11530, "total_steps": 14493, "loss": 0.7794, "lr": 3.40767427287599e-05, "epoch": 2.386739473616107, "percentage": 79.56, "elapsed_time": "1 day, 7:42:26", "remaining_time": "8:08:53", "throughput": 28334.87, "total_tokens": 3234335232} +{"current_steps": 11540, "total_steps": 14493, "loss": 0.7882, "lr": 3.406883133573224e-05, "epoch": 2.388809813410626, "percentage": 79.62, "elapsed_time": "1 day, 7:44:08", "remaining_time": "8:07:15", "throughput": 28334.81, "total_tokens": 3237213248} +{"current_steps": 11550, "total_steps": 14493, "loss": 0.7891, "lr": 3.406092545036932e-05, "epoch": 2.3908801532051447, "percentage": 79.69, "elapsed_time": "1 day, 7:45:48", "remaining_time": "8:05:36", "throughput": 28334.6, "total_tokens": 3240027584} +{"current_steps": 11560, "total_steps": 14493, "loss": 0.7902, "lr": 3.405302506628365e-05, "epoch": 2.3929504929996637, "percentage": 79.76, "elapsed_time": "1 day, 7:47:25", "remaining_time": "8:03:57", "throughput": 28335.07, "total_tokens": 3242835776} +{"current_steps": 11570, "total_steps": 14493, "loss": 0.7816, "lr": 3.404513017709813e-05, "epoch": 2.395020832794182, "percentage": 79.83, "elapsed_time": "1 day, 7:49:02", "remaining_time": "8:02:17", "throughput": 28335.79, "total_tokens": 3245661376} +{"current_steps": 11580, "total_steps": 14493, "loss": 0.7864, "lr": 3.403724077644598e-05, "epoch": 2.397091172588701, "percentage": 79.9, "elapsed_time": "1 day, 7:50:39", "remaining_time": "8:00:38", "throughput": 28336.37, "total_tokens": 3248476352} +{"current_steps": 11590, "total_steps": 14493, "loss": 0.7841, "lr": 3.402935685797077e-05, "epoch": 2.3991615123832197, "percentage": 79.97, "elapsed_time": "1 day, 7:52:14", "remaining_time": "7:58:57", "throughput": 28337.44, "total_tokens": 3251271616} +{"current_steps": 11600, "total_steps": 14493, "loss": 0.7749, "lr": 3.4021478415326355e-05, "epoch": 2.4012318521777387, "percentage": 80.04, "elapsed_time": "1 day, 7:53:49", "remaining_time": "7:57:18", "throughput": 28338.24, "total_tokens": 3254060416} +{"current_steps": 11610, "total_steps": 14493, "loss": 0.7864, "lr": 3.401360544217687e-05, "epoch": 2.4033021919722577, "percentage": 80.11, "elapsed_time": "1 day, 7:55:28", "remaining_time": "7:55:39", "throughput": 28338.28, "total_tokens": 3256875392} +{"current_steps": 11620, "total_steps": 14493, "loss": 0.7909, "lr": 3.400573793219672e-05, "epoch": 2.4053725317667762, "percentage": 80.18, "elapsed_time": "1 day, 7:57:03", "remaining_time": "7:53:59", "throughput": 28339.15, "total_tokens": 3259678592} +{"current_steps": 11630, "total_steps": 14493, "loss": 0.7825, "lr": 3.3997875879070546e-05, "epoch": 2.4074428715612948, "percentage": 80.25, "elapsed_time": "1 day, 7:58:37", "remaining_time": "7:52:18", "throughput": 28339.94, "total_tokens": 3262432384} +{"current_steps": 11640, "total_steps": 14493, "loss": 0.7929, "lr": 3.399001927649318e-05, "epoch": 2.4095132113558138, "percentage": 80.31, "elapsed_time": "1 day, 8:00:15", "remaining_time": "7:50:39", "throughput": 28340.14, "total_tokens": 3265221632} +{"current_steps": 11650, "total_steps": 14493, "loss": 0.7805, "lr": 3.398216811816968e-05, "epoch": 2.4115835511503327, "percentage": 80.38, "elapsed_time": "1 day, 8:01:51", "remaining_time": "7:48:59", "throughput": 28340.82, "total_tokens": 3268009344} +{"current_steps": 11660, "total_steps": 14493, "loss": 0.7979, "lr": 3.397432239781527e-05, "epoch": 2.4136538909448513, "percentage": 80.45, "elapsed_time": "1 day, 8:03:24", "remaining_time": "7:47:19", "throughput": 28341.97, "total_tokens": 3270783296} +{"current_steps": 11670, "total_steps": 14493, "loss": 0.787, "lr": 3.396648210915531e-05, "epoch": 2.4157242307393703, "percentage": 80.52, "elapsed_time": "1 day, 8:04:57", "remaining_time": "7:45:39", "throughput": 28343.02, "total_tokens": 3273557504} +{"current_steps": 11680, "total_steps": 14493, "loss": 0.7925, "lr": 3.3958647245925315e-05, "epoch": 2.417794570533889, "percentage": 80.59, "elapsed_time": "1 day, 8:06:41", "remaining_time": "7:44:01", "throughput": 28342.58, "total_tokens": 3276445632} +{"current_steps": 11690, "total_steps": 14493, "loss": 0.8032, "lr": 3.3950817801870885e-05, "epoch": 2.419864910328408, "percentage": 80.66, "elapsed_time": "1 day, 8:08:19", "remaining_time": "7:42:22", "throughput": 28342.7, "total_tokens": 3279244352} +{"current_steps": 11700, "total_steps": 14493, "loss": 0.7997, "lr": 3.3942993770747735e-05, "epoch": 2.4219352501229263, "percentage": 80.73, "elapsed_time": "1 day, 8:09:59", "remaining_time": "7:40:43", "throughput": 28342.86, "total_tokens": 3282090624} +{"current_steps": 11710, "total_steps": 14493, "loss": 0.7938, "lr": 3.3935175146321626e-05, "epoch": 2.4240055899174453, "percentage": 80.8, "elapsed_time": "1 day, 8:11:37", "remaining_time": "7:39:04", "throughput": 28343.03, "total_tokens": 3284895104} +{"current_steps": 11720, "total_steps": 14493, "loss": 0.7904, "lr": 3.392736192236839e-05, "epoch": 2.426075929711964, "percentage": 80.87, "elapsed_time": "1 day, 8:13:14", "remaining_time": "7:37:24", "throughput": 28343.59, "total_tokens": 3287691072} +{"current_steps": 11730, "total_steps": 14493, "loss": 0.7787, "lr": 3.391955409267387e-05, "epoch": 2.428146269506483, "percentage": 80.94, "elapsed_time": "1 day, 8:14:53", "remaining_time": "7:35:45", "throughput": 28343.61, "total_tokens": 3290500672} +{"current_steps": 11740, "total_steps": 14493, "loss": 0.7948, "lr": 3.3911751651033896e-05, "epoch": 2.4302166093010014, "percentage": 81.0, "elapsed_time": "1 day, 8:16:31", "remaining_time": "7:34:06", "throughput": 28343.93, "total_tokens": 3293327680} +{"current_steps": 11750, "total_steps": 14493, "loss": 0.7806, "lr": 3.3903954591254334e-05, "epoch": 2.4322869490955203, "percentage": 81.07, "elapsed_time": "1 day, 8:18:11", "remaining_time": "7:32:27", "throughput": 28344.01, "total_tokens": 3296169920} +{"current_steps": 11760, "total_steps": 14493, "loss": 0.7912, "lr": 3.389616290715097e-05, "epoch": 2.434357288890039, "percentage": 81.14, "elapsed_time": "1 day, 8:19:47", "remaining_time": "7:30:48", "throughput": 28344.59, "total_tokens": 3298964032} +{"current_steps": 11770, "total_steps": 14493, "loss": 0.7915, "lr": 3.388837659254955e-05, "epoch": 2.436427628684558, "percentage": 81.21, "elapsed_time": "1 day, 8:21:25", "remaining_time": "7:29:08", "throughput": 28344.84, "total_tokens": 3301751616} +{"current_steps": 11780, "total_steps": 14493, "loss": 0.8028, "lr": 3.3880595641285746e-05, "epoch": 2.4384979684790764, "percentage": 81.28, "elapsed_time": "1 day, 8:23:01", "remaining_time": "7:27:29", "throughput": 28345.31, "total_tokens": 3304540928} +{"current_steps": 11790, "total_steps": 14493, "loss": 0.7807, "lr": 3.387282004720513e-05, "epoch": 2.4405683082735954, "percentage": 81.35, "elapsed_time": "1 day, 8:24:38", "remaining_time": "7:25:50", "throughput": 28345.58, "total_tokens": 3307328896} +{"current_steps": 11800, "total_steps": 14493, "loss": 0.7918, "lr": 3.386504980416316e-05, "epoch": 2.4426386480681144, "percentage": 81.42, "elapsed_time": "1 day, 8:26:17", "remaining_time": "7:24:10", "throughput": 28345.8, "total_tokens": 3310147712} +{"current_steps": 11810, "total_steps": 14493, "loss": 0.7943, "lr": 3.385728490602515e-05, "epoch": 2.444708987862633, "percentage": 81.49, "elapsed_time": "1 day, 8:27:54", "remaining_time": "7:22:31", "throughput": 28346.13, "total_tokens": 3312952512} +{"current_steps": 11820, "total_steps": 14493, "loss": 0.7866, "lr": 3.384952534666625e-05, "epoch": 2.446779327657152, "percentage": 81.56, "elapsed_time": "1 day, 8:29:35", "remaining_time": "7:20:53", "throughput": 28345.92, "total_tokens": 3315773504} +{"current_steps": 11830, "total_steps": 14493, "loss": 0.7809, "lr": 3.3841771119971455e-05, "epoch": 2.4488496674516704, "percentage": 81.63, "elapsed_time": "1 day, 8:31:13", "remaining_time": "7:19:13", "throughput": 28346.21, "total_tokens": 3318592512} +{"current_steps": 11840, "total_steps": 14493, "loss": 0.7731, "lr": 3.383402221983554e-05, "epoch": 2.4509200072461894, "percentage": 81.69, "elapsed_time": "1 day, 8:32:51", "remaining_time": "7:17:34", "throughput": 28346.35, "total_tokens": 3321393280} +{"current_steps": 11850, "total_steps": 14493, "loss": 0.7902, "lr": 3.3826278640163064e-05, "epoch": 2.452990347040708, "percentage": 81.76, "elapsed_time": "1 day, 8:34:27", "remaining_time": "7:15:55", "throughput": 28346.93, "total_tokens": 3324186688} +{"current_steps": 11860, "total_steps": 14493, "loss": 0.7792, "lr": 3.3818540374868354e-05, "epoch": 2.455060686835227, "percentage": 81.83, "elapsed_time": "1 day, 8:36:06", "remaining_time": "7:14:16", "throughput": 28347.29, "total_tokens": 3327027968} +{"current_steps": 11870, "total_steps": 14493, "loss": 0.7838, "lr": 3.381080741787547e-05, "epoch": 2.4571310266297455, "percentage": 81.9, "elapsed_time": "1 day, 8:37:42", "remaining_time": "7:12:36", "throughput": 28348.05, "total_tokens": 3329824000} +{"current_steps": 11880, "total_steps": 14493, "loss": 0.7806, "lr": 3.38030797631182e-05, "epoch": 2.4592013664242645, "percentage": 81.97, "elapsed_time": "1 day, 8:39:18", "remaining_time": "7:10:56", "throughput": 28348.81, "total_tokens": 3332647488} +{"current_steps": 11890, "total_steps": 14493, "loss": 0.7947, "lr": 3.379535740454003e-05, "epoch": 2.461271706218783, "percentage": 82.04, "elapsed_time": "1 day, 8:40:59", "remaining_time": "7:09:18", "throughput": 28348.6, "total_tokens": 3335477248} +{"current_steps": 11900, "total_steps": 14493, "loss": 0.7795, "lr": 3.3787640336094126e-05, "epoch": 2.463342046013302, "percentage": 82.11, "elapsed_time": "1 day, 8:42:37", "remaining_time": "7:07:39", "throughput": 28349.05, "total_tokens": 3338312768} +{"current_steps": 11910, "total_steps": 14493, "loss": 0.794, "lr": 3.3779928551743325e-05, "epoch": 2.4654123858078205, "percentage": 82.18, "elapsed_time": "1 day, 8:44:19", "remaining_time": "7:06:00", "throughput": 28348.78, "total_tokens": 3341170432} +{"current_steps": 11920, "total_steps": 14493, "loss": 0.788, "lr": 3.3772222045460084e-05, "epoch": 2.4674827256023395, "percentage": 82.25, "elapsed_time": "1 day, 8:45:58", "remaining_time": "7:04:22", "throughput": 28349.06, "total_tokens": 3344014912} +{"current_steps": 11930, "total_steps": 14493, "loss": 0.7905, "lr": 3.37645208112265e-05, "epoch": 2.469553065396858, "percentage": 82.32, "elapsed_time": "1 day, 8:47:35", "remaining_time": "7:02:42", "throughput": 28349.45, "total_tokens": 3346815488} +{"current_steps": 11940, "total_steps": 14493, "loss": 0.7878, "lr": 3.3756824843034255e-05, "epoch": 2.471623405191377, "percentage": 82.38, "elapsed_time": "1 day, 8:49:10", "remaining_time": "7:01:02", "throughput": 28350.33, "total_tokens": 3349615424} +{"current_steps": 11950, "total_steps": 14493, "loss": 0.7831, "lr": 3.374913413488464e-05, "epoch": 2.473693744985896, "percentage": 82.45, "elapsed_time": "1 day, 8:50:49", "remaining_time": "6:59:23", "throughput": 28350.53, "total_tokens": 3352425216} +{"current_steps": 11960, "total_steps": 14493, "loss": 0.7932, "lr": 3.374144868078848e-05, "epoch": 2.4757640847804145, "percentage": 82.52, "elapsed_time": "1 day, 8:52:25", "remaining_time": "6:57:44", "throughput": 28350.79, "total_tokens": 3355176000} +{"current_steps": 11970, "total_steps": 14493, "loss": 0.783, "lr": 3.373376847476615e-05, "epoch": 2.4778344245749335, "percentage": 82.59, "elapsed_time": "1 day, 8:53:59", "remaining_time": "6:56:04", "throughput": 28351.59, "total_tokens": 3357937408} +{"current_steps": 11980, "total_steps": 14493, "loss": 0.7788, "lr": 3.3726093510847566e-05, "epoch": 2.479904764369452, "percentage": 82.66, "elapsed_time": "1 day, 8:55:35", "remaining_time": "6:54:24", "throughput": 28352.47, "total_tokens": 3360767168} +{"current_steps": 11990, "total_steps": 14493, "loss": 0.7951, "lr": 3.371842378307212e-05, "epoch": 2.481975104163971, "percentage": 82.73, "elapsed_time": "1 day, 8:57:14", "remaining_time": "6:52:45", "throughput": 28352.7, "total_tokens": 3363609856} +{"current_steps": 12000, "total_steps": 14493, "loss": 0.7908, "lr": 3.371075928548872e-05, "epoch": 2.4840454439584896, "percentage": 82.8, "elapsed_time": "1 day, 8:58:54", "remaining_time": "6:51:07", "throughput": 28352.82, "total_tokens": 3366449728} +{"current_steps": 12010, "total_steps": 14493, "loss": 0.7909, "lr": 3.37031000121557e-05, "epoch": 2.4861157837530086, "percentage": 82.87, "elapsed_time": "1 day, 9:00:33", "remaining_time": "6:49:28", "throughput": 28353.07, "total_tokens": 3369282880} +{"current_steps": 12020, "total_steps": 14493, "loss": 0.7922, "lr": 3.369544595714088e-05, "epoch": 2.488186123547527, "percentage": 82.94, "elapsed_time": "1 day, 9:02:13", "remaining_time": "6:47:49", "throughput": 28352.94, "total_tokens": 3372111232} +{"current_steps": 12030, "total_steps": 14493, "loss": 0.793, "lr": 3.368779711452148e-05, "epoch": 2.490256463342046, "percentage": 83.01, "elapsed_time": "1 day, 9:03:48", "remaining_time": "6:46:09", "throughput": 28353.72, "total_tokens": 3374905152} +{"current_steps": 12040, "total_steps": 14493, "loss": 0.7927, "lr": 3.368015347838413e-05, "epoch": 2.4923268031365646, "percentage": 83.07, "elapsed_time": "1 day, 9:05:24", "remaining_time": "6:44:30", "throughput": 28354.4, "total_tokens": 3377715200} +{"current_steps": 12050, "total_steps": 14493, "loss": 0.7919, "lr": 3.3672515042824855e-05, "epoch": 2.4943971429310836, "percentage": 83.14, "elapsed_time": "1 day, 9:06:56", "remaining_time": "6:42:49", "throughput": 28355.49, "total_tokens": 3380448320} +{"current_steps": 12060, "total_steps": 14493, "loss": 0.7786, "lr": 3.366488180194904e-05, "epoch": 2.496467482725602, "percentage": 83.21, "elapsed_time": "1 day, 9:08:32", "remaining_time": "6:41:10", "throughput": 28356.35, "total_tokens": 3383260224} +{"current_steps": 12070, "total_steps": 14493, "loss": 0.7852, "lr": 3.365725374987143e-05, "epoch": 2.498537822520121, "percentage": 83.28, "elapsed_time": "1 day, 9:10:05", "remaining_time": "6:39:30", "throughput": 28357.57, "total_tokens": 3386049856} +{"current_steps": 12080, "total_steps": 14493, "loss": 0.7993, "lr": 3.36496308807161e-05, "epoch": 2.5006081623146397, "percentage": 83.35, "elapsed_time": "1 day, 9:11:45", "remaining_time": "6:37:51", "throughput": 28357.54, "total_tokens": 3388885632} +{"current_steps": 12090, "total_steps": 14493, "loss": 0.7861, "lr": 3.3642013188616426e-05, "epoch": 2.5026785021091587, "percentage": 83.42, "elapsed_time": "1 day, 9:13:19", "remaining_time": "6:36:11", "throughput": 28358.32, "total_tokens": 3391649600} +{"current_steps": 12100, "total_steps": 14493, "loss": 0.7969, "lr": 3.3634400667715074e-05, "epoch": 2.5047488419036776, "percentage": 83.49, "elapsed_time": "1 day, 9:14:55", "remaining_time": "6:34:32", "throughput": 28359.04, "total_tokens": 3394452544} +{"current_steps": 12110, "total_steps": 14493, "loss": 0.7952, "lr": 3.3626793312164013e-05, "epoch": 2.506819181698196, "percentage": 83.56, "elapsed_time": "1 day, 9:16:32", "remaining_time": "6:32:52", "throughput": 28359.27, "total_tokens": 3397240448} +{"current_steps": 12120, "total_steps": 14493, "loss": 0.7801, "lr": 3.361919111612443e-05, "epoch": 2.508889521492715, "percentage": 83.63, "elapsed_time": "1 day, 9:18:06", "remaining_time": "6:31:12", "throughput": 28360.4, "total_tokens": 3400031296} +{"current_steps": 12130, "total_steps": 14493, "loss": 0.7677, "lr": 3.361159407376678e-05, "epoch": 2.5109598612872337, "percentage": 83.7, "elapsed_time": "1 day, 9:19:44", "remaining_time": "6:29:33", "throughput": 28360.73, "total_tokens": 3402848384} +{"current_steps": 12140, "total_steps": 14493, "loss": 0.7813, "lr": 3.3604002179270685e-05, "epoch": 2.5130302010817527, "percentage": 83.76, "elapsed_time": "1 day, 9:21:20", "remaining_time": "6:27:54", "throughput": 28361.63, "total_tokens": 3405678464} +{"current_steps": 12150, "total_steps": 14493, "loss": 0.7816, "lr": 3.359641542682504e-05, "epoch": 2.515100540876271, "percentage": 83.83, "elapsed_time": "1 day, 9:22:56", "remaining_time": "6:26:14", "throughput": 28362.16, "total_tokens": 3408475712} +{"current_steps": 12160, "total_steps": 14493, "loss": 0.7864, "lr": 3.3588833810627854e-05, "epoch": 2.51717088067079, "percentage": 83.9, "elapsed_time": "1 day, 9:24:36", "remaining_time": "6:24:36", "throughput": 28362.0, "total_tokens": 3411290112} +{"current_steps": 12170, "total_steps": 14493, "loss": 0.7801, "lr": 3.358125732488632e-05, "epoch": 2.5192412204653087, "percentage": 83.97, "elapsed_time": "1 day, 9:26:10", "remaining_time": "6:22:56", "throughput": 28362.47, "total_tokens": 3414013120} +{"current_steps": 12180, "total_steps": 14493, "loss": 0.7946, "lr": 3.357368596381679e-05, "epoch": 2.5213115602598277, "percentage": 84.04, "elapsed_time": "1 day, 9:27:51", "remaining_time": "6:21:17", "throughput": 28362.58, "total_tokens": 3416869952} +{"current_steps": 12190, "total_steps": 14493, "loss": 0.7871, "lr": 3.356611972164471e-05, "epoch": 2.5233819000543463, "percentage": 84.11, "elapsed_time": "1 day, 9:29:29", "remaining_time": "6:19:38", "throughput": 28363.17, "total_tokens": 3419733248} +{"current_steps": 12200, "total_steps": 14493, "loss": 0.7985, "lr": 3.355855859260466e-05, "epoch": 2.5254522398488652, "percentage": 84.18, "elapsed_time": "1 day, 9:31:04", "remaining_time": "6:17:59", "throughput": 28364.28, "total_tokens": 3422563584} +{"current_steps": 12210, "total_steps": 14493, "loss": 0.8009, "lr": 3.3551002570940285e-05, "epoch": 2.5275225796433842, "percentage": 84.25, "elapsed_time": "1 day, 9:32:42", "remaining_time": "6:16:19", "throughput": 28364.23, "total_tokens": 3425345920} +{"current_steps": 12220, "total_steps": 14493, "loss": 0.7952, "lr": 3.354345165090431e-05, "epoch": 2.5295929194379028, "percentage": 84.32, "elapsed_time": "1 day, 9:34:18", "remaining_time": "6:14:40", "throughput": 28365.01, "total_tokens": 3428145280} +{"current_steps": 12230, "total_steps": 14493, "loss": 0.7819, "lr": 3.3535905826758515e-05, "epoch": 2.5316632592324213, "percentage": 84.39, "elapsed_time": "1 day, 9:35:53", "remaining_time": "6:13:00", "throughput": 28365.68, "total_tokens": 3430941760} +{"current_steps": 12240, "total_steps": 14493, "loss": 0.7785, "lr": 3.352836509277369e-05, "epoch": 2.5337335990269403, "percentage": 84.45, "elapsed_time": "1 day, 9:37:30", "remaining_time": "6:11:21", "throughput": 28366.05, "total_tokens": 3433737216} +{"current_steps": 12250, "total_steps": 14493, "loss": 0.7874, "lr": 3.352082944322966e-05, "epoch": 2.5358039388214593, "percentage": 84.52, "elapsed_time": "1 day, 9:39:12", "remaining_time": "6:09:43", "throughput": 28365.69, "total_tokens": 3436571456} +{"current_steps": 12260, "total_steps": 14493, "loss": 0.8012, "lr": 3.351329887241524e-05, "epoch": 2.537874278615978, "percentage": 84.59, "elapsed_time": "1 day, 9:40:46", "remaining_time": "6:08:03", "throughput": 28366.47, "total_tokens": 3439344832} +{"current_steps": 12270, "total_steps": 14493, "loss": 0.7945, "lr": 3.3505773374628225e-05, "epoch": 2.5399446184104963, "percentage": 84.66, "elapsed_time": "1 day, 9:42:20", "remaining_time": "6:06:23", "throughput": 28367.41, "total_tokens": 3442128768} +{"current_steps": 12280, "total_steps": 14493, "loss": 0.7861, "lr": 3.3498252944175354e-05, "epoch": 2.5420149582050153, "percentage": 84.73, "elapsed_time": "1 day, 9:43:59", "remaining_time": "6:04:44", "throughput": 28367.78, "total_tokens": 3444973504} +{"current_steps": 12290, "total_steps": 14493, "loss": 0.7937, "lr": 3.3490737575372326e-05, "epoch": 2.5440852979995343, "percentage": 84.8, "elapsed_time": "1 day, 9:45:34", "remaining_time": "6:03:05", "throughput": 28368.57, "total_tokens": 3447747328} +{"current_steps": 12300, "total_steps": 14493, "loss": 0.7813, "lr": 3.348322726254375e-05, "epoch": 2.546155637794053, "percentage": 84.87, "elapsed_time": "1 day, 9:47:11", "remaining_time": "6:01:26", "throughput": 28368.62, "total_tokens": 3450529728} +{"current_steps": 12310, "total_steps": 14493, "loss": 0.7735, "lr": 3.347572200002315e-05, "epoch": 2.548225977588572, "percentage": 84.94, "elapsed_time": "1 day, 9:48:51", "remaining_time": "5:59:47", "throughput": 28368.74, "total_tokens": 3453382656} +{"current_steps": 12320, "total_steps": 14493, "loss": 0.7911, "lr": 3.3468221782152924e-05, "epoch": 2.5502963173830904, "percentage": 85.01, "elapsed_time": "1 day, 9:50:29", "remaining_time": "5:58:08", "throughput": 28369.53, "total_tokens": 3456235776} +{"current_steps": 12330, "total_steps": 14493, "loss": 0.7839, "lr": 3.346072660328435e-05, "epoch": 2.5523666571776094, "percentage": 85.08, "elapsed_time": "1 day, 9:52:06", "remaining_time": "5:56:29", "throughput": 28369.7, "total_tokens": 3459013696} +{"current_steps": 12340, "total_steps": 14493, "loss": 0.7814, "lr": 3.345323645777756e-05, "epoch": 2.554436996972128, "percentage": 85.14, "elapsed_time": "1 day, 9:53:44", "remaining_time": "5:54:50", "throughput": 28369.76, "total_tokens": 3461808640} +{"current_steps": 12350, "total_steps": 14493, "loss": 0.7763, "lr": 3.34457513400015e-05, "epoch": 2.556507336766647, "percentage": 85.21, "elapsed_time": "1 day, 9:55:23", "remaining_time": "5:53:11", "throughput": 28369.72, "total_tokens": 3464619200} +{"current_steps": 12360, "total_steps": 14493, "loss": 0.7842, "lr": 3.343827124433396e-05, "epoch": 2.558577676561166, "percentage": 85.28, "elapsed_time": "1 day, 9:56:59", "remaining_time": "5:51:31", "throughput": 28370.53, "total_tokens": 3467430272} +{"current_steps": 12370, "total_steps": 14493, "loss": 0.7929, "lr": 3.343079616516151e-05, "epoch": 2.5606480163556844, "percentage": 85.35, "elapsed_time": "1 day, 9:58:34", "remaining_time": "5:49:52", "throughput": 28371.18, "total_tokens": 3470216000} +{"current_steps": 12380, "total_steps": 14493, "loss": 0.7944, "lr": 3.3423326096879495e-05, "epoch": 2.562718356150203, "percentage": 85.42, "elapsed_time": "1 day, 10:00:10", "remaining_time": "5:48:12", "throughput": 28372.0, "total_tokens": 3473019520} +{"current_steps": 12390, "total_steps": 14493, "loss": 0.7933, "lr": 3.341586103389203e-05, "epoch": 2.564788695944722, "percentage": 85.49, "elapsed_time": "1 day, 10:01:48", "remaining_time": "5:46:33", "throughput": 28372.56, "total_tokens": 3475873536} +{"current_steps": 12400, "total_steps": 14493, "loss": 0.8036, "lr": 3.3408400970611995e-05, "epoch": 2.566859035739241, "percentage": 85.56, "elapsed_time": "1 day, 10:03:22", "remaining_time": "5:44:54", "throughput": 28373.24, "total_tokens": 3478635456} +{"current_steps": 12410, "total_steps": 14493, "loss": 0.7867, "lr": 3.340094590146095e-05, "epoch": 2.5689293755337594, "percentage": 85.63, "elapsed_time": "1 day, 10:04:58", "remaining_time": "5:43:14", "throughput": 28373.91, "total_tokens": 3481423168} +{"current_steps": 12420, "total_steps": 14493, "loss": 0.7911, "lr": 3.3393495820869215e-05, "epoch": 2.570999715328278, "percentage": 85.7, "elapsed_time": "1 day, 10:06:36", "remaining_time": "5:41:35", "throughput": 28374.13, "total_tokens": 3484248832} +{"current_steps": 12430, "total_steps": 14493, "loss": 0.7808, "lr": 3.338605072327576e-05, "epoch": 2.573070055122797, "percentage": 85.77, "elapsed_time": "1 day, 10:08:09", "remaining_time": "5:39:55", "throughput": 28375.42, "total_tokens": 3487045120} +{"current_steps": 12440, "total_steps": 14493, "loss": 0.7885, "lr": 3.337861060312827e-05, "epoch": 2.575140394917316, "percentage": 85.83, "elapsed_time": "1 day, 10:09:48", "remaining_time": "5:38:17", "throughput": 28375.52, "total_tokens": 3489857472} +{"current_steps": 12450, "total_steps": 14493, "loss": 0.786, "lr": 3.337117545488306e-05, "epoch": 2.5772107347118345, "percentage": 85.9, "elapsed_time": "1 day, 10:11:21", "remaining_time": "5:36:37", "throughput": 28376.66, "total_tokens": 3492644096} +{"current_steps": 12460, "total_steps": 14493, "loss": 0.7978, "lr": 3.33637452730051e-05, "epoch": 2.5792810745063535, "percentage": 85.97, "elapsed_time": "1 day, 10:12:54", "remaining_time": "5:34:57", "throughput": 28377.57, "total_tokens": 3495382336} +{"current_steps": 12470, "total_steps": 14493, "loss": 0.7693, "lr": 3.335632005196796e-05, "epoch": 2.581351414300872, "percentage": 86.04, "elapsed_time": "1 day, 10:14:33", "remaining_time": "5:33:18", "throughput": 28377.77, "total_tokens": 3498239104} +{"current_steps": 12480, "total_steps": 14493, "loss": 0.7889, "lr": 3.334889978625383e-05, "epoch": 2.583421754095391, "percentage": 86.11, "elapsed_time": "1 day, 10:16:11", "remaining_time": "5:31:39", "throughput": 28377.67, "total_tokens": 3500992448} +{"current_steps": 12490, "total_steps": 14493, "loss": 0.7845, "lr": 3.3341484470353515e-05, "epoch": 2.5854920938899095, "percentage": 86.18, "elapsed_time": "1 day, 10:17:43", "remaining_time": "5:29:59", "throughput": 28378.67, "total_tokens": 3503736000} +{"current_steps": 12500, "total_steps": 14493, "loss": 0.7794, "lr": 3.333407409876635e-05, "epoch": 2.5875624336844285, "percentage": 86.25, "elapsed_time": "1 day, 10:19:20", "remaining_time": "5:28:20", "throughput": 28379.25, "total_tokens": 3506544640} +{"current_steps": 12510, "total_steps": 14493, "loss": 0.8004, "lr": 3.332666866600024e-05, "epoch": 2.5896327734789475, "percentage": 86.32, "elapsed_time": "1 day, 10:20:56", "remaining_time": "5:26:41", "throughput": 28379.78, "total_tokens": 3509341888} +{"current_steps": 12520, "total_steps": 14493, "loss": 0.791, "lr": 3.331926816657162e-05, "epoch": 2.591703113273466, "percentage": 86.39, "elapsed_time": "1 day, 10:22:34", "remaining_time": "5:25:02", "throughput": 28380.04, "total_tokens": 3512152384} +{"current_steps": 12530, "total_steps": 14493, "loss": 0.786, "lr": 3.331187259500546e-05, "epoch": 2.5937734530679846, "percentage": 86.46, "elapsed_time": "1 day, 10:24:10", "remaining_time": "5:23:22", "throughput": 28380.76, "total_tokens": 3514957888} +{"current_steps": 12540, "total_steps": 14493, "loss": 0.7866, "lr": 3.3304481945835235e-05, "epoch": 2.5958437928625036, "percentage": 86.52, "elapsed_time": "1 day, 10:25:44", "remaining_time": "5:21:43", "throughput": 28381.58, "total_tokens": 3517746816} +{"current_steps": 12550, "total_steps": 14493, "loss": 0.7925, "lr": 3.329709621360288e-05, "epoch": 2.5979141326570225, "percentage": 86.59, "elapsed_time": "1 day, 10:27:19", "remaining_time": "5:20:03", "throughput": 28382.54, "total_tokens": 3520561408} +{"current_steps": 12560, "total_steps": 14493, "loss": 0.7888, "lr": 3.328971539285882e-05, "epoch": 2.599984472451541, "percentage": 86.66, "elapsed_time": "1 day, 10:28:53", "remaining_time": "5:18:24", "throughput": 28383.58, "total_tokens": 3523358848} +{"current_steps": 12570, "total_steps": 14493, "loss": 0.7764, "lr": 3.3282339478161935e-05, "epoch": 2.6020548122460596, "percentage": 86.73, "elapsed_time": "1 day, 10:30:30", "remaining_time": "5:16:45", "throughput": 28383.85, "total_tokens": 3526147968} +{"current_steps": 12580, "total_steps": 14493, "loss": 0.7909, "lr": 3.327496846407953e-05, "epoch": 2.6041251520405786, "percentage": 86.8, "elapsed_time": "1 day, 10:32:08", "remaining_time": "5:15:06", "throughput": 28384.3, "total_tokens": 3528972672} +{"current_steps": 12590, "total_steps": 14493, "loss": 0.7961, "lr": 3.3267602345187304e-05, "epoch": 2.6061954918350976, "percentage": 86.87, "elapsed_time": "1 day, 10:33:45", "remaining_time": "5:13:27", "throughput": 28384.71, "total_tokens": 3531774784} +{"current_steps": 12600, "total_steps": 14493, "loss": 0.7885, "lr": 3.326024111606942e-05, "epoch": 2.608265831629616, "percentage": 86.94, "elapsed_time": "1 day, 10:35:24", "remaining_time": "5:11:48", "throughput": 28384.92, "total_tokens": 3534609216} +{"current_steps": 12610, "total_steps": 14493, "loss": 0.7814, "lr": 3.325288477131839e-05, "epoch": 2.610336171424135, "percentage": 87.01, "elapsed_time": "1 day, 10:37:02", "remaining_time": "5:10:09", "throughput": 28385.05, "total_tokens": 3537428352} +{"current_steps": 12620, "total_steps": 14493, "loss": 0.7873, "lr": 3.324553330553507e-05, "epoch": 2.6124065112186536, "percentage": 87.08, "elapsed_time": "1 day, 10:38:41", "remaining_time": "5:08:30", "throughput": 28385.3, "total_tokens": 3540255872} +{"current_steps": 12630, "total_steps": 14493, "loss": 0.7913, "lr": 3.323818671332871e-05, "epoch": 2.6144768510131726, "percentage": 87.15, "elapsed_time": "1 day, 10:40:22", "remaining_time": "5:06:52", "throughput": 28385.04, "total_tokens": 3543086080} +{"current_steps": 12640, "total_steps": 14493, "loss": 0.7976, "lr": 3.323084498931687e-05, "epoch": 2.616547190807691, "percentage": 87.21, "elapsed_time": "1 day, 10:42:00", "remaining_time": "5:05:13", "throughput": 28385.34, "total_tokens": 3545900864} +{"current_steps": 12650, "total_steps": 14493, "loss": 0.7863, "lr": 3.322350812812545e-05, "epoch": 2.61861753060221, "percentage": 87.28, "elapsed_time": "1 day, 10:43:38", "remaining_time": "5:03:34", "throughput": 28385.48, "total_tokens": 3548714560} +{"current_steps": 12660, "total_steps": 14493, "loss": 0.793, "lr": 3.321617612438862e-05, "epoch": 2.620687870396729, "percentage": 87.35, "elapsed_time": "1 day, 10:45:11", "remaining_time": "5:01:54", "throughput": 28386.45, "total_tokens": 3551470016} +{"current_steps": 12670, "total_steps": 14493, "loss": 0.7972, "lr": 3.320884897274886e-05, "epoch": 2.6227582101912477, "percentage": 87.42, "elapsed_time": "1 day, 10:46:46", "remaining_time": "5:00:15", "throughput": 28387.23, "total_tokens": 3554274432} +{"current_steps": 12680, "total_steps": 14493, "loss": 0.7949, "lr": 3.320152666785692e-05, "epoch": 2.624828549985766, "percentage": 87.49, "elapsed_time": "1 day, 10:48:24", "remaining_time": "4:58:36", "throughput": 28387.62, "total_tokens": 3557094272} +{"current_steps": 12690, "total_steps": 14493, "loss": 0.804, "lr": 3.319420920437179e-05, "epoch": 2.626898889780285, "percentage": 87.56, "elapsed_time": "1 day, 10:50:01", "remaining_time": "4:56:57", "throughput": 28388.26, "total_tokens": 3559929600} +{"current_steps": 12700, "total_steps": 14493, "loss": 0.7893, "lr": 3.31868965769607e-05, "epoch": 2.628969229574804, "percentage": 87.63, "elapsed_time": "1 day, 10:51:36", "remaining_time": "4:55:17", "throughput": 28388.9, "total_tokens": 3562710528} +{"current_steps": 12710, "total_steps": 14493, "loss": 0.789, "lr": 3.317958878029911e-05, "epoch": 2.6310395693693227, "percentage": 87.7, "elapsed_time": "1 day, 10:53:09", "remaining_time": "4:53:38", "throughput": 28389.98, "total_tokens": 3565493056} +{"current_steps": 12720, "total_steps": 14493, "loss": 0.784, "lr": 3.3172285809070665e-05, "epoch": 2.6331099091638412, "percentage": 87.77, "elapsed_time": "1 day, 10:54:44", "remaining_time": "4:51:58", "throughput": 28390.84, "total_tokens": 3568281920} +{"current_steps": 12730, "total_steps": 14493, "loss": 0.7787, "lr": 3.3164987657967214e-05, "epoch": 2.6351802489583602, "percentage": 87.84, "elapsed_time": "1 day, 10:56:19", "remaining_time": "4:50:19", "throughput": 28391.24, "total_tokens": 3571023552} +{"current_steps": 12740, "total_steps": 14493, "loss": 0.7847, "lr": 3.315769432168877e-05, "epoch": 2.637250588752879, "percentage": 87.9, "elapsed_time": "1 day, 10:57:54", "remaining_time": "4:48:40", "throughput": 28391.72, "total_tokens": 3573782720} +{"current_steps": 12750, "total_steps": 14493, "loss": 0.784, "lr": 3.315040579494349e-05, "epoch": 2.6393209285473977, "percentage": 87.97, "elapsed_time": "1 day, 10:59:31", "remaining_time": "4:47:01", "throughput": 28392.1, "total_tokens": 3576592768} +{"current_steps": 12760, "total_steps": 14493, "loss": 0.7892, "lr": 3.31431220724477e-05, "epoch": 2.6413912683419167, "percentage": 88.04, "elapsed_time": "1 day, 11:01:08", "remaining_time": "4:45:22", "throughput": 28392.53, "total_tokens": 3579410304} +{"current_steps": 12770, "total_steps": 14493, "loss": 0.7754, "lr": 3.3135843148925834e-05, "epoch": 2.6434616081364353, "percentage": 88.11, "elapsed_time": "1 day, 11:02:43", "remaining_time": "4:43:42", "throughput": 28393.38, "total_tokens": 3582203200} +{"current_steps": 12780, "total_steps": 14493, "loss": 0.7886, "lr": 3.3128569019110414e-05, "epoch": 2.6455319479309543, "percentage": 88.18, "elapsed_time": "1 day, 11:04:21", "remaining_time": "4:42:03", "throughput": 28393.55, "total_tokens": 3585011328} +{"current_steps": 12790, "total_steps": 14493, "loss": 0.7868, "lr": 3.312129967774207e-05, "epoch": 2.647602287725473, "percentage": 88.25, "elapsed_time": "1 day, 11:05:55", "remaining_time": "4:40:24", "throughput": 28394.27, "total_tokens": 3587786176} +{"current_steps": 12800, "total_steps": 14493, "loss": 0.793, "lr": 3.311403511956952e-05, "epoch": 2.6496726275199918, "percentage": 88.32, "elapsed_time": "1 day, 11:07:33", "remaining_time": "4:38:45", "throughput": 28394.57, "total_tokens": 3590589952} +{"current_steps": 12810, "total_steps": 14493, "loss": 0.7836, "lr": 3.310677533934952e-05, "epoch": 2.6517429673145108, "percentage": 88.39, "elapsed_time": "1 day, 11:09:09", "remaining_time": "4:37:06", "throughput": 28395.12, "total_tokens": 3593374464} +{"current_steps": 12820, "total_steps": 14493, "loss": 0.7855, "lr": 3.309952033184686e-05, "epoch": 2.6538133071090293, "percentage": 88.46, "elapsed_time": "1 day, 11:10:41", "remaining_time": "4:35:26", "throughput": 28395.98, "total_tokens": 3596117888} +{"current_steps": 12830, "total_steps": 14493, "loss": 0.7915, "lr": 3.309227009183439e-05, "epoch": 2.655883646903548, "percentage": 88.53, "elapsed_time": "1 day, 11:12:16", "remaining_time": "4:33:47", "throughput": 28396.43, "total_tokens": 3598862592} +{"current_steps": 12840, "total_steps": 14493, "loss": 0.7977, "lr": 3.308502461409295e-05, "epoch": 2.657953986698067, "percentage": 88.59, "elapsed_time": "1 day, 11:13:52", "remaining_time": "4:32:08", "throughput": 28396.87, "total_tokens": 3601648384} +{"current_steps": 12850, "total_steps": 14493, "loss": 0.789, "lr": 3.3077783893411386e-05, "epoch": 2.660024326492586, "percentage": 88.66, "elapsed_time": "1 day, 11:15:29", "remaining_time": "4:30:29", "throughput": 28397.27, "total_tokens": 3604442944} +{"current_steps": 12860, "total_steps": 14493, "loss": 0.7945, "lr": 3.30705479245865e-05, "epoch": 2.6620946662871043, "percentage": 88.73, "elapsed_time": "1 day, 11:17:04", "remaining_time": "4:28:49", "throughput": 28397.89, "total_tokens": 3607226432} +{"current_steps": 12870, "total_steps": 14493, "loss": 0.7949, "lr": 3.3063316702423094e-05, "epoch": 2.664165006081623, "percentage": 88.8, "elapsed_time": "1 day, 11:18:39", "remaining_time": "4:27:10", "throughput": 28398.98, "total_tokens": 3610052032} +{"current_steps": 12880, "total_steps": 14493, "loss": 0.7812, "lr": 3.305609022173388e-05, "epoch": 2.666235345876142, "percentage": 88.87, "elapsed_time": "1 day, 11:20:14", "remaining_time": "4:25:31", "throughput": 28399.77, "total_tokens": 3612871296} +{"current_steps": 12890, "total_steps": 14493, "loss": 0.7808, "lr": 3.304886847733954e-05, "epoch": 2.668305685670661, "percentage": 88.94, "elapsed_time": "1 day, 11:21:51", "remaining_time": "4:23:52", "throughput": 28400.46, "total_tokens": 3615704128} +{"current_steps": 12900, "total_steps": 14493, "loss": 0.7775, "lr": 3.304165146406865e-05, "epoch": 2.6703760254651794, "percentage": 89.01, "elapsed_time": "1 day, 11:23:26", "remaining_time": "4:22:13", "throughput": 28400.99, "total_tokens": 3618473856} +{"current_steps": 12910, "total_steps": 14493, "loss": 0.7765, "lr": 3.30344391767577e-05, "epoch": 2.6724463652596984, "percentage": 89.08, "elapsed_time": "1 day, 11:25:11", "remaining_time": "4:20:35", "throughput": 28400.51, "total_tokens": 3621384256} +{"current_steps": 12920, "total_steps": 14493, "loss": 0.7872, "lr": 3.302723161025104e-05, "epoch": 2.674516705054217, "percentage": 89.15, "elapsed_time": "1 day, 11:26:51", "remaining_time": "4:18:56", "throughput": 28400.85, "total_tokens": 3624266432} +{"current_steps": 12930, "total_steps": 14493, "loss": 0.7795, "lr": 3.302002875940093e-05, "epoch": 2.676587044848736, "percentage": 89.22, "elapsed_time": "1 day, 11:28:25", "remaining_time": "4:17:17", "throughput": 28401.77, "total_tokens": 3627063616} +{"current_steps": 12940, "total_steps": 14493, "loss": 0.7812, "lr": 3.3012830619067466e-05, "epoch": 2.6786573846432544, "percentage": 89.28, "elapsed_time": "1 day, 11:30:06", "remaining_time": "4:15:38", "throughput": 28401.66, "total_tokens": 3629906688} +{"current_steps": 12950, "total_steps": 14493, "loss": 0.7811, "lr": 3.300563718411857e-05, "epoch": 2.6807277244377734, "percentage": 89.35, "elapsed_time": "1 day, 11:31:43", "remaining_time": "4:13:59", "throughput": 28401.66, "total_tokens": 3632682048} +{"current_steps": 12960, "total_steps": 14493, "loss": 0.7958, "lr": 3.299844844943e-05, "epoch": 2.6827980642322924, "percentage": 89.42, "elapsed_time": "1 day, 11:33:22", "remaining_time": "4:12:21", "throughput": 28401.57, "total_tokens": 3635481664} +{"current_steps": 12970, "total_steps": 14493, "loss": 0.7997, "lr": 3.299126440988535e-05, "epoch": 2.684868404026811, "percentage": 89.49, "elapsed_time": "1 day, 11:35:00", "remaining_time": "4:10:42", "throughput": 28402.05, "total_tokens": 3638328128} +{"current_steps": 12980, "total_steps": 14493, "loss": 0.7832, "lr": 3.298408506037596e-05, "epoch": 2.6869387438213295, "percentage": 89.56, "elapsed_time": "1 day, 11:36:39", "remaining_time": "4:09:03", "throughput": 28402.28, "total_tokens": 3641149760} +{"current_steps": 12990, "total_steps": 14493, "loss": 0.7756, "lr": 3.297691039580097e-05, "epoch": 2.6890090836158484, "percentage": 89.63, "elapsed_time": "1 day, 11:38:14", "remaining_time": "4:07:24", "throughput": 28403.2, "total_tokens": 3643971072} +{"current_steps": 13000, "total_steps": 14493, "loss": 0.7967, "lr": 3.29697404110673e-05, "epoch": 2.6910794234103674, "percentage": 89.7, "elapsed_time": "1 day, 11:39:51", "remaining_time": "4:05:45", "throughput": 28403.66, "total_tokens": 3646790336} +{"current_steps": 13010, "total_steps": 14493, "loss": 0.7798, "lr": 3.2962575101089594e-05, "epoch": 2.693149763204886, "percentage": 89.77, "elapsed_time": "1 day, 11:41:31", "remaining_time": "4:04:06", "throughput": 28403.59, "total_tokens": 3649619456} +{"current_steps": 13020, "total_steps": 14493, "loss": 0.7683, "lr": 3.295541446079024e-05, "epoch": 2.6952201029994045, "percentage": 89.84, "elapsed_time": "1 day, 11:43:08", "remaining_time": "4:02:27", "throughput": 28403.65, "total_tokens": 3652390464} +{"current_steps": 13030, "total_steps": 14493, "loss": 0.7997, "lr": 3.2948258485099336e-05, "epoch": 2.6972904427939235, "percentage": 89.91, "elapsed_time": "1 day, 11:44:47", "remaining_time": "4:00:48", "throughput": 28403.94, "total_tokens": 3655237632} +{"current_steps": 13040, "total_steps": 14493, "loss": 0.7904, "lr": 3.29411071689547e-05, "epoch": 2.6993607825884425, "percentage": 89.97, "elapsed_time": "1 day, 11:46:25", "remaining_time": "3:59:10", "throughput": 28404.22, "total_tokens": 3658062848} +{"current_steps": 13050, "total_steps": 14493, "loss": 0.7772, "lr": 3.2933960507301826e-05, "epoch": 2.701431122382961, "percentage": 90.04, "elapsed_time": "1 day, 11:48:01", "remaining_time": "3:57:31", "throughput": 28405.05, "total_tokens": 3660899328} +{"current_steps": 13060, "total_steps": 14493, "loss": 0.7851, "lr": 3.292681849509387e-05, "epoch": 2.70350146217748, "percentage": 90.11, "elapsed_time": "1 day, 11:49:39", "remaining_time": "3:55:52", "throughput": 28405.02, "total_tokens": 3663659776} +{"current_steps": 13070, "total_steps": 14493, "loss": 0.7957, "lr": 3.291968112729166e-05, "epoch": 2.7055718019719985, "percentage": 90.18, "elapsed_time": "1 day, 11:51:19", "remaining_time": "3:54:13", "throughput": 28405.23, "total_tokens": 3666539840} +{"current_steps": 13080, "total_steps": 14493, "loss": 0.7884, "lr": 3.291254839886367e-05, "epoch": 2.7076421417665175, "percentage": 90.25, "elapsed_time": "1 day, 11:52:56", "remaining_time": "3:52:34", "throughput": 28405.82, "total_tokens": 3669369472} +{"current_steps": 13090, "total_steps": 14493, "loss": 0.7819, "lr": 3.2905420304785995e-05, "epoch": 2.709712481561036, "percentage": 90.32, "elapsed_time": "1 day, 11:54:38", "remaining_time": "3:50:56", "throughput": 28405.84, "total_tokens": 3672262528} +{"current_steps": 13100, "total_steps": 14493, "loss": 0.7922, "lr": 3.289829684004235e-05, "epoch": 2.711782821355555, "percentage": 90.39, "elapsed_time": "1 day, 11:56:13", "remaining_time": "3:49:17", "throughput": 28406.66, "total_tokens": 3675072384} +{"current_steps": 13110, "total_steps": 14493, "loss": 0.7883, "lr": 3.289117799962402e-05, "epoch": 2.713853161150074, "percentage": 90.46, "elapsed_time": "1 day, 11:57:53", "remaining_time": "3:47:38", "throughput": 28406.58, "total_tokens": 3677899008} +{"current_steps": 13120, "total_steps": 14493, "loss": 0.7943, "lr": 3.2884063778529914e-05, "epoch": 2.7159235009445926, "percentage": 90.53, "elapsed_time": "1 day, 11:59:28", "remaining_time": "3:45:59", "throughput": 28407.43, "total_tokens": 3680700032} +{"current_steps": 13130, "total_steps": 14493, "loss": 0.789, "lr": 3.28769541717665e-05, "epoch": 2.717993840739111, "percentage": 90.6, "elapsed_time": "1 day, 12:01:04", "remaining_time": "3:44:20", "throughput": 28408.44, "total_tokens": 3683554624} +{"current_steps": 13140, "total_steps": 14493, "loss": 0.7858, "lr": 3.2869849174347775e-05, "epoch": 2.72006418053363, "percentage": 90.66, "elapsed_time": "1 day, 12:02:38", "remaining_time": "3:42:40", "throughput": 28409.38, "total_tokens": 3686345792} +{"current_steps": 13150, "total_steps": 14493, "loss": 0.7955, "lr": 3.2862748781295294e-05, "epoch": 2.722134520328149, "percentage": 90.73, "elapsed_time": "1 day, 12:04:14", "remaining_time": "3:41:01", "throughput": 28410.08, "total_tokens": 3689183744} +{"current_steps": 13160, "total_steps": 14493, "loss": 0.7951, "lr": 3.2855652987638146e-05, "epoch": 2.7242048601226676, "percentage": 90.8, "elapsed_time": "1 day, 12:05:51", "remaining_time": "3:39:23", "throughput": 28410.48, "total_tokens": 3691981760} +{"current_steps": 13170, "total_steps": 14493, "loss": 0.7799, "lr": 3.284856178841291e-05, "epoch": 2.726275199917186, "percentage": 90.87, "elapsed_time": "1 day, 12:07:27", "remaining_time": "3:37:44", "throughput": 28410.72, "total_tokens": 3694757568} +{"current_steps": 13180, "total_steps": 14493, "loss": 0.7963, "lr": 3.284147517866367e-05, "epoch": 2.728345539711705, "percentage": 90.94, "elapsed_time": "1 day, 12:09:07", "remaining_time": "3:36:05", "throughput": 28410.76, "total_tokens": 3697577344} +{"current_steps": 13190, "total_steps": 14493, "loss": 0.7827, "lr": 3.2834393153441976e-05, "epoch": 2.730415879506224, "percentage": 91.01, "elapsed_time": "1 day, 12:10:45", "remaining_time": "3:34:26", "throughput": 28411.08, "total_tokens": 3700409472} +{"current_steps": 13200, "total_steps": 14493, "loss": 0.785, "lr": 3.282731570780689e-05, "epoch": 2.7324862193007426, "percentage": 91.08, "elapsed_time": "1 day, 12:12:20", "remaining_time": "3:32:47", "throughput": 28411.7, "total_tokens": 3703199040} +{"current_steps": 13210, "total_steps": 14493, "loss": 0.7834, "lr": 3.2820242836824875e-05, "epoch": 2.7345565590952616, "percentage": 91.15, "elapsed_time": "1 day, 12:13:55", "remaining_time": "3:31:08", "throughput": 28412.38, "total_tokens": 3705988800} +{"current_steps": 13220, "total_steps": 14493, "loss": 0.7687, "lr": 3.2813174535569854e-05, "epoch": 2.73662689888978, "percentage": 91.22, "elapsed_time": "1 day, 12:15:32", "remaining_time": "3:29:29", "throughput": 28412.72, "total_tokens": 3708791872} +{"current_steps": 13230, "total_steps": 14493, "loss": 0.7964, "lr": 3.280611079912318e-05, "epoch": 2.738697238684299, "percentage": 91.29, "elapsed_time": "1 day, 12:17:08", "remaining_time": "3:27:50", "throughput": 28413.53, "total_tokens": 3711615488} +{"current_steps": 13240, "total_steps": 14493, "loss": 0.8026, "lr": 3.279905162257358e-05, "epoch": 2.7407675784788177, "percentage": 91.35, "elapsed_time": "1 day, 12:18:44", "remaining_time": "3:26:11", "throughput": 28413.96, "total_tokens": 3714402432} +{"current_steps": 13250, "total_steps": 14493, "loss": 0.7828, "lr": 3.279199700101723e-05, "epoch": 2.7428379182733367, "percentage": 91.42, "elapsed_time": "1 day, 12:20:18", "remaining_time": "3:24:32", "throughput": 28415.02, "total_tokens": 3717197312} +{"current_steps": 13260, "total_steps": 14493, "loss": 0.7926, "lr": 3.2784946929557644e-05, "epoch": 2.7449082580678557, "percentage": 91.49, "elapsed_time": "1 day, 12:21:54", "remaining_time": "3:22:53", "throughput": 28415.44, "total_tokens": 3720005184} +{"current_steps": 13270, "total_steps": 14493, "loss": 0.8034, "lr": 3.277790140330571e-05, "epoch": 2.746978597862374, "percentage": 91.56, "elapsed_time": "1 day, 12:23:29", "remaining_time": "3:21:14", "throughput": 28416.14, "total_tokens": 3722788800} +{"current_steps": 13280, "total_steps": 14493, "loss": 0.7849, "lr": 3.277086041737968e-05, "epoch": 2.7490489376568927, "percentage": 91.63, "elapsed_time": "1 day, 12:25:04", "remaining_time": "3:19:35", "throughput": 28416.85, "total_tokens": 3725573760} +{"current_steps": 13290, "total_steps": 14493, "loss": 0.7779, "lr": 3.276382396690513e-05, "epoch": 2.7511192774514117, "percentage": 91.7, "elapsed_time": "1 day, 12:26:41", "remaining_time": "3:17:56", "throughput": 28416.9, "total_tokens": 3728350400} +{"current_steps": 13300, "total_steps": 14493, "loss": 0.7954, "lr": 3.275679204701496e-05, "epoch": 2.7531896172459307, "percentage": 91.77, "elapsed_time": "1 day, 12:28:19", "remaining_time": "3:16:17", "throughput": 28417.53, "total_tokens": 3731194496} +{"current_steps": 13310, "total_steps": 14493, "loss": 0.7964, "lr": 3.274976465284939e-05, "epoch": 2.7552599570404492, "percentage": 91.84, "elapsed_time": "1 day, 12:29:54", "remaining_time": "3:14:38", "throughput": 28418.21, "total_tokens": 3734003840} +{"current_steps": 13320, "total_steps": 14493, "loss": 0.7919, "lr": 3.274274177955593e-05, "epoch": 2.7573302968349678, "percentage": 91.91, "elapsed_time": "1 day, 12:31:30", "remaining_time": "3:12:59", "throughput": 28418.62, "total_tokens": 3736779648} +{"current_steps": 13330, "total_steps": 14493, "loss": 0.7873, "lr": 3.273572342228937e-05, "epoch": 2.7594006366294868, "percentage": 91.98, "elapsed_time": "1 day, 12:33:02", "remaining_time": "3:11:20", "throughput": 28419.54, "total_tokens": 3739506368} +{"current_steps": 13340, "total_steps": 14493, "loss": 0.7839, "lr": 3.272870957621176e-05, "epoch": 2.7614709764240057, "percentage": 92.04, "elapsed_time": "1 day, 12:34:39", "remaining_time": "3:09:41", "throughput": 28419.93, "total_tokens": 3742317632} +{"current_steps": 13350, "total_steps": 14493, "loss": 0.7913, "lr": 3.2721700236492414e-05, "epoch": 2.7635413162185243, "percentage": 92.11, "elapsed_time": "1 day, 12:36:17", "remaining_time": "3:08:02", "throughput": 28420.11, "total_tokens": 3745123136} +{"current_steps": 13360, "total_steps": 14493, "loss": 0.7771, "lr": 3.271469539830788e-05, "epoch": 2.7656116560130433, "percentage": 92.18, "elapsed_time": "1 day, 12:37:49", "remaining_time": "3:06:23", "throughput": 28421.13, "total_tokens": 3747876480} +{"current_steps": 13370, "total_steps": 14493, "loss": 0.7919, "lr": 3.270769505684193e-05, "epoch": 2.767681995807562, "percentage": 92.25, "elapsed_time": "1 day, 12:39:25", "remaining_time": "3:04:44", "throughput": 28421.72, "total_tokens": 3750677824} +{"current_steps": 13380, "total_steps": 14493, "loss": 0.7939, "lr": 3.2700699207285544e-05, "epoch": 2.769752335602081, "percentage": 92.32, "elapsed_time": "1 day, 12:40:58", "remaining_time": "3:03:05", "throughput": 28422.7, "total_tokens": 3753459136} +{"current_steps": 13390, "total_steps": 14493, "loss": 0.7994, "lr": 3.269370784483691e-05, "epoch": 2.7718226753965993, "percentage": 92.39, "elapsed_time": "1 day, 12:42:33", "remaining_time": "3:01:26", "throughput": 28423.54, "total_tokens": 3756277632} +{"current_steps": 13400, "total_steps": 14493, "loss": 0.7823, "lr": 3.268672096470138e-05, "epoch": 2.7738930151911183, "percentage": 92.46, "elapsed_time": "1 day, 12:44:13", "remaining_time": "2:59:47", "throughput": 28423.45, "total_tokens": 3759090176} +{"current_steps": 13410, "total_steps": 14493, "loss": 0.7745, "lr": 3.2679738562091506e-05, "epoch": 2.7759633549856373, "percentage": 92.53, "elapsed_time": "1 day, 12:45:53", "remaining_time": "2:58:08", "throughput": 28423.26, "total_tokens": 3761928448} +{"current_steps": 13420, "total_steps": 14493, "loss": 0.7997, "lr": 3.2672760632226964e-05, "epoch": 2.778033694780156, "percentage": 92.6, "elapsed_time": "1 day, 12:47:29", "remaining_time": "2:56:30", "throughput": 28424.09, "total_tokens": 3764748544} +{"current_steps": 13430, "total_steps": 14493, "loss": 0.8033, "lr": 3.266578717033458e-05, "epoch": 2.7801040345746744, "percentage": 92.67, "elapsed_time": "1 day, 12:49:07", "remaining_time": "2:54:51", "throughput": 28424.36, "total_tokens": 3767567680} +{"current_steps": 13440, "total_steps": 14493, "loss": 0.7874, "lr": 3.265881817164833e-05, "epoch": 2.7821743743691933, "percentage": 92.73, "elapsed_time": "1 day, 12:50:43", "remaining_time": "2:53:12", "throughput": 28424.82, "total_tokens": 3770357440} +{"current_steps": 13450, "total_steps": 14493, "loss": 0.7893, "lr": 3.265185363140928e-05, "epoch": 2.7842447141637123, "percentage": 92.8, "elapsed_time": "1 day, 12:52:16", "remaining_time": "2:51:33", "throughput": 28425.71, "total_tokens": 3773137600} +{"current_steps": 13460, "total_steps": 14493, "loss": 0.7863, "lr": 3.26448935448656e-05, "epoch": 2.786315053958231, "percentage": 92.87, "elapsed_time": "1 day, 12:53:56", "remaining_time": "2:49:54", "throughput": 28425.65, "total_tokens": 3775970816} +{"current_steps": 13470, "total_steps": 14493, "loss": 0.7813, "lr": 3.263793790727256e-05, "epoch": 2.7883853937527494, "percentage": 92.94, "elapsed_time": "1 day, 12:55:32", "remaining_time": "2:48:15", "throughput": 28426.04, "total_tokens": 3778748224} +{"current_steps": 13480, "total_steps": 14493, "loss": 0.7753, "lr": 3.2630986713892495e-05, "epoch": 2.7904557335472684, "percentage": 93.01, "elapsed_time": "1 day, 12:57:12", "remaining_time": "2:46:37", "throughput": 28425.92, "total_tokens": 3781563200} +{"current_steps": 13490, "total_steps": 14493, "loss": 0.7835, "lr": 3.26240399599948e-05, "epoch": 2.7925260733417874, "percentage": 93.08, "elapsed_time": "1 day, 12:58:47", "remaining_time": "2:44:58", "throughput": 28426.56, "total_tokens": 3784346112} +{"current_steps": 13500, "total_steps": 14493, "loss": 0.7805, "lr": 3.2617097640855914e-05, "epoch": 2.794596413136306, "percentage": 93.15, "elapsed_time": "1 day, 13:00:24", "remaining_time": "2:43:19", "throughput": 28427.13, "total_tokens": 3787176320} +{"current_steps": 13510, "total_steps": 14493, "loss": 0.7898, "lr": 3.2610159751759314e-05, "epoch": 2.796666752930825, "percentage": 93.22, "elapsed_time": "1 day, 13:02:00", "remaining_time": "2:41:40", "throughput": 28427.59, "total_tokens": 3789985728} +{"current_steps": 13520, "total_steps": 14493, "loss": 0.7986, "lr": 3.26032262879955e-05, "epoch": 2.7987370927253434, "percentage": 93.29, "elapsed_time": "1 day, 13:03:37", "remaining_time": "2:40:01", "throughput": 28427.86, "total_tokens": 3792787712} +{"current_steps": 13530, "total_steps": 14493, "loss": 0.7871, "lr": 3.259629724486198e-05, "epoch": 2.8008074325198624, "percentage": 93.36, "elapsed_time": "1 day, 13:05:14", "remaining_time": "2:38:22", "throughput": 28428.35, "total_tokens": 3795605056} +{"current_steps": 13540, "total_steps": 14493, "loss": 0.7854, "lr": 3.258937261766323e-05, "epoch": 2.802877772314381, "percentage": 93.42, "elapsed_time": "1 day, 13:06:49", "remaining_time": "2:36:43", "throughput": 28429.03, "total_tokens": 3798385344} +{"current_steps": 13550, "total_steps": 14493, "loss": 0.7774, "lr": 3.258245240171074e-05, "epoch": 2.8049481121089, "percentage": 93.49, "elapsed_time": "1 day, 13:08:24", "remaining_time": "2:35:05", "throughput": 28429.8, "total_tokens": 3801189952} +{"current_steps": 13560, "total_steps": 14493, "loss": 0.7877, "lr": 3.2575536592322935e-05, "epoch": 2.807018451903419, "percentage": 93.56, "elapsed_time": "1 day, 13:10:01", "remaining_time": "2:33:26", "throughput": 28430.19, "total_tokens": 3804011008} +{"current_steps": 13570, "total_steps": 14493, "loss": 0.778, "lr": 3.256862518482523e-05, "epoch": 2.8090887916979375, "percentage": 93.63, "elapsed_time": "1 day, 13:11:40", "remaining_time": "2:31:47", "throughput": 28430.59, "total_tokens": 3806870848} +{"current_steps": 13580, "total_steps": 14493, "loss": 0.7788, "lr": 3.256171817454994e-05, "epoch": 2.811159131492456, "percentage": 93.7, "elapsed_time": "1 day, 13:13:16", "remaining_time": "2:30:08", "throughput": 28431.32, "total_tokens": 3809692160} +{"current_steps": 13590, "total_steps": 14493, "loss": 0.7777, "lr": 3.255481555683633e-05, "epoch": 2.813229471286975, "percentage": 93.77, "elapsed_time": "1 day, 13:14:50", "remaining_time": "2:28:29", "throughput": 28432.02, "total_tokens": 3812467648} +{"current_steps": 13600, "total_steps": 14493, "loss": 0.7953, "lr": 3.254791732703057e-05, "epoch": 2.815299811081494, "percentage": 93.84, "elapsed_time": "1 day, 13:16:29", "remaining_time": "2:26:51", "throughput": 28432.04, "total_tokens": 3815280448} +{"current_steps": 13610, "total_steps": 14493, "loss": 0.7864, "lr": 3.254102348048575e-05, "epoch": 2.8173701508760125, "percentage": 93.91, "elapsed_time": "1 day, 13:18:05", "remaining_time": "2:25:12", "throughput": 28432.8, "total_tokens": 3818111680} +{"current_steps": 13620, "total_steps": 14493, "loss": 0.7955, "lr": 3.25341340125618e-05, "epoch": 2.819440490670531, "percentage": 93.98, "elapsed_time": "1 day, 13:19:46", "remaining_time": "2:23:33", "throughput": 28432.54, "total_tokens": 3820959872} +{"current_steps": 13630, "total_steps": 14493, "loss": 0.7739, "lr": 3.2527248918625575e-05, "epoch": 2.82151083046505, "percentage": 94.05, "elapsed_time": "1 day, 13:21:20", "remaining_time": "2:21:54", "throughput": 28433.2, "total_tokens": 3823706432} +{"current_steps": 13640, "total_steps": 14493, "loss": 0.7909, "lr": 3.252036819405075e-05, "epoch": 2.823581170259569, "percentage": 94.11, "elapsed_time": "1 day, 13:22:53", "remaining_time": "2:20:15", "throughput": 28434.18, "total_tokens": 3826478976} +{"current_steps": 13650, "total_steps": 14493, "loss": 0.7805, "lr": 3.251349183421788e-05, "epoch": 2.8256515100540875, "percentage": 94.18, "elapsed_time": "1 day, 13:24:28", "remaining_time": "2:18:36", "throughput": 28434.73, "total_tokens": 3829254656} +{"current_steps": 13660, "total_steps": 14493, "loss": 0.7901, "lr": 3.250661983451434e-05, "epoch": 2.8277218498486065, "percentage": 94.25, "elapsed_time": "1 day, 13:26:03", "remaining_time": "2:16:57", "throughput": 28435.32, "total_tokens": 3832033088} +{"current_steps": 13670, "total_steps": 14493, "loss": 0.7806, "lr": 3.2499752190334326e-05, "epoch": 2.829792189643125, "percentage": 94.32, "elapsed_time": "1 day, 13:27:39", "remaining_time": "2:15:19", "throughput": 28435.89, "total_tokens": 3834843136} +{"current_steps": 13680, "total_steps": 14493, "loss": 0.7634, "lr": 3.2492888897078834e-05, "epoch": 2.831862529437644, "percentage": 94.39, "elapsed_time": "1 day, 13:29:16", "remaining_time": "2:13:40", "throughput": 28436.02, "total_tokens": 3837622080} +{"current_steps": 13690, "total_steps": 14493, "loss": 0.7895, "lr": 3.248602995015567e-05, "epoch": 2.8339328692321626, "percentage": 94.46, "elapsed_time": "1 day, 13:30:54", "remaining_time": "2:12:01", "throughput": 28436.35, "total_tokens": 3840463744} +{"current_steps": 13700, "total_steps": 14493, "loss": 0.7974, "lr": 3.247917534497943e-05, "epoch": 2.8360032090266816, "percentage": 94.53, "elapsed_time": "1 day, 13:32:32", "remaining_time": "2:10:23", "throughput": 28436.56, "total_tokens": 3843269760} +{"current_steps": 13710, "total_steps": 14493, "loss": 0.7919, "lr": 3.247232507697145e-05, "epoch": 2.8380735488212006, "percentage": 94.6, "elapsed_time": "1 day, 13:34:09", "remaining_time": "2:08:44", "throughput": 28437.55, "total_tokens": 3846155008} +{"current_steps": 13720, "total_steps": 14493, "loss": 0.7777, "lr": 3.246547914155985e-05, "epoch": 2.840143888615719, "percentage": 94.67, "elapsed_time": "1 day, 13:35:45", "remaining_time": "2:07:05", "throughput": 28438.14, "total_tokens": 3848987904} +{"current_steps": 13730, "total_steps": 14493, "loss": 0.7861, "lr": 3.245863753417949e-05, "epoch": 2.8422142284102376, "percentage": 94.74, "elapsed_time": "1 day, 13:37:22", "remaining_time": "2:05:26", "throughput": 28438.29, "total_tokens": 3851764352} +{"current_steps": 13740, "total_steps": 14493, "loss": 0.7817, "lr": 3.2451800250271944e-05, "epoch": 2.8442845682047566, "percentage": 94.8, "elapsed_time": "1 day, 13:39:01", "remaining_time": "2:03:48", "throughput": 28438.39, "total_tokens": 3854577984} +{"current_steps": 13750, "total_steps": 14493, "loss": 0.8005, "lr": 3.244496728528553e-05, "epoch": 2.8463549079992756, "percentage": 94.87, "elapsed_time": "1 day, 13:40:42", "remaining_time": "2:02:09", "throughput": 28438.27, "total_tokens": 3857424384} +{"current_steps": 13760, "total_steps": 14493, "loss": 0.7835, "lr": 3.243813863467525e-05, "epoch": 2.848425247793794, "percentage": 94.94, "elapsed_time": "1 day, 13:42:19", "remaining_time": "2:00:30", "throughput": 28438.78, "total_tokens": 3860255616} +{"current_steps": 13770, "total_steps": 14493, "loss": 0.7763, "lr": 3.243131429390281e-05, "epoch": 2.8504955875883127, "percentage": 95.01, "elapsed_time": "1 day, 13:43:54", "remaining_time": "1:58:52", "throughput": 28439.55, "total_tokens": 3863066624} +{"current_steps": 13780, "total_steps": 14493, "loss": 0.7999, "lr": 3.2424494258436594e-05, "epoch": 2.8525659273828317, "percentage": 95.08, "elapsed_time": "1 day, 13:45:33", "remaining_time": "1:57:13", "throughput": 28439.47, "total_tokens": 3865873216} +{"current_steps": 13790, "total_steps": 14493, "loss": 0.7874, "lr": 3.241767852375166e-05, "epoch": 2.8546362671773506, "percentage": 95.15, "elapsed_time": "1 day, 13:47:15", "remaining_time": "1:55:34", "throughput": 28439.14, "total_tokens": 3868731712} +{"current_steps": 13800, "total_steps": 14493, "loss": 0.7986, "lr": 3.241086708532971e-05, "epoch": 2.856706606971869, "percentage": 95.22, "elapsed_time": "1 day, 13:48:54", "remaining_time": "1:53:56", "throughput": 28439.16, "total_tokens": 3871553088} +{"current_steps": 13810, "total_steps": 14493, "loss": 0.7985, "lr": 3.24040599386591e-05, "epoch": 2.858776946766388, "percentage": 95.29, "elapsed_time": "1 day, 13:50:32", "remaining_time": "1:52:17", "throughput": 28439.39, "total_tokens": 3874366720} +{"current_steps": 13820, "total_steps": 14493, "loss": 0.7718, "lr": 3.23972570792348e-05, "epoch": 2.8608472865609067, "percentage": 95.36, "elapsed_time": "1 day, 13:52:05", "remaining_time": "1:50:38", "throughput": 28440.1, "total_tokens": 3877113408} +{"current_steps": 13830, "total_steps": 14493, "loss": 0.7775, "lr": 3.239045850255842e-05, "epoch": 2.8629176263554257, "percentage": 95.43, "elapsed_time": "1 day, 13:53:45", "remaining_time": "1:49:00", "throughput": 28440.05, "total_tokens": 3879942528} +{"current_steps": 13840, "total_steps": 14493, "loss": 0.7851, "lr": 3.238366420413817e-05, "epoch": 2.864987966149944, "percentage": 95.49, "elapsed_time": "1 day, 13:55:22", "remaining_time": "1:47:21", "throughput": 28440.59, "total_tokens": 3882770368} +{"current_steps": 13850, "total_steps": 14493, "loss": 0.7883, "lr": 3.237687417948882e-05, "epoch": 2.867058305944463, "percentage": 95.56, "elapsed_time": "1 day, 13:56:59", "remaining_time": "1:45:42", "throughput": 28440.89, "total_tokens": 3885575104} +{"current_steps": 13860, "total_steps": 14493, "loss": 0.8084, "lr": 3.2370088424131776e-05, "epoch": 2.869128645738982, "percentage": 95.63, "elapsed_time": "1 day, 13:58:39", "remaining_time": "1:44:04", "throughput": 28440.7, "total_tokens": 3888410816} +{"current_steps": 13870, "total_steps": 14493, "loss": 0.7956, "lr": 3.236330693359497e-05, "epoch": 2.8711989855335007, "percentage": 95.7, "elapsed_time": "1 day, 14:00:14", "remaining_time": "1:42:25", "throughput": 28441.35, "total_tokens": 3891190848} +{"current_steps": 13880, "total_steps": 14493, "loss": 0.7854, "lr": 3.2356529703412894e-05, "epoch": 2.8732693253280193, "percentage": 95.77, "elapsed_time": "1 day, 14:01:53", "remaining_time": "1:40:46", "throughput": 28441.4, "total_tokens": 3894009024} +{"current_steps": 13890, "total_steps": 14493, "loss": 0.7862, "lr": 3.234975672912661e-05, "epoch": 2.8753396651225382, "percentage": 95.84, "elapsed_time": "1 day, 14:03:32", "remaining_time": "1:39:08", "throughput": 28441.78, "total_tokens": 3896887040} +{"current_steps": 13900, "total_steps": 14493, "loss": 0.7898, "lr": 3.234298800628368e-05, "epoch": 2.8774100049170572, "percentage": 95.91, "elapsed_time": "1 day, 14:05:13", "remaining_time": "1:37:29", "throughput": 28441.74, "total_tokens": 3899760000} +{"current_steps": 13910, "total_steps": 14493, "loss": 0.7864, "lr": 3.2336223530438195e-05, "epoch": 2.8794803447115758, "percentage": 95.98, "elapsed_time": "1 day, 14:06:47", "remaining_time": "1:35:50", "throughput": 28442.66, "total_tokens": 3902548928} +{"current_steps": 13920, "total_steps": 14493, "loss": 0.7831, "lr": 3.232946329715076e-05, "epoch": 2.8815506845060943, "percentage": 96.05, "elapsed_time": "1 day, 14:08:23", "remaining_time": "1:34:11", "throughput": 28443.13, "total_tokens": 3905355328} +{"current_steps": 13930, "total_steps": 14493, "loss": 0.7858, "lr": 3.2322707301988456e-05, "epoch": 2.8836210243006133, "percentage": 96.12, "elapsed_time": "1 day, 14:10:00", "remaining_time": "1:32:33", "throughput": 28443.64, "total_tokens": 3908170368} +{"current_steps": 13940, "total_steps": 14493, "loss": 0.7884, "lr": 3.231595554052488e-05, "epoch": 2.8856913640951323, "percentage": 96.18, "elapsed_time": "1 day, 14:11:36", "remaining_time": "1:30:54", "throughput": 28444.02, "total_tokens": 3910967360} +{"current_steps": 13950, "total_steps": 14493, "loss": 0.7781, "lr": 3.230920800834005e-05, "epoch": 2.887761703889651, "percentage": 96.25, "elapsed_time": "1 day, 14:13:15", "remaining_time": "1:29:15", "throughput": 28444.23, "total_tokens": 3913806400} +{"current_steps": 13960, "total_steps": 14493, "loss": 0.7839, "lr": 3.2302464701020486e-05, "epoch": 2.88983204368417, "percentage": 96.32, "elapsed_time": "1 day, 14:14:52", "remaining_time": "1:27:37", "throughput": 28444.78, "total_tokens": 3916640192} +{"current_steps": 13970, "total_steps": 14493, "loss": 0.7973, "lr": 3.2295725614159126e-05, "epoch": 2.8919023834786883, "percentage": 96.39, "elapsed_time": "1 day, 14:16:29", "remaining_time": "1:25:58", "throughput": 28445.03, "total_tokens": 3919435520} +{"current_steps": 13980, "total_steps": 14493, "loss": 0.7907, "lr": 3.228899074335536e-05, "epoch": 2.8939727232732073, "percentage": 96.46, "elapsed_time": "1 day, 14:18:06", "remaining_time": "1:24:19", "throughput": 28445.33, "total_tokens": 3922229504} +{"current_steps": 13990, "total_steps": 14493, "loss": 0.7874, "lr": 3.228226008421498e-05, "epoch": 2.896043063067726, "percentage": 96.53, "elapsed_time": "1 day, 14:19:39", "remaining_time": "1:22:40", "throughput": 28446.28, "total_tokens": 3924997120} +{"current_steps": 14000, "total_steps": 14493, "loss": 0.7811, "lr": 3.2275533632350193e-05, "epoch": 2.898113402862245, "percentage": 96.6, "elapsed_time": "1 day, 14:21:12", "remaining_time": "1:21:02", "throughput": 28447.26, "total_tokens": 3927780864} +{"current_steps": 14010, "total_steps": 14493, "loss": 0.7781, "lr": 3.226881138337963e-05, "epoch": 2.9001837426567634, "percentage": 96.67, "elapsed_time": "1 day, 14:22:50", "remaining_time": "1:19:23", "throughput": 28447.28, "total_tokens": 3930570624} +{"current_steps": 14020, "total_steps": 14493, "loss": 0.7812, "lr": 3.2262093332928256e-05, "epoch": 2.9022540824512824, "percentage": 96.74, "elapsed_time": "1 day, 14:24:30", "remaining_time": "1:17:44", "throughput": 28447.05, "total_tokens": 3933384768} +{"current_steps": 14030, "total_steps": 14493, "loss": 0.7905, "lr": 3.225537947662746e-05, "epoch": 2.904324422245801, "percentage": 96.81, "elapsed_time": "1 day, 14:26:08", "remaining_time": "1:16:06", "throughput": 28447.18, "total_tokens": 3936198336} +{"current_steps": 14040, "total_steps": 14493, "loss": 0.7751, "lr": 3.224866981011494e-05, "epoch": 2.90639476204032, "percentage": 96.87, "elapsed_time": "1 day, 14:27:45", "remaining_time": "1:14:27", "throughput": 28447.55, "total_tokens": 3939010880} +{"current_steps": 14050, "total_steps": 14493, "loss": 0.7878, "lr": 3.22419643290348e-05, "epoch": 2.908465101834839, "percentage": 96.94, "elapsed_time": "1 day, 14:29:25", "remaining_time": "1:12:49", "throughput": 28447.73, "total_tokens": 3941880064} +{"current_steps": 14060, "total_steps": 14493, "loss": 0.7764, "lr": 3.2235263029037446e-05, "epoch": 2.9105354416293574, "percentage": 97.01, "elapsed_time": "1 day, 14:31:02", "remaining_time": "1:11:10", "throughput": 28447.99, "total_tokens": 3944666304} +{"current_steps": 14070, "total_steps": 14493, "loss": 0.8046, "lr": 3.222856590577962e-05, "epoch": 2.912605781423876, "percentage": 97.08, "elapsed_time": "1 day, 14:32:42", "remaining_time": "1:09:31", "throughput": 28447.86, "total_tokens": 3947510400} +{"current_steps": 14080, "total_steps": 14493, "loss": 0.7958, "lr": 3.222187295492436e-05, "epoch": 2.914676121218395, "percentage": 97.15, "elapsed_time": "1 day, 14:34:14", "remaining_time": "1:07:52", "throughput": 28448.88, "total_tokens": 3950246912} +{"current_steps": 14090, "total_steps": 14493, "loss": 0.8, "lr": 3.221518417214104e-05, "epoch": 2.916746461012914, "percentage": 97.22, "elapsed_time": "1 day, 14:35:47", "remaining_time": "1:06:14", "throughput": 28449.69, "total_tokens": 3953026944} +{"current_steps": 14100, "total_steps": 14493, "loss": 0.7921, "lr": 3.22084995531053e-05, "epoch": 2.9188168008074324, "percentage": 97.29, "elapsed_time": "1 day, 14:37:24", "remaining_time": "1:04:35", "throughput": 28450.24, "total_tokens": 3955842048} +{"current_steps": 14110, "total_steps": 14493, "loss": 0.7742, "lr": 3.220181909349907e-05, "epoch": 2.9208871406019514, "percentage": 97.36, "elapsed_time": "1 day, 14:39:02", "remaining_time": "1:02:56", "throughput": 28450.39, "total_tokens": 3958662528} +{"current_steps": 14120, "total_steps": 14493, "loss": 0.782, "lr": 3.219514278901053e-05, "epoch": 2.92295748039647, "percentage": 97.43, "elapsed_time": "1 day, 14:40:39", "remaining_time": "1:01:18", "throughput": 28450.78, "total_tokens": 3961486080} +{"current_steps": 14130, "total_steps": 14493, "loss": 0.7795, "lr": 3.218847063533413e-05, "epoch": 2.925027820190989, "percentage": 97.5, "elapsed_time": "1 day, 14:42:17", "remaining_time": "0:59:39", "throughput": 28451.07, "total_tokens": 3964293056} +{"current_steps": 14140, "total_steps": 14493, "loss": 0.7854, "lr": 3.218180262817055e-05, "epoch": 2.9270981599855075, "percentage": 97.56, "elapsed_time": "1 day, 14:43:54", "remaining_time": "0:58:00", "throughput": 28451.31, "total_tokens": 3967083136} +{"current_steps": 14150, "total_steps": 14493, "loss": 0.7933, "lr": 3.217513876322674e-05, "epoch": 2.9291684997800265, "percentage": 97.63, "elapsed_time": "1 day, 14:45:31", "remaining_time": "0:56:22", "throughput": 28451.69, "total_tokens": 3969902528} +{"current_steps": 14160, "total_steps": 14493, "loss": 0.7845, "lr": 3.216847903621581e-05, "epoch": 2.931238839574545, "percentage": 97.7, "elapsed_time": "1 day, 14:47:09", "remaining_time": "0:54:43", "throughput": 28451.88, "total_tokens": 3972708608} +{"current_steps": 14170, "total_steps": 14493, "loss": 0.8016, "lr": 3.216182344285713e-05, "epoch": 2.933309179369064, "percentage": 97.77, "elapsed_time": "1 day, 14:48:42", "remaining_time": "0:53:04", "throughput": 28452.44, "total_tokens": 3975451136} +{"current_steps": 14180, "total_steps": 14493, "loss": 0.7998, "lr": 3.215517197887625e-05, "epoch": 2.9353795191635825, "percentage": 97.84, "elapsed_time": "1 day, 14:50:23", "remaining_time": "0:51:26", "throughput": 28452.19, "total_tokens": 3978279552} +{"current_steps": 14190, "total_steps": 14493, "loss": 0.782, "lr": 3.214852464000488e-05, "epoch": 2.9374498589581015, "percentage": 97.91, "elapsed_time": "1 day, 14:51:58", "remaining_time": "0:49:47", "throughput": 28452.65, "total_tokens": 3981056768} +{"current_steps": 14200, "total_steps": 14493, "loss": 0.8057, "lr": 3.2141881421980945e-05, "epoch": 2.9395201987526205, "percentage": 97.98, "elapsed_time": "1 day, 14:53:37", "remaining_time": "0:48:09", "throughput": 28452.7, "total_tokens": 3983873280} +{"current_steps": 14210, "total_steps": 14493, "loss": 0.7761, "lr": 3.213524232054851e-05, "epoch": 2.941590538547139, "percentage": 98.05, "elapsed_time": "1 day, 14:55:13", "remaining_time": "0:46:30", "throughput": 28453.09, "total_tokens": 3986660032} +{"current_steps": 14220, "total_steps": 14493, "loss": 0.8024, "lr": 3.21286073314578e-05, "epoch": 2.9436608783416576, "percentage": 98.12, "elapsed_time": "1 day, 14:56:50", "remaining_time": "0:44:51", "throughput": 28453.35, "total_tokens": 3989445632} +{"current_steps": 14230, "total_steps": 14493, "loss": 0.7854, "lr": 3.2121976450465155e-05, "epoch": 2.9457312181361766, "percentage": 98.19, "elapsed_time": "1 day, 14:58:29", "remaining_time": "0:43:13", "throughput": 28453.25, "total_tokens": 3992270144} +{"current_steps": 14240, "total_steps": 14493, "loss": 0.7991, "lr": 3.211534967333308e-05, "epoch": 2.9478015579306955, "percentage": 98.25, "elapsed_time": "1 day, 15:00:02", "remaining_time": "0:41:34", "throughput": 28454.3, "total_tokens": 3995047808} +{"current_steps": 14250, "total_steps": 14493, "loss": 0.7858, "lr": 3.210872699583019e-05, "epoch": 2.949871897725214, "percentage": 98.32, "elapsed_time": "1 day, 15:01:38", "remaining_time": "0:39:55", "throughput": 28454.72, "total_tokens": 3997856768} +{"current_steps": 14260, "total_steps": 14493, "loss": 0.7762, "lr": 3.210210841373118e-05, "epoch": 2.951942237519733, "percentage": 98.39, "elapsed_time": "1 day, 15:03:15", "remaining_time": "0:38:17", "throughput": 28455.05, "total_tokens": 4000650880} +{"current_steps": 14270, "total_steps": 14493, "loss": 0.7756, "lr": 3.2095493922816855e-05, "epoch": 2.9540125773142516, "percentage": 98.46, "elapsed_time": "1 day, 15:04:50", "remaining_time": "0:36:38", "throughput": 28455.26, "total_tokens": 4003396480} +{"current_steps": 14280, "total_steps": 14493, "loss": 0.7901, "lr": 3.2088883518874105e-05, "epoch": 2.9560829171087706, "percentage": 98.53, "elapsed_time": "1 day, 15:06:25", "remaining_time": "0:34:59", "throughput": 28455.92, "total_tokens": 4006188352} +{"current_steps": 14290, "total_steps": 14493, "loss": 0.7768, "lr": 3.208227719769589e-05, "epoch": 2.958153256903289, "percentage": 98.6, "elapsed_time": "1 day, 15:08:06", "remaining_time": "0:33:21", "throughput": 28455.75, "total_tokens": 4009035648} +{"current_steps": 14300, "total_steps": 14493, "loss": 0.7816, "lr": 3.207567495508124e-05, "epoch": 2.960223596697808, "percentage": 98.67, "elapsed_time": "1 day, 15:09:39", "remaining_time": "0:31:42", "throughput": 28456.67, "total_tokens": 4011813760} +{"current_steps": 14310, "total_steps": 14493, "loss": 0.7866, "lr": 3.2069076786835205e-05, "epoch": 2.9622939364923266, "percentage": 98.74, "elapsed_time": "1 day, 15:11:14", "remaining_time": "0:30:04", "throughput": 28457.36, "total_tokens": 4014617216} +{"current_steps": 14320, "total_steps": 14493, "loss": 0.7851, "lr": 3.2062482688768904e-05, "epoch": 2.9643642762868456, "percentage": 98.81, "elapsed_time": "1 day, 15:12:48", "remaining_time": "0:28:25", "throughput": 28458.13, "total_tokens": 4017398336} +{"current_steps": 14330, "total_steps": 14493, "loss": 0.7759, "lr": 3.205589265669947e-05, "epoch": 2.966434616081364, "percentage": 98.88, "elapsed_time": "1 day, 15:14:24", "remaining_time": "0:26:46", "throughput": 28458.5, "total_tokens": 4020185536} +{"current_steps": 14340, "total_steps": 14493, "loss": 0.8, "lr": 3.204930668645005e-05, "epoch": 2.968504955875883, "percentage": 98.94, "elapsed_time": "1 day, 15:16:03", "remaining_time": "0:25:08", "throughput": 28458.85, "total_tokens": 4023037760} +{"current_steps": 14350, "total_steps": 14493, "loss": 0.7751, "lr": 3.20427247738498e-05, "epoch": 2.970575295670402, "percentage": 99.01, "elapsed_time": "1 day, 15:17:38", "remaining_time": "0:23:29", "throughput": 28459.36, "total_tokens": 4025812992} +{"current_steps": 14360, "total_steps": 14493, "loss": 0.7845, "lr": 3.2036146914733854e-05, "epoch": 2.9726456354649207, "percentage": 99.08, "elapsed_time": "1 day, 15:19:11", "remaining_time": "0:21:51", "throughput": 28459.88, "total_tokens": 4028547584} +{"current_steps": 14370, "total_steps": 14493, "loss": 0.7798, "lr": 3.202957310494336e-05, "epoch": 2.974715975259439, "percentage": 99.15, "elapsed_time": "1 day, 15:20:47", "remaining_time": "0:20:12", "throughput": 28460.11, "total_tokens": 4031313280} +{"current_steps": 14380, "total_steps": 14493, "loss": 0.7934, "lr": 3.202300334032542e-05, "epoch": 2.976786315053958, "percentage": 99.22, "elapsed_time": "1 day, 15:22:25", "remaining_time": "0:18:33", "throughput": 28460.38, "total_tokens": 4034130816} +{"current_steps": 14390, "total_steps": 14493, "loss": 0.7833, "lr": 3.201643761673308e-05, "epoch": 2.978856654848477, "percentage": 99.29, "elapsed_time": "1 day, 15:23:59", "remaining_time": "0:16:55", "throughput": 28461.01, "total_tokens": 4036899648} +{"current_steps": 14400, "total_steps": 14493, "loss": 0.7863, "lr": 3.200987593002536e-05, "epoch": 2.9809269946429957, "percentage": 99.36, "elapsed_time": "1 day, 15:25:32", "remaining_time": "0:15:16", "throughput": 28461.68, "total_tokens": 4039630912} +{"current_steps": 14410, "total_steps": 14493, "loss": 0.7952, "lr": 3.200331827606721e-05, "epoch": 2.9829973344375147, "percentage": 99.43, "elapsed_time": "1 day, 15:27:06", "remaining_time": "0:13:38", "throughput": 28462.27, "total_tokens": 4042402816} +{"current_steps": 14420, "total_steps": 14493, "loss": 0.7677, "lr": 3.199676465072951e-05, "epoch": 2.9850676742320332, "percentage": 99.5, "elapsed_time": "1 day, 15:28:44", "remaining_time": "0:11:59", "throughput": 28462.29, "total_tokens": 4045200640} +{"current_steps": 14430, "total_steps": 14493, "loss": 0.7749, "lr": 3.1990215049889046e-05, "epoch": 2.987138014026552, "percentage": 99.57, "elapsed_time": "1 day, 15:30:19", "remaining_time": "0:10:20", "throughput": 28462.53, "total_tokens": 4047939136} +{"current_steps": 14440, "total_steps": 14493, "loss": 0.7751, "lr": 3.198366946942851e-05, "epoch": 2.9892083538210708, "percentage": 99.63, "elapsed_time": "1 day, 15:31:59", "remaining_time": "0:08:42", "throughput": 28462.63, "total_tokens": 4050787008} +{"current_steps": 14450, "total_steps": 14493, "loss": 0.7919, "lr": 3.1977127905236514e-05, "epoch": 2.9912786936155897, "percentage": 99.7, "elapsed_time": "1 day, 15:33:33", "remaining_time": "0:07:03", "throughput": 28463.55, "total_tokens": 4053599360} +{"current_steps": 14460, "total_steps": 14493, "loss": 0.7728, "lr": 3.197059035320752e-05, "epoch": 2.9933490334101083, "percentage": 99.77, "elapsed_time": "1 day, 15:35:13", "remaining_time": "0:05:25", "throughput": 28463.51, "total_tokens": 4056425280} +{"current_steps": 14470, "total_steps": 14493, "loss": 0.7812, "lr": 3.196405680924189e-05, "epoch": 2.9954193732046273, "percentage": 99.84, "elapsed_time": "1 day, 15:36:54", "remaining_time": "0:03:46", "throughput": 28463.29, "total_tokens": 4059264704} +{"current_steps": 14480, "total_steps": 14493, "loss": 0.7936, "lr": 3.195752726924582e-05, "epoch": 2.997489712999146, "percentage": 99.91, "elapsed_time": "1 day, 15:38:32", "remaining_time": "0:02:08", "throughput": 28463.33, "total_tokens": 4062074176} +{"current_steps": 14490, "total_steps": 14493, "loss": 0.782, "lr": 3.195100172913139e-05, "epoch": 2.9995600527936648, "percentage": 99.98, "elapsed_time": "1 day, 15:40:08", "remaining_time": "0:00:29", "throughput": 28464.01, "total_tokens": 4064895552} +{"current_steps": 14493, "total_steps": 14493, "epoch": 3.0, "percentage": 100.0, "elapsed_time": "1 day, 15:42:02", "remaining_time": "0:00:00", "throughput": 28445.48, "total_tokens": 4065497216}